下载全国城市空气质量历史数据
发布时间:2020-05-24 23:18:16 所属栏目:Python 来源:互联网
导读:下载全国城市空气质量历史数据
|
下面是脚本之家 jb51.cc 通过网络收集整理的代码片段。 脚本之家小编现在分享给大家,也给大家做个参考。
import os
import shutil
import re
import urllib.request
home = "http://www.tianqihoubao.com"
def down2txt(code,tit,url):
# 网页地址
page = urllib.request.urlopen(url).read()
try:
page = page.decode("gbk")
except:
page = page.decode("utf-8")
i_start = page.find("<h1>")
i_end = page.find("</h1>")
t = page[i_start : i_end]
#tit = t.replace("rn","").replace("<h1>","").strip(" ")
#print(tit)
# 创建目录
if not os.path.exists(os.getcwd() + "/data/" + code):
os.makedirs(os.getcwd() + "/data/" + code)
# 文件存在则不下载
file = os.getcwd() + "/data/" + code + "/" + tit + ".txt"
if os.path.exists(file):
print("文件已存在:" + tit + ".txt")
return
# 截取表格文本
i_start = page.find('<table')
i_end = page.find('</table>')
page = page[i_start:i_end]
i_start = page.find(">")
page = page[i_start:]
page = page.replace("rn","")
#page = page.replace("r","").replace("r","")
page = page.replace("</b>","").replace("<b>","")
#page = page.replace(" ","").replace(" ","")
re_c = re.compile(">(.+?)<")
ls = re.findall(re_c,page)
f = open(file,"w")
i = 0
s = ""
for l in ls:
l = l.strip(" ")
if l == "":
continue
s += l + " "
if i == 8:
#print(s)
f.write(s + "n")
i+=1
if i >= 9:
i = 0
s = ""
f.close()
def down_city(name,code):
url = home + "/aqi/" + code + ".html"
print(url)
page = urllib.request.urlopen(url).read()
page = page.decode("gbk")
ls = re.findall(re.compile("href='(/aqi/" + code + "-" + ".+?html)'"),page)
for l in ls:
url = home + l
tit = l.replace("/aqi/","").replace(".html","")
print(url)
down2txt(code,url)
#print(l)
if __name__ == "__main__":
url = home + "/aqi/"
page = urllib.request.urlopen(url).read()
try:
page = page.decode("gbk")
except:
page = page.decode("utf-8")
ls = re.findall(re.compile('href="/aqi/(.+?)</a>'),page)
index = 0
for l in ls:
try:
ls2 = l.replace(" ","").replace('.html">'," ").strip(" ").split(" ")
if len(ls2) == 2:
index += 1
print( str(index) + "/" + str(len(ls)) + ": " + ls2[0] + " " + ls2[1])
b_down = False
# 查找下载记录
if os.path.exists("dataindex.txt"):
f = open("dataindex.txt","r")
ls3 = f.readlines()
f.close()
for l3 in ls3:
if l3.strip("n") == ls2[0] + " " + ls2[1]:
print(ls2[1] + " 已下载")
b_down = True
break
if b_down :
continue
down_city(ls2[1],ls2[0])
# 保存记录
f = open("dataindex.txt","a")
f.write(ls2[0] + " " + ls2[1] + "n")
f.close()
except:
print("error!")
print("finished!")
以上是脚本之家(jb51.cc)为你收集整理的全部代码内容,希望文章能够帮你解决所遇到的程序开发问题。 如果觉得脚本之家网站内容还不错,欢迎将脚本之家网站推荐给程序员好友。 (编辑:安卓应用网) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |
