|
复制代码 代码如下: import socket import re
''' 广东省公安厅出入境政务服务网护照,通行证办理进度查询。 分析网址格式为 http://www.gdcrj.com/wsyw/tcustomer/tcustomer.do?&method=find&applyid=身份证号码 构造socket请求网页html,利用正则匹配出查询结果 ''' def gethtmlbyidentityid(identityid): s = socket.socket(socket.AF_INET,socket.SOCK_STREAM) host = 'www.gdcrj.com'; suburl = '/wsyw/tcustomer/tcustomer.do?&method=find&applyid={0}' port = 80;
remote_ip = socket.gethostbyname(host) s.connect((remote_ip,port))
print('【INFO】:socket连接成功')
message = 'GET '+ suburl.format(identityid) +' HTTP/1.1rnHost: '+ host +'rnrn'
# str 2 bytes m_bytes = message.encode('utf-8')
# send bytes s.sendall(m_bytes)
print('【INFO】:远程下载中...')
recevstr = '' while True: # return bytes recev = s.recv(4096) # bytes 2 str recevstr += recev.decode(encoding = 'utf-8',errors = 'ignore') if not recev: s.close() print('【INFO】:远程下载网页完成') break return recevstr
''' 利用正则表达式从上步获取的网页html内容里找出查询结果 ''' def getresultfromhtml(htmlstr): linebreaks = re.compile(r'ns*') space = re.compile('( )+') resultReg = re.compile(r'<td class="news_font">([^<td]+)</td>',re.MULTILINE)
#去除换行符和空格 htmlstr = linebreaks.sub('',htmlstr) htmlstr = space.sub(' ',htmlstr)
#匹配出查询结果 result = resultReg.findall(htmlstr) for res in result: print(res.strip())
if __name__ == '__main__': identityid = input('输入您的身份证号码(仅限广东省居民查询):') try: identityid = int(identityid) print('【INFO】:开始查询') html = gethtmlbyidentityid(identityid) getresultfromhtml(html) print('【INFO】:查询成功') except: print('【WARN】:输入非法')
input('【INFO】:按任意键退出')
(编辑:安卓应用网)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|