抓取简单游全站数据
发布时间:2020-05-25 16:55:33 所属栏目:Python 来源:互联网
导读:抓取简单游全站数据
|
下面是脚本之家 jb51.cc 通过网络收集整理的代码片段。 脚本之家小编现在分享给大家,也给大家做个参考。 #!/usr/bin/python
# -*- coding:utf-8 -*-
from public import gethtml2
from dbconfig_waigua import *
from scrapy.selector import Selector
import MySQLdb,MySQLdb.cursors,datetime,re,inspect
class getGames:
conn = None
db = None
def __init__(self):
self.conn = MySQLdb.connect
(db=dbname,user=dbuser,passwd=dbpasswd,host=dbhost,charset="utf8",cursorclass =
MySQLdb.cursors.DictCursor)
self.db = self.conn.cursor()
def saveGame(self,name):
self.db.execute('select id from fz_games where name = %s',(name))
item = self.db.fetchone()
if not item:
self.db.execute('insert into fz_games (name,updated) values (%s,%s)',(name,datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
self.conn.commit()
return self.db.lastrowid
else:
return item['id']
def savefuzhu(self,gametype,title,info,updated):
self.db.execute('select id from fz_games where name = %s',(gametype))
item = self.db.fetchone()
if item:
gameid = item['id']
else:
self.db.execute('insert into fz_games (name,(gametype,datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
self.conn.commit()
gameid = self.db.lastrowid
self.db.execute('select id from fz_name where name = %s',(title))
item = self.db.fetchone()
if not item:
self.db.execute('insert into fz_name (gameid,name,updated) values
(%s,%s,(gameid,updated))
self.conn.commit()
#解析简单游脚本
def parsejdy(self,url):
html = unicode(gethtml2(url).read(),'gbk','ignore')
sel = Selector(text = html)
tmp = sel.xpath('//meta[@name="keywords"]/@content').extract()[0]
arr = tmp.split(',')
gametype = arr[0]
title = arr[1]
updated = sel.xpath('//div[@class="detailall"]/div[@class="syall"][position()
=2]/div[@class="rights"]/text()').extract()[0]
info = sel.xpath('//div[@class="abstract"][position()=1]/text()').extract()[0]
print updated.strip().encode('utf-8'),gametype.strip().encode('utf-
8'),title.strip().encode('utf-8'),info.encode('utf-8')
return (gametype.strip(),title.strip(),info.strip(),updated.strip())
def _getgamelistjdy(self,url):
html = gethtml2(url).read()
sel = Selector(text = html)
items = sel.xpath('//a[contains(@href,"scripts")]/@href').extract()
for item in items:
item = self.parsejdy('http://www.jdyou.com/'+item[2:])
self.savefuzhu(item[0],item[1],item[2],item[3])
#简单游全站解析
def getgamefromjdy(self):
#得到全部游戏列表
html = unicode(gethtml2('http://www.jdyou.com/').read(),'ignore')
sel = Selector(text = html)
items = sel.xpath('//div[@class="clearfixs menulist"]/a[contains
(@href,"ScriptList")]/@href').extract()
for item in items:
try:
self._getgamelistjdy('http://www.jdyou.com/'+item)
except:
pass
if __name__ == "__main__":
g = getGames()
g.getgamefromjdy()
以上是脚本之家(jb51.cc)为你收集整理的全部代码内容,希望文章能够帮你解决所遇到的程序开发问题。 如果觉得脚本之家网站内容还不错,欢迎将脚本之家网站推荐给程序员好友。 (编辑:安卓应用网) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |
