加入收藏 | 设为首页 | 会员中心 | 我要投稿 安卓应用网 (https://www.0791zz.com/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 编程开发 > Python > 正文

python爬虫程序 小猪短租北京房子300个详情页爬取

发布时间:2020-05-25 01:29:55 所属栏目:Python 来源:互联网
导读:python爬虫程序 小猪短租北京房子300个详情页爬取

importrequests
frombs4importBeautifulSoup

urls=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i))foriinrange(1,15,1)]
defspider_prepare(urls):
spider_urls=[]
forurlinurls:
iflen(spider_urls)<300:
print(len(spider_urls))
response=requests.get(url)
soup=BeautifulSoup(response.text,'lxml')
links=soup.select('a[class="resule_img_a"]')
forlinkinlinks:
new_link=link.get('href')
spider_urls.append(new_link)
returnspider_urls

#查看是否抓取待爬取的url链接成功
print(spider_prepare(urls))


data={}
forurlinspider_prepare(urls):
print(url)
response=requests.get(url)
soup=BeautifulSoup(response.text,'lxml')

title=soup.select('div.pho_info>h4')[0].text
address=soup.select('div.con_l>div.pho_info>p')[0].get('title')
price=soup.select('#pricePart>div.day_l>span')[0].text
pic=soup.select('#curBigImage')[0].get('src')
name=soup.select('a.lorder_name')[0].text
name_pic=soup.select('div.member_pic>a>img')[0].get('src')
name_gender=soup.select('div.w_240>h6>span')[0].get('class')

##查看结果
#print(title)
#print(address)
#print(price)
#print(pic)
#print(name)
#print(name_pic)
#print(name_gender)

defgender(name_gender):
ifname_gender[0]=="member_boy_ico":
return'boy'
elifname_gender[0]=="member_girl_ico":
return'girl'
else:
return'未知'
#验证结果
#print(gender(name_gender))

data={
'title':title,'address':address,'price':price,'pic':pic,'name':name,'name_pic':name_pic,'name_gender':gender(name_gender)
}
#验证data
print(data)
withopen(r'D:python3tripadvisor_spiderxiaozhu.txt','a+',encoding='utf-8')asfile_text:
#data['title'].strip('n')去除字典title的n符号
file_text.writelines('标题:{}t地址:{}t价格:{}t照片:{}t屋主姓名:{}t屋主照片:{}t屋主性别:{}n'.
format(data['title'].strip('n'),data['address'],data['price'],data['pic'],data['name'],
data['name_pic'],data['name_gender']))


(编辑:安卓应用网)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读