<div id="wmd-preview-section-3461" class="wmd-preview-section preview-content markdown_views">
<h2 id="phpspider-的简单使用">phpspider 的简单使用
// GitHub下载方式
require_once DIR . '/../autoloader.php';
use phpspidercorephpspider;
/ Do NOT delete this comment /
/ 不要删除这段注释 /
$configs = array(
'name' => '糗事百科','log_show' => true,'tasknum' => 1,'save_running_state' => false,'domains' => array(
'qiushibaike.com','www.qiushibaike.com'
),'scan_urls' => array(
'https://www.qiushibaike.com/'
),'list_url_regexes' => array(
"https://www.qiushibaike.com/8hr/page/d+?s=d+"
),'content_url_regexes' => array(
"https://www.qiushibaike.com/article/d+",),'max_try' => 5,//'proxies' => array(
//'http://H784U84R444YABQD:57A8B0B743F9B4D2@proxy.abuyun.com:9010'
//),//'export' => array(
//'type' => 'csv',//'file' => '../data/qiushibaike.csv',//),//'export' => array(
//'type' => 'sql',//'file' => '../data/qiushibaike.sql',//'table' => 'content','export' => array(
'type' => 'db','table' => 'content','db_config' => array(
'host' => '127.0.0.1','port' => 3306,'user' => 'root','pass' => 'root','name' => 'test',// 'queue_config' => array(
// 'host' => '127.0.0.1',// 'port' => 6379,// 'pass' => 'foobared',// 'db' => 5,// 'prefix' => 'phpspider',// 'timeout' => 30,// ),'fields' => array(
array(
'name' => "article_title",'selector' => "//*[@id='single-next-link']//div[contains(@class,'content')]/text()[1]",'required' => true,array(
'name' => "article_author",'selector' => "//div[contains(@class,'author')]//h2",array(
'name' => "article_headimg",'author')]//a[1]",array(
'name' => "article_content",'content')]",array(
'name' => "article_publish_time",array(
'name' => "url",// 这里随便设置,on_extract_field回调里面会替换
'required' => true,);
$spider = new phpspider($configs);
$spider->on_handle_img = function($fieldname,$img)
{
$regex = '/src="(https?://.*?)"/i';
preg_match($regex,$img,$rs);
if (!$rs)
{
return $img;
}
$url = $rs[1];
$img = $url;
//$pathinfo = pathinfo($url);
//$fileext = $pathinfo['extension'];
//if (strtolower($fileext) == 'jpeg')
//{
//$fileext = 'jpg';
/ (编辑:安卓应用网)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!