|
大家好,依然是我 Bygones ,又到了中午休息时间,继续来写抓取资源网
下面看代码 是对QQ业务乐园进行抓取的 大家看一下,每天更新一个资源网 期待吧
代码回帖可见
<?php
/**
* 资源网文章获取
* isshw.cn 话不多说 爱上生活网
* @author Bygones
* @Time 2018年8月29日
* 站长开发交流群:179981870
* 代码仅供参考 如果需要,请自行改写
*/
$get_url = "http://www.qqyewu.com/";
$UserAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.0.04506; .NET CLR 3.5.21022; .NET CLR 1.0.3705; .NET CLR 1.1.4322)';
$curl = curl_init();
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $get_url);
curl_setopt($curl, CURLOPT_HEADER, 0);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_ENCODING, '');
curl_setopt($curl, CURLOPT_USERAGENT, $UserAgent);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($curl);
$res=mb_convert_encoding($data, 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');
$preg = '#<li><span class="fcr today"><em class="newDate">'.date("m-d").'</em></span><a class="type" target="_blank" href="(.*)">(.*)</a><a class="titname" target="_blank" href="(.*)" title="(.*)">(.*)</a></li>#';
preg_match_all($preg,$res,$qqyewu);
// echo '<pre>';
foreach ($qqyewu[3] as $key => $value) {
echo '<br>';
echo $qqyewu[4][$key];//标题
echo '<br>';
echo $value;//跳转链接
echo '<br>';
$href = $get_url.$value;
echo '<a href="'.$href.'">'.$qqyewu[4][$key].'</a>';//组合拼接
}
|
|