<?php
set_time_limit(0); //定义脚本执行时间无限制
//代理ip 配合BURP
const PROXY_LIST = [
'127.0.0.1:8080',
];
function curl_via_proxy($url,$proxy_ip,$headers = [],$user_agent = 'curl',$method = 'GET')
{
$arr_ip = explode(':',$proxy_ip);
$ch = curl_init($url); //创建CURL对象
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method);
curl_setopt($ch, CURLOPT_HEADER, 0); //返回头部
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //返回信息
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 3); //连接超时时间
curl_setopt($ch, CURLOPT_TIMEOUT, 5); //读取超时时间
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //对认证证书来源的检查
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //从证书中检查SSL加密算法是否存在
curl_setopt($ch, CURLOPT_PROXY, $arr_ip[0]); //代理服务器地址
curl_setopt($ch, CURLOPT_PROXYPORT, $arr_ip[1]); //代理服务器端口
curl_setopt($ch, CURLOPT_ENCODING, 'gzip');
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
//添加头部信息
if(!empty($headers)){
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
}
$res = curl_exec($ch);
$curl_errno = curl_errno($ch);
if ($curl_errno) {
curl_close($ch);
return false;
}
curl_close($ch);
return $res;
}
$headers = array(
'authority:www.mrwu.red',
'upgrade-insecure-requests:1',
'user-agent:Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3355.4 Safari/537.36',
'accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding:gzip, deflate, br',
'accept-language:zh-CN,zh;q=0.9,en;q=0.8',
);
$num = 1;//初始化一个变量为1
for($i = 1; $i <= 3637; $i++)//这个是看你要生成多少个数字,然后循环生成
{
if($num >= 1)
$url = 'https://www.mrwu.red/xxxxx/user_list/p/'.$num.'.html';
$nums = findThePhoneNumbers(curl_via_proxy($url,PROXY_LIST[array_rand(PROXY_LIST,1)],$headers));
$num++;//然后将变量加1,即在前面生成的数字基础上加一
}
function findThePhoneNumbers($oldStr = ""){
// 检测字符串是否为空
$oldStr=trim($oldStr);
$numbers = array();
if(empty($oldStr)){
return $numbers;
}
$strArr = explode("-", $oldStr);
$newStr = $strArr[0];
for ($i=1; $i < count($strArr); $i++) {
if (preg_match("/\d{2}$/", $newStr) && preg_match("/^\d{11}/", $strArr[$i])){
$newStr .= $strArr[$i];
} elseif (preg_match("/\d{3,4}$/", $newStr) && preg_match("/^\d{7,8}/", $strArr[$i])) {
$newStr .= $strArr[$i];
} else {
$newStr .= "-".$strArr[$i];
}
}
// 手机号的获取
$reg='/\D(?:86)?(\d{11})\D/is';//匹配数字的正则表达式
preg_match_all($reg,$newStr,$result);
$nums = array();
$aa = "/^[0-9]{11}$/";
foreach ($result[1] as $key => $value) {
if(preg_match($aa,$value)){
$nums[] = array("a" => $value);
$myfile = fopen('E:\test\test\test\public\txt.txt', "a") or die("Unable to open file!"); #其中 w是覆盖写入,改成a 是追加写入
$txt = $value."\r";
fwrite($myfile, $txt);
fclose($myfile);
//echo $value,'</br>';
}
}
}
网页手机号采集脚本
- Mr.Wu
- 0
最近遇到个网站,我想获得后台的手机号,但是手工复制太累,而且那个后台还用了JS判断登陆,利用BURP修改相应包跳过限制,但是采集工具什么的都没办法配合BURP实现采集,另外那些采集工具写的太花里胡哨原谅我用不来.
基于此,昨晚写出了这么个小脚本,工具的原理是,获得目标页相应包,然后利用正则提取出所有的手机号存在本地,狠轻松的配合了BURP实现了我要的效果.