单线程备份文件扫描
脚本
import requests import re # 使用方法: # 在这个路径添加 D:\Desktop\url.txt url.txt 的文件 在文件中添加带 http:// 或者 https:// 的域名,一行一个 # 将13和15行中的 D:\Desktop\url.txt 改为 url.txt 即可读取脚本目录下的 url.txt 文件内容 proxy = {"http": "127.0.0.1:8080"} #代理 ## inurl 定义要扫的备份文件 inurl = {"/www.zip","/www.rar","/www.tar.gz","/wwwroot.zip","/wwwroot.rar","/wwwroot.tar.gz","/web.zip","/web.rar","/web.tar.gz","/.svn"} hz = {".zip",".rar",".tar.gz"} # 指定备份文件后缀 后面自动生成为 xxx.com.zip 样式的格式 count = len(open(r'D:\Desktop\url.txt', 'r').readlines()) count = count * 13 # inurl + hz 的总和 with open(r"D:\Desktop\url.txt") as f: n = 0 data = [] nodata = [] for line in f: line = line.replace("\n","").split() try: for x in line: for s in list(inurl): url = x + s n = n+1 html = requests.get(url,allow_redirects=False) html.encoding = 'utf-8' html = html.status_code if html == 200 : data.append(url) print('进程:(%s/%s) ----- 状态:%s ----- 文件存在 ----- 目标:%s'%(n,count,html,url) ) else: print('进程:(%s/%s) ----- 状态:%s ----- 不存在 ----- 目标:%s'%(n,count,html,url) ) if x.startswith("https://"): for index in hz: url = x + '/' + x.replace("https://","") + index n = n+1 html = requests.get(url,allow_redirects=False) html.encoding = 'utf-8' html = html.status_code if html == 200 : data.append(url) print('进程:(%s/%s) ----- 状态:%s ----- 文件存在 ----- 目标:%s'%(n,count,html,url) ) else: print('进程:(%s/%s) ----- 状态:%s ----- 不存在 ----- 目标:%s'%(n,count,html,url) ) elif x.startswith("http://"): for index in hz: url = x + '/' + x.replace("http://","") + index n = n+1 html = requests.get(url,allow_redirects=False) html.encoding = 'utf-8' html = html.status_code if html == 200 : data.append(url) print('进程:(%s/%s) ----- 状态:%s ----- 文件存在 ----- 目标:%s'%(n,count,html,url) ) else: print('进程:(%s/%s) ----- 状态:%s ----- 不存在 ----- 目标:%s'%(n,count,html,url) ) except: nodata.append(url) print('进程:(%s/%s) ----- 状态:%s ----- 无法访问 ----- 目标:%s'%(count,n,html,url) ) print('扫描完成') print('-----------成功的结果-----------') print("\n".join(str(i) for i in data)) print('-----------无法访问的-----------') print("\n".join(str(i) for i in nodata))
说明
常见的备份文件命名如下:
www.zip
www.rar
www.tar.gz
wwwroot.zip
wwwroot.rar
wwwroot.tar.gz
web.zip
web.rar
web.tar.gz
.svn
www.rar
www.tar.gz
wwwroot.zip
wwwroot.rar
wwwroot.tar.gz
web.zip
web.rar
web.tar.gz
.svn
本脚本的作用是扫描如上备份文件以及带域名的备份文件(www.xx.com/www.xx.com.zip等)是否存在,喜欢的可以收藏~
多进程备份文件扫描
脚本
# -*- coding: utf-8 -*- # @Author: mrwu # @Date: 2022-03-07 09:43:19 # @Last Modified by: mrwu # @Last Modified time: 2022-07-01 18:08:38 from multiprocessing import Pool from tqdm import tqdm import requests import re import argparse requests.packages.urllib3.disable_warnings() #关闭ssl控制台报错 header = {"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1"} ## inurl 定义要扫的备份文件 inurl = {"/www.zip","/www.rar","/www.tar.gz","/www.gz","/wwwroot.zip","/wwwroot.rar","/wwwroot.tar.gz","/wwwroot.gz","/web.zip","/web.rar","/web.tar.gz","/web.gz","/七月修复weipan.zip","/weipan.zip","/weipan.rar","/weipan.tar.gz"} hz = {".zip",".rar",".tar.gz"} urllist = [] def banner(): print(''' ____ _ _____ | _ \ | | / ____| | |_) | __ _ ___| | ___ _ _ __ ___ | (___ ___ __ _ _ __ | _ < / _` |/ __| |/ / | | | '_ \/ __| \___ \ / __/ _` | '_ \ | |_) | (_| | (__| <| |_| | |_) \__ \ ____) | (_| (_| | | | | |____/ \__,_|\___|_|\_\\__,_| .__/|___/ |_____/ \___\__,_|_| |_| | | |_| Author:MrWu Blog:www.mrwu.red ''') def save(data): f = open(r'fail_url.txt', 'a',encoding='utf-8') f.write(data + '\n') f.close() def open_url(url): with open(url) as f: for url in f: url = url.replace("\n","").split() for x in url: for index in hz: if x.startswith("https://"): url1 = x + '/' + x.replace("https://","") + index if x.startswith("http://"): url1 = x + '/' + x.replace("http://","") + index urllist.append(url1) for s in list(inurl): url = x + s urllist.append(url) return(urllist) def run(url): try: #代理 # proxy = {"socks": "127.0.0.1:7890"} #代理 # html = requests.get(url, headers=header,verify=False,timeout=(2,2), proxies=proxy) #代理 html = requests.get(url, headers=header,verify=False,timeout=(2,2)) html.encoding = 'utf-8' code = html.status_code html = html.headers['Content-Type'] result = "application" in html result2 = "application/json" in html if result == True and result2 == False: print('\033[1;31m[!] 目标:%s ----- 文件存在 ----- 状态码:%s\033[0m'%(url,code)) except Exception as e: save(url) if __name__ == '__main__': banner() parser = argparse.ArgumentParser() parser.add_argument('-u','--url', default='url.txt',help="URL文件路径") parser.add_argument('-t','--threads', default='20',help="进程数,不要超过60") args = parser.parse_args() urllist = open_url(args.url) po = Pool(int(args.threads)) #最高60 #进度条显示 pbar = tqdm(total=len(urllist)) pbar.set_description('正在扫描') update = lambda *args: pbar.update() #进度条显示 for url in urllist: po.apply_async(run,(url,), callback=update) po.close() po.join()
说明
- -u 参数指定要扫描的url字典文件路径,默认脚本文件目录下的 url.txt 文件,一行一个
- -t 参数指定扫描的进程数,默认20,不要超过60,进程太高可能导致扫描不准确
- URL.txt 文件中的URL地址一定得带协议头,如:https://www.baidu.com 否则会报错
- 无法访问的URL地址会自动存放到脚本目录下的 fail_url.txt 文件中
- 如果需要增加备份文件字典,修改代码16行中的 inurl 变量
- 感觉用不到代理,所以就没详细写代理功能,如果需要用到代理,取消58,59行注释,并注释61行,反之如果不需要代理,则取消61行注释,注释58,59行
- 扫描成功的结果是实时加红显示
- 用到的模块:
Pool
tqdm
requests
argparse
自行安装
本文作者为Mr.Wu,转载请注明,尊守博主劳动成果!
由于经常折腾代码,可能会导致个别文章内容显示错位或者别的 BUG 影响阅读; 如发现请在该文章下留言告知于我,thank you !
哈大神。我是安装模块 pip install tqdm 总是提示 File "<stdin>", line 1
求教!
@小白你的报错信息没贴完整呢
C:\Users\Administrator\Desktop\新建文件夹>1.py
Traceback (most recent call last):
File "C:\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 706, in urlopen
chunked=chunked,
File "C:\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 382, in _make_request
self._validate_conn(conn)
File "C:\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 1010, in _validate_conn
conn.connect()
File "C:\Python\Python37\lib\site-packages\urllib3\connection.py", line 464, in connect
_match_hostname(cert, self.assert_hostname or server_hostname)
File "C:\Python\Python37\lib\site-packages\urllib3\connection.py", line 508, in _match_hostname
match_hostname(cert, asserted_hostname)
File "C:\Python\Python37\lib\ssl.py", line 323, in match_hostname
% (hostname, ', '.join(map(repr, dnsnames))))
ssl.SSLCertVerificationError: ("hostname '106.3.87.70' doesn't match either of '35458dj.com', 'www.35458dj.com'",)
During handling of the above exception, another exception occurred:
@1ssl.SSLCertVerificationError: ("hostname '106.3.87.70' doesn't match either of '35458dj.com', 'www.35458dj.com'",)
这不是很明显的标注了吗? ssl证书错误
@Mr.Wu怎么样才能联系到你呢
@win留言页面有我的群呢~
@1重新改了下,你可以使用第二个版本的[aru_3]