@
barnett2010 简单写了个下载 pdf 的 demo, 单纯的 requests 请求. 可以自己优化一下
import requests
headers = {
"Referer": "
https://www.dpm.org.cn/Public/static/pdfwrap/js/pdf.worker.js",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.33"
}
url = "
https://www.dpm.org.cn/Uploads/File/2022/08/29/u630c38230f984.pdf"
resp = requests.get(url, headers=headers)
total_len = int(resp.headers["Content-Length"])
range_int = 65535
loop = total_len // range_int + 1
for i in range(loop):
if i == loop - 1:
exist = True
start = range_int * i + 1
end = total_len
elif i == 0:
exist = False
start = 0
end = range_int
else:
exist = True
start = range_int * i + 1
end = range_int * (i + 1)
headers["Range"] = f"bytes={start}-{end}"
content = requests.get(url, headers=headers).content
if not exist:
with open("a.pdf", "wb") as f:
f.write(content)
else:
with open("a.pdf", "ab") as f:
f.write(content)