#!/usr/bin/env python
# -*- coding:utf-8 -*-
import requests
import re
import os
def getHTMLText(url):
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"}
try:
r = requests.get(url,headers=headers)
r.raise_for_status()
return r.text
except requests.exceptions.RequestException as e:
print(e)
def getURLList(html):
regex = r"( http(s?):)([/|.|\w|\s|-])*\.(?:jpg|gif|png)"
lst = []
matches = re.finditer(regex, html, re.MULTILINE)
for x,y in enumerate(matches):
try:
lst.append(str(y.group()))
except:
continue
return sorted(set(lst),key = lst.index)
def download(lst,filepath='img'):
if not os.path.isdir(filepath):
os.makedirs(filepath)
filecounter = len(lst)
filenow = 1
for url in lst:
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"}
filename = filepath +'/' + url.split('/')[-1]
with open(filename,'wb') as f :
try:
img = requests.get(url,headers=headers)
img.raise_for_status()
print("Downloading {}/{} file name:{}".format(filenow,filecounter,filename.split('/')[-1]))
filenow += 1
f.write(img.content)
f.flush()
f.close()
print("{} saved".format(filename))
except requests.exceptions.RequestException as e:
print(e)
continue
if __name__ == '__main__':
url = input('please input the image url:')
filepath = input('please input the download path:')
html = getHTMLText(url)
lst = getURLList(html)
download(lst,filepath)
这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。
V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。
V2EX is a community of developers, designers and creative people.