#coding:utf-8
import urllib.request
import re
import time
import sys
import os
from imp import reload
reload(sys)
print ('#'*50)
print ('This program is mainly collecting watercress <Do not be shy> group picture')
print ('#'*50)
print ('Collected before the need to enter a proxy server address, so we can prevent the douban shielding.')
print ('Recommend a proxy address:
http://cn-proxy.com/')
print ('Only need to input the server address and port number, do not need to input HTTP')
print ('Demo:127.0.0.1:8080')
print ('#'*50)
proxy_input = input('127.0.0.1:8087:')
proxy_handler = urllib.request.ProxyHandler({'http':'%s'%proxy_input})
opener = urllib.request.build_opener(proxy_handler)
urllib.request.install_opener(opener)
img_LuJ = input('input path:')
img_LuJ2 = os.path.abspath(img_LuJ)
print(img_LuJ2)
def gethtml2(url2):
req = urllib.request.Request(url2)
html2 = urllib.request.urlopen(req).read()
return html2
def gettoimg(html2):
reg2 = r'
http://www.douban.com/group/topic/\d+'
html2 = html2.decode('utf-8')
toplist = re.findall(reg2,html2)
x = 0
for topicurl in toplist:
x+=1
return topicurl
def download(topic_page):
reg3 = r'
http://img3.douban.com/view/group_topic/large/public/.+\.jpg'
imglist = re.findall(b'reg3',topic_page)
i = 1
download_img = None
for imgurl in imglist:
img_numlist = re.findall(r'p\d{7}',imgurl)
for img_num in img_numlist:
download_img = urllib.request.urlretrieve(imgurl,img_LuJ2 + '/%s.jpg'%img_num)
time.sleep(1)
i+=1
print (imgurl)
return download_img
page_end = int(input('Please enter the page number:'))
num_end = page_end*25
num = 0
page_num = 1
while num<=num_end:
html2 = gethtml2('
http://www.douban.com/group/haixiuzu/discussion?start=%d'%num)
topicurl = gettoimg(html2)
topic_page = gethtml2(topicurl)
download_img=download(topic_page)
num = page_num*25
page_num+=1
else:
print('Program to collect complete')
这个是我修改你的代码 python3下面跑成功但是没有获得图片 能不能帮忙看一下