#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests,urllib2
from bs4 import BeautifulSoup
import sys
def get_img(url):
wb = requests.get(url)
wb.encoding = "utf-8"
soup = BeautifulSoup(wb.text,'lxml')
img_url = soup.select('#Zoom img')
if img_url == []:
return False
else:
return img_url[0].get('src')
def get_title(url):
wb = requests.get(url)
wb.encoding = "utf-8"
soup = BeautifulSoup(wb.text,'lxml')
title = soup.select("a[href='#']")
return title
def get_url(Url):
wb = requests.get(Url)
wb.encoding = 'gb2312'
soup = BeautifulSoup(wb.text,'lxml')
title = soup.select('.ulink')
url = soup.select('.ulink')
titles_urls = []
for x,y in zip(title,url):
reload(sys)
sys.setdefaultencoding('utf-8')
data = {
'title': x.get_text().split("《")[1].split("/")[0].split("》")[0],
'url':y.get('href'),
}
titles_urls.append(data)
return titles_urls
for z in range(1,100):
url = '
http://www.ygdy8.net/html/gndy/dyzz/list_23_%d.html' %z
for x in get_url(url):
u = get_img("
http://www.ygdy8.net"+str(x['url']))
if u != False:
print u
print x['title']
y = str(x['title'])
with open('imgs/'+str(y)+'.jpg', "wb") as f:
f.write(requests.get(u).content)
print "第%d 页" %z
运行结果:
琼斯的自由国度
Traceback (most recent call last): with open('imgs/'+str(y)+'.jpg', "wb") as f:
IOError: [Errno 2] No such file or directory: 'imgs/\xe7\x90\xbc\xe6\x96\xaf\xe7\x9a\x84\xe8\x87\xaa\xe7\x94\xb1\xe5\x9b\xbd\xe5\xba\xa6.jpg'
[Finished in 0.7s with exit code 1]
这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。
https://www.v2ex.com/t/306308
V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。
V2EX is a community of developers, designers and creative people.