Python 新手爬虫的新手问题～～～求大神解惑！！！

def handle_request(self, url):
	"""构建请求对象：return"""
	headers = {
		'User-Agent':' Mozilla / 5.0 （ Windows; U; Windows NT 6.1; en-us ） AppleWebKit / 534.50 （ KHTML，类似 Gecko ）版本 / 5.1 Safari / 534.50',
		}
	return urllib.request.Request(url=url, headers=headers)

def send_request(self, request):
	"""发送请求获取内容"""
	return urllib.request.urlopen(request).read().decode('gbk')
def down_picture(self, response):
	#根据内容形成 tree 对象
	tree = etree.HTML(response)
	#根据数据形成对应 xpath
	pic_href = tree.xpath('//div[@class="main"]/dl/dd/a/img/@src') #图片链接
	pic_text = tree.xpath('//div[@class="main"]/dl/dd/a[@target="_blank"]/text()') #图片文本

	#pic_src://div[@class="main"]/dl/dd/a/img/@src
	#pic_text()://div[@class="main"]/dl/dd/a[@target="_blank"]/text()

	# try:
	for img in zip(pic_text,pic_href): 
		# request = self.handle_request(img[1]) # 再次构建图片请求对象
		# response = self.send_request(request) # 发送对象返回响应 error403
		dirname = './tupian';filename = img[0]+'.jpg'
		filepath = os.path.join(dirname, filename)
		if not os.path.exists(dirname):
			os.mkdir(dirname)
		# with open(filepath, 'wb') as fp:
			# fp.write(response.read())
		urllib.request.urlretrieve(img[1],filepath) #图片下载

	# except Exception as e:
		# print(e)

UnicodeEncodeError: 'latin-1' codec can't encode character '\uff08' in position 14: ordinal not in range(256) 有时间的话，顺便把 httperror403 错误也解了吧，新手，也没找到办法

img

request

dirname

div

6 replies • 2018-07-30 10:10:38 +08:00