主要 py 文件码源:
import scrapy
from myfirst.items import MyfirstItem
class TestSpider(scrapy.Spider):
name = 'test'
part_url = 'https://www.qiushibaike.com/imgrank/'
page = 'page/'
def start_requests(self):
for page_number in range(1, 2, 1):
if page_number == 1:
yield self.make_requests_from_url(self.part_url)
else:
yield self.make_requests_from_url(self.part_url + self.page + str(page_number) + "/")
def parse(self, response):
item = MyfirstItem()
item["img_url"] = response.xpath("//div[@class='thumb']/a/img/@src")
for url in item["img_url"]:
print(url)
url 打印出来:
<Selector xpath="//div[@class='thumb']/a/img/@src" data='//pic.qiushibaike.com/system/pictures/11'>
实际地址比 data 的要长
import scrapy
from myfirst.items import MyfirstItem
class TestSpider(scrapy.Spider):
name = 'test'
part_url = 'https://www.qiushibaike.com/imgrank/'
page = 'page/'
def start_requests(self):
for page_number in range(1, 2, 1):
if page_number == 1:
yield self.make_requests_from_url(self.part_url)
else:
yield self.make_requests_from_url(self.part_url + self.page + str(page_number) + "/")
def parse(self, response):
item = MyfirstItem()
item["img_url"] = response.xpath("//div[@class='thumb']/a/img/@src")
for url in item["img_url"]:
print(url)
url 打印出来:
<Selector xpath="//div[@class='thumb']/a/img/@src" data='//pic.qiushibaike.com/system/pictures/11'>
实际地址比 data 的要长