代码如下: 先在主页获取子页面,大概 64 个,然后并发访问子页面, 总用时大概 36s,感觉还是有点慢。
URL_MAP = {'home_page': 'https://xxx/stocks/industry', 'base': 'https://xxx.com'}
class App(BaseService):
def __init__(self):
super(App, self).__init__()
async def home_page(self):
start = time.time()
async with aiohttp.ClientSession() as session:
async with session.get(url=URL_MAP['home_page'], headers=headers) as response:
html = await response.text() # 这个阻塞
resp = Selector(text=html)
industries = resp.xpath('//ul[@class="list-unstyled"]/a')
task_list =[]
for industry in industries:
json_data = {}
industry_url = industry.xpath('.//@href').extract_first()
industry_name = industry.xpath('.//li/text()').extract_first()
json_data['industry_url'] = industry_url
json_data['industry_name'] = industry_name
task = asyncio.ensure_future(self.detail_list(session, industry_url, json_data))
task_list.append(task)
await asyncio.gather(*task_list)
end = time.time()
print(f'time used {end-start}')
async def detail_list(self, session, url, json_data):
async with session.get(URL_MAP['base']+url, headers=headers) as response:
response = await response.text()
self.parse_detail(response, json_data)
def parse_detail(self, html, json_data=None):
resp = Selector(text=html)
# info = resp.xpath('//div[@id="v_desc"]/div[@class="info open"]/text()').extract_first()
title =resp.xpath('//title/text()').extract_first()
print(title)
app = Holdle()
loop = asyncio.get_event_loop()
loop.run_until_complete(app.home_page())
这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。
V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。
V2EX is a community of developers, designers and creative people.