from bs4 import BeautifulSoup soup = BeautifulSoup(open('tmp1.txt'), 'lxml')
list_a = [tag.get('href') for tag in soup.select('a[href]')]
list_a 中即全部的页面超链接
# 求各种爬虫兼职
xlzd
2016-04-11 14:13:06 +08:00
list_a, list_a_text = (lambda l: ([_['href'] for _ in l], [_.getText() for _ in l]))(getattr(__import__('bs4'), 'BeautifulSoup')(open('tmp1.txt'), 'lxml').find_all('a'))