import ...
def get_links():
urls = []
for page in range(1, 71):
list_view = '
http://sz.58.com/tech/pn{}'.format(page)
url_1 = '
http://sz.58.com/tech/' url_2 = 'x.shtml'
wb_data = requests.get(list_view, headers=headers)
soup = BeautifulSoup(wb_data.text, 'html.parser')
for link in soup.select('div.job_name a'):
urls.append(url_1+str(link.get('urlparams').split('=')[-1].strip('_q'))+url_2)
return urls
def get_info():
urls = get_links()
try:
for url in urls:
wb_data = requests.get(url, headers=headers)
soup = BeautifulSoup(wb_data.text, 'html.parser')
time.sleep(2)
data = {
'job': soup.select('.pos_title')[0].text,
'salary': soup.select('.pos_salary')[0].text,
'condition': soup.select('.item_condition')[1].text,
'exprience': soup.select('.item_condition')[2].text
}
print(data)
except IndexError:
pass
except requests.exceptions.ConnectionError:
pass
get_info()
这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。
https://www.v2ex.com/t/463108
V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。
V2EX is a community of developers, designers and creative people.