class JobsSpider(scrapy.Spider): name = 'jobs' allowed_domains = ['zhaopin.com'] start_urls = ['https://www.zhaopin.com/']
def start_requests(self):
browser = webdriver.Chrome()
browser.get("https://zhaopin.com")
windows = browser.current_window_handle
input = browser.find_element_by_class_name('zp-search__input')
input.send_keys('Python')
time.sleep(1)
button = browser.find_element_by_class_name('zp-search__btn')
button.click()
all_handles = browser.window_handles
for handle in all_handles:
if handle != windows:
browser.switch_to.window(handle)
url = browser.current_url
yield Request(url,callback = self.parse)
def parse(self, response):
le = LinkExtractor(restrict_css='div.contentpile__content__wrapper__item.clearfix')
for link in le.extract_links(response):
yield scrapy.Request(link.url,callback=self.parse_job)
def parse_job(self,response):
jobs = JobItem()
sel = response.css('div.main')
jobs['jobname'] = sel.css('hi.l.info-h3::text').extract_first()
jobs['Cname'] = sel.css('div.company 1::text').extract_first()
jobs['salary'] = sel.css('div.l.info-money strong::text').extract_first()
jobs['joblocation'] = sel.css('span.icon-address::text').extract_first()
jobs['experience'] = sel.css('div.info-three.1').xpath('(.//span)[1].text()').extract_first()
jobs['education'] =sel.css('div.info-three.1').xpath('(.//span)[2].text()').extract_first()
jobs['count'] =sel.css('div.info-three.1').xpath('(.//span)[3].text()').extract_first()
jobs['jobintro'] = sel.css('div.pos-ul').extract
yield jobs
1
huisezhiyin 2019-04-10 15:13:00 +08:00
你这个代码格式贴的 让人很难看得懂啊
|
2
idotfish OP @huisezhiyin 不好意思,刚刚入门 python,不太懂这些东西,把代码直接截图出来可以吗
|
3
huisezhiyin 2019-04-10 16:17:04 +08:00 1
@idotfish 你这随便搜一下 ERROR 就有答案啊
随便搜一下 error:ssl_client_socket_impl.cc(964)] handshake failed stack overflow 上的一个答案 https://stackoverflow.com/questions/37883759/errorssl-client-socket-openssl-cc1158-handshake-failed-with-chromedriver-chr 不行的话就试试其他的答案 |