根据提示信息,找不到 chrome 执行文件,需要下载 chromedriver,然后添加到 executable_path=chromedriver
chromedriver 下载地址:
http://chromedriver.storage.googleapis.com/index.html推荐填写完整的绝对路径,例如:
chromedriver = '/usr/local/bin/chromedriver'
browser = webdriver.Chrome(executable_path=chromedriver) # 打开 Chrome 浏览器
给出的完整代码如下:
from selenium import webdriver
from selenium.webdriver.common.proxy import *
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from pyvirtualdisplay import Display
# from xvfbwrapper import Xvfb
import bs4, os, re, time, zipfile
from base64 import b64encode
import sys
from posix import unlink
reload(sys)
sys.setdefaultencoding('utf8')
## webdriver + chrome (不使用代理,爬取网页)
def spider_url_chrome(url):
browser = None
display = None
try:
display = Display(visible=0, size=(800, 600))
display.start()
chromedriver = '/usr/local/bin/chromedriver'
browser = webdriver.Chrome(executable_path=chromedriver) # 打开 Chrome 浏览器
browser.get(url)
content = browser.page_source
print("content: " + str(content))
finally:
if browser: browser.quit()
if display: display.stop()
## webdriver + chrome + proxy + whiteip (无密码,或白名单 ip 授权)
## 米扑代理:
https://proxy.mimvp.comdef spider_url_chrome_by_whiteip(url):
browser = None
display = None
## 白名单 ip,请见米扑代理会员中心:
https://proxy.mimvp.com/usercenter/userinfo.php?p=whiteip mimvp_proxy = {
'ip' : '140.143.62.84', # ip
'port_https' : 62288, # http, https
'port_socks' : 62287, # socks5
'username' : 'mimvp-user',
'password' : 'mimvp-pass'
}
try:
display = Display(visible=0, size=(800, 600))
display.start()
chrome_options = Options() # ok
chrome_options = webdriver.ChromeOptions() # ok
proxy_https_argument = '--proxy-server=http://{ip}:{port}'.format(ip=mimvp_proxy['ip'], port=mimvp_proxy['port_https']) # http, https (无密码,或白名单 ip 授权,成功)
chrome_options.add_argument(proxy_https_argument)
# proxy_socks_argument = '--proxy-server=socks5://{ip}:{port}'.format(ip=mimvp_proxy['ip'], port=mimvp_proxy['port_socks']) # socks5 (无密码,或白名单 ip 授权,失败)
# chrome_options.add_argument(proxy_socks_argument)
chromedriver = '/usr/local/bin/chromedriver'
browser = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_options) # 打开 Chrome 浏览器
browser.get(url)
content = browser.page_source
print("content: " + str(content))
finally:
if browser: browser.quit()
if display: display.stop()