自己写了一个, 那个 60 那里是最大页数酌情修改
from selenium import webdriver
import datetime
import pandas as pd
import time
options = webdriver.ChromeOptions()
options.add_argument('--ignore-ssl-errors=yes')
options.add_argument('--ignore-certificate-errors')
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options)
xpath_items = "//*[@class='CollectionDetailPageItem-innerContainer']"
xpath_title = ".//h2"
xpath_readAll = ".//button[text()='阅读全文']"
xpath_content = ".//div[@class='RichContent']"
xpath_content = ".//div[@class='RichContent-inner']"
xpath_next_page = "//button[text()='下一页']"
result_list = list()
count = 0
for i in range(1, 60):
driver.get("
https://www.zhihu.com/collection/{收藏夹编号}?page={}".format(i))
time.sleep(3)
elems = driver.find_elements_by_xpath(xpath_items)
for item in elems:
count += 1
try:
title = item.find_element_by_xpath(xpath_title).text
item.find_element_by_xpath(xpath_readAll).click()
content = item.find_element_by_xpath(xpath_content).get_attribute('outerHTML')
result_list.append({'title': title, 'content': content})
except:
pass
df = pd.DataFrame(result_list)
df.to_excel('res.xlsx', index=False)
df = pd.DataFrame(result_list)
df.to_excel('res.xlsx', index=False)
input("please wait...")