#!/usr/bin env python3 import requests import os import execjs,json,time
class qimingpian(object):
def __init__(self):
self.s=requests.session()
self.js_file=os.getcwd()+"/"+"js_decrypt.js"
def get_content(self):
cookies={}
cookies_str='Hm_lvt_d1cdd45a1d449d32c7b4dbab4915de60=1532161260; Hm_lpvt_d1cdd45a1d449d32c7b4dbab4915de60=1532161260; gr_user_id=0ac1c623-6d25-4c89-b0eb-beaccb4ed35c; time_token=1532254367533; unionid=ETXncbCRyisjw/hr0zeTaonhpvkz/81ntwbBWAKYE4wdmhbtHCwxkjwb+0gjVdRzeJWqqIs6kiQsM8IbOYgM5A==; Hm_lvt_1e712c5331439bcf163b46f3d208f00b=1532161262,1532252857,1532254027,1532254368; Hm_lpvt_1e712c5331439bcf163b46f3d208f00b=1532254368; userinfo={%22nickname%22:%22Wing%E3%80%82%22%2C%22headimgurl%22:%22http://thirdwx.qlogo.cn/mmopen/vi_32/Q0j4TwGTfTJzmBzIeVHkjp6IVAl3uWAgB4FYIC96KygBjBvY2qAHycK1OctdAcODsWMh8zJia3j9GCBOzR5Truw/132%22%2C%22coin%22:%2250%22%2C%22applySubmit%22:%220%22%2C%22team_flag%22:%220%22%2C%22team_uuid%22:%22%22%2C%22vip_out_date%22:%22%22%2C%22usernum%22:%22226256331%22%2C%22team_enterprise%22:%220%22%2C%22enterprise_coin%22:%220%22%2C%22is_admin%22:%220%22%2C%22is_manager%22:%220%22%2C%22first_shenqing%22:%220%22%2C%22phone%22:%2213161346498%22%2C%22apply_phone%22:%2213161346498%22%2C%22scope%22:%22qmp%22%2C%22apply_state%22:3%2C%22liyou%22:%22%22%2C%22is_certify%22:1%2C%22ip%22:%22106.37.197.194%22%2C%22person_role%22:%22%22%2C%22claim_type%22:0%2C%22expireinfo%22:false%2C%22inneruser%22:false%2C%22apply_pro_state%22:3%2C%22person_id%22:%22%22}'
for line in cookies_str.split(';'): # 按照字符:进行划分读取
# 其设置为 1 就会把字符串拆分成 2 份
name, value = line.strip().split('=', 1)
cookies[name] = value # 为字典 cookies 添加内容
url='http://pdf.api.qimingpian.com/t/getFileByPage1'
headers={"Referer": "http://vip.qimingpian.com/","User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36","Host": "pdf.api.qimingpian.com","Accept": "application/json, text/plain, */*","Accept-Encoding": "gzip, deflate","Accept-Language": "en-US,en;q=0.9","Connection": "keep-alive","Content-Length": "183","Content-Type": "application/x-www-form-urlencoded","Origin": "http://vip.qimingpian.com"}
for i in range(1,101):
form_data={"page":"i","num":"40","w":"" ,"ptype": "qmp_pc","version": "2.0","unionid": "ETXncbCRyisjw/hr0zeTaonhpvkz/81ntwbBWAKYE4wdmhbtHCwxkjwb+0gjVdRzeJWqqIs6kiQsM8IbOYgM5A==","jtype": "vip","time_token": "1532254367533"}
response=self.s.post(url=url,data=form_data,headers=headers,cookies=cookies)
print(self.s.cookies)
print(response.headers)
print(response.text)
json_data=json.loads(response.text)
_js=open(self.js_file,'r').read()
data=execjs.compile(_js).call('n',json_data['data1'])
print(data)
for j in range(0,len(data['items'])):
name=data['items'][j]['name']
report_source=data['items'][j]['report_source']
update_time=data['items'][j]['update_time']
url=data['items'][j]['url']
print(name)
print(report_source)
print(update_time)
print(url)
print('\n')
if name=='main': qimingpian().get_content()
代码很少,js 加密破解,但是现在的问题,这个网站每请求一页就 set-cookie 重新设置 sessionid,我这里用的是 session 应该是动态的变化,为啥还是报错呢?现在的情况只能访问第一页 到第二页就报以下错误
<RequestsCookieJar[<cookie phpsessid="3khddv90nbg11lu1ia8eld8ol3" for="" <a="" href="<a href=" http:="" pdf.api.qimingpian.com"="" rel="nofollow">http://pdf.api.qimingpian.com" rel="nofollow">pdf.api.qimingpian.com=""/>]> {'Content-Type': 'text/html', 'Connection': 'keep-alive', 'Content-Length': '254', 'Via': 'kunlun6.cn24[,0]', 'Timing-Allow-Origin': '*', 'Date': 'Sun, 22 Jul 2018 16:49:17 GMT', 'EagleId': '7ae1224615322781579372751e', 'Server': 'Tengine', 'X-Tengine-Error': 'non-existent domain'}
<html> <head><title>403 Forbidden</title></head> <body bgcolor="white">You don't have permission to access the URL on this server.
http://vip.qimingpian.com/#/finos/investment/ireport 进去之后 创投数据 报告库里面的数据 不知道自己错在哪里?
这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。
V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。
V2EX is a community of developers, designers and creative people.