想要用curl的post模拟登录,然后再抓取想要的页面
post的数据和url的格式如何写?试了很多都没有实现
请大神们帮忙看看,谢谢了。代码写的不好看,还请各位见谅
$post = array(
'u' => '***',
'p' => '***',
);
$url = "http://www.v2ex.com/signin"; //登录地址
$cookie = dirname(__FILE__) . '/cookie.txt'; //设置cookie保存路径
$url2 = "http://www.v2ex.com/notes"; //登录后要获取信息的地址
login_post($url, $cookie, $post); //模拟登录
$content = get_content($url2, $cookie); //获取$url2的信息
@ unlink($cookie); //删除cookie文件
function login_post($url, $cookie, $post) {
$curl = curl_init(); //初始化curl模块
curl_setopt($curl, CURLOPT_URL, $url); //登录提交的地址
curl_setopt($curl, CURLOPT_HEADER, 0); //是否显示头信息
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 0); //是否自动显示返回的信息
curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie); //设置Cookie信息保存在指定的文件中
curl_setopt($curl, CURLOPT_POST, 1); //post方式提交
curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($post)); //要提交的信息
curl_exec($curl); //执行cURL
curl_close($curl); //关闭cURL资源,并且释放系统资源
}
function get_content($url, $cookie) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie); //读取cookie
$rs = curl_exec($ch); //执行cURL抓取页面内容
curl_close($ch);
return $rs;
}
1
lilydjwg 2015-06-07 11:25:05 +08:00
man CURLOPT_POSTFIELDS 里有说。你没有设置 Content-Type 啊。
|
2
LeoQ 2015-06-07 11:35:50 +08:00 via Android 1
PHP的手册我觉得是很好看的,为什么你不看?
|
3
dai269619118 2015-06-07 11:39:56 +08:00
抓包工具看看header里面的参数是不是一样。
应该就是content-type类型没对 mac抓包httpscoop 参考下我最近写的一个论坛的 $data = $_POST; //请求登陆地址 $url = "http://passport.uc108.com/login.aspx?mode=1"; $data = array( 'username' => trim($_POST['username']), 'password' => trim($_POST['password']), 'verifyCode' => trim($_POST['yzm']), 'verifycodeid' => trim($_SESSION['code']), 'remember' => 1, ); $ch = curl_init($url); $array = Array( "Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding:gzip, deflate", "Accept-Language:zh-CN,zh;q=0.8", "Cache-Control:max-age=0", "Connection:keep-alive", "Host:passport.uc108.com", "Origin:http://shangyu.108sq.com", "Referer:http://shangyu.108sq.com/User/Login?url=http%3A%2F%2Fshangyu.108sq.com%2F", "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36", "DNT:1", ); $ch = curl_init($url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); curl_setopt($ch, CURLOPT_HTTPHEADER, $array); curl_setopt($ch, CURLINFO_HEADER_OUT, true); curl_setopt($ch, CURLOPT_COOKIEFILE, $_SESSION['file']); curl_setopt($ch, CURLOPT_COOKIEJAR, $_SESSION['file']); $result = curl_exec($ch); curl_close($ch); $pattern = "/apps.*?\"(.*?)\"/m"; preg_match_all($pattern, $result, $match); if(!empty($match[1])) { return $match[1]; } else { $flag = array('flag' => '101', 'msg' => '账号或验证码错误'); echo json_encode($flag); exit; } |
4
l12ab 2015-06-07 12:21:28 +08:00
和contenttype没什么关系,会自动url encode
用$cookie_jar = tempnam($tmpdir,'cookie'); 产生cookie文件 登录和抓取的时候最好同时设置一下CURLOPT_COOKIEJAR和CURLOPT_COOKIEFILE 给你的登录函数 设置一个返回值,看看是否登录成功 |
5
lilydjwg 2015-06-07 13:29:53 +08:00
@l12ab Content-Type 是对服务器说的。有可能服务器只有在 Content-Type 指明为表单提交时才会按表单的格式解码处理。
|
8
yangmls 2015-06-07 19:16:17 +08:00
|
9
Yien 2015-06-07 21:32:30 +08:00
mark
|
10
lijinma 2015-06-07 22:26:35 +08:00 via iPhone
不用 Guzzle?
|