python 爬虫问题

2014-12-25 09:46:36 +08:00
 LINAICAI

想爬一个网站,发现用了函数翻页,地址URL不变,怎样爬?
(function(){

var loadImageOnScroll=_.throttle(function(){
    var bodyScrollTop = $("body").scrollTop() || $(window).scrollTop()
    var windowHeight = $(window).height();
    $("div.thumbnail img").each(function(){
        var $this = $(this)
        if(!$this.attr("src") && $this.offset().top<bodyScrollTop+windowHeight){
            $this.attr("src",$this.attr("data-src"));
            $this[0].onload=function(){
                $this.css("min-height","0")
            }
        }
    });
},200)

function domReady(){
    initPagenation();
    initSelectPic();
    initStar();
    ajax_get_star_info()
    if(!_u.isMobile()){
        setTimeout(showBigImgIfUrlIndicate,500)
    }
    $(window).scroll(loadImageOnScroll)
}

function showBigImgIfUrlIndicate(){
    var showbig=url_param("showbig")
    if(showbig=="last" || showbig=="next"){
        var imgs=$("div.thumbnail img");
        if(imgs.length>0){
            if(showbig=="last"){
                showBigImg(imgs[imgs.length-1])
            }
            else{
                showBigImg(imgs[0])
            }
        }
    }
}

function ajax_get_star_info(){
    var ids=$.map($("div.thumbnail[data-id]"),function(obj,i){
        return $(obj).attr("data-id")
    })
    if(ids.length==0){
        return;
    }
    var url = '/ajax/imgs/like/'+ids.join(",")
    return $.ajax({
        dataType: "json",
        url: url,
        cache:false
    }).done(function(resp){
        if(resp && resp.code==0){
            set_img_stars(resp.likes,resp.user_imgs)
        }
    }).fail(function(){
    });
}

/**
 *  likes dict {img_id:likecount}
 */
function set_img_stars(likes,user_imgs){

    $("div.thumbnail[data-id]").each(function(x){
        var obj = $(this)
        var id=obj.attr("data-id")
        obj.find("div.bottombar span.starcount").text(likes[id] || 0)
        var star_icon = obj.find("div.bottombar span.star")
        if(user_imgs.indexOf(id)>=0){
            set_img_liked(star_icon)
        }
        else{
            set_img_disliked(star_icon)
        }
    })
}

//
function initStar(){
    $("div.thumbnail .bottombar span.star").click(function(){
        var id=$(this).closest("div.thumbnail").find("img").attr("data-id")
        var starClass = $(this).attr("data-star-class") || "icon-star"
        if ($(this).hasClass(starClass+"-empty")){
            like_img_by_id(id,this)
            _hmt.push(['_trackEvent', 'image', 'like', id]);
        }
        else{
            dislike_img_by_id(id,this)
            _hmt.push(['_trackEvent', 'image', 'dislike', id]);
        }
    });
}

//update,pass
function set_img_liked(obj,incr){
    var starClass = $(obj).attr("data-star-class") || "icon-star"
    $(obj).removeClass(starClass+"-empty").addClass(starClass)
    if(incr){
        var countobj= $(obj).parent().find("span.starcount")
        var count = countobj.text()
        count = isNaN(parseInt(count))? 0 : parseInt(count)
        countobj.text(count+1)
    }
}

function set_img_disliked(obj,desc){
    var starClass = $(obj).attr("data-star-class") || "icon-star"
    $(obj).removeClass(starClass).addClass(starClass+"-empty")
    if(desc){
        var countobj= $(obj).parent().find("span.starcount")
        var count = countobj.text()
        count = isNaN(parseInt(count))? 0 : parseInt(count)
        countobj.text(count-1<=0?0:count-1)
    }
}
function dislike_img_by_id(id,obj){
    if(!Core.isLogedIn()){
        return Core.gotoLogin(true)
    }
    var url="/ajax/pic/"+id+"/dislike"
    return $.ajax({
        type:"POST",
        dataType: "json",
        url: url,
        cache:false
    }).done(function(resp){
        if(resp){
            if (resp.code==0){
                set_img_disliked(obj,true)
                return
            }
            if(resp.code==RESP_CODE.need_login){
                return Core.gotoLogin(true)
            }
        }
        alert("取消喜欢图片失败")
    }).fail(function(){
        alert("服务器出差中,请等待它归来")
    });

}

//pass
function like_img_by_id(id,obj,fn){
    if(!Core.isLogedIn()){
        return Core.gotoLogin(true)
    }
    var url="/ajax/pic/"+id+"/like"
    return $.ajax({
        type:"POST",
        dataType: "json",
        url: url,
        //IE9会cache,这里加上一个参数
        cache:false
    }).done(function(resp){
        if(resp){
            if(resp.code==0){
                if(obj){
                    set_img_liked(obj,true)
                }
                if(fn){
                    fn();
                }
                return
            }
            else if(resp.code==RESP_CODE.need_login){
                return Core.gotoLogin(true)
            }
        }
        alert("喜欢图片失败..")
    }).fail(function(){
        alert("服务器出差中,请等待它归来")
    });
}

function findNextImg(url){
    var a=$("div.thumbnail img");
    for(var i=0;i<a.length;i++){
        if($(a[i]).attr('data-bigimg')==url && i<a.length-1){
            return a[i+1]
        }
    }
}

function findLastImg(url){
    var a=$("div.thumbnail img");
    for(var i=0;i<a.length;i++){
        if($(a[i]).attr('data-bigimg')==url && i>0){
            return a[i-1]
        }
    }
}

function goNextPage(){
    var p=getCurrentPage();
    var url=url_param("p",p+1);
    url = url_param("showbig","next",url)
    window.location=url
}

function goLastPage(){
    var p=getCurrentPage();
    if(p>0){
        var url=url_param("p",p-1);
        url = url_param("showbig","last",url)
        window.location=url
    }
}

function onDialogOpen(url,topicurl,imgid,dialog){

    function goNextPic(k){
        Core.PageAlert.close()
            var obj=findNextImg(url)
            if(obj){
                $.modal.close();
                showBigImg(obj)
            }
            else{
                goNextPage();
            }
        k.preventDefault()
    }

    function goPrePic(k){

            Core.PageAlert.close()
            var obj=findLastImg(url)
            if(obj){
                $.modal.close();
                showBigImg(obj)
            }
            else{
                goLastPage();
            }
            k.preventDefault()
    }

    $("body").keydown(function(k){
        //support tab on textarea
        //方向键向右
        if (k.which== 39) { 
            goNextPic(k);
            return false;
        }
        //方向按键向左
        else if(k.which==37){
            goPrePic(k);
            return false;
        }
        else if(k.which==13){
            //进入帖子详细页
            window.open(topicurl,"_blank");
            return false;
        }
        //空格按键,加收藏
        else if(k.which==32){
            //添加到收藏
            like_img_by_id(imgid,null,function(){
                //dialog.container.find(" ")
                Core.PageAlert.success("喜欢图片成功.")
            });
            return false;
        }
    })

    $(".big_img_div .prepic").click(function(k){
        goPrePic(k)
    })

    $(".big_img_div .nextpic").click(function(k){
        goNextPic(k)
    })

    $(".big_img_div img").mousemove(function(k){
        $(".big_img_tips").css("left",k.clientX).css("top",k.clientY);
    })

    $(".big_img_div img").one("mouseenter",function(k){
        setTimeout(function(){
            $(".big_img_tips").show().delay(3000).fadeOut()
        },500);
    });
}

function onDialogClose(url){
    $("body").unbind("keydown");
}

function showBigImg(obj){
    var url=$(obj).attr("data-bigimg")
    var width=$(obj).attr("data-width")
    var height=$(obj).attr("data-height")
    var topicurl = $(obj).attr("data-url")
    var topictitle= $(obj).attr("data-title")
    var userurl = $(obj).attr("data-userurl")
    var imgid= $(obj).attr("data-id")
    var opt= {
        zIndex: 10000,
        close: true,
        escClose: true,
        height:height,
        width:width,
        opacity: 70,
        containerCss:{"margin-top":"20px"},
        onOpen:function(dialog){
            onDialogOpen(url,topicurl,imgid,dialog);
            dialog.overlay.show();
            dialog.container.show();
            dialog.data.fadeIn('slow')
        },
        onClose:function(dialog){
            $.modal.close(); // must call this!
            onDialogClose(url)
        }
    }
    var fixTop=parseInt(($(window).height()-height)/2-20)
    if(fixTop<0){
        fixTop=0;
    }
    opt=fixPopPosition(opt,fixTop)

    var tpl=$("script[data-tpl='view_big_img']").html()
    tpl=new Templet(tpl)
    $(tpl.render({
        url:url,
        width:width,
        height:height,
        topicurl:topicurl,
        topictitle:topictitle
    })).modal(opt)

    /*
    $(_u.strformat('<div class="big_img_div"><img src="{0}" style="width:{1}px;height:{2}px" />'+
                '<p class="text-center" style="margin-top:10px;"><span class="pull-left label label-info" title="点击左方向按键←查看上一张">←</span><a target="_douban" class="label label-success" href="{3}">查看原帖</a><span class="pull-right label label-info" title="点击右方向按键→查看下一张">→</span></p></div>',url,width,height,topicurl,topictitle)).modal(opt);
                */
    _hmt.push(['_trackEvent', 'image', 'viewbig', '']);
}

function initSelectPic(){
    var u=location.href
    $(".topcorner").hide()
    //非mobile版本才加这个逻辑
    if(!_u.isMobile()){
        $("div.thumbnail img").click(function(){
            showBigImg(this)
        });
    }
}

function fixPopPosition(opt,t){
    var obj= opt || {};
    obj.docDem = [$("body").height(),$("body").width()]
    obj.position= [t]
    return obj;
}

function hidePic(id,obj){
    var url="/ajax/pic/"+id+"/hide"
    return $.ajax({
        type:"POST",
        dataType: "json",
        url: url,
        //IE9会cache,这里加上一个参数
        cache:false
    }).done(function(resp){
        if(resp && resp.code==0){
            $(obj).closest("div.thumbnail").remove()
        }
        else{
            alert("选择为可见出错")
        }
    }).fail(function(){
        alert("设置为可见失败")
    });
}

function selectPic(id,obj){
    var url="/ajax/pic/"+id+"/select"
    return $.ajax({
        type:"POST",
        dataType: "json",
        url: url,
        //IE9会cache,这里加上一个参数
        cache:false
    }).done(function(resp){
        if(resp && resp.code==0){
            $(obj).closest("div.thumbnail").remove()
        }
        else{
            alert("选择为可见出错")
        }
    }).fail(function(){
        alert("设置为可见失败")
    });
}

function login(){

}

function regist(){

}

function url_param(name,value,url) {
    //return decodeURIComponent((new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec(location.search) || [, ""])[1].replace(/\+/g, '%20')) || null;
    if(typeof value=="undefined"){
        return decodeURIComponent((new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec(url || location.search)||[,""])[1].replace(/\+/g, '%20'))||null;
    }
    else{
        url= url || window.location.href;
        name = name.toString();
        value = encodeURIComponent(value.toString())
        var r = new RegExp("(^|\\W)" + name + "=[^&]*", "g");
        var vUrl = url.split("#");
        vUrl[0] = (vUrl[0].match(r)) ? vUrl[0].replace(r, "$1" + name + "=" + value) : vUrl[0] + (vUrl[0].indexOf("?") == -1 ? "?" : "&") + name + "=" + value;
        return vUrl.join("#");
    }
}

function getCurrentPage(){
    var p = url_param("p")
    p = p || 0
    p = p-0
    return p
}

function initPagenation(){
    var tpl=$("script[data-tpl='pagination']").html()
    tpl=new Templet(tpl)
    var pcount=cgidata.page_count
    var pages=[]
    var p=getCurrentPage();
    var start= (p-2)<0?0:(p-2);
    var end=p+2>pcount?pcount:p+2;
    for(var i=start;i<end;i++){
        pages.push({url:url_param("p",i),page:i+1,"class":(p==i)?"active":""})
    }
    $('div.pagination ul').html(tpl.render({pages:pages,next:end<pcount}));
    if(p-1>=0){
        $('div.pagination ul a[data-type="pre"]').attr('href',url_param("p",p-1))
    }
    else{
        $('div.pagination ul a[data-type="pre"]').parent().remove();
    }
    if(p+1<pcount){
        $('div.pagination ul a[data-type="next"]').attr('href',url_param("p",p+1))
    }
    else{
        $('div.pagination ul a[data-type="next"]').parent().remove();
    }
}

Core.domReady(domReady)

})();

我希望迭代出每一页得数据,当然我爬数据得正则已经准备好了,问题在怎样取得每一页得url?

3291 次点击
所在节点    问与答
22 条回复
LINAICAI
2014-12-25 15:40:34 +08:00
@icedx 忽然觉得web开发比移动端开发酷多了 python好爽啊
不错的IDE加语言能力 居然还可以直接操作远程数据库
比OC用起来舒服
icedx
2014-12-25 16:15:15 +08:00
@LINAICAI
其实坑挺深得 有可能一生也不理解Python 的哲学...

这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。

https://www.v2ex.com/t/156503

V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。

V2EX is a community of developers, designers and creative people.

© 2021 V2EX