报错我能理解,但是这种情况怎么解决呢? 本人才开始玩 python,各种没头绪啊 traceback 如下:
Traceback (most recent call last):
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1994, in __call__
return self.wsgi_app(environ, start_response)
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1985, in wsgi_app
response = self.handle_exception(e)
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1540, in handle_exception
reraise(exc_type, exc_value, tb)
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1982, in wsgi_app
response = self.full_dispatch_request()
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1614, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1517, in handle_user_exception
reraise(exc_type, exc_value, tb)
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1612, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/local/lib/python2.7/site-packages/flask/app.py", line 1598, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/Users/chenchen/code/flask_scrapy/webapp/run.py", line 91, in run
crawler = CrawlerProcess(settings)
File "/usr/local/lib/python2.7/site-packages/scrapy/crawler.py", line 239, in __init__
install_shutdown_handlers(self._signal_shutdown)
File "/usr/local/lib/python2.7/site-packages/scrapy/utils/ossignal.py", line 21, in install_shutdown_handlers
reactor._handleSignals()
File "/usr/local/lib/python2.7/site-packages/twisted/internet/posixbase.py", line 295, in _handleSignals
_SignalReactorMixin._handleSignals(self)
File "/usr/local/lib/python2.7/site-packages/twisted/internet/base.py", line 1154, in _handleSignals
signal.signal(signal.SIGINT, self.sigInt)
ValueError: signal only works in main thread
附上核心代码
@app.route('/run')
def run():
project = dict()
project['name'] = 'test'
project['mod'] = 'debug'
project['script'] = """
# -*- coding: utf-8 -*-
import scrapy
class TiebaCategorySpider(scrapy.Spider):
name = "tieba_category"
start_url = 'http://tieba.baidu.com/f/index/forumclass'
def start_requests(self):
yield scrapy.Request(self.start_url)
def parse(self, response):
try:
links = response.xpath('//ul[@class="item-list-ul clearfix"]/li/a')
for i in links:
a = i.xpath('@href').extract_first()
name = i.xpath('text()').extract_first()
yield scrapy.Request(self.repair_url(a), callback=self.parse_category, meta={'sub_category': name})
except Exception as e:
print e
return
def parse_category(self, response):
a_list = response.xpath('//a[@class="ba_href clearfix"]')
category = response.xpath('//div[@class="ba_class_title"]/text()').extract_first()
for i in a_list:
item = CategoryItem()
item['img'] = i.xpath('img[@class="ba_pic"]/@src').extract_first()
item['name'] = i.xpath('div[@class="ba_content"]/p[@class="ba_name"]/text()').extract_first()
item['member_count'] = i.xpath('div[@class="ba_content"]//span[@class="ba_m_num"]/text()').extract_first()
item['post_count'] = i.xpath('div[@class="ba_content"]//span[@class="ba_p_num"]/text()').extract_first()
item['sub_category'] = response.meta.get('sub_category')
item['desc'] = i.xpath('div[@class="ba_content"]//p[@class="ba_desc"]/text()').extract_first()
item['category'] = category
yield item
next_url = response.xpath('//div[@class="pagination"]/a[@class="next"]/@href').extract_first()
if next_url:
yield scrapy.Request(self.repair_url(next_url), callback=self.parse_category,
meta={'sub_category': response.meta.get('sub_category')})
@staticmethod
def repair_url(url):
if url.startswith('http'):
pass
else:
url = ''. join(['http://tieba.baidu.com', url])
return url
"""
loader = ProjectLoader(project)
module = loader.load_module('test_spider')
a = module.__dict__
for each in list(six.itervalues(module.__dict__)):
if inspect.isclass(each) and issubclass(each, scrapy.Spider):
module.__dict__['__handler_cls__'] = each
_class = module.__dict__.get('__handler_cls__')
assert _class is not None, "need BaseHandler in project module"
spider = _class()
settings = get_project_settings()
crawler = CrawlerProcess(settings)
crawler.crawl(spider)
# crawler.start()
return repr(module.__dict__)
这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。
V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。
V2EX is a community of developers, designers and creative people.