python 的 multiprocessing

2014-08-04 14:59:45 +08:00
 wangfeng3769
python的multiprocessing在windows(4核)下不能正常运行,加了freeze_support()仍然不能正常运行,希望v友们能够帮忙一下。
11736 次点击
所在节点    问与答
9 条回复
wangfeng3769
2014-08-04 15:00:14 +08:00
multiprocessing.freeze_support()
# lock = multiprocessing.Lock()
w = multiprocessing.Process(target=work, args=(url, ))
nw = multiprocessing.Process(target=download, args=())
w.start()
nw.start()
w.join()
nw.join()
wangfeng3769
2014-08-04 16:34:09 +08:00
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "D:\Python27\lib\multiprocessing\forking.py", line 380, in main
You have to input a complete URL
prepare(preparation_data)
Traceback (most recent call last):
File "D:\Python27\lib\multiprocessing\forking.py", line 495, in prepare
File "<string>", line 1, in <module>
'__parents_main__', file, path_name, etc
File "D:\Python27\lib\multiprocessing\forking.py", line 380, in main
File "D:\sworkspace\weixinspider.py", line 160, in <module>
prepare(preparation_data)
File "D:\Python27\lib\multiprocessing\forking.py", line 495, in prepare
main(url)
File "D:\sworkspace\weixinspider.py", line 147, in main
'__parents_main__', file, path_name, etc
File "D:\sworkspace\weixinspider.py", line 160, in <module>
w.start()
File "D:\Python27\lib\multiprocessing\process.py", line 130, in start
main(url)
File "D:\sworkspace\weixinspider.py", line 147, in main
self._popen = Popen(self)
File "D:\Python27\lib\multiprocessing\forking.py", line 258, in __init__
w.start()
File "D:\Python27\lib\multiprocessing\process.py", line 130, in start
cmd = get_command_line() + [rhandle]
self._popen = Popen(self)
File "D:\Python27\lib\multiprocessing\forking.py", line 358, in get_command_li
ne
File "D:\Python27\lib\multiprocessing\forking.py", line 258, in __init__
is not going to be frozen to produce a Windows executable.''')
RuntimeError: cmd = get_command_line() + [rhandle]

File "D:\Python27\lib\multiprocessing\forking.py", line 358, in get_command_li
ne
Attempt to start a new process before the current process
has finished its bootstrapping phase.

This probably means that you are on Windows and you have
forgotten to use the proper idiom in the main module:

if __name__ == '__main__':
freeze_support()
...

The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce a Windows executable.
is not going to be frozen to produce a Windows executable.''')
RuntimeError:
Attempt to start a new process before the current process
has finished its bootstrapping phase.

This probably means that you are on Windows and you have
forgotten to use the proper idiom in the main module:

if __name__ == '__main__':
freeze_support()
...

The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce a Windows executable.
wangfeng3769
2014-08-04 16:35:22 +08:00
求大仙们说一声。
wingao
2014-08-04 18:02:21 +08:00
if __name__ == '__main__':
w = multiprocessing.Process(target=work, args=(url, ))
nw = multiprocessing.Process(target=download, args=())
w.start()
nw.start()
w.join()
nw.join()
wingao
2014-08-04 18:03:41 +08:00
这个错误应该是你在其他的子进程里又开了进程
把创建进程放到 if __name__ == '__main__' 下
wangfeng3769
2014-08-05 15:53:10 +08:00
#coding:utf-8
import re
import os
import requests as R
from BeautifulSoup import BeautifulSoup as BS
import multiprocessing
import urlparse
import time
opt = "Mozilla/5.0 (Linux; U; Android 4.1.2; zh-cn; GT-I9300 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 MicroMessenger/5.2.380"
headers = {'User-Agent':opt}
a,b = multiprocessing.Pipe()
domain_url = "66365.m.weimob.com"
G_queue_url = []
G_spidered_url = []

def is_existed(file_real_path):
i=1
while 1:
if i==1:
file_real_path_tem = file_real_path+'%s.htm'%""
if os.path.isfile(file_real_path_tem):
file_real_path_tem = file_real_path+'_%s.htm'%str(i)
else:
return file_real_path_tem

i=i+1


def get_web_page(url):
try:
r = R.get(url,headers=headers)
html = r.text
except:
return None

if html:
return html
else:
return None

def btree(O):
if O:
return BS(O,fromEncoding="utf-8")
else:
return None

def download():
url = "http://66365.m.weimob.com/weisite/home?pid=66365&bid=135666&wechatid=oAdrCtzBdLhgpyIOYtBNELkWXJ68&wxref=mp.weixin.qq.com"
print 'download'
checked_list = []

while True:
print 'I am busy'


recv_data = b.recv()
# recv_data = [url]
# print recv_data
if type(recv_data)!=type([]):
if recv_data ==0:
break

for url in recv_data:
print url
if url in checked_list:
# checked_list.append(url)
continue
else:
checked_list.append(url)

if re.search(domain_url,url):
url_list = urlparse.urlparse(url)
domain_folder = url_list[1]
file_path = url_list.path
real_path_r = os.path.sep.join([domain_folder,file_path])
real_path_l = re.split(r'/|\\',real_path_r)
# print real_path_l
if len(real_path_l)==2:
if not real_path_l[-1]:
continue
real_path_f = os.path.sep.join(real_path_l[0:-1])
real_path_r = is_existed(real_path_r)
try:
if not os.path.exists(real_path_f) :
os.makedirs(real_path_f)
try:
f = open(real_path_r,'w')
except :
open(real_path_r).close()
f = open(real_path_r,'w')
else:
try:
f = open(real_path_r,'w')
except :
open(real_path_r).close()
f = open(real_path_r,'w')
r = R.get(url)
content = unicode(r.text).encode(r.encoding,'ignore')
if not content:
continue
f.write(content)
f.close()
except:
pass
else:
pass

def get_links(html):
soup = btree(html)
# print soup
if not soup:
return []
a_links = soup.findAll('a')
if not a_links:
return []
link_list = []
for link in a_links:
# print link
try:
link = link.get('href')
if not link:
continue
except:
# print link
continue

if not re.search(domain_url,link) and not re.search('http', link):
link_list.append("http://"+domain_url+link)
return link_list

def work(url):

global G_spidered_url
global G_queue_url
# print G_spidered_url,G_queue_url
G_spidered_url.append(url)
html = get_web_page(url)
all_links = get_links(html)
send_list=[]
if G_queue_url and all_links:
for slink in all_links:
if slink not in G_queue_url:
send_list .append(slink)
G_queue_url.append(slink)
a.send(send_list)
elif not G_queue_url and all_links :

G_queue_url = all_links
a.send(all_links)

for url in G_queue_url:
if url in G_spidered_url:
continue
else:
G_spidered_url.append(url)
work(url)
a.send(0)

def main(url):
multiprocessing.freeze_support()
lock = multiprocessing.Lock()
w = multiprocessing.Process(target=work, args=(url, ))
nw = multiprocessing.Process(target=download, args=())
w.start()
nw.start()
w.join()
nw.join()


if __name__ == '__main__':
url= "http://66365.m.weimob.com/weisite/home?pid=66365&bid=135666&wechatid=oAdrCtzBdLhgpyIOYtBNELkWXJ68&wxref=mp.weixin.qq.com"

import sys
try:
url = sys.argv[1]
except:
print "You have to input a complete URL"
# main(url)
multiprocessing.freeze_support()
lock = multiprocessing.Lock()
w = multiprocessing.Process(target=work, args=(url, ))
nw = multiprocessing.Process(target=download, args=())
w.start()
nw.start()
w.join()
nw.join()


想说一下 在windows下无法运行download ,看一下怎么回事,专门扒人家网站的爬虫。希望copy下来试试。祝大家好运。
wangfeng3769
2014-08-05 15:54:05 +08:00
有点流氓但是呢 现在就这样了。
wangfeng3769
2014-08-05 16:09:36 +08:00
wangfeng3769
2014-08-05 16:10:11 +08:00
退出的时候有点小问题,不知道哪儿出了问题。

这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。

https://www.v2ex.com/t/126049

V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。

V2EX is a community of developers, designers and creative people.

© 2021 V2EX