import requests
from bs4 import BeautifulSoup
import multiprocessing
n = 0
url_list = ['
http://ubuntuforums.org/forumdisplay.php?f=333', ]
for x in range(1, 50):
n += 1
raw_url = '
http://ubuntuforums.org/forumdisplay.php?f=333&page=%d' % n
url_list.append(raw_url)
def running(url, q, lock):
lock.acquire()
html = requests.get(url)
if html.status_code == 200:
html_text = html.text
soup = BeautifulSoup(html_text)
with open('/home/zhg/Pictures/cao.txt', 'a+') as f:
for link in soup.find_all('a', 'title'):
s = '
http://ubuntuforums.org/' + str(link.get('href')) + ' ' + str(link.get_text().encode('utf-8'))
f.writelines(s)
f.writelines('\n')
lock.release()
if __name__ == '__main__':
manager = multiprocessing.Manager()
p = multiprocessing.Pool(len(url_list))
q = manager.Queue()
lock = manager.Lock()
for x in url_list:
p.apply_async(running, args=(x, q, lock))
p.close()
p.join()
print "process ended"