翻出来了原来写的。。
```python
def goubanjia_com(self, *args):
logger.info('
giubanjia.com start')
i = 1
self.THREAD_ID += 1
while(1):
url = '
http://www.goubanjia.com/free/index%d.shtml' % (i)
r = requests.get(url, headers=self.http_headers())
if r.status_code == 404:
break
try:
html = BeautifulSoup(r.text, 'lxml')
tbody = html.tbody
for tr in tbody.find_all('tr'):
p = proxy()
[x.extract() for x in tr.find_all('p')]
try:
_ = tr.find_all('td', {'class':"ip"})[0].text
_ = _.split(':')
p.ip = _[0]
p.port = int(_[1])
# p.port = int(tr.find_all('td', {'data-title':"PORT"})[0].text)
p.safe = tr.find_all('td')[1].text.replace(' ', '').replace('\n', '').replace('\t', '')
p.type = tr.find_all('td')[2].text.replace(' ', '').replace('\n', '').replace('\t', '')
p.place = tr.find_all('td')[3].text.replace(' ', '').replace('\n', '').replace('\t', '').replace('\r', '').replace('\xa0', '')
p.net = tr.find_all('td')[4].text.replace(' ', '').replace('\n', '').replace('\t', '')
except IndexError as e:
print(tr)
logger.error('%s is index error' % p)
# exit(0)
logger.debug('<get>%s' % p)
self.wait_for_verify.put(p)
self.THREAD_ID += 1
self.add_thread(self.verify_proxy_thread, self.THREAD_ID)
logger.debug('%s ok' % url)
gevent.sleep(1)
except AttributeError as e:
print(e)
# print(r.text)
gevent.sleep(10)
logger.error('%s Error, sleep 10s' % url)
continue
# exit()
i += 1
```