首先 python 代码中函数中定义函数的一种用法是修饰器,这个在我的上个提问中,已经知道了,也详细的了解了一下装饰器的作用,主要的就是动态的增加函数的功能,在代码测试,日志处理等处理中,作用明显.
今天在读 sqlmap 的源码时,看到了一个crawler.py的爬虫文件,其中的一部分代码就是在函数中定义函数,而且不是用的修饰器,我看了代码,这段代码完全可以在外部定义,这是没有问题的,我想知道的是为什么要这么定义呢?主要的作用是什么呢?比如,共享变量,隐藏函数?
下面贴一部分代码
def crawl(target):
try:
visited = set()
threadData = getCurrentThreadData()
threadData.shared.value = oset()
def crawlThread():
threadData = getCurrentThreadData()
while kb.threadContinue:
with kb.locks.limit:
if threadData.shared.unprocessed:
current = threadData.shared.unprocessed.pop()
if current in visited:
continue
elif conf.crawlExclude and re.search(conf.crawlExclude, current):
dbgMsg = "skipping '%s'" % current
logger.debug(dbgMsg)
continue
else:
visited.add(current)
else:
break
content = None
try:
if current:
content = Request.getPage(url=current, crawling=True, raise404=False)[0]
except SqlmapConnectionException, ex:
errMsg = "connection exception detected (%s). skipping " % ex
errMsg += "URL '%s'" % current
logger.critical(errMsg)
except SqlmapSyntaxException:
errMsg = "invalid URL detected. skipping '%s'" % current
logger.critical(errMsg)
except httplib.InvalidURL, ex:
errMsg = "invalid URL detected (%s). skipping " % ex
errMsg += "URL '%s'" % current
logger.critical(errMsg)
if not kb.threadContinue:
break
if isinstance(content, unicode):
try:
match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
if match:
content = "<html>%s</html>" % match.group(1)
soup = BeautifulSoup(content)
tags = soup('a')
if not tags:
tags = re.finditer(r'(?si)<a[^>]+href="(?P<href>[^>"]+)"', content)
for tag in tags:
href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
if href:
if threadData.lastRedirectURL and threadData.lastRedirectURL[0] == threadData.lastRequestUID:
current = threadData.lastRedirectURL[1]
url = urlparse.urljoin(current, href)
# flag to know if we are dealing with the same target host
_ = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], (url, target)))
if conf.scope:
if not re.search(conf.scope, url, re.I):
continue
elif not _:
continue
if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
with kb.locks.value:
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):
threadData.shared.value.add(url)
except UnicodeEncodeError: # for non-HTML files
pass
finally:
if conf.forms:
findPageForms(content, current, False, True)
if conf.verbose in (1, 2):
threadData.shared.count += 1
status = '%d/%d links visited (%d%%)' % (threadData.shared.count, threadData.shared.length, round(100.0 * threadData.shared.count / threadData.shared.length))
dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
threadData.shared.deeper = set()
threadData.shared.unprocessed = set([target])
if not conf.sitemapUrl:
message = "do you want to check for the existence of "
message += "site's sitemap(.xml) [y/N] "
test = readInput(message, default="n")
if test[0] in ("y", "Y"):
found = True
items = None
url = urlparse.urljoin(target, "/sitemap.xml")
try:
items = parseSitemap(url)
except SqlmapConnectionException, ex:
if "page not found" in getSafeExString(ex):
found = False
logger.warn("'sitemap.xml' not found")
except:
pass
finally:
if found:
if items:
for item in items:
if re.search(r"(.*?)\?(.+)", item):
threadData.shared.value.add(item)
if conf.crawlDepth > 1:
threadData.shared.unprocessed.update(items)
logger.info("%s links found" % ("no" if not items else len(items)))
infoMsg = "starting crawler"
if conf.bulkFile:
infoMsg += " for target URL '%s'" % target
logger.info(infoMsg)
for i in xrange(conf.crawlDepth):
threadData.shared.count = 0
threadData.shared.length = len(threadData.shared.unprocessed)
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
if not conf.bulkFile:
logger.info("searching for links with depth %d" % (i + 1))
runThreads(numThreads, crawlThread, threadChoice=(i>0))
clearConsoleLine(True)
if threadData.shared.deeper:
threadData.shared.unprocessed = set(threadData.shared.deeper)
else:
break
except KeyboardInterrupt:
warnMsg = "user aborted during crawling. sqlmap "
warnMsg += "will use partial list"
logger.warn(warnMsg)
finally:
clearConsoleLine(True)
if not threadData.shared.value:
warnMsg = "no usable links found (with GET parameters)"
logger.warn(warnMsg)
else:
for url in threadData.shared.value:
kb.targets.add((url, None, None, None, None))
这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。
V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。
V2EX is a community of developers, designers and creative people.