wwttc
2014-08-04 10:29:38 +08:00
f = file("largefile")
....for line in f:
........try:
............tweet_time = line.split(',',3)[2].split()[0] # 微博发布时间
............tweet = line.split(',',3)[-1] # 微博内容
............for topic in topics:
................topic_items = topic.split() # 每个topic可能有多个词组成
................isContain = True
................for item in topic_items:
....................if item not in tweet:
........................isContain = False
........................break
....................if isContain:
........................pass # 该微博包含该topic
........except:
............continue
f.close()