phus
2012-03-01 16:28:15 +08:00
HTML = u'''\
<div class="c">
<span class="cmt"><a href="...">游完1200才閃</a> 对 我 说:</span>
你好,转发的赠书大概什么时候送到,上海的,谢谢
<span class="ct">2011-09-16 21:17:35</span> <a href="....." class="cc">回复他 </a> <a href="......." class="cc">共3条对话</a>
</div>
'''
def main():
tree = lxml.etree.fromstring(HTML, lxml.etree.HTMLParser())
print ''.join(x.strip() for x in tree.xpath('//div[@class="c"]/text()'))