imn1
2017-08-31 07:03:17 +08:00
>>> import html.parser
>>> h = html.parser.HTMLParser()
>>> s = h.unescape('© 2010')
>>> s
u'\xa9 2010'
>>> print s
© 2010
>>> s = h.unescape('© 2010')
>>> s
u'\xa9 2010'
>>> '袈'.encode("unicode-escape")
b'\\u8888'
>>> chr(int('8888', 16))
'袈'
>>> h.unescape('♥')
'♥'
>>> h.unescape('♥')
'♥'
>>> h.unescape('♥')
'♥'
>>> '♥'.encode("unicode-escape")
b'\\u2665'
>>> chr(int('2665', 16))
'♥'
>>> import html.entities as h
>>> h.name2codepoint['hearts']
9829
>>> a='汉字먀니'.encode('utf-8')
>>> b=re.findall(b'\xe4[\xb8-\xff][\x00-\xff]|[\xe5-\xe8][\x00-\xff][\x00-\xff]|\xe9[\x00-\xbe][\x00-\xff]', a)
>>> b
[b'\xe6\xb1\x89', b'\xe5\xad\x97']