Github: https://github.com/gaojiuli/htmlparsing
import requests
from htmlparsing import Element, HTMLParsing, Text, Attr, Parse, HTML, Markdown
url = 'https://news.ycombinator.com/'
r = requests.get(url)
article_list = HTMLParsing(r.text).list('.athing', {'title': Text('a.storylink'), # css selector
'link': Attr('a.storylink', 'href')})
print(article_list)
import requests
from htmlparsing import Element, HTMLParsing, Text, Attr, Parse
url = 'https://news.ycombinator.com/item?id=16476454'
r = requests.get(url)
article_detail = HTMLParsing(r.text).detail({'title': Text('a.storylink'),
'points': Parse('span.score', '>{} points'),
'link': Attr('a.storylink', 'href')})
print(article_detail)