赞!确实需要一个简洁界面的 English Dictionary。为了学英语,我的观点是尽量用英文的解释来解释英文,所以我比较喜欢 Collins 词典。
http://www.collinsdictionary.com/ 它有两个词典,一个English,一个English for learner (这里边的解释很易懂)。我现在在用python写一个抓取程序。我把大概的程序贴下,楼主觉得有用可以加到在线翻译的选项里。希望以后能在github上合作~
# -*- coding: utf-8 -*-
"""
Lookup word in Collins website
version 0.1
"""
import urllib, urllib2
import re
import codecs
from BeautifulSoup import BeautifulSoup
def selectDic():
# which dictionary is used, 4 options
datasets = ['american', 'american-cobuild-learners',
'english', 'english-cobuild-learners']
# For simplicity, 'american-cobuild-learners' for default
# dic = 'american-cobuild-learners'
dic = 'american'
return dic
def lookupWord():
# Get the lookup word
# Again, for illustration, we fixed the look-up word to 'geek'
word = 'world'
return word
def getQuery(dic, word):
query = {}
query['dataset'] = dic
query['q'] = word
return query
def parsePage(content, word):
soup = BeautifulSoup(content)
#Find the explanation part, id name is corresponding to the lookup word
idname = word + "_1"
rel = soup.findAll('div', id=idname)
# unicode is for phonetic symbols
rel = unicode.join(u'\n',map(unicode,rel))
# There are icons for pronunciation request, for now, I deleted~~
# Hope this can be added
regx = r'(<img.*/>)';
m = re.search(regx, rel)
if m:
rel = rel.replace(m.group(0), '')
# For now, I save the result to a html to testify correctness
# If it is a chrome extention, it can be shown immediately~~
head = '<html><head><title>Page title</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head><body>'
tail = '</body></html>'
page = head + ''.join(rel) + tail
return page
def savePage(rel):
f = codecs.open('dic.html','w','utf-8')
f.write(rel)
f.close()
def queryPost():
try:
baseurl = "
http://www.collinsdictionary.com/search";
timeout = 10;
dic = selectDic()
word = lookupWord()
query = getQuery(dic, word)
data = urllib.urlencode(query);
page = urllib2.urlopen(baseurl, data, timeout)
content = page.read();
rel = parsePage(content, word)
savePage(rel)
except:
print "Error"
if __name__ == "__main__":
queryPost()
print "Done"