查辞典和例句

软件和网站开发以及相关技术探讨
回复
头像
roylez
帖子: 1928
注册时间: 2005-10-04 10:59
来自: 上海

查辞典和例句

#1

帖子 roylez » 2007-11-22 10:18

忘了谁的源程序了,不能查例句,偏偏我例句查得很频繁。昨天晚上终于耐着性子给改写了。

代码: 全选

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import sys, urllib2, optparse
import cElementTree as ElementTree

def parseDict(xml):
    tree = ElementTree.fromstring(xml)
    word = tree.find('original-query').text
    customtrans = tree.findall('custom-translation')
    translist = []
    for node in customtrans:
        temp =[] 
        for item in deepFindAll(node,'translation/content'):
            temp.append(item.text)
        translist.append([node.find('source/name').text,temp])
    return word, translist

def parseSentence(xml):
    tree = ElementTree.fromstring(xml)
    senlist = []
    for node in deepFindAll(tree,'example-sentences/sentence-pair'):
        senlist.append([node.find('sentence').text, 
            node.find('sentence-translation').text])
    return senlist

def deepFindAll(element, tag):
    if type(tag) == type(''):       tag = tag.split('/')
    if tag == []:        return  [element]
    if len(tag) == 1:
        elist = []
        findres = element.findall(tag[0])
        if findres:     elist.extend(findres)
        for node in element:
            elist.extend(deepFindAll(node, tag[0]))
        return elist
    else:
        sublist = deepFindAll(element, tag[0])
        return deepFindAll(element, tag[1:])

if __name__=='__main__': 
    parser = optparse.OptionParser()
    parser.add_option('-w', dest='word',action='store_true',
            default=False, help='print the translation of the word.')
    parser.add_option('-s', dest='sent',action='store_true',
            default=False, help='print sample sentences.')
    options, args = parser.parse_args(sys.argv[1:])
    xml1= urllib2.urlopen("http://dict.yodao.com/search?keyfrom=dict.python&q=" + '+'.join(args) + "&xmlDetail=true&doctype=xml").read()
    xml2= urllib2.urlopen("http://dict.yodao.com/search?keyfrom=dict.python&q=lj:" + '+'.join(args) + "&xmlDetail=true&doctype=xml").read()
    word, translist = parseDict(xml1)
    senlist = parseSentence(xml2)
    BOLD='\033[1m'
    DEFAULT='\033[m'
    UNDERLINE='\033[4m'
    MAGENTA='\033[35m'
    YELLOW='\033[33m'
    GREEN='\033[32m'
    RED='\033[31m'
    WHITE='\033[37m'
    BGWHITE='\033[47m'
    if options.word:
        print RED+BOLD+word+DEFAULT
        for item in translist:
            print MAGENTA+BGWHITE+item[0]+DEFAULT +': '\
                +GREEN+BOLD+ '; '.join(item[1]) + DEFAULT
    if options.sent:
        for item in senlist:
            print item[0].replace('<b>', YELLOW).replace('</b>', DEFAULT)
            print WHITE+UNDERLINE+item[1]+DEFAULT
    if not options.word and not options.sent:
        print RED+BOLD+word+DEFAULT
        for item in translist:
            print MAGENTA+BGWHITE+item[0]+DEFAULT +': '\
                +GREEN+BOLD+ '; '.join(item[1]) + DEFAULT
        for item in senlist[:7]:
            print item[0].replace('<b>', YELLOW).replace('</b>', DEFAULT)
            print WHITE+UNDERLINE+item[1]+DEFAULT

附件
screenshot.png
上次由 roylez 在 2007-11-22 12:57,总共编辑 1 次。
头像
bones7456
帖子: 8495
注册时间: 2006-04-12 20:05
来自: 杭州
联系:

#2

帖子 bones7456 » 2007-11-22 12:17

代码: 全选

lily@LLY:~$ ai python-celementtree
正在读取软件包列表... 完成
正在分析软件包的依赖关系树       
Reading state information... 完成      
python-celementtree 已经是最新的版本了。
共升级了 0 个软件包,新安装了 0 个软件包,要卸载 0 个软件包,有 2 个软件未被升级。
lily@LLY:~$ dict ubuntu
Traceback (most recent call last):
  File "/home/lily/bin/dict", line 4, in <module>
    import cElementTree as ElementTree
ImportError: No module named cElementTree
lily@LLY:~$
为何啊?
关注我的blog: ε==3
头像
roylez
帖子: 1928
注册时间: 2005-10-04 10:59
来自: 上海

#3

帖子 roylez » 2007-11-22 12:44

我加了celementtree依赖,debian etch底下写的。也许ubuntu里面import的时候语句有所不同。
试试把
import cElementTree as ElementTree
改成
from elementtree import ElementTree

有点错,刚刚改了。
头像
fortruth
帖子: 1795
注册时间: 2005-11-06 1:51
来自: 七彩云世界
联系:

#4

帖子 fortruth » 2007-11-22 13:08

按ls的。仍有错。

代码: 全选

gonroad@ubuntu:~/linux/bin$ sudo apt-get install python-celementtree
正在读取软件包列表... 完成
正在分析软件包的依赖关系树
Reading state information... 完成
python-celementtree 已经是最新的版本了。
共升级了 0 个软件包,新安装了 0 个软件包,要卸载 0 个软件包,有 1 个软件未被升级。
gonroad@ubuntu:~/linux/bin$ dict unix
Traceback (most recent call last):
  File "/home/gonroad/linux/bin/dict", line 5, in <module>
    import cElementTree as ElementTree
ImportError: No module named cElementTree
gonroad@ubuntu:~/linux/bin$  
佛出寺,求索真世界 For_Truth:Free_Open_Share
OPEN GPG KEY:03D18D95
anbutu
帖子: 6
注册时间: 2007-10-31 7:51

#5

帖子 anbutu » 2008-01-03 17:05

代码: 全选

try:
    from xml.etree import ElementTree # for Python 2.5 users
except ImportError:
    form elementtree import ElementTree
anbutu
帖子: 6
注册时间: 2007-10-31 7:51

#6

帖子 anbutu » 2008-01-03 17:08

再附上一个我以前写的吧 :-)
www.dict.cn 取的数据。

代码: 全选

#!/usr/bin/env python
# -*- coding:utf-8 -*-

import httplib
from xml.dom import minidom

class PyDict:
    def __init__(self):
        self.server = 'www.dict.cn'
        self.port   = 80
        self.url    = '/ws.php'
        self.method = 'POST'
        self.header = {'Content-Type':'application/x-www-form-urlencoded', 'User-Agent':'PyDict'}

    def query(self, word):
        word = str(word) # be sure a string
        if len(word.strip()) == 0:
            print 'Invalid input'
            return {}

        conn = httplib.HTTPConnection(self.server, self.port)
        # we make UTF-8 as default
        # TODO may raise an exception
        conn.request(self.method, self.url, 'utf8=true&q=' + word, self.header)
        res = conn.getresponse()
        # function parse just need a file like object
        xmldoc = minidom.parse(res)
        conn.close()

        # only one <def> element
        _def = xmldoc.getElementsByTagName('def')[0].firstChild.data
        # no such word
        if _def == 'Not Found':
            print _def
            return

        _pron = xmldoc.getElementsByTagName('pron')
        if _pron:
            _pron = _pron[0].firstChild.data
            _pron = _pron.split(', ')
            _pron = ', '.join(['[' + p +']' for p in _pron])
        else:
            _pron = ''

        # process sentences
        _sentences = xmldoc.getElementsByTagName('sent')
        if _sentences:
            # a empty list to store sentences
            _sent = [];

            for _sentence in _sentences:
                _orig = _sentence.firstChild.firstChild.data
                _tran = _sentence.lastChild.firstChild.data
                _sent.append((_orig, _tran))
            
            _sent = '\n'.join([sent[0] + '\n' + sent[1] for sent in _sent])
        else:
            _sent = ''

        _word = {}
        _word['pron'] = _pron
        _word['def'] = _def
        _word['sent'] = _sent

        return _word

if __name__ == '__main__':
    dict = PyDict()
    word = raw_input('->')
    while word != '#':
#       print '\n'.join(["%s:\n%s" % v for v in dict.query(word).items()])
        if len(word.strip()) != 0:
            result = dict.query(word)
            print result['pron']
            print result['def']
            print '例句:'
            print result['sent']

        word = raw_input('->')
头像
heihaier
帖子: 168
注册时间: 2008-09-29 22:16
联系:

Re: 查辞典和例句

#7

帖子 heihaier » 2009-07-02 15:45

Keep it simple, stupid!
https://heiher.info
回复