• 欢迎访问开心洋葱网站,在线教程,推荐使用最新版火狐浏览器和Chrome浏览器访问本网站,欢迎加入开心洋葱 QQ群
  • 为方便开心洋葱网用户,开心洋葱官网已经开启复制功能!
  • 欢迎访问开心洋葱网站,手机也能访问哦~欢迎加入开心洋葱多维思维学习平台 QQ群
  • 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏开心洋葱吧~~~~~~~~~~~~~!
  • 由于近期流量激增,小站的ECS没能经的起亲们的访问,本站依然没有盈利,如果各位看如果觉着文字不错,还请看官给小站打个赏~~~~~~~~~~~~~!

xapian创建索引和搜索的简单范例程序

python 水墨上仙 1598次浏览

xapian创建索引和搜索的简单范例程序,代码使用python实现

创建索引代码


import sys
import os
import errno
from contextlib import closing

import xapian as _x

def main(files):
    # try to make a db in pwd
    try:
        os.mkdir('./xdb/')
    except (OSError, IOError), e:
        if e.errno != errno.EEXIST:
            raise

    with closing(_x.WritableDatabase('./xdb/sonnets.db',
                                     _x.DB_CREATE_OR_OPEN)) as x_db:
        # setup our indexer
        for f in files:
            with closing(open(f, 'r+')) as f:
                sonnet = f.read()
                num_lines = len(sonnet.split('\n'))
                author = 'William Shakespeare'

                # make a new document
                x_doc = _x.Document()

                # set sonnet text as data, and name as id
                x_id = 'Q%s' % f.name
                x_doc.set_data(sonnet)
                x_doc.add_term(x_id)

                # setup indexer
                indexer = _x.TermGenerator()
                indexer.set_stemmer(_x.Stem("english"))
                indexer.set_document(x_doc)

                # make author searchable in main text
                indexer.index_text(author)
                # do not keep going from author to text, seperate them
                indexer.increase_termpos()
                # index author into 'A' prefix, seperately
                indexer.index_text(author, 1, 'A')

                # index sonnet text
                indexer.index_text(sonnet)

                # add XLINES as number of lines
                x_doc.add_term('XLINES%s' % num_lines)

                # save
                x_db.replace_document(x_id, x_doc)

if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))

查询代码


import sys
from contextlib import closing
import xapian as _x

def _parseq(x_db, query, prefix=''):
    '''parse and return a QueryParser query'''
    qp = _x.QueryParser()
    stemmer = _x.Stem("english")
    qp.set_stemmer(stemmer)
    qp.set_database(x_db)
    qp.set_stemming_strategy(_x.QueryParser.STEM_SOME)
    return qp.parse_query(query, 0, prefix)

def _joinq(op, first, sec):
    if not first:
        return sec
    return _x.Query(op, first, sec)

def main(query, author_q, num_lines):
    x_query = None
    with closing(_x.Database('./xdb/sonnets.db')) as x_db:
        # setup the query
        if query:
            x_query = _x.Query(_parseq(x_db, query))
        if author_q:
            x_query = _joinq(_x.Query.OP_AND, x_query, _parseq(x_db, query, 'A'))
        if num_lines:
            x_query = _joinq(_x.Query.OP_AND, x_query,
                             _x.Query('XLINES%s' % num_lines.strip()))
        if not x_query:
            x_query = _x.Query()

        # setup the enquire object to perform the query
        enq = _x.Enquire(x_db)
        enq.set_query(x_query)
        for res in enq.get_mset(0, x_db.get_doccount(), None, None):
            print res.document.get_data()
            print

if __name__ == '__main__':
    while len(sys.argv) < 4:
        sys.argv.append(None)
    sys.exit(main(*sys.argv[-3:]))


开心洋葱 , 版权所有丨如未注明 , 均为原创丨未经授权请勿修改 , 转载请注明xapian创建索引和搜索的简单范例程序
喜欢 (0)
加载中……