这段python代码对xapian的部分操作进行了简单的封装,希望对大家有用。
import xapian, config from mmseg.search import seg_txt_2_dict class Xapian(): """xapian search class """ def __init__(self): """init xapian search class :returns: class """ self.db = xapian.WritableDatabase(config.xapian_index_dir, xapian.DB_CREATE_OR_OPEN) self.enquire = xapian.Enquire(self.db) self.enquire.set_sort_by_value(1, True) def get_document(self, id): """获取doc :id: id :returns: Document """ return self.db.get_document(id) def delete_document(self,id): """删除索引 :id: 索引id """ try: return self.db.delete_document(id) except: return None def update_index(self, id, text=None, values=None, data=None): """更新索引 :id: 要替换的id :doc: 新的doc """ try: doc = self.get_document(id) except: return False if text: doc.clear_terms()#清除terms for word, value in seg_txt_2_dict(text).iteritems(): doc.add_term(word) if values: doc.clear_values() for key, value in values.iteritems(): doc.add_value(key, value) if data: doc.set_data(data) try: self.db.replace_document(id, doc) return True except: return False def index(self, id, text, values={}, data=''): """index to xapian :id: data id :text: search content is utf-8 :returns: boolean """ doc = xapian.Document() for word, value in seg_txt_2_dict(text).iteritems(): print word, value doc.add_term(word) #添加value用于排序,key似乎只能是数字 for key, value in values.iteritems(): doc.add_value(key, value) if data: doc.set_data(data) try: self.db.replace_document(id, doc) return True except: return False def search(self, keywords, offset=0, limit=10): """search xapian :keywords: 搜索的关键字 :offset: 起始位置 :limit: 结束位置 :returns: matches对象 """ query_list = [] for word, value in seg_txt_2_dict(keywords.encode('utf-8')).iteritems(): query = xapian.Query(word) query_list.append(query) if len(query_list) != 1: query = xapian.Query(xapian.Query.OP_AND, query_list) else: query = query_list[0] self.enquire.set_query(query) matches = self.enquire.get_mset(offset, limit, 10000) return matches def flush(self): """flush to disk :returns: flush结果 """ return self.db.flush() search = Xapian()