Lucene开发实例代码演示,适用于一般企业类搜索服务
转自:http://blog.csdn.net/sd0902/article/details/8466608
package com.lucene.util; import java.io.File; import java.io.IOException; import java.util.ArrayList; import org.apache.log4j.Logger; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; import org.apache.lucene.document.Field; import org.springframework.context.ApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; import org.wltea.analyzer.lucene.IKQueryParser; import org.wltea.analyzer.lucene.IKSimilarity; import com.lucene.LuceneConfig; import com.lucene.data.LuceneData; import com.model.Model; import com.model.Novel; import com.service.NovelService; /** * lucene工具类 * * @author Administrator * */ public class LuceneUtil { /** * 日志 */ static Logger logger = Logger.getLogger(LuceneUtil.class); public static Integer totalNum=0; /** * 创建索引 * @param data 要放入索引的一条记录 * @return */ public static synchronized boolean createIndex(LuceneData data) { IndexWriter indexWriter = null; Directory d = null; try { d = FSDirectory.open(new File(LuceneConfig.INDEX_PATH)); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, AnalyzerUtil.getIkAnalyzer()); // 3.6以后不推荐用optimize,使用LogMergePolicy优化策略 conf.setMergePolicy(optimizeIndex()); // 创建索引模式:CREATE,覆盖模式; APPEND,追加模式 File file = new File(LuceneConfig.INDEX_PATH); File[] f = file .listFiles(); if(f.length==0) conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); else conf.setOpenMode(IndexWriterConfig.OpenMode.APPEND); indexWriter = new IndexWriter(d, conf); //因为id是唯一的,如果之前存在就先删除原来的,在创建新的 Term term = new Term("id", data.getId()); indexWriter.deleteDocuments(term); Document doc = getDocument(data); indexWriter.addDocument(doc); logger.debug("索引结束,共有索引{}个" + indexWriter.numDocs()); //System.out.println("索引结束,共有索引{}个" + indexWriter.numDocs()+":"+doc.get("id")+":"+doc.get("author")); // 自动优化合并索引文件,3.6以后不推荐用optimize,使用LogMergePolicy优化策略 // indexWriter.optimize(); indexWriter.commit(); return true; } catch (CorruptIndexException e) { e.printStackTrace(); logger.error("索引添加异常", e); } catch (LockObtainFailedException e) { e.printStackTrace(); logger.error("索引添加异常", e); } catch (IOException e) { e.printStackTrace(); logger.error("索引不存在", e); } catch (Exception e) { e.printStackTrace(); logger.error("索引添加异常", e); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (CorruptIndexException e) { e.printStackTrace(); logger.error("索引关闭异常", e); } catch (IOException e) { e.printStackTrace(); logger.error("索引关闭异常", e); } finally { try { if (d != null && IndexWriter.isLocked(d)) { IndexWriter.unlock(d); } } catch (IOException e) { e.printStackTrace(); logger.error("解锁异常", e); } } } } return false; } /** * 更新索引 * * @param data * @return */ public static boolean updateIndex(LuceneData data) { IndexWriter indexWriter = null; Directory d = null; try { d = FSDirectory.open(new File(LuceneConfig.INDEX_PATH)); while (d != null && IndexWriter.isLocked(d)) {// 如果文件锁住,等待解锁 Thread.sleep(1000); logger.error("索引已经锁住,正在等待...."); } IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_31, AnalyzerUtil.getIkAnalyzer()); // 3.6以后不推荐用optimize,使用LogMergePolicy优化策略 conf.setMergePolicy(optimizeIndex()); indexWriter = new IndexWriter(d, conf); Term term = new Term("id", data.getId()); // 不管更新与否,先删除原来的 indexWriter.deleteDocuments(term); Document doc = getDocument(data); indexWriter.addDocument(doc); // indexWriter.optimize(); indexWriter.commit(); logger.debug("更新索引,文章ID为{}" + data.getId()); logger.debug("共有索引{}个" + indexWriter.numDocs()); return true; } catch (CorruptIndexException e) { e.printStackTrace(); logger.error("索引添加异常", e); } catch (LockObtainFailedException e) { e.printStackTrace(); logger.error("索引添加异常", e); } catch (IOException e) { e.printStackTrace(); logger.error("索引不存在", e); } catch (Exception e) { e.printStackTrace(); logger.error("索引添加异常", e); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (CorruptIndexException e) { e.printStackTrace(); logger.error("索引关闭异常", e); } catch (IOException e) { e.printStackTrace(); logger.error("索引关闭异常", e); } finally { try { if (d != null && IndexWriter.isLocked(d)) { IndexWriter.unlock(d); } } catch (IOException e) { e.printStackTrace(); logger.error("解锁异常", e); } } } } return false; } /** * 根据id删除索引(id对应的那条document) * * @param id * document的id * @return */ public static boolean deleteIndex(String id) { IndexWriter indexWriter = null; Directory d = null; try { d = FSDirectory.open(new File(LuceneConfig.INDEX_PATH)); while (d != null && IndexWriter.isLocked(d)) {// 如果文件锁住,等待解锁 Thread.sleep(1000); logger.error("索引已经锁住,正在等待...."); } IndexWriterConfig indexWriterConfig = new IndexWriterConfig( Version.LUCENE_36, AnalyzerUtil.getIkAnalyzer()); indexWriter = new IndexWriter(d, indexWriterConfig); Term term = new Term("id", id); indexWriter.deleteDocuments(term); indexWriter.optimize(); indexWriter.commit(); logger.debug("删除文章ID:{}的索引..." + id); logger.debug("共有索引{}个" + indexWriter.numDocs()); indexWriter.close(); return true; } catch (CorruptIndexException e) { e.printStackTrace(); logger.error("索引删除异常", e); } catch (LockObtainFailedException e) { e.printStackTrace(); logger.error("索引删除异常", e); } catch (IOException e) { e.printStackTrace(); logger.error("索引不存在", e); } catch (Exception e) { e.printStackTrace(); logger.error("索引删除异常", e); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (CorruptIndexException e) { e.printStackTrace(); logger.error("索引关闭异常", e); } catch (IOException e) { e.printStackTrace(); logger.error("索引关闭异常", e); } finally { try { if (d != null && IndexWriter.isLocked(d)) { IndexWriter.unlock(d); } } catch (IOException e) { e.printStackTrace(); logger.error("解锁异常", e); } } } } return false; } /** * @param fileds 要查询的综合字段 ex【 new String[]{ "contentTitle", "contentContext","keywords"};】 * @param occurs 要查询的字段出现可能 ex【new Occur[] { Occur.SHOULD, Occur.SHOULD,Occur.SHOULD };】 * @param keyWord 要查询的关键字 * @param page 当前页 * @param pageSize 分页数 * @return */ public static ArrayList<LuceneData> search(String[] fileds, Occur[] occurs,String keyWord,Integer page,Integer pageSize) { return search(fileds, occurs, keyWord,"","", page, pageSize); } /** * @param fileds 要查询的综合字段 ex【 new String[]{ "contentTitle", "contentContext","keywords"};】 * @param occurs 要查询的字段出现可能 ex【new Occur[] { Occur.SHOULD, Occur.SHOULD,Occur.SHOULD };】 * @param keyWord 要查询的关键字 * @param subType 主类型 * @param type 主类型下的子类型 * @param page 当前页 * @param pageSize 分页数 * @return */ public static ArrayList<LuceneData> search(String[] fileds, Occur[] occurs,String keyWord,String bigtype,String subType,Integer page,Integer pageSize) { try { // ---------初始化--------------------------------------------------- IndexReader reader = IndexReader.open(FSDirectory.open(new File(LuceneConfig.INDEX_PATH))); IndexSearcher searcher = new IndexSearcher(reader); // 在索引器中使用IKSimilarity相似度评估器 searcher.setSimilarity(new IKSimilarity()); // ----------设置过滤器------------------------------------------------ BooleanQuery booleanquery = new BooleanQuery(); // 综合查询 (查询条件1) Query likequery = IKQueryParser.parseMultiField(fileds, keyWord,occurs); booleanquery.add(likequery, Occur.MUST); //主类型过滤 (查询条件2) if(bigtype.length()>0) { Query subquery = IKQueryParser.parse("bigtype", bigtype); booleanquery.add(subquery, Occur.MUST); } //从类型过滤 (查询条件3) if(subType.length()>0) { Query subquery = IKQueryParser.parse("type", subType); booleanquery.add(subquery, Occur.MUST); } //过滤数字区间 //NumericRangeQuery<Integer> spanquery = NumericRangeQuery.newIntRange("id", begin, end, true, true); //booleanquery.add(spanquery, Occur.MUST); //过滤时间区间(时间的getTime比大小) //NumericRangeQuery<Integer> spanquery = NumericRangeQuery.newLongRange("id", begin, end, true, true); //booleanquery.add(spanquery, Occur.MUST); //-------------过滤filter-------------------------------------------------- //-------------设置权值(其中一个方法在doc创建Field时field.setBoost)-------------------- //-------------排序-------------------------------------------------------- /*多字段排序,设置在前面的会优先排序 //true:降序 false:升序 * SortField[] sortFields = new SortField[3]; * SortField top = new SortField("isTop", SortField.INT, true); * SortField hits = new SortField("contentHits", SortField.INT,true); * SortField pubtime = new SortField("publishTime",SortField.LONG, true); * sortFields[0] = top; * sortFields[1] = hits; * sortFields[2] = pubtime; * Sort sort = new Sort(sortFields); */ //-------------搜索-------------------------------------------------------- //分页查询,lucene不支持分页查询,因为查询速度很快,所以我们就设置查询上限 TopScoreDocCollector topCollector = TopScoreDocCollector.create(page*pageSize, false);//上限 searcher.search(booleanquery, topCollector); //查询结果的总数量 totalNum=topCollector.getTotalHits(); ScoreDoc[] docs = topCollector.topDocs((page - 1) * pageSize, pageSize).scoreDocs;//返回所需数据 //高亮显示 SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(booleanquery)); highlighter.setTextFragmenter(new SimpleFragmenter(100)); ArrayList<LuceneData> list = new ArrayList<LuceneData>(); LuceneData data=null; for (ScoreDoc scdoc : docs) { Document document = searcher.doc(scdoc.doc); data=new LuceneData(); //设置高壳 TokenStream tokenStream=null; String name = document.get("name"); tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scdoc.doc, "name", AnalyzerUtil.getIkAnalyzer()); name = highlighter.getBestFragment(tokenStream, name); if(name==null) name=document.get("name"); String author = document.get("author"); tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scdoc.doc, "author", AnalyzerUtil.getIkAnalyzer()); author = highlighter.getBestFragment(tokenStream, author); if(author==null) author=document.get("author"); String outline = document.get("outline"); tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scdoc.doc, "outline", AnalyzerUtil.getIkAnalyzer()); outline = highlighter.getBestFragment(tokenStream, outline); if(outline==null) outline=document.get("outline"); String type = document.get("type"); tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scdoc.doc, "type", AnalyzerUtil.getIkAnalyzer()); type = highlighter.getBestFragment(tokenStream, type); if(type==null) type=document.get("type"); data.setId(document.get("id")); data.setName(name); data.setAuthor(author); data.setOutline(outline); data.setType(type); data.setTypeid(document.get("typeid")) ; data.setBigtype(document.get("bigtype")); data.setUpdateTime(document.get("updateTime")); data.setImgPath(document.get("imgPath")); data.setImgUrlPath(document.get("imgUrlPath")); data.setContent(document.get("content")); data.setLink_url(document.get("link_url")); data.setHot(Long.parseLong(document.get("hot"))); data.setClickPoint(Long.parseLong(document.get("clickPoint"))); list.add(data); } return list; } catch (Exception e) { e.printStackTrace(); logger.error("搜索异常", e); return new ArrayList<LuceneData>(); } } /** * 把传入的数据类型转换成Document * * @param data * @return */ private static Document getDocument(LuceneData data) { Document doc = new Document(); doc.add(new Field("id", data.getId(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("name", data.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field("author", data.getAuthor(), Store.YES,Index.ANALYZED)); doc.add(new Field("outline", data.getOutline(), Store.YES,Index.ANALYZED)); doc.add(new Field("type", data.getType(), Store.YES, Index.ANALYZED)); doc.add(new Field("updateTime", data.getUpdateTime(), Store.YES,Index.NOT_ANALYZED)); doc.add(new Field("imgPath", data.getImgPath(), Store.YES,Index.NOT_ANALYZED)); doc.add(new Field("imgUrlPath", data.getImgUrlPath()==null?"":data.getImgUrlPath(), Store.YES,Index.NOT_ANALYZED)); doc.add(new Field("content", data.getContent()==null?"":data.getContent(), Store.YES,Index.ANALYZED)); doc.add(new Field("link_url", data.getLink_url(), Store.YES,Index.NOT_ANALYZED)); doc.add(new Field("hot", Long.toString(data.getHot()), Store.YES,Index.NOT_ANALYZED)); doc.add(new Field("clickPoint", Long.toString(data.getClickPoint()),Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("bigtype", data.getBigtype(), Store.YES,Index.NOT_ANALYZED)); doc.add(new Field("typeid", data.getTypeid(), Store.YES,Index.NOT_ANALYZED)); return doc; } /** * 优化索引,返回优化策略 * * @return */ private static LogMergePolicy optimizeIndex() { LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); // 设置segment添加文档(Document)时的合并频率 // 值较小,建立索引的速度就较慢 // 值较大,建立索引的速度就较快,>10适合批量建立索引 // 达到50个文件时就和合并 mergePolicy.setMergeFactor(50); // 设置segment最大合并文档(Document)数 // 值较小有利于追加索引的速度 // 值较大,适合批量建立索引和更快的搜索 mergePolicy.setMaxMergeDocs(5000); // 启用复合式索引文件格式,合并多个segment mergePolicy.setUseCompoundFile(true); return mergePolicy; } /** * 转换类型成lucene的data类型 * @param list * @return */ public static ArrayList<LuceneData> transformation_Novel(ArrayList<Novel> list){ ArrayList<LuceneData> transforlist=new ArrayList<LuceneData>(); LuceneData data=new LuceneData(); for(Model model : list) { if(model instanceof Novel) { data=new LuceneData(); Novel novel=(Novel)model; data.setId(novel.getId()+""); data.setName(novel.getName()); data.setAuthor(novel.getAuthor()); data.setOutline(novel.getOutline()); data.setType(novel.getNovelType().getName()); data.setTypeid(novel.getNovelType().getId()+""); data.setBigtype("小说"); data.setUpdateTime(novel.getUpdateTime()+""); data.setImgPath(novel.getImgPath()); data.setImgUrlPath(novel.getImgUrlPath()); data.setContent(novel.getContent()); data.setLink_url(novel.getLink_url()); data.setHot(novel.getHot()); data.setClickPoint(novel.getClickPoint()); transforlist.add(data); } } return transforlist; } /** * 测试 * @param args */ public static void main(String[] args) { //---------------------创建 // ApplicationContext springContext = new ClassPathXmlApplicationContext(new String[]{"classpath:com/springResource/*.xml"}); // NovelService novelService = (NovelService)springContext.getBean("novelService"); // System.out.println("novelService"+novelService); // // ArrayList<Novel> list=novelService.getNovelList(21, 100); // ArrayList<LuceneData> transforlist=LuceneService.transformation(list); // for(LuceneData data : transforlist) // { // System.out.println("in"+LuceneService.createIndex(data)); // } //---------------------搜索 String[] fileds=new String[]{ "name", "author","outline","type"}; Occur[] occurs=new Occur[] { Occur.SHOULD, Occur.SHOULD,Occur.SHOULD ,Occur.SHOULD }; ArrayList<LuceneData> list=LuceneUtil.search(fileds, occurs, "初雪", 1, 10); for(LuceneData data:list) { System.out.println(data); System.out.println(data.getId()+":"+data.getAuthor()); } System.out.println(list.size()); } }
package com.lucene.util; import org.apache.lucene.analysis.Analyzer; import org.wltea.analyzer.lucene.IKAnalyzer; /** * 分词器工具,设定分词器 * @author Administrator * */ public class AnalyzerUtil { private static Analyzer analyzer; public static Analyzer getIkAnalyzer() { if (analyzer == null) { // 当为true时,分词器迚行最大词长切分 ;当为false时,分词器迚行最细粒度切 analyzer = new IKAnalyzer(true); } return analyzer; } }
package com.lucene.data; /** * 数据类 * @author Administrator * */ public class LuceneData { private String id; private String name; private String author; private String imgPath; private String outline; //描述 private String type; //类型 private String typeid;//类型 id private String bigtype; // 总类型 private String updateTime; private String imgUrlPath; private String content; private String link_url; private Long hot=0l; private Long clickPoint=0l; public String getId() { return id; } public void setId(String id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; } public String getImgPath() { return imgPath; } public void setImgPath(String imgPath) { this.imgPath = imgPath; } public String getOutline() { return outline; } public void setOutline(String outline) { this.outline = outline; } public String getType() { return type; } public void setType(String type) { this.type = type; } public String getUpdateTime() { return updateTime; } public void setUpdateTime(String updateTime) { this.updateTime = updateTime; } public String getImgUrlPath() { return imgUrlPath; } public void setImgUrlPath(String imgUrlPath) { this.imgUrlPath = imgUrlPath; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public String getLink_url() { return link_url; } public void setLink_url(String linkUrl) { link_url = linkUrl; } public Long getHot() { return hot; } public void setHot(Long hot) { this.hot = hot; } public Long getClickPoint() { return clickPoint; } public void setClickPoint(Long clickPoint) { this.clickPoint = clickPoint; } public String getBigtype() { return bigtype; } public void setBigtype(String bigtype) { this.bigtype = bigtype; } @Override public String toString() { return "LuceneData [author=" + author + ", bigtype=" + bigtype + ", clickPoint=" + clickPoint + ", content=" + content + ", hot=" + hot + ", id=" + id + ", imgPath=" + imgPath + ", imgUrlPath=" + imgUrlPath + ", link_url=" + link_url + ", name=" + name + ", outline=" + outline + ", type=" + type + ", updateTime=" + updateTime + "]"; } public String getTypeid() { return typeid; } public void setTypeid(String typeid) { this.typeid = typeid; } }