搭建环境
搭建Lucene的开发环境只需要加入Lucene的Jar包,要加入的jar包至少要有:
lucene-core-3.0.1.jar(核心包)
contrib\analyzers\common\lucene-analyzers-3.0.1.jar(分词器)
contrib\highlighter\lucene-highlighter-3.0.1.jar(高亮)
contrib\memory\lucene-memory-3.0.1.jar(高亮)
Article.java
1 package cn.itcast._domain; 2 public class Article { 3 4 private Integer id; 5 private String title; 6 private String content; 7 8 public Integer getId() { 9 return id;10 }11 12 public void setId(Integer id) {13 this.id = id;14 }15 16 public String getTitle() {17 return title;18 }19 20 public void setTitle(String title) {21 this.title = title;22 }23 24 public String getContent() {25 return content;26 }27 28 public void setContent(String content) {29 this.content = content;30 }31 32 }
HelloWorld.java
1 package cn.itcast.helloworld; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.util.ArrayList; 6 import java.util.List; 7 8 import org.apache.lucene.analysis.Analyzer; 9 import org.apache.lucene.analysis.standard.StandardAnalyzer; 10 import org.apache.lucene.document.Document; 11 import org.apache.lucene.document.Field; 12 import org.apache.lucene.document.Field.Index; 13 import org.apache.lucene.document.Field.Store; 14 import org.apache.lucene.index.IndexWriter; 15 import org.apache.lucene.index.IndexWriter.MaxFieldLength; 16 import org.apache.lucene.queryParser.QueryParser; 17 import org.apache.lucene.search.IndexSearcher; 18 import org.apache.lucene.search.Query; 19 import org.apache.lucene.search.ScoreDoc; 20 import org.apache.lucene.search.TopDocs; 21 import org.apache.lucene.store.Directory; 22 import org.apache.lucene.store.FSDirectory; 23 import org.apache.lucene.util.Version; 24 import org.junit.Test; 25 26 import cn.itcast._domain.Article; 27 28 public class HelloWorld { 29 30 private static Directory directory; // 索引库目录 31 private static Analyzer analyzer; // 分词器 32 33 static { 34 try { 35 directory = FSDirectory.open(new File("./indexDir")); 36 analyzer = new StandardAnalyzer(Version.LUCENE_30); 37 } catch (IOException e) { 38 throw new RuntimeException(e); 39 } 40 } 41 42 // 建立索引 43 @Test 44 public void testCreateIndex() throws Exception { 45 // 准备数据 46 Article article = new Article(); 47 article.setId(2); 48 article.setTitle("准备Lucene的开发环境"); 49 article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。"); 50 51 // 放到索引库中 52 // 1, 把Article转为Document 53 Document doc = new Document(); 54 String idStr = article.getId().toString(); //这个使用的话效率降低 被遗弃了 55 String idStr = NumericUtils.intToPrefixCoded(article.getId()); // 一定要使用Lucene的工具类把数字转为字符串! 56 //目录区域 和 数据区 57 doc.add(new Field("id", idStr, Store.YES, Index.ANALYZED)); 58 doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED)); 59 doc.add(new Field("content", article.getContent(), Store.NO, Index.ANALYZED)); 60 61 // 2, 把Document放到索引库中 在目录中的长度 源码 Integer.Max_Value 62 IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.UNLIMITED); 63 indexWriter.addDocument(doc); 64 indexWriter.close(); 65 } 66 67 // 搜索 68 @Test 69 public void testSearch() throws Exception { 70 // 准备查询条件 71 String queryString = "lucene的"; 72 // String queryString = "hibernate"; 73 74 // 执行搜索 75 Listlist = new ArrayList (); 76 77 // ========================================================================================== 78 79 // 1,把查询字符串转为Query对象(默认只从title中查询) 80 QueryParser queryParser = new QueryParser(Version.LUCENE_30, "title", analyzer); 81 Query query = queryParser.parse(queryString); 82 83 // 2,执行查询,得到中间结果 84 IndexSearcher indexSearcher = new IndexSearcher(directory); // 指定所用的索引库 85 TopDocs topDocs = indexSearcher.search(query, 100); // 最多返回前n条结果 86 87 int count = topDocs.totalHits; 88 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 89 90 // 3,处理结果 91 for (int i = 0; i < scoreDocs.length; i++) { 92 ScoreDoc scoreDoc = scoreDocs[i]; 93 float score = scoreDoc.score; // 相关度得分 94 int docId = scoreDoc.doc; // Document的内部编号 95 96 // 根据编号拿到Document数据 97 Document doc = indexSearcher.doc(docId); 98 99 // 把Document转为Article100 String idStr = doc.get("id"); //101 String title = doc.get("title");102 String content = doc.get("content"); // 等价于 doc.getField("content").stringValue();103 104 Article article = new Article(); Integer id = NumericUtils.prefixCodedToInt(doc.get("id")); // 一定要使用Lucene的工具类把字符串转为数字!105 article.setId(id);106 article.setTitle(title);107 article.setContent(content);108 109 list.add(article);110 }111 indexSearcher.close();112 113 // ==========================================================================================114 115 // 显示结果116 System.out.println("总结果数:" + list.size());117 for (Article a : list) {118 System.out.println("------------------------------");119 System.out.println("id = " + a.getId());120 System.out.println("title = " + a.getTitle());121 System.out.println("content = " + a.getContent());122 }123 }124 }
1-_搜索互联网资源的程序结构.PNG
索引库的内部结构
建立索引的执行过程
搜索的执行过程
分词器要保持一致