无干货,仅供复制
位置信息类
package player.kent.chen.temp.lucene.span; import org.apache.commons.lang.builder.ToStringBuilder; public class KeywordLocation { private String file; /** * position in the token stream */ private int position; private KeywordLocation() { } public static final KeywordLocation createInstance(String file, int position) { KeywordLocation instance = new KeywordLocation(); instance.file = file; instance.position = position; return instance; } public String getFile() { return file; } public void setFile(String file) { this.file = file; } public int getPosition() { return position; } public void setPosition(int position) { this.position = position; } @Override public String toString() { return ToStringBuilder.reflectionToString(this, ToStringStyle.SHORT_PREFIX_STYLE); } }
搜索器
package player.kent.chen.temp.lucene.span; import java.io.File; public class FindFirstOccurenceSearcher { public static void main(String[] args) throws Exception { String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene-sanguo"; File id = new File(rootDir, "index"); Directory indexDir = FSDirectory.open(id); IndexSearcher searcher = new IndexSearcher(indexDir); String keyword = "Brotherhood"; KeywordLocation kl = findFirstOccurence(searcher, keyword); System.out.println(MessageFormat.format("\"{0}\":{1}", keyword, kl)); searcher.close(); } private static KeywordLocation findFirstOccurence(IndexSearcher searcher, String keyword) throws IOException, CorruptIndexException { SpanTermQuery spanTermQuery = new SpanTermQuery(new Term("contents", keyword.toLowerCase())); IndexReader indexReader = searcher.getIndexReader(); Spans spans = spanTermQuery.getSpans(indexReader); TopDocs hits = searcher.search(spanTermQuery, 1); if (hits.totalHits == 0) { return null; } spans.next(); Document doc = indexReader.document(spans.doc()); String file = doc.get("filepath"); int position = spans.start(); return KeywordLocation.createInstance(file, position); } }
另附索引器
package player.kent.chen.temp.lucene.span; import java.io.File; public class LearnSpanLuceneIndexer { public static void main(String[] args) throws Exception { String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene-sanguo"; File contentDir = new File(rootDir, "content"); File indexDir = new File(rootDir, "index"); FileUtils.deleteDirectory(indexDir); indexDir.mkdirs(); long begin = now(); doIndex(contentDir, indexDir); System.out.println("Done in miliseconds of : " + (now() - begin)); } private static void doIndex(File cd, File id) throws IOException { Directory indexDir = FSDirectory.open(id); IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); File[] files = cd.listFiles(); for (File file : files) { System.out.println("Indexing ... " + file.getAbsolutePath()); Document doc = new Document(); doc.add(new Field("contents", new FileReader(file))); doc.add(new Field("filepath", file.getAbsolutePath(), Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.numDocs(); writer.close(); } private static long now() { return System.currentTimeMillis(); } }