仅供拷贝
<!--pom.xml--> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>3.0.0</version> </dependency>
package player.kent.chen.temp.lucene; import java.io.File; import java.io.FileReader; import java.io.IOException; import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class MyLuceneIndexer { public static void main(String[] args) throws Exception { String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene"; File contentDir = new File(rootDir, "content"); File indexDir = new File(rootDir, "index"); FileUtils.deleteDirectory(indexDir); indexDir.mkdirs(); long begin = now(); doIndex(contentDir, indexDir); System.out.println("Done in miliseconds of : " + (now() - begin)); } private static void doIndex(File cd, File id) throws IOException { Directory indexDir = FSDirectory.open(id); IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); File[] files = cd.listFiles(); for (File file : files) { System.out.println("Indexing ... " + file.getAbsolutePath()); Document doc = new Document(); doc.add(new Field("contents", new FileReader(file))); doc.add(new Field("filepath", file.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); } writer.numDocs(); writer.close(); } private static long now() { return System.currentTimeMillis(); } }
package player.kent.chen.temp.lucene; import java.io.File; import java.text.MessageFormat; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class MyLuceneSearcher { public static void main(String[] args) throws Exception { String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene"; File id = new File(rootDir, "index"); String keyword = "搜索"; Directory indexDir = FSDirectory.open(id); IndexSearcher is = new IndexSearcher(indexDir); QueryParser qp = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer( Version.LUCENE_30)); Query query = qp.parse(keyword); long begin = now(); TopDocs hits = is.search(query, 10); System.out.println(MessageFormat.format("Found {0} matches in {1} milliseconds", hits.totalHits, now() - begin)); System.out.println("They are:"); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); String file = doc.get("filepath"); String grepCmd = MessageFormat.format("cat {0} | grep -5 {1}", file, keyword); System.out.println("Please do: " + grepCmd); } is.close(); } private static long now() { return System.currentTimeMillis(); } }