lucene indexer/searcher简单代码示例

仅供拷贝

<!--pom.xml-->

		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-core</artifactId>
			<version>3.0.0</version>
		</dependency>
 

package player.kent.chen.temp.lucene;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class MyLuceneIndexer {

    public static void main(String[] args) throws Exception {
        String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene";

        File contentDir = new File(rootDir, "content");
        File indexDir = new File(rootDir, "index");

        FileUtils.deleteDirectory(indexDir);
        indexDir.mkdirs();

        long begin = now();
        doIndex(contentDir, indexDir);
        System.out.println("Done in miliseconds of : " + (now() - begin));

    }

    private static void doIndex(File cd, File id) throws IOException {
        Directory indexDir = FSDirectory.open(id);
        IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(Version.LUCENE_30),
                true, IndexWriter.MaxFieldLength.UNLIMITED);

        File[] files = cd.listFiles();
        for (File file : files) {
            System.out.println("Indexing ... " + file.getAbsolutePath());
            Document doc = new Document();
            doc.add(new Field("contents", new FileReader(file)));
            doc.add(new Field("filepath", file.getAbsolutePath(), Field.Store.YES,
                    Field.Index.NOT_ANALYZED));
            writer.addDocument(doc);
        }

        writer.numDocs();
        writer.close();

    }

    private static long now() {
        return System.currentTimeMillis();
    }

}


package player.kent.chen.temp.lucene;

import java.io.File;
import java.text.MessageFormat;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class MyLuceneSearcher {

    public static void main(String[] args) throws Exception {
        String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene";
        File id = new File(rootDir, "index");

        String keyword = "搜索";

        Directory indexDir = FSDirectory.open(id);
        IndexSearcher is = new IndexSearcher(indexDir);

        QueryParser qp = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(
                Version.LUCENE_30));
        Query query = qp.parse(keyword);

        long begin = now();
        TopDocs hits = is.search(query, 10);
        System.out.println(MessageFormat.format("Found {0} matches in {1} milliseconds",
                hits.totalHits, now() - begin));

        System.out.println("They are:");

        for (ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = is.doc(scoreDoc.doc);
            String file = doc.get("filepath");
            String grepCmd = MessageFormat.format("cat {0} | grep -5 {1}", file, keyword);
            System.out.println("Please do: " + grepCmd);
        }

        is.close();

    }

    private static long now() {
        return System.currentTimeMillis();
    }

}


Leave a Comment

Your email address will not be published.

This site uses Akismet to reduce spam. Learn how your comment data is processed.