热门关键字:  ubuntu  分区  函数  linux系统进程  Fedora

当前位置 :| 主页>Linux教程>编程开发>JAVA>

lucene入门

来源: 作者: 时间:2008-07-03 Tag: 点击:
先到http://www.apache.org/dist/lucene/java/下载luce-2.3.2.zip 找到lucene-core-2.3.2.jar,lucene-demos-2.3.2.jar加到Class_path中
就可以使用
下面是具体的使用:
---建立索引目录
package cn.jane.main;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
public class TxtFileIndexer {
 public static void main(String[] args) throws Exception{
  //indexDir is the directory that hosts Lucene's index files
        File   indexDir = new File("D:\\luceneIndex"); //***********
        //dataDir is the directory that hosts the text files that to be indexed
        File   dataDir  = new File("D:\\luceneData");  //**************
        Analyzer luceneAnalyzer = new StandardAnalyzer();
        File[] dataFiles  = dataDir.listFiles();
        IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
        long startTime = new Date().getTime();
        for(int i = 0; i < dataFiles.length; i++){
         if(dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")){
          System.out.println("Indexing file " + dataFiles[i].getCanonicalPath());
          Document document = new Document();
          Reader txtReader = new FileReader(dataFiles[i]);
//        document.add(Field.Text("path",dataFiles[i].getCanonicalPath()));  版本为1.9的时候用的方法
//           document.add(Field.Text("contents",txtReader));
                document.add(new Field("path",dataFiles[i].getCanonicalPath(),Field.Store.YES,Field.Index.TOKENIZED));
                document.add(new Field("contents",txtReader));
          
          indexWriter.addDocument(document);
         }
        }
        indexWriter.optimize();
        indexWriter.close();
        long endTime = new Date().getTime();
       
        System.out.println("It takes " + (endTime - startTime)
                           + " milliseconds to create index for the files in directory "
                     + dataDir.getPath());       
 }
}
 
 
---查找的方法
package cn.jane.main;
import java.io.File;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
public class TxtFileSearcher {
 public static void main(String[] args) throws Exception{
     String queryStr = "lucene";
     //This is the directory that hosts the Lucene index
        File indexDir = new File("D:\\luceneIndex");
        FSDirectory directory = FSDirectory.getDirectory(indexDir,false);
        IndexSearcher searcher = new IndexSearcher(directory);
        if(!indexDir.exists()){
         System.out.println("The Lucene index is not exist");
         return;
        }
        Term term = new Term("contents",queryStr.toLowerCase());
        TermQuery luceneQuery = new TermQuery(term);
        Hits hits = searcher.search(luceneQuery);
        for(int i = 0; i < hits.length(); i++){
         Document document = hits.doc(i);
         System.out.println("File: " + document.get("path"));
        }
        System.out.println("success!");
 }
}
-----------------基本已经完成

上一篇:没有了
下一篇:没有了
最新评论共有 0 位网友发表了评论
发表评论
评论内容:不能超过250字,需审核,请自觉遵守互联网相关政策法规。
用户名: 密码:
匿名?
注册
栏目列表