import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
System.out.println("请输入关键字: ");
Scanner scanner = new Scanner(System.in);
String queryStr = scanner.nextLine();
System.out.println("您输入的关键字是: " + queryStr);
try {
IndexSearcher searcher = new IndexSearcher(FSDirectory
.open(new File(strIndexPath)));
String strField = "content";
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,
strField, new SmartChineseAnalyzer(Version.LUCENE_CURRENT));
Query query = parser.parse(queryStr);
System.out.println(query.toString());
TopDocs hits = searcher.search(query, 10);
ScoreDoc[] scoreDoc = hits.scoreDocs;
// 高亮显示设置
Highlighter highlighter = null;
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"<read>", "</read>");
highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
// 这个100是指定关键字字符串的context的长度,你可以自己设定,因为不可能返回整篇正文内容
highlighter.setTextFragmenter(new SimpleFragmenter(100));
Document doc;
DBConfigure dbConfigure = new DBConfigure();
dbConfigure.OpenConn();
ResultSet rs = null;
String content = "";
for (int index = 0; index < scoreDoc.length; index++) {
doc = searcher.doc(scoreDoc[index].doc);
System.out.println(doc.getField("URL").stringValue());
System.out.println(doc.getField("title").stringValue());
// 根据URL的MD5值查询数据库中取出相应的正文
String URL_MD5 = UrlToFilePath.strMd5(doc.getField("URL")
.stringValue().trim(), 1);
String sql = "select content from page where url_md5='"
+ URL_MD5 + "'";
rs = dbConfigure.executeQuery(sql);
if (rs.next()) {
content = rs.getString("content");
}
// 高亮显示摘要
TokenStream tokenStream = new SmartChineseAnalyzer(Version.LUCENE_CURRENT).tokenStream("token",
new StringReader(content));
System.out.println(highlighter.getBestFragment(tokenStream, content));
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}