在项目中使用Lucene进行全文检索
博客专区 > SPPan 的博客 > 博客详情
在项目中使用Lucene进行全文检索
SPPan 发表于7个月前
在项目中使用Lucene进行全文检索
  • 发表于 7个月前
  • 阅读 18
  • 收藏 1
  • 点赞 0
  • 评论 0

本文主要讲解在maven项目中使用Lucene进行全文检索的基本步骤。

一、首先需要引入依赖,使用的版本为4.7.2。

<!--核心包-->
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-core</artifactId>
    <version>4.7.2</version>
</dependency>
<!--关键字高亮-->
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-highlighter</artifactId>
    <version>4.7.2</version>
</dependency>
<!--分词-->
<dependency>
    <groupId>com.janeluo</groupId>
    <artifactId>ikanalyzer</artifactId>
    <version>2012_u6</version>
</dependency>

二、创建一个Javabean作为数据传输对象

public class SearcherBean {

	private String sid;
	private String title;
	private String description;
	private String content;
	private String url;
	private Date created;

	private Object data; // 保存其他信息,不是用来检索的

	public SearcherBean() {
	}

	public SearcherBean(String sid, String title, String description, String content, String url, Date created,
			Object data) {
		this.sid = sid;
		this.title = title;
		this.description = description;
		this.content = content;
		this.url = url;
		this.created = created;
		this.data = data;
	}

	public String getSid() {
		return sid;
	}

	public void setSid(String sid) {
		this.sid = sid;
	}

	public String getTitle() {
		return title;
	}

	public void setTitle(String title) {
		this.title = title;
	}

	public String getDescription() {
		return description;
	}

	public void setDescription(String description) {
		this.description = description;
	}

	public String getContent() {
		return content;
	}

	public void setContent(String content) {
		this.content = content;
	}

	public String getUrl() {
		return url;
	}

	public void setUrl(String url) {
		this.url = url;
	}

	public Date getCreated() {
		return created;
	}

	public void setCreated(Date created) {
		this.created = created;
	}

	public Object getData() {
		return data;
	}

	public void setData(Object data) {
		this.data = data;
	}
	
}

三、增加索引

public void addBean() {
    IndexWriter writer = null;
    try {
        //指定文件存储路径
        Directory directory = NIOFSDirectory.open(new File("c://"));
        //创建词法分析
        Analyzer analyzer = new IKAnalyzer();
        //创建IndexWriter
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
        writer = new IndexWriter(directory, iwc);
        //创建文档
        Document doc = createDoc(bean);
        //保存文档
        writer.addDocument(doc);
    } catch (Exception e) {
        logger.error("add bean to lucene error", e);
    } finally {
        try {
            if (writer != null) {
               writer.close();
            }
        } catch (IOException e) {
            logger.error("close failed", e);
        }
    }
}
/**
* 创建Doc
* @param bean
* @return
*/
private Document createDoc(SearcherBean bean) {
        Document doc = new Document();
        doc.add(new StringField("id", bean.getId(), Field.Store.YES));
        doc.add(new TextField("title", bean.getTitle(), Field.Store.YES));
        doc.add(new TextField("summary", bean.getSummary(), Field.Store.YES));
        doc.add(new TextField("content", bean.getContent(), Field.Store.YES));	
        doc.add(new StringField("authorName", bean.getAuthorName(), Field.Store.YES));
        doc.add(new IntField("views", bean.getViews(), Field.Store.YES));
        doc.add(new StringField("createdAt", DateTools.dateToString(bean.getCreateAt(), DateTools.Resolution.MILLISECOND), Field.Store.YES));
        return doc;
}

四、删除索引

@Override
public void deleteBean(String beanId) {
    IndexWriter writer = null;
    try {
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
        writer = new IndexWriter(directory, iwc);
        writer.deleteDocuments(new Term("id", beanId));
    } catch (IOException e) {
        logger.error("delete bean to lucene error,beanId:"+beanId,e);
    } catch (InterruptedException e) {
        logger.error("delete bean to lucene error,beanId:"+beanId,e);
    } finally {
        try {
            if(writer!=null) {
                writer.close();
            }
        } catch (IOException e) {
            logger.error("close failed", e);
        }
    }
}
    
/**
 * 删除所有
 */
@Override
public void deleteAllBean() {
	IndexWriter writer = null;
	try {
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
		writer = new IndexWriter(directory, iwc);
		writer.deleteAll();
	} catch (IOException e) {
		logger.error("delete allBean to lucene error",e);
	} catch (InterruptedException e) {
		logger.error("delete allBean to lucene error",e);
	} finally {
		try {
			if(writer!=null) {
				writer.close();
			}
		} catch (IOException e) {
			logger.error("close failed", e);
		}
	}
}

五、检索

/**
 *  转换为SearchBean
 * @param searcher
 * @param topDocs
 * @return
 * @throws IOException
 */
private List getSearcherBeans(Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
    List searcherBeans = new ArrayList();
    for (ScoreDoc item : topDocs.scoreDocs) {
        Document doc = searcher.doc(item.doc);
        SearcherBean searcherBean = new SearcherBean();
        searcherBean.setId(doc.get("id"));
        
        searcherBean.setTitle(setHighlighter(query, doc, "title"));
        searcherBean.setSummary(setHighlighter(query, doc, "summary"));
        searcherBean.setContent(setHighlighter(query, doc, "content"));
        
        searcherBean.setViews(Integer.parseInt(doc.get("views")));
        searcherBean.setAuthorName(doc.get("authorName"));
        
        try {
			searcherBean.setCreateAt(DateTools.stringToDate(doc.get("createdAt")));
		} catch (java.text.ParseException e) {
			e.printStackTrace();
		}
        searcherBeans.add(searcherBean);
    }
    return searcherBeans;
}
/**
 * 获取Query 对象
 * @param keyword
 * @param module
 * @return
 */
private Query getQuery(String keyword) {
    try {
        QueryParser queryParser1 = new QueryParser(Version.LUCENE_47, "content", analyzer);
        Query termQuery1 = queryParser1.parse(keyword);
        
        QueryParser queryParser2 = new QueryParser(Version.LUCENE_47, "title", analyzer);
        Query termQuery2 = queryParser2.parse(keyword);
        
        QueryParser queryParser3 = new QueryParser(Version.LUCENE_47, "summary", analyzer);
        Query termQuery3 = queryParser3.parse(keyword);
        
        BooleanQuery booleanClauses = new BooleanQuery();
        booleanClauses.add(new BooleanClause(termQuery1, BooleanClause.Occur.SHOULD));
        booleanClauses.add(new BooleanClause(termQuery2, BooleanClause.Occur.SHOULD));
        booleanClauses.add(new BooleanClause(termQuery3, BooleanClause.Occur.SHOULD));
        
        booleanClauses.setMinimumNumberShouldMatch(1);
        return booleanClauses;
    } catch (ParseException e) {
        e.printStackTrace();
    }
    return null;
}
/**
 * 通过关键字搜索分页
 * 
 * @param keyword 关键字
 */
@Override
public Page search(String keyword) {
    try {
        IndexReader aIndexReader = DirectoryReader.open(directory);
        IndexSearcher searcher = null;
        searcher = new IndexSearcher(aIndexReader);
        Query query = getQuery(keyword);
        TopDocs topDocs = searcher.search(query, 50);
        List searcherBeans = getSearcherBeans(query, searcher, topDocs);
        Page searcherBeanPage = new Page<>(searcherBeans, 1, 10, 100, 1000);
        return searcherBeanPage;
    } catch (Exception e) {
    }
    return null;
}
/**
 * 分页检索
 * @param pageNum 当前页
 * 
 * @param pageSize 每页条数
 * 
 * @param queryString 关键字
 * 
 */
@Override
public Page search(int pageNum, int pageSize, String queryString) {
    IndexReader aIndexReader = null;
    try {
        aIndexReader = DirectoryReader.open(directory);
        IndexSearcher searcher = null;
        searcher = new IndexSearcher(aIndexReader);
        Query query = getQuery(queryString);
        ScoreDoc lastScoreDoc = getLastScoreDoc(pageNum, pageSize, query, searcher);
        TopDocs topDocs = searcher.searchAfter(lastScoreDoc, query, pageSize);
        List searcherBeans = getSearcherBeans(query, searcher, topDocs);
        int totalRow = searchTotalRecord(searcher, query);
        int totalPages;
        if ((totalRow % pageSize) == 0) {
            totalPages = totalRow / pageSize;
        } else {
            totalPages = totalRow / pageSize + 1;
        }
        Page searcherBeanPage = new Page<>(searcherBeans, pageNum, pageSize, totalPages, totalRow);
        return searcherBeanPage;
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;
}
/**
 * 根据页码和分页大小获取上一次最后一个ScoreDoc
 *
 * @param pageIndex
 * @param pageSize
 * @param query
 * @param indexSearcher
 * @return
 * @throws IOException
 */
private ScoreDoc getLastScoreDoc(int pageIndex, int pageSize, Query query, IndexSearcher indexSearcher) throws IOException {
    if (pageIndex == 1) return null;//如果是第一页返回空
    int num = pageSize * (pageIndex - 1);//获取上一页的数量
    TopDocs tds = indexSearcher.search(query, num);
    return tds.scoreDocs[num - 1];
}
/**
 * @param query
 * @return
 * @throws IOException
 * @Title: searchTotalRecord
 * @Description: 获取符合条件的总记录数
 */
public int searchTotalRecord(IndexSearcher searcher, Query query) throws IOException {
    TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
    if (topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
        return 0;
    }
    ScoreDoc[] docs = topDocs.scoreDocs;
    return docs.length;
}

设置关键字高亮

/**
 * 高亮设置
 * @param query
 * @param doc
 * @param field
 * @return
 */
private String setHighlighter(Query query,Document doc,String field){
    try {
        SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("", "");
        Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));
        String fieldValue = doc.get(field);
        String highlighterStr = highlighter.getBestFragment(analyzer,field,fieldValue);
        return highlighterStr == null ? fieldValue:highlighterStr;
    } catch (Exception e) {
    	e.printStackTrace();
    }
    return null;
}

标签: Lucene
共有 人打赏支持
粉丝 6
博文 30
码字总数 15436
×
SPPan
如果觉得我的文章对您有用,请随意打赏。您的支持将鼓励我继续创作!
* 金额(元)
¥1 ¥5 ¥10 ¥20 其他金额
打赏人
留言
* 支付类型
微信扫码支付
打赏金额:
已支付成功
打赏金额: