折腾了好几天,终于把东西都搬到新住处了,累死我了.现在是光着膀子坐在电脑前码字.
前几天发表了一篇文章,写的是关于lucene(见文章我封装的全文检索之lucene篇),对于这篇文章大家什么看法都有,有好有坏,不管好坏,都谢谢大家,我会继续努力写下去的,我也会参考你们的建议去修改一下,争取写出更好的!
今天准备写的是关于solr的,solr相信大家有的已经很熟悉了,具体是什么玩意,什么怎么用啊,我就不写了.浪费oschina服务器硬盘空间.我就写写,我封装的这套所谓的框架(好多人都说仅仅只是一个对索引的创建,更新,删除以及查询的几个操作而已,不过确实是这样的.名字起的有点大了.)
啥也不说,先浪费点oschina的硬盘再说(贴代码):
package com.message.base.search.engine;
import com.message.base.pagination.PaginationSupport;
import com.message.base.pagination.PaginationUtils;
import com.message.base.search.SearchBean;
import com.message.base.search.SearchInitException;
import com.message.base.utils.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.BeanUtils;
import java.net.MalformedURLException;
import java.util.*;
/**
* 基于solr实现的搜索引擎.
*
* @author sunhao(sunhao.java@gmail.com)
* @version V1.0
* @createTime 13-5-5 下午9:36
*/
public class SolrSearchEngine extends AbstractSearchEngine {
private static final Logger logger = LoggerFactory.getLogger(SolrSearchEngine.class);
private String server = "http://localhost:8080/solr";
private SolrServer getSolrServer(){
if(StringUtils.isEmpty(server)){
logger.error("null solr server path!");
throw new SearchInitException("Give a null solr server path");
}
try {
return new CommonsHttpSolrServer(server);
} catch (MalformedURLException e) {
throw new SearchInitException("Connect to solr server error use server '" + server + "'");
}
}
public synchronized void doIndex(List<SearchBean> searchBeans) throws Exception {
SolrServer solrServer = getSolrServer();
List<SolrInputDocument> sids = new ArrayList<SolrInputDocument>();
for(SearchBean sb : searchBeans){
if(sb == null){
logger.debug("give SearchBean is null!");
return;
}
//初始化一些字段
sb.initPublicFields();
SolrInputDocument sid = new SolrInputDocument();
//保证每个对象的唯一性,而且通过对象的主键可以明确的找到这个对象在solr中的索引
sid.addField("id", "uniqueKey-" + sb.getIndexType() + "-" + sb.getId());
if(StringUtils.isEmpty(sb.getId())){
throw new SearchInitException("you must give a id");
}
sid.addField("pkId", sb.getId());
if(StringUtils.isEmpty(sb.getKeyword())){
throw new SearchInitException("you must give a keyword");
}
sid.addField("keyword", sb.getKeyword());
if(StringUtils.isEmpty(sb.getOwerId())){
throw new SearchInitException("you must give a owerId");
}
sid.addField("owerId", sb.getOwerId());
if(StringUtils.isEmpty(sb.getOwerName())){
throw new SearchInitException("you must give a owerName");
}
sid.addField("owerName", sb.getOwerName());
if(StringUtils.isEmpty(sb.getLink())){
throw new SearchInitException("you must give a link");
}
sid.addField("link", sb.getLink());
if(StringUtils.isEmpty(sb.getCreateDate())){
throw new SearchInitException("you must give a createDate");
}
sid.addField("createDate", sb.getCreateDate());
sid.addField("indexType", getIndexType(sb));
String[] doIndexFields = sb.getDoIndexFields();
Map<String, String> values = sb.getIndexFieldValues();
if(doIndexFields != null && doIndexFields.length > 0){
for(String f : doIndexFields){
//匹配动态字段
sid.addField(f + "_message", values.get(f));
}
}
sids.add(sid);
}
solrServer.add(sids);
solrServer.commit();
}
public synchronized void deleteIndex(SearchBean bean) throws Exception {
if(bean == null){
logger.warn("Get search bean is empty!");
return;
}
String id = bean.getId();
if(StringUtils.isEmpty(id)){
logger.warn("get id and id value from bean is empty!");
return;
}
SolrServer server = getSolrServer();
UpdateResponse ur = server.deleteByQuery("pkId:" + id);
logger.debug("delete all indexs! UpdateResponse is '{}'! execute for '{}'ms!", ur, ur.getElapsedTime());
server.commit();
}
public synchronized void deleteIndexs(List<SearchBean> beans) throws Exception {
if(beans == null){
logger.warn("Get beans is empty!");
return;
}
for(SearchBean bean : beans){
this.deleteIndex(bean);
}
}
public PaginationSupport doSearch(List<SearchBean> beans, boolean isHighlighter, int start, int num) throws Exception {
if(beans == null || beans.isEmpty()){
logger.debug("given search beans is empty!");
return PaginationUtils.getNullPagination();
}
List queryResults = new ArrayList();
StringBuffer query_ = new StringBuffer();
for(SearchBean bean : beans){
//要进行检索的字段
String[] doSearchFields = bean.getDoSearchFields();
if(doSearchFields == null || doSearchFields.length == 0)
continue;
for(int i = 0; i < doSearchFields.length; i++){
String f = doSearchFields[i];
query_.append("(").append(f).append("_message:*").append(bean.getKeyword()).append("*").append(")");
if(i + 1 != doSearchFields.length)
query_.append(" OR ");
}
}
if(StringUtils.isEmpty(query_.toString())){
logger.warn("query string is null!");
return PaginationUtils.getNullPagination();
}
SolrQuery query = new SolrQuery();
query.setQuery(query_.toString());
query.setStart(start == -1 ? 0 : start);
query.setRows(num == -1 ? 100000000 : num);
query.setFields("*", "score");
if(isHighlighter){
query.setHighlight(true).setHighlightSimplePre(getHtmlPrefix()).setHighlightSimplePost(getHtmlSuffix());
query.setHighlightSnippets(2);
query.setHighlightFragsize(1000);
query.setParam("hl.fl", "*");
}
QueryResponse response = getSolrServer().query(query);
SolrDocumentList sd = response.getResults();
for(Iterator it = sd.iterator(); it.hasNext(); ){
SolrDocument doc = (SolrDocument) it.next();
String indexType = doc.get("indexType").toString();
SearchBean result = super.getSearchBean(indexType, beans);
try {
result.setId(doc.getFieldValue("pkId").toString());
result.setLink(doc.getFieldValue("link").toString());
result.setOwerId(doc.getFieldValue("owerId").toString());
result.setOwerName(doc.getFieldValue("owerName").toString());
result.setCreateDate(doc.getFieldValue("createDate").toString());
result.setIndexType(doc.getFieldValue("indexType").toString());
String keyword = StringUtils.EMPTY;
if(isHighlighter){
String id = (String) doc.getFieldValue("id");
List temp = response.getHighlighting().get(id).get("keyword");
if(temp != null && !temp.isEmpty()){
keyword = temp.get(0).toString();
}
}
if(StringUtils.isEmpty(keyword))
keyword = doc.getFieldValue("keyword").toString();
result.setKeyword(keyword);
//要进行检索的字段
String[] doSearchFields = result.getDoSearchFields();
if(doSearchFields == null || doSearchFields.length == 0)
continue;
Map<String, String> extendValues = new HashMap<String, String>();
for(String field : doSearchFields){
String value = doc.getFieldValue(field + "_message").toString();
if(isHighlighter){
String id = (String) doc.getFieldValue("id");
List temp = response.getHighlighting().get(id).get(field + "_message");
if(temp != null && !temp.isEmpty()){
value = temp.get(0).toString();
}
}
extendValues.put(field, value);
}
result.setSearchValues(extendValues);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
queryResults.add(result);
}
PaginationSupport paginationSupport = PaginationUtils.makePagination(queryResults, Long.valueOf(sd.getNumFound()).intValue(), num, start);
return paginationSupport;
}
public synchronized void deleteIndexsByIndexType(Class<? extends SearchBean> clazz) throws Exception {
String indexType = getIndexType(BeanUtils.instantiate(clazz));
this.deleteIndexsByIndexType(indexType);
}
public synchronized void deleteIndexsByIndexType(String indexType) throws Exception {
SolrServer server = getSolrServer();
UpdateResponse ur = server.deleteByQuery("indexType:" + indexType);
logger.debug("delete all indexs! UpdateResponse is '{}'! execute for '{}'ms!", ur, ur.getElapsedTime());
server.commit();
}
public synchronized void deleteAllIndexs() throws Exception {
SolrServer server = getSolrServer();
UpdateResponse ur = server.deleteByQuery("*:*");
logger.debug("delete all indexs! UpdateResponse is '{}'! execute for '{}'ms!", ur, ur.getElapsedTime());
server.commit();
}
public void updateIndex(SearchBean searchBean) throws Exception {
this.updateIndexs(Collections.singletonList(searchBean));
}
/**
* 更新索引<br/>
* 在solr中更新索引也就是创建索引(当有相同ID存在的时候,仅仅更新,否则新建)<br/>
* {@link SolrSearchEngine#doIndex(java.util.List)}
*
* @param searchBeans 需要更新的beans
* @throws Exception
*/
public void updateIndexs(List<SearchBean> searchBeans) throws Exception {
this.doIndex(searchBeans);
}
public void setServer(String server) {
this.server = server;
}
}
关于solr服务端的配置,我想说的就是那个schema.xml文件的配置:
1.这里我配置了几个共有的字段,如下:
<!-- start my solr -->
<field name="pkId" type="string" indexed="true" stored="true"/>
<field name="keyword" type="string" indexed="true" stored="true"/>
<field name="owerId" type="string" indexed="true" stored="true"/>
<field name="owerName" type="string" indexed="true" stored="true"/>
<field name="link" type="string" indexed="true" stored="true"/>
<field name="createDate" type="string" indexed="true" stored="true"/>
<field name="indexType" type="string" indexed="true" stored="true"/>
<!-- end my solr -->
这些是一些固定字段,也是每个对象都通用的.
<!-- a dynamic field, match all fields what end with _solr -->
<dynamicField name="*_message" type="paodingAnalyzer" indexed="true" stored="true"/>
这个是动态匹配字段,比如说我有一个对象其实一个字段是真实姓名(truename),那么在solr索引中的字段名称就叫(username_message).这样就能匹配起来了,so easy!
再谈谈solr使用分词,暂时我使用的是庖丁分词(paoding),需要的可以去网上找找,osc上就有的.
需要在solr的schema.xml添加一个字段类型:
<!-- paoding -->
<fieldType name="paodingAnalyzer" class="solr.TextField">
<analyzer class="net.paoding.analysis.analyzer.PaodingAnalyzer"></analyzer>
</fieldType>
然后在你需要使用分词的字段的配置上,修改type="paodingAnalyzer".跟上面的动态字段一致.
可以检查一下是否配置正确:
访问http://192.168.1.118/solr/admin/analysis.jsp?highlight=on
安装以下图片说明操作:
好了,over here.具体对索引的新增.删除.更新.以及查询的操作见上面的代码,相信对于沉浸在oschina这么多年的你们,这些都是小case了.