文档章节

lucene模糊查询 精确查询 包含关键字 不包含关键字 包含全部关键字 范围查询 模糊查询 通配符

一枚Sir
 一枚Sir
发布于 2014/10/22 17:36
字数 1998
阅读 270
收藏 0

/**
 * 
 *
 * @Author 
 * @Date 2012-03-08
 * @Describe 用于lucene高级查询
 */
public class AdvanceSearchService {

 
 public List<IndexRecord> search(QueryConditionQuestion qcq) {
  //测试的时候使用,真正调用时jsonstr是传递过来的
/*  String jsonstr="{end:\"40\",isDynsort:\"0\",metadbId:\"402881be3387158c01338716928d0000\",start:\"0\","
            +"queryConditions:[{queryGroups:[" +
              "{queryType:\"range\",phraseSlop:\"5\",stypeIds:[\"pubtime\"]," +
              "keyWords:[\"2010\",\"2011\"],operator:\"or\",rankWeight:\"1\"," +
              "groupId:\"1\",groupOperator:\"and\"}," +
              "{queryType:\"range\",phraseSlop:\"5\",stypeIds:[\"pubtime\"]," +
              "keyWords:[\"2006\",\"2008\"],groupOperator:\"and\",operator:\"or\",groupId:\"1\"," +
              "}]}," +
              "{queryGroups:[" +
              "{queryType:\"wildcard\",phraseSlop:\"5\",stypeIds:[\"title\",\"lib\"]," +
              "keyWords:[\"人?\"],operator:\"or\",rankWeight:\"1\"," +
              "groupId:\"1\",groupOperator:\"and\"}," +
              "{queryType:\"fuzzy\",phraseSlop:\"5\",stypeIds:[\"title\",\"lib\"]," +
              "keyWords:[\"test\"],operator:\"or\",rankWeight:\"1\"," +
              "groupId:\"1\",groupOperator:\"not\"}"+
              "]}" +
              "]}";*/
 // QueryConditionQuestion qcq=JsonstrToQueryConditonQuestion.getQueryConditionQuestion(jsonstr);
  int start=qcq.getStart();
  int end=qcq.getEnd();
  List<IndexRecord> recordsList = new ArrayList<IndexRecord>();
  MssMetadbInfo mssMetadbInfo = mssMetadbInfoDAO.getMssMetadbInfoByid(qcq.getMetadbId());
  List<MssMetadbStruct> listStruct = mssMetadbStructDAO.getMssMetadbStruct(qcq.getMetadbId()); 
  String indexPath = mssMetadbInfo.getIndexpath();
  

  
  
  //拼接查询字符串
  Map<Integer,QueryConditionVO[]> vos=qcq.getQueryConditions();
  Similarity similarity = new IKSimilarity();
  IndexSearcher isearcher = null;
  Directory directory = null;
  File file = null;
  TopDocs topDocs = null;
  try {
   file = new File(indexPath);
   directory = NIOFSDirectory.open(file);
   isearcher = new IndexSearcher(IndexReader.open(directory));
   isearcher.setSimilarity(similarity);
   
   topDocs = isearcher.search(getAdvanceQuery(vos), isearcher.maxDoc());
      start--;// 调用方传入的参数从1开始,所以相应的数组下标应该 -1
   if (start < 0)
    start = 0;
   int flag = start;
   if (topDocs.totalHits > 0) {
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    if (scoreDocs.length > 0) {
     IndexField indexField = null;
     for (int j = start; j < scoreDocs.length; j++) {
      // 一次循环生成一个indexRecord
      IndexRecord indexRecord = new IndexRecord();
      indexRecord.setTotalHits(topDocs.totalHits);
      indexRecord.setScore((scoreDocs[j].score) * 100);// 分数

      List<IndexField> listIndexField = new ArrayList<IndexField>();

      // 添加固有字段 字段id
      Document targetDoc = isearcher.doc(scoreDocs[j].doc);
      indexField = new IndexField();
      indexField.setName(Consts.INDEXFIELD_ID);
      indexField.setText(targetDoc.get(Consts.INDEXFIELD_ID));
      listIndexField.add(indexField);

      indexField = new IndexField();
      indexField.setName(Consts.INDEXFIELD_ORIGID);
      indexField.setText(targetDoc.get(Consts.INDEXFIELD_ORIGID));
      listIndexField.add(indexField);

      indexField = new IndexField();
      indexField.setName(Consts.INDEXFIELD_INTIME);
      indexField.setText(targetDoc.get(Consts.INDEXFIELD_INTIME));
      listIndexField.add(indexField);

      indexField = new IndexField();
      indexField.setName(Consts.INDEXFIELD_TABLE_ID);
      indexField.setText(targetDoc.get(Consts.INDEXFIELD_TABLE_ID));
      listIndexField.add(indexField);

      indexField = new IndexField();
      indexField.setName(Consts.INDEXFIELD_METADBID);
      indexField.setText(targetDoc.get(Consts.INDEXFIELD_METADBID));
      listIndexField.add(indexField);

      indexField = new IndexField();
      indexField.setName(Consts.INDEXFIELD_STORE_SERVERID);
      indexField.setText(targetDoc.get(Consts.INDEXFIELD_STORE_SERVERID));
      listIndexField.add(indexField);

      indexField = new IndexField();
      indexField.setName(Consts.INDEXFIELD_TBLNAME);
      indexField.setText(targetDoc.get(Consts.INDEXFIELD_TBLNAME));
      listIndexField.add(indexField);
      
    
      indexField = new IndexField();
      indexField.setName(Consts.INDEXFIELD_DOCNUM);
      indexField.setText(String.valueOf(scoreDocs[j].doc));
      listIndexField.add(indexField);

      // 根据listStruct添加其他字段
      for (MssMetadbStruct mms : listStruct) {
       indexField = new IndexField();
       indexField.setName(mms.getStcode());
       indexField.setText(targetDoc.get(mms.getStcode()));
       listIndexField.add(indexField);
      }

      // 添加推荐次数,评分次数,分数总和,指定的排名顺序
      IndexField[] docf = new IndexField[listIndexField.size()];
      int i = 0;
      for (IndexField a : listIndexField) {
       docf[i++] = a;
      }
      indexRecord.setDocfields(docf);
      // 添加Record对象的id
      indexRecord.setId(targetDoc.get(Consts.INDEXFIELD_ID));

      MssStoreServer mss = mssStoreServerDAO.getMssStoreServer(targetDoc.get(Consts.INDEXFIELD_STORE_SERVERID));
      Connection conn = null;
      ResultSet resultSet = null;
      Statement statement = null;
      try {
       conn = ConnectionUtil.getConnection(mss);
       String sql = "select * from " + targetDoc.get(Consts.INDEXFIELD_TBLNAME) + " where id = '" + targetDoc.get(Consts.INDEXFIELD_ID) + "'";
       statement = conn.createStatement();
       resultSet = statement.executeQuery(sql);
       while (resultSet.next()) {
        indexRecord.setRankcount(resultSet.getLong("rankcount")); // 添加评分次数
        indexRecord.setRanksum(resultSet.getLong("ranksum"));// 添加分数总和
        indexRecord.setUpcount(resultSet.getLong("upcount"));// 添加推荐次数
        indexRecord.setCustidx(resultSet.getLong("custidx"));// 指定的排名顺序
       }

      } catch (Exception ex) {
       ex.printStackTrace();
      } finally {
       ConnectionUtil.closeResultSet(resultSet);
       ConnectionUtil.closeStatement(statement);
       ConnectionUtil.closeConnection(conn);
      }
      if (null != indexRecord)
       recordsList.add(indexRecord);
      flag++;
      if (flag >= end)
       break;
     }
    }
   }
   //防止反编译用
   if (654789 == new Random().nextInt()){
           throw new Exception("try again 654789 == new Random().nextInt()");
    }
  } catch (Exception e) {
   e.printStackTrace();
   System.out.println("*******SearchService search方法查询索引报错 ********");
  } finally {
   try {
    if (isearcher != null) {
     isearcher.close();
    }
    if (directory != null) {
     directory.close();
    }
    //防止反编译用
    if (654789 == new Random().nextInt()){
            throw new Exception("try again 654789 == new Random().nextInt()");
     }
   } catch (Exception e) {
    System.out.println("*******SearchService isearcher,directory关闭报错 ********");
   }

  }
  return recordsList;
 }
 
 
 /**
  * 得到组合查询
  */
 public Query getAdvanceQuery(Map<Integer,QueryConditionVO[]> vos){
  BooleanQuery bQueryAll=null;
  Query query = null;
  if(vos==null){
   //return recordsList;
  }else{
   try{
    QueryConditionVO[] queryConditions=null;
    QueryConditionVO vo=null;
    bQueryAll=new BooleanQuery();
    BooleanQuery bQueryEvery=null;
    for(int i=0;i<vos.size();i++){//所有组查询
     queryConditions=vos.get(i);//具体某一组查询
     bQueryEvery=new BooleanQuery(); 
     for(int j=0;j<queryConditions.length;j++){//每一组查询内部拼接条件
      vo=queryConditions[j];//下一个vo
      query=getTermQuery(vo);//获得具体的某个查询对象
      if(queryConditions.length>1){//分组内部组查询组合
       if(vo.getOperator().equalsIgnoreCase("AND")){
        bQueryEvery.add(query,Occur.MUST);
       }else if(vo.getOperator().equalsIgnoreCase("NOT")){
        bQueryEvery.add(query,Occur.MUST_NOT);
       }else if(vo.getOperator().equalsIgnoreCase("OR")){
        bQueryEvery.add(query,Occur.SHOULD);
       }
      }  
  
     }
     //添加每一个分组的查询条件组合成组合查询条件,最外层外部组合查询条件
     if(queryConditions.length>1){
      if(vo.getGroupOperator().equalsIgnoreCase("AND")){
       bQueryAll.add(bQueryEvery,Occur.MUST);
      }else if(vo.getGroupOperator().equalsIgnoreCase("NOT")){
       bQueryAll.add(bQueryEvery,Occur.MUST_NOT);
      }else if(vo.getGroupOperator().equalsIgnoreCase("OR")){
       bQueryAll.add(bQueryEvery,Occur.SHOULD);
      }
     }else{
      if(vo.getGroupOperator().equalsIgnoreCase("AND")){
       bQueryAll.add(query,Occur.MUST);
      }else if(vo.getGroupOperator().equalsIgnoreCase("NOT")){
       bQueryAll.add(query,Occur.MUST_NOT);
      }else if(vo.getGroupOperator().equalsIgnoreCase("OR")){
       bQueryAll.add(query,Occur.SHOULD);
      }
     }
    }
   }catch(Exception e){
    e.printStackTrace();
   }finally{
    try{
     //防止反编译用
     if (654789 == new Random().nextInt()){
            throw new Exception("try again 654789 == new Random().nextInt()");
     }
    }catch(Exception e){
     e.printStackTrace();
    }
    
   }
 
  }
  System.out.println("bqueryall="+bQueryAll.toString());
  return bQueryAll;
 }
 
 /**
  * 
  * @param  vo
  * @return
  */

 public Query getTermQuery(QueryConditionVO vo){
  Query query=null;
  QueryParser queryParser=null;
  try{
   if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_CONTAINS) 
     || vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_CONTAINSNONE)
     || vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_CONTAINSALL)){//包含以下关键词
       queryParser=new MultiFieldQueryParser(Version.LUCENE_33,vo.getStypeIds(),new IKAnalyzer());
     
    if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_CONTAINSNONE)){//不包括此关键词
     vo.setOperator("not");//设置内部操作关系为must_not类型,不满足此条件
    }else if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_CONTAINSALL)){
     queryParser.setDefaultOperator(Operator.AND);//关键字经过IKAnalyzer分词后仍然为并且的关系
    }
    queryParser.setPhraseSlop(vo.getPhraseSlop());
    try {
     
      query=queryParser.parse(vo.getKeyWords()[0]);
      
    } catch (ParseException e) {
     e.printStackTrace();
    }
   }else if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_RANGE)){//范围查询
    
    query=new TermRangeQuery(vo.getStypeIds()[0],vo.getKeyWords()[0], vo.getKeyWords()[1], true, true);
    
   }else if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_FUZZY)){//模糊查询
    
    query=new FuzzyQuery(new Term(vo.getStypeIds()[0],vo.getKeyWords()[0]));
    
   }else if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_WILDCARD)){//通配符查询
    query=new WildcardQuery(new Term(vo.getStypeIds()[0],vo.getKeyWords()[0])); 
   }
   query.setBoost(vo.getRankWeight());//设置权重,改变得分情况,原有得分乘以设定的值为改变后的查询结果得分情况,默认设置为1
   //防止反编译用
   if (654789 == new Random().nextInt()){
          throw new Exception("try again 654789 == new Random().nextInt()");
   }
  }catch(Exception e){
   e.printStackTrace();
  }finally{
   try{
    //防止反编译用
    if (654789 == new Random().nextInt()){
           throw new Exception("try again 654789 == new Random().nextInt()");
    }
   }catch(Exception e){
    e.printStackTrace();
   }
   
  }
  
  return query;
 }
 
}

 

 

 

import java.util.Map;
/**
 * @Describe 高级查询顶层条件对象
 */
public class QueryConditionQuestion {
//"start":"0","isDynsort":"0","metadbid":"123","end":"40"
 //分页:开始条数
 private int start=0;
 //是否排序
 private int isDynsory=0;
 //服务器id
 private String metadbId;
 //分页:结束条数
 private int end;
 //分组查询条件对象
 private Map<Integer,QueryConditionVO[]> queryConditions;
 
 public QueryConditionQuestion(){
  
 }
 public int getStart() {
  return start;
 }
 public void setStart(int start) {
  this.start = start;
 }
 public int getIsDynsory() {
  return isDynsory;
 }
 public void setIsDynsory(int isDynsory) {
  this.isDynsory = isDynsory;
 }

 public String getMetadbId() {
  return metadbId;
 }
 public void setMetadbId(String metadbId) {
  this.metadbId = metadbId;
 }
 public int getEnd() {
  return end;
 }
 public void setEnd(int end) {
  this.end = end;
 }
 public Map<Integer, QueryConditionVO[]> getQueryConditions() {
  return queryConditions;
 }
 public void setQueryConditions(Map<Integer, QueryConditionVO[]> queryConditions) {
  this.queryConditions = queryConditions;
 }

 
}

 

 

---------------------------

/**
 * @Describe 高级查询底层条件对象
 */
public class QueryConditionVO {
/**
 * {"phraseSlop":"5","keywords":[{"k1":"中国"},{"k1":"人民"},{"k3":"解放军"}],
 * "rankWeight":"50","groupid":"1","orerator":"and","groupOperator":"and","querytype":"contains","stypeid":"title"}
 */
 //两个词之间隔度
 private int phraseSlop=0;
 //查询关键字
 private String[] keyWords;
 //权重
 private int rankWeight=1;
 //分组id
 private int groupId;
 //小组内部查询关系:and or not,默认设置为Or
 private String operator="OR";
 //组与组之间查询关系:and or not,默认设置为or
 private String groupOperator="OR";
 //查询类型:范围range、包含contains、不包含containsnone、模糊fuzzy、通配符wildcard等
 private String queryType;
 //查询字段
 private String[] stypeIds;
 public int getPhraseSlop() {
  return phraseSlop;
 }
 public void setPhraseSlop(int phraseSlop) {
  this.phraseSlop = phraseSlop;
 }
 public String[] getKeyWords() {
  return keyWords;
 }
 public void setKeyWords(String[] keyWords) {
  this.keyWords = keyWords;
 }
 public int getRankWeight() {
  return rankWeight;
 }
 public void setRankWeight(int rankWeight) {
  this.rankWeight = rankWeight;
 }
 public int getGroupId() {
  return groupId;
 }
 public void setGroupId(int groupId) {
  this.groupId = groupId;
 }
 public String getOperator() {
  return operator;
 }
 public void setOperator(String operator) {
  this.operator = operator;
 }
 public String getGroupOperator() {
  return groupOperator;
 }
 public void setGroupOperator(String groupOperator) {
  this.groupOperator = groupOperator;
 }
 public String getQueryType() {
  return queryType;
 }
 public void setQueryType(String queryType) {
  this.queryType = queryType;
 }
 public String[] getStypeIds() {
  return stypeIds;
 }
 public void setStypeIds(String[] stypeIds) {
  this.stypeIds = stypeIds;
 }
 
}

 

-------------------------------------------------------------------------------

调用格式规范

一、其他系统调用A系统高级组合查询接口需要传递如下xml字符串:

 

<?xml version="1.0" encoding="GBK"?>

<search ver="1" type="query">

<query question="advanceSearch">

<param name=”jsonstr

 value=”字符串格式的查询条件”>

</param>

</query>

</search>

 

二、json字符串格式的查询条件格式:

{"metadbId":"123","start":"0","isDynsort":"0","end":"40",

"queryConditions":[{"queryGroups":[{"groupId":"1","queryType":"range","phraseSlop":"5",

"rankWeight":"50","stypeIds":["pubtime"],

"keyWords":["2010","2011"],"groupOperator":"and",

"operator":"or"},    {"groupId":"1","queryType":"range","phraseSlop":"5",

"stypeIds":["pubtime"],"keyWords":["2006","2008"],

"groupOperator":"and","operator":"or"}]},

{"queryGroups":[{"groupId":"2","queryType":"contains","phraseSlop":"5",

"rankWeight":"50","stypeIds":["title","lib"],

"keyWords":[“人生 you"],"groupOperator":"and",

"operator":"and"}]}]}

 

三、json字符串的查询条件格式对应表:

 

 

Json字符串示例:查询出【发布时间(pubtime)为2010年至2011年或者2006年至2008年】并且【标题(title)或者图书馆(lib)中含有人生和you关键字】的数据

{"metadbId":"123","start":"0","isDynsort":"0","end":"40",

"queryConditions":[{"queryGroups":[{"groupId":"1","queryType":"range","phraseSlop":"5",

"rankWeight":"50","stypeIds":["pubtime"],

"keyWords":["2010","2011"],"groupOperator":"and",

"operator":"or"},    {"groupId":"1","queryType":"range","phraseSlop":"5",

"stypeIds":["pubtime"],"keyWords":["2006","2008"],

"groupOperator":"and","operator":"or"}]},

{"queryGroups":[{"groupId":"2","queryType":"contains","phraseSlop":"5",

"rankWeight":"50","stypeIds":["title","lib"],

"keyWords":[“人生 you"],"groupOperator":"and",

"operator":"and"}]}]}

 

A系统向B系统发送高级组合查询xml示例:

<?xml version="1.0" encoding="GBK"?>

<search ver="1" type="query">

<query question="advanceSearch">

<param name=”jsonstr”

 value=” {end:40,start:0,isDynsort:0,metadbId:402881be3387158c01338716928d0000, queryConditions:[{queryGroups:[{“groupId”:0,”groupOperator”:”and”,

”keyWords”:[“人生”], ”operator”:”OR”,

”phraseSlop”:0,”queryType”:”contains”,

”rankWeight”:1,”stypeIds”:[“title”]}]},

{queryGroups:[{“groupId”:0,”groupOperator”:”and”,

”keyWords”:[“人生艺术”],    

”operator”:”OR”,”phraseSlop”:0,

”queryType”:”containsall”,”rankWeight”:1,

”stypeIds”:[“title”]}]},

{queryGroups:[{“groupId”:0,”groupOperator”:”and”,

”keyWords”:[“2006”,”2011”],

”operator”:”OR”,”phraseSlop”:0,

”queryType”:”range”,”rankWeight”:1,

”stypeIds”:[“pubtime”]}]}, {queryGroups:[{“groupId”:0,”groupOperator”:”and”,

”keyWords”:[“test”],”operator”:”OR”,

”phraseSlop”:0,”queryType”:”fuzzy”,

”rankWeight”:1,”stypeIds”:[“title”]}]},

{queryGroups:[{“groupId”:0,”groupOperator”:”and”,

”keyWords”:[“t*”],”operator”:”OR”,

”phraseSlop”:0,”queryType”:”wildcard”,

”rankWeight”:1,”stypeIds”:[“title”]}]}]}”>

</param>

</query>

</search>

 

© 著作权归作者所有

一枚Sir
粉丝 119
博文 209
码字总数 350904
作品 0
朝阳
架构师
私信 提问
kibana使用的lucene查询语法

kibana在ELK阵营中用来查询展示数据 elasticsearch构建在Lucene之上,过滤器语法和Lucene相同 kibana4官方演示页面 全文搜索 在搜索栏输入login,会返回所有字段值中包含login的文档 使用双引...

code_cj
2015/12/30
835
0
Lucene查询语法详解

Lucene提供了丰富的API来组合定制你所需要的查询器,同时也可以利用Query Parser提供的强大的查询语法解析来构造你想要的查询器。本文章详细的介绍了Lucene的查询语法。通过Java语法分析器把...

黄平俊
2009/05/26
33.8K
7
MySQL单表多字段模糊查询解决方法

在最近的一个项目需要实现在单表中对多字段进行多个关键字的模糊查询,但这数个关键字并不一定都存在于某个字段 例如现有table表,其中有title,tag,description三个字段,分别记录一条资料...

BearCatYN
2014/09/20
6K
1
Lucene小记(入门篇),索引创建、更新、删除、查找等操作。

虽然使用Lucene已经有一段时间了,但是仍不敢妄加评论Lucene的好与坏,毕竟目前接触的全文索引技术只有Lucene。而且Lucene并不是真正意义上的引擎,只算的上是Java开发的全文索引工具包。与传...

Jialy
2014/06/26
6.6K
9
Elasticsearch 常用基本查询

安装启动很简单,参考官网步骤:https://www.elastic.co/downloads/elasticsearch   为了介绍Elasticsearch中的不同查询类型,我们将对带有下列字段的文档进行搜索:title(标题),autho...

小爪进击者
2018/12/25
24
0

没有更多内容

加载失败,请刷新页面

加载更多

Kafka实战(五) - 核心API及适用场景全面解析

1 四个核心API ● Producer API 允许一个应用程序发布一串流式的数据到一个或者多个Kafka topic。 ● Consumer API 允许一个应用程序订阅一个或多个topic ,并且对发布给他们的流式数据进行处...

JavaEdge
今天
11
0
实现线程的第三种方式——Callable & Future

Callable Runnable 封装一个异步运行的任务, 可以把它想象成为一个没有参数和返回值的异步方 法。Callable 与 Runnable 类似, 但是有返回值。Callable 接口是一个参数化的类型, 只有一 个...

ytuan996
今天
12
0
OSChina 周六乱弹 —— 不要摁F了!

Osc乱弹歌单(2019)请戳(这里) 【今日歌曲】 @巴拉迪维 : 朴树写的词曲都给人一种莫名的失落感,不过这首歌他自己却没有唱,换成赵传这种高音阶嘶喊的确很好,低沉但却有力,老男人的呐喊...

小小编辑
今天
24
1
Android Binder机制 - interface_cast和asBinder讲解

研究Android底层代码时,尤其是Binder跨进程通信时,经常会发现interface_cast和asBinder,很容易被这两个函数绕晕,下面来讲解一下: interface_cast 下面根据下述ICameraClient例子进行分析...

天王盖地虎626
昨天
13
0
计算机实现原理专题--存储器的实现(二)

计算机实现原理专题--存储器的实现(一)中描述了一种可以记住输入端变化的装置。现需要对其功能进行扩充,我们将上面的开关定义为置位,下面的开关定义为复位,然后需要增加一个保持位,当保...

FAT_mt
昨天
10
0

没有更多内容

加载失败,请刷新页面

加载更多

返回顶部
顶部