elasticsearch批量导入数据重复问题

原创
2017/06/14 09:13
阅读数 6.1K
package com.zhidou.es.export;

import com.zhidou.es.config.ExporES;
import com.zhidou.es.config.InitES;
import com.zhidou.es.util.DateUtil;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;

import java.io.*;
import java.util.Date;


/**
 * description
 * Created by 08101 on 2017/6/5.
 */
public class ExportES {


    public void  getESinformatation()

    {
        Client client = InitES.initClient();
        SearchResponse response = client.prepareSearch("i_his_adas_data").setTypes("his_adas_data")
                .setQuery(QueryBuilders.matchAllQuery()).setSize(50000).setScroll(new TimeValue(600000))
                .setSearchType(SearchType.SCAN).execute().actionGet();//setSearchType(SearchType.Scan) 告诉ES不需要排序只要结果返回即可 setScroll(new TimeValue(600000)) 设置滚动的时间
        String scrollid = response.getScrollId();
        try {
            //把导出的结果以JSON的格式写到文件里
            BufferedWriter out = new BufferedWriter(new FileWriter("eseses", true));

            //每次返回数据10000条。一直循环查询直到所有的数据都查询出来
            while (true) {
                SearchResponse response2 = client.prepareSearchScroll(scrollid).setScroll(new TimeValue(1000000))
                        .execute().actionGet();
                SearchHits searchHit = response2.getHits();
                //再次查询不到数据时跳出循环
                if (searchHit.getHits().length == 0) {
                    break;
                }
                System.out.println("查询数量 :" + searchHit.getHits().length);
                for (int i = 0; i < searchHit.getHits().length; i++) {
                    String json = searchHit.getHits()[i].getSourceAsString();
                    out.write(json);
                    out.write("\r\n");
                }
            }
            System.out.println("查询结束");
            out.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    public void insertESInformation(){


        Client client= ExporES.initClient();
        try {
            //读取刚才导出的ES数据
            BufferedReader br = new BufferedReader(new FileReader("eseses"));
            String json = null;
            int count = 0;
            //开启批量插入
            BulkRequestBuilder bulkRequest = client.prepareBulk();
            while ((json = br.readLine()) != null) {
                bulkRequest.add(client.prepareIndex("i_his_adas_data", "his_adas_data").setSource(json));
                //每一千条提交一次
                if (count% 10000==0) {
                    bulkRequest.execute().actionGet();
                    System.out.println(DateUtil.format(new Date())+"提交了:" + count);
                }
                count++;
            }
            bulkRequest.execute().actionGet();
            System.out.println("插入完毕");
            br.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

 

 

 

/**
* description
* Created by 08101 on 2017/6/13.
*/
public class mainEs {

    public static void main(String[] args) {

        ExportES exportES=new ExportES();
        exportES.getESinformatation();
        exportES.insertESInformation();

    }
}

 

展开阅读全文
打赏
0
0 收藏
分享
加载中
如果还有重复的你试一下如下代码:
BulkRequestBuilder bulkRequest = client.prepareBulk();

while ((json = br.readLine()) != null) {
bulkRequest.add(client.prepareIndex("bigdata", "student").setSource(json));
//每一千条提交一次
if (count% 1000==0) {
bulkRequest.execute().actionGet();
bulkRequest = client.prepareBulk();

bulkRequest=client.prepareBulk();

System.out.println("提交了:" + count);
}
count++;
}
2017/06/27 21:01
回复
举报
你这导入的数据还重复吗?
2017/06/27 20:58
回复
举报
更多评论
打赏
2 评论
0 收藏
0
分享
返回顶部
顶部