文档章节

flume-ng 自定义拦截器,对header中的字段进行正则匹配分离出更多header

c
 chunhei2008
发布于 2015/03/17 17:03
字数 529
阅读 4.2K
收藏 0


代码如下:

package com.wy.flume.interceptor;

import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import org.apache.flume.interceptor.RegexExtractorInterceptorPassThroughSerializer;
import org.apache.flume.interceptor.RegexExtractorInterceptorSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;

public class RegexExtractorHeaderInterceptor implements Interceptor {

    static final String REGEX = "regex";
    static final String SERIALIZERS = "serializers";
    
    
    static final String EXTRACTOR_HEADER = "extractorHeader";  
    static final boolean DEFAULT_EXTRACTOR_HEADER = false;  
    static final String EXTRACTOR_HEADER_KEY = "extractorHeaderKey"; 

    private static final Logger logger = LoggerFactory
        .getLogger(RegexExtractorHeaderInterceptor.class);

    private final Pattern regex;
    private final List<NameAndSerializer> serializers;

    private final boolean extractorHeader;  
    private final String extractorHeaderKey;  
    
    private RegexExtractorHeaderInterceptor(Pattern regex,
        List<NameAndSerializer> serializers,boolean extractorHeader, String extractorHeaderKey) {
      this.regex = regex;
      this.serializers = serializers;
      
      this.extractorHeader = extractorHeader;
      this.extractorHeaderKey = extractorHeaderKey;
      
    }

    @Override
    public void initialize() {
      // NO-OP...
    }

    @Override
    public void close() {
      // NO-OP...
    }

    @Override
    public Event intercept(Event event) {
      String extractorHeaderVal;
      if (extractorHeader){
          
          extractorHeaderVal = event.getHeaders().get(extractorHeaderKey);
          
      }else{
          
          extractorHeaderVal = new String(event.getBody(),Charsets.UTF_8);
          
      }
      
      Matcher matcher = regex.matcher(extractorHeaderVal);
      Map<String, String> headers = event.getHeaders();
      if (matcher.find()) {
        for (int group = 0, count = matcher.groupCount(); group < count; group++) {
          int groupIndex = group + 1;
          if (groupIndex > serializers.size()) {
            if (logger.isDebugEnabled()) {
              logger.debug("Skipping group {} to {} due to missing serializer",
                  group, count);
            }
            break;
          }
          NameAndSerializer serializer = serializers.get(group);
          if (logger.isDebugEnabled()) {
            logger.debug("Serializing {} using {}", serializer.headerName,
                serializer.serializer);
          }
          headers.put(serializer.headerName,
              serializer.serializer.serialize(matcher.group(groupIndex)));
        }
      }
      return event;
    }

    @Override
    public List<Event> intercept(List<Event> events) {
      List<Event> intercepted = Lists.newArrayListWithCapacity(events.size());
      for (Event event : events) {
        Event interceptedEvent = intercept(event);
        if (interceptedEvent != null) {
          intercepted.add(interceptedEvent);
        }
      }
      return intercepted;
    }

    public static class Builder implements Interceptor.Builder {

      private Pattern regex;
      private List<NameAndSerializer> serializerList;
      
      private boolean extractorHeader;
      private String extractorHeaderKey;
      
      private final RegexExtractorInterceptorPassThroughSerializer defaultSerializer = new RegexExtractorInterceptorPassThroughSerializer();
      

      @Override
      public void configure(Context context) {
        String regexString = context.getString(REGEX);
        Preconditions.checkArgument(!StringUtils.isEmpty(regexString),
            "Must supply a valid regex string");
        regex = Pattern.compile(regexString);
        regex.pattern();
        regex.matcher("").groupCount();
        configureSerializers(context);
        
        extractorHeader = context.getBoolean(EXTRACTOR_HEADER,DEFAULT_EXTRACTOR_HEADER);
        
        if (extractorHeader){
            
            extractorHeaderKey = context.getString(EXTRACTOR_HEADER_KEY);
            Preconditions.checkArgument(!StringUtils.isEmpty(extractorHeaderKey),"header key must");
            
        }
        
      }

      private void configureSerializers(Context context) {
        String serializerListStr = context.getString(SERIALIZERS);
        Preconditions.checkArgument(!StringUtils.isEmpty(serializerListStr),
            "Must supply at least one name and serializer");

        String[] serializerNames = serializerListStr.split("\\s+");

        Context serializerContexts =
            new Context(context.getSubProperties(SERIALIZERS + "."));

        serializerList = Lists.newArrayListWithCapacity(serializerNames.length);
        for(String serializerName : serializerNames) {
          Context serializerContext = new Context(
              serializerContexts.getSubProperties(serializerName + "."));
          String type = serializerContext.getString("type", "DEFAULT");
          String name = serializerContext.getString("name");
          Preconditions.checkArgument(!StringUtils.isEmpty(name),
              "Supplied name cannot be empty.");

          if("DEFAULT".equals(type)) {
            serializerList.add(new NameAndSerializer(name, defaultSerializer));
          } else {
            serializerList.add(new NameAndSerializer(name, getCustomSerializer(
                type, serializerContext)));
          }
        }
      }

      private RegexExtractorInterceptorSerializer getCustomSerializer(
          String clazzName, Context context) {
        try {
          RegexExtractorInterceptorSerializer serializer = (RegexExtractorInterceptorSerializer) Class
              .forName(clazzName).newInstance();
          serializer.configure(context);
          return serializer;
        } catch (Exception e) {
          logger.error("Could not instantiate event serializer.", e);
          Throwables.propagate(e);
        }
        return defaultSerializer;
      }

      @Override
      public Interceptor build() {
        Preconditions.checkArgument(regex != null,
            "Regex pattern was misconfigured");
        Preconditions.checkArgument(serializerList.size() > 0,
            "Must supply a valid group match id list");
        return new RegexExtractorHeaderInterceptor(regex, serializerList, extractorHeader, extractorHeaderKey);
      }
    }

    static class NameAndSerializer {
      private final String headerName;
      private final RegexExtractorInterceptorSerializer serializer;

      public NameAndSerializer(String headerName,
          RegexExtractorInterceptorSerializer serializer) {
        this.headerName = headerName;
        this.serializer = serializer;
      }
    }
  }

应用配置:

hdp2.sources.s1.interceptors = i2
hdp2.sources.s1.interceptors.i2.type = com.wy.flume.interceptor.RegexExtractorHeaderInterceptor$Builder
hdp2.sources.s1.interceptors.i2.regex = ([^_]+)_(\\d{8}).*
hdp2.sources.s1.interceptors.i2.extractorHeader = true
hdp2.sources.s1.interceptors.i2.extractorHeaderKey = basename
hdp2.sources.s1.interceptors.i2.serializers = s1 s2
hdp2.sources.s1.interceptors.i2.serializers.s1.name = log_type
hdp2.sources.s1.interceptors.i2.serializers.s2.name = log_day

© 著作权归作者所有

c

chunhei2008

粉丝 5
博文 27
码字总数 8480
作品 2
广州
高级程序员
私信 提问
加载中

评论(0)

Flume学习系列(四)---- Interceptors(拦截器)

前言:flume通过使用Interceptors(拦截器)实现修改和过滤事件的功能。举个栗子,一个网站每天产生海量数据,但是可能会有很多数据是不完整的(缺少重要字段),或冗余的,如果不对这些数据...

小北觅
2018/08/21
0
0
阿里大数据工程师教你怎样理解Flume

lume是干什么的? 收集日志的 flume如何搜集日志? 我们把flume比作情报人员 (1)搜集信息 (2)获取记忆信息 (3)传递报告间谍信息 flume是怎么完成上面三件事情的,三个组件: source: ...

JAVA丶学习
2018/04/14
0
0
【翻译】Flume 1.8.0 User Guide(用户指南) Processors

翻译自官网flume1.8用户指南,原文地址:Flume 1.8.0 User Guide 篇幅限制,分为以下5篇: 【翻译】Flume 1.8.0 User Guide(用户指南) 【翻译】Flume 1.8.0 User Guide(用户指南) source 【翻...

osc_bgoqj0sj
2019/02/13
1
0
Flume NG 学习笔记(八)Interceptors(拦截器)测试

版权声明:本文为博主原创文章,未经博主允许不得转载。 目录(?)[+] 拦截器主要是对事件的header信息信息操作,要么直接忽略他,要么修改他的数据 一、Event Serializers file_roll sink 和h...

jackwxh
2018/06/29
0
0
flume-拦截器、channel选择器、sink组合sink处理器

Flume Interceptors Flume有能力修改/删除流程中的events。这是在拦截器(interceptor)的帮助下完成的。拦截器(Interceptors)是实现org.apache.flume.interceptor.Interceptor接口的类。一...

osc_ngi4bcdo
2018/06/08
2
0

没有更多内容

加载失败,请刷新页面

加载更多

直接显示StackOverflow的答题日期, 增加评论区回复的时间显示 ,修改时间显示到小时分。

// ==UserScript==// @name 直接显示StackOverflow的答题日期, 增加评论区回复的时间显示 ,修改时间显示到小时分。// @namespace http://tampermonkey.net/// @version ...

FalconChen
今天
36
0
Shader笔记_005 纹理

纹理最初的目的就是使用一张图片来控制模型的外观,通过纹理映射技术 我们可以把一张图粘贴在物体表面,逐纹素的控制模型的颜色。 通常美术建模的时候也会在软件里利用纹理展开技术把纹理展开成...

STONE-CITY
今天
12
0
iOS MVVM 与RAC结合使用

MVVM配合 RAC 更能发挥的淋漓尽致。 我们把 MVVM 第一篇的例子 KVO 的事件 替换成 配合RAC 框架使用, OC的话直接导入 : pod 'ReactiveObjC' Swift 直接用 RXSwift就可以。 把 ViewModel里加...

T型人才追梦者
今天
22
1
OSChina 周一乱弹 —— 影响心情的三座大山

Osc乱弹歌单(2020)请戳(这里) 【今日歌曲】 @薛定谔的兄弟 :分享洛神有语创建的歌单「我喜欢的音乐」: 《浮生(inst.)》- 忘乡 / 墨凡悦 手机党少年们想听歌,请使劲儿戳(这里) @凝小紫...

小小编辑
今天
37
0
Unity中头发渲染

头发与普通PBR 材质最大的区别是 头发是各项异性的高光, 参考实现主要为下面文章 http://web.engr.oregonstate.edu/~mjb/cs519/Projects/Papers/HairRendering.pdf 头发包含 2个高光,以及高...

liyong2
今天
20
0

没有更多内容

加载失败,请刷新页面

加载更多

返回顶部
顶部