文档章节

flink统计根据账号每30秒 金额的平均值

o
 osc_x4h57ch8
发布于 2018/04/24 15:20
字数 434
阅读 0
收藏 0

精选30+云产品,助力企业轻松上云!>>>

package com.zetyun.streaming.flink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.streaming.util.serialization.JSONDeserializationSchema;
import org.apache.flink.util.Collector;

import javax.annotation.Nullable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.Properties;

/**
* Created by jyt on 2018/4/10.
* 基于账号计算每30秒 金额的平均值
*/
public class EventTimeAverage {

public static void main(String[] args) throws Exception {
final ParameterTool parameterTool = ParameterTool.fromArgs(args);
String topic = parameterTool.get("topic", "accountId-avg");
Properties properties = parameterTool.getProperties();
properties.setProperty("bootstrap.servers", "192.168.44.101:9092");
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<ObjectNode> source = env.addSource(new FlinkKafkaConsumer010(
topic,
new JSONDeserializationSchema(),
properties));
//设置WaterMarks方式一
/*SingleOutputStreamOperator<ObjectNode> objectNodeOperator = source.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.seconds(15)) {
@Override
public long extractTimestamp(ObjectNode element) {
SimpleDateFormat format = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
Date eventTime = null;
try {
eventTime = format.parse(element.get("eventTime").asText());
} catch (ParseException e) {
e.printStackTrace();
}
return eventTime.getTime();
}
});*/
//设置WaterMarks方式二
SingleOutputStreamOperator<ObjectNode> objectNodeOperator = source.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<ObjectNode>() {
public long currentMaxTimestamp = 0L;
public static final long maxOutOfOrderness = 10000L;//最大允许的乱序时间是10s
Watermark a = null;
SimpleDateFormat format = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");


@Nullable
@Override
public Watermark getCurrentWatermark() {
a = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
return a;
}

@Override
public long extractTimestamp(ObjectNode jsonNodes, long l) {
String time = jsonNodes.get("eventTime").asText();
long timestamp = 0;
try {
timestamp = format.parse(time).getTime();
} catch (ParseException e) {
e.printStackTrace();
}
currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
return timestamp;
}
});
KeyedStream<Tuple3<String, Double, String>, Tuple> keyBy = objectNodeOperator.map(new MapFunction<ObjectNode, Tuple3<String, Double, String>>() {
@Override
public Tuple3<String, Double, String> map(ObjectNode jsonNodes) throws Exception {
System.out.println(jsonNodes.get("accountId").asText() + "==map====" + jsonNodes.get("amount").asDouble() + "===map===" + jsonNodes.get("eventTime").asText());
return new Tuple3<String, Double, String>(jsonNodes.get("accountId").asText(), jsonNodes.get("amount").asDouble(), jsonNodes.get("eventTime").asText());
}
}).keyBy(0);


SingleOutputStreamOperator<Object> apply = keyBy.window(TumblingEventTimeWindows.of(Time.seconds(30))).apply(new WindowFunction<Tuple3<String,Double,String>, Object, Tuple, TimeWindow>() {
@Override
public void apply(Tuple tuple, TimeWindow timeWindow, Iterable<Tuple3<String, Double, String>> iterable, Collector<Object> collector) throws Exception {
Iterator<Tuple3<String, Double, String>> iterator = iterable.iterator();
int count =0;
double num = 0.0;
///Tuple2<String, Double> result = null;
Tuple3<String, Double, String> next = null;
String accountId = null ;
while (iterator.hasNext()) {
next = iterator.next();
System.out.println(next);
accountId=next.f0;
num += next.f1;
count++;
}
if (next != null) {

collector.collect(new Tuple2<String, Double>(accountId,num/count));
}
}
});


apply.print();
//apply.addSink(new FlinkKafkaProducer010<String>("192.168.44.101:9092","wiki-result",new SimpleStringSchema()));
env.execute("AverageDemo");
}

}
o
粉丝 0
博文 500
码字总数 0
作品 0
私信 提问
加载中
请先登录后再评论。

暂无文章

pyhon

cython 相关的帖子. http://blog.behnel.de/categories/cython.html https://www.nexedi.com/

MtrS
9分钟前
9
0
多处理与线程Python - Multiprocessing vs Threading Python

问题: I am trying to understand the advantages of multiprocessing over threading . 我试图了解多处理优于线程的优势。 I know that multiprocessing gets around the Global Interpret......

法国红酒甜
14分钟前
9
0
格式编号始终显示2个小数位 - Format number to always show 2 decimal places

问题: I would like to format my numbers to always display 2 decimal places, rounding where applicable. 我想将数字格式化为始终显示2个小数位,并在适用的情况下四舍五入。 Examples...

富含淀粉
今天
22
0
Docker可视化工具Portainer

1 前言 从没想到Docker也有可视化的工具,因为它的命令还是非常清晰简单的。无聊搜了一下,原来已经有很多Docker可视化工具了。如DockerUI、Shipyard、Rancher、Portainer等。查看对比了一番...

南瓜慢说
今天
20
0
日志系统新贵 Loki,真香!!

最近,在对公司容器云的日志方案进行设计的时候,发现主流的ELK或者EFK比较重,再加上现阶段对于ES复杂的搜索功能很多都用不上最终选择了Grafana开源的Loki日志系统,下面介绍下Loki的背景。...

庞陆阳
今天
14
0

没有更多内容

加载失败,请刷新页面

加载更多

返回顶部
顶部