文档章节

hive 脚本

元禛慎独
 元禛慎独
发布于 2016/05/06 17:11
字数 1470
阅读 9
收藏 0

#!/bin/sh


date_yes=$1

if [ ! ${date_yes} ]; then

    date_yes=`date -d "-1 days" +"%Y%m%d"`

fi

echo ${date_yes}

date_bef_yes=`date -d "${date_yes} -1 day" +"%Y%m%d"`

date_bef_7=`date -d "${date_yes} -7 day" +"%Y%m%d"`

echo ${date_bef_yes} ${date_bef_7}


remote_host="hadoop@192.168.1.27"

sour_addr="/home/data/superhero/"

local_addr="/data/superhero"

data_base="superhero_bi"


ser="\\\", \\\""

map1="\\\", \\\""

map2="\\\": \\\""

ios="ios"

pub="pub"


rsync -arvzP ${remote_host}:${sour_addr}/viso_config/*_${date_yes} /data/superhero/bi_tool/viso_config/ &&\

#rsync --progress -arvzP ${remote_host}:${sour_addr}/reg_act/act_${date_yes} ${local_addr}/act/act_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/reg_act/reg_${date_yes} ${local_addr}/reg/reg_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/log_redis/info_${date_yes} ${local_addr}/info/info_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/log_redis/all_pet_${date_yes} ${local_addr}/pet/all_pet_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/log_redis/equip_${date_yes} ${local_addr}/equip/equip_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/log_redis/item_${date_yes} ${local_addr}/item/item_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/log_redis/card_${date_yes} ${local_addr}/card/card_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/log_redis/card_super_step_${date_yes} ${local_addr}/super_step/card_super_step_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/log_redis/all_scores_${date_yes} ${local_addr}/scores/all_scores_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/paylog/paylog_${date_yes} ${local_addr}/paylog/paylog_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/action_log/action_log_${date_yes} ${local_addr}/action_log/action_log_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/reg_act/mix_hc_${date_yes} ${local_addr}/mix_hc/mix_hc_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/reg_act/ios_hc_${date_yes} ${local_addr}/ios_hc/ios_hc_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/log_redis/vip_info_${date_yes} ${local_addr}/vip_info/vip_info_${date_yes} && \

#rsync --progress -arvzP ${remote_host}:${sour_addr}/spendlog/spendlog_${date_yes} ${local_addr}/spendlog/spendlog_${date_yes} && \

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/paylog/paylog_${date_yes}' overwrite into table ${data_base}.raw_paylog partition (ds='${date_yes}');

insert overwrite table mid_paylog_all_ext partition(ds='${date_bef_yes}') select order_id,admin,gift_coin,level,old_coin,order_coin,order_money,order_time,platform_2,product_id,raw_data,reason,scheme_id,user_id from mid_paylog_all where ds='${date_bef_yes}';


insert overwrite table mid_paylog_all_ext partition(ds='${date_yes}') select order_id,admin,gift_coin,level,old_coin,order_coin,order_money,order_time,platform_2,product_id,raw_data,reason,scheme_id,user_id from raw_paylog where ds='${date_yes}';

insert overwrite table mid_paylog_all partition(ds='${date_yes}') select order_id,admin,gift_coin,level,old_coin,order_coin,order_money,order_time,platform_2,product_id,raw_data,reason,scheme_id,user_id from mid_paylog_all_ext;

insert overwrite table mid_gs_user partition(ds='${date_yes}') select distinct user_id from mid_paylog_all where ds='${date_yes}' and lower(platform_2) = 'admin_test';

alter table mid_paylog_all_ext drop partition(ds='${date_bef_yes}');

alter table mid_paylog_all_ext drop partition(ds='${date_yes}');

alter table mid_paylog_all drop partition(ds='${date_bef_7}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/act/act_${date_yes}' overwrite into table ${data_base}.raw_act partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/reg/reg_${date_yes}' overwrite into table ${data_base}.raw_reg partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/info/info_${date_yes}' overwrite into table ${data_base}.raw_info partition (ds='${date_yes}');

insert overwrite table mid_info_all_ext partition (ds='${date_bef_yes}') select uid,account,nick,platform_2,device,create_time,fresh_time,vip_level,level,zhandouli,food,metal,energy,nengjing,zuanshi,qiangnengzhichen,chaonengzhichen,xingdongli,xingling,jinbi,lianjingshi,shenen,gaojishenen,gaojinengjing,jingjichangdianshu from mid_info_all where ds='${date_bef_yes}';

insert overwrite table mid_info_all_ext partition (ds='${date_yes}') select uid,account,nick,platform_2,device,create_time,fresh_time,vip_level,level,zhandouli,food,metal,energy,nengjing,zuanshi,qiangnengzhichen,chaonengzhichen,xingdongli,xingling,jinbi,lianjingshi,shenen,gaojishenen,gaojinengjing,jingjichangdianshu from raw_info where ds='${date_yes}';


insert overwrite table mid_info_all partition(ds='${date_yes}') select t.uid,t.account,t.nick,t.platform_2,t.device,t.create_time,t.fresh_time,t.vip_level,t.level,t.zhandouli,t.food,t.metal,t.energy,t.nengjing,t.zuanshi,t.qiangnengzhichen,t.chaonengzhichen,t.xingdongli,t.xingling,t.jinbi,t.lianjingshi,t.shenen,t.gaojishenen,t.gaojinengjing,t.jingjichangdianshu from (select *, row_number() over (distribute by uid sort by fresh_time desc ) as rn from mid_info_all_ext ) t where t.rn<2;


alter table mid_info_all_ext drop partition(ds='${date_bef_yes}');

alter table mid_info_all_ext drop partition(ds='${date_yes}');


"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/pet/all_pet_${date_yes}' overwrite into table ${data_base}.raw_pet partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/equip/equip_${date_yes}' overwrite into table ${data_base}.raw_equip partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/item/item_${date_yes}' overwrite into table ${data_base}.raw_item partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/card/card_${date_yes}' overwrite into table ${data_base}.raw_card partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/super_step/card_super_step_${date_yes}' overwrite into table ${data_base}.raw_super_step partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/scores/all_scores_${date_yes}' overwrite into table ${data_base}.raw_scores partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/action_log/action_log_${date_yes}' overwrite into table ${data_base}.raw_action_log partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/mix_hc/mix_hc_${date_yes}' overwrite into table ${data_base}.raw_mix_hc partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/ios_hc/ios_hc_${date_yes}' overwrite into table ${data_base}.raw_ios_hc partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/vip_info/vip_info_${date_yes}' overwrite into table ${data_base}.raw_vip_info partition (ds='${date_yes}');

"

hive -S -e "

use ${data_base};

load data  inpath '${local_addr}/spendlog/spendlog_${date_yes}' overwrite into table ${data_base}.raw_spendlog partition (ds='${date_yes}');

insert overwrite table mid_spendlog_no_gs partition(ds='${date_yes}') select a.order_id,a.user_id,a.level,a.subtime,a.coin_num,a.coin_1st,a.coin_2nd,a.goods_type,a.goods_subtype,a.goods_name,a.goods_num,a.goods_price,a.goods_cnname,a.args from (select * from raw_spendlog where ds='${date_yes}')a left outer join (select user_id from mid_gs_user where ds='${date_yes}')b on a.user_id=b.user_id where b.user_id is null ;

"

hive -S -e "

use ${data_base};

load data local inpath '/data/superhero/bi_tool/viso_config/ios_ser_list_${date_yes}' overwrite into table ${data_base}.raw_ser_list partition(ds='${date_yes}', plat='${ios}');

insert overwrite table mid_ser_list partition(ds='${date_yes}', plat='${ios}') select explode(split(substring(ser,3,length(ser)-4),'${ser}')) from ${data_base}.raw_ser_list where ds='${date_yes}' and plat='${ios}';

"

hive -S -e "

use ${data_base};

load data local inpath '/data/superhero/bi_tool/viso_config/ios_father_server_map_${date_yes}' overwrite into table ${data_base}.raw_ser_map partition(ds='${date_yes}',plat='${ios}');

insert overwrite table mid_ser_map partition(ds='${date_yes}',plat='${ios}') select explode(str_to_map(substring(ser,3,length(ser)-4),'${map1}','${map2}')) from ${data_base}.raw_ser_map where ds='${date_yes}' and plat='${ios}';

"

hive -S -e "

use ${data_base};

load data local inpath '/data/superhero/bi_tool/viso_config/pub_ser_list_${date_yes}' overwrite into table ${data_base}.raw_ser_list partition(ds='${date_yes}', plat='${pub}');

insert overwrite table mid_ser_list partition(ds='${date_yes}', plat='${pub}') select explode(split(substring(ser,3,length(ser)-4),'${ser}')) from ${data_base}.raw_ser_list where ds='${date_yes}' and plat='${pub}';

"

hive -S -e "

use ${data_base};

load data local inpath '/data/superhero/bi_tool/viso_config/pub_father_server_map_${date_yes}' overwrite into table ${data_base}.raw_ser_map partition(ds='${date_yes}',plat='${pub}');

insert overwrite table mid_ser_map partition(ds='${date_yes}',plat='${pub}') select explode(str_to_map(substring(ser,3,length(ser)-4),'${map1}','${map2}')) from ${data_base}.raw_ser_map where ds='${date_yes}' and plat='${pub}';

"

##-- insert overwrite table mid_gs_user partition(ds='${date_yes}') select distinct user_id from raw_paylog where ds='${date_yes}' and lower(platform_2) = 'admin_test'; -- 获取gs

##--

##-- insert overwrite table mid_spendlog_no_gs partition(ds='${date_yes}') select a.* from (select * from raw_spendlog where ds='${date_yes}')a left outer join (select user_id from mid_gs_user where ds='${date_yes}')b on a.user_id=b.user_id where b.user_id is null; -- spendlog剔除gs

##--

##-- -- 产生total_info

##-- insert overwrite table mid_info_all_ext partition (ds='${date_bef_yes}') select uid,account,nick,platform_2,device,create_time,fresh_time,vip_level,level,zhandouli,food,metal,energy,nengjing,zuanshi,qiangnengzhichen,chaonengzhichen,xingdongli,xingling,jinbi,lianjingshi,shenen,gaojishenen,gaojinengjing,jingjichangdianshu from mid_info_all where ds='${date_bef_yes}';

##-- insert overwrite table mid_info_all_ext partition (ds='${date_yes}') select uid,account,nick,platform_2,device,create_time,fresh_time,vip_level,level,zhandouli,food,metal,energy,nengjing,zuanshi,qiangnengzhichen,chaonengzhichen,xingdongli,xingling,jinbi,lianjingshi,shenen,gaojishenen,gaojinengjing,jingjichangdianshu from raw_info where ds='${date_yes}';

##--

##-- insert overwrite table mid_info_all partition(ds='${date_yes}') select t.uid,t.account,t.nick,t.platform_2,t.device,t.create_time,t.fresh_time,t.vip_level,t.level,t.zhandouli,t.food,t.metal,t.energy,t.nengjing,t.zuanshi,t.qiangnengzhichen,t.chaonengzhichen,t.xingdongli,t.xingling,t.jinbi,t.lianjingshi,t.shenen,t.gaojishenen,t.gaojinengjing,t.jingjichangdianshu from (select *, row_number() over (distribute by uid sort by fresh_time desc ) as rn from mid_info_all_ext ) t where t.rn<2;

##--

##-- alter table mid_info_all_ext drop partition(ds='${date_bef_yes}');

##-- alter table mid_info_all_ext drop partition(ds='${date_yes}');

##--

##-- -- 产生活跃account

##-- insert overwrite table mid_act_account partition(ds='${date_yes}') select info_all.account, act.plat, act.platform_2,act.ser from (select *, substring(uid,1,length(uid)-7) as ser from raw_act where ds='${date_yes}') act join (select uid, account from mid_info_all where ds='${date_yes}') info_all on act.uid=info_all.uid;

##--

##-- -- 产生新增account

##-- -- 由 raw_reg匹配出account  select info_all.account, reg.plat, reg.platform_2,reg.ser  from (select *, substring(uid,1,length(uid)-7) as ser from raw_reg where ds='20160406')reg join (select uid, account from mid_info_all where ds='20160406')info_all on reg.uid=info_all.uid;

##--

##-- insert overwrite table mid_new_account_ext partition (ds='${date_yes}')select acct.account, acct.plat, acct.platfom_2, acct.server  from (select * from mid_act_account where ds='${date_yes}')acct left join (select account from mid_info_all where ds='${date_bef_yes}') info_all on acct.account=info_all.account where info_all.account is null; -- 尚需修改参数

##-- insert overwrite table mid_new_account partition (ds='${date_yes}') select ext.account, ext.plat, ext.platform_2, ext.server from (select *, row_number() over (distribute by account sort by server) as rn from mid_new_account_ext)ext where ext.rn=1;

##-- alter table mid_new_account_ext drop partition(ds='${date_yes}');

##"


© 著作权归作者所有

元禛慎独
粉丝 3
博文 209
码字总数 60366
作品 0
朝阳
程序员
私信 提问
hive+python数据分析入门

本文首先发布在: http://leanote.com/blog/view/539276d41a91080a06000002 本文作者系 leanote 的核心开发者, 多谢关注leanote. leanote官网, leanote github 为什么要使用hive+python来分析...

lifephp
2014/06/07
0
2
大数据迁移(简单案例)(41 )

大数据迁移: 所谓的大数据迁移就是把某个节点上的数据(或者几个节点上的数据)分别拷贝到不同数据节点上的过程. 就像我在的公司就是这样做的,在CDHhadoop的版本中,一个命令就能完成上面的大数...

肖鋭
2014/05/11
0
1
自定义UDF时引用JAR包的另一种方式

背景: 写了一个UDF A.class放在A.jar里。A.class依赖B.jar中的B.class。B.class又依赖C.jar中的C.class。 使用add jar把A.jar、B.jar、C.jar把jar包加入classpath hive脚本里执行create te...

周一帆
2014/10/31
0
0
hive连接hbase外部表错误,Can't get the locations

在hive中执行创建hbase的外部表,执行创建脚本: hive> CREATE EXTERNAL TABLE hbase_userFace(id string, mobile string,name string) > STORED BY 'org.apache.hadoop.hive.hbase.HBaseSt......

灵宝
2016/03/18
2.8K
0
[Hadoop大数据]——Hive部署入门教程

Hive是为了解决hadoop中mapreduce编写困难,提供给熟悉sql的人使用的。只要你对SQL有一定的了解,就能通过Hive写出mapreduce的程序,而不需要去学习hadoop中的api。 在部署前需要确认安装jdk...

青夜之衫
2017/12/05
0
0

没有更多内容

加载失败,请刷新页面

加载更多

NIO基于长度域的报文在Netty下的解码

1, 先复习一下粘包/拆包 1.1, 粘包/拆包的含义 TCP是个“流”协议, 并不了解上层业务数据的具体含义, 它会根据TCP缓冲区的实际情况进行包的划分,所以在业务上认为,一个完整的包可能会被TCP...

老菜鸟0217
今天
8
0
从零开始搭建spring-cloud(2) ----ribbon

在微服务架构中,业务都会被拆分成一个独立的服务,服务与服务的通讯是基于http restful的。Spring cloud有两种服务调用方式,一种是ribbon+restTemplate,另一种是feign。 其实我们已经在上...

Vincent-Duan
今天
19
0
get和post的区别?

doGet:路径传参。效率高,安全性差(get的传送数据量有限制,不能大于2Kb) doPOST:实体传参。效率低,安全性好 建议: 1、get方式的安全性较Post方式要差些,包含机密信息的话,建议用Pos...

花无谢
昨天
4
0
当谈论迭代器时,我谈些什么?

当谈论迭代器时,我谈些什么? 花下猫语:之前说过,我对于编程语言跟其它学科的融合非常感兴趣,但我还说漏了一点,就是我对于 Python 跟其它编程语言的对比学习,也很感兴趣。所以,我一直...

豌豆花下猫
昨天
14
0
10天学Python直接做项目,我做了这5件事

初学者如何尽快上手python? 市面上关于如何学python的资料很多,但是讲的都太复杂。 我就是很简单的几句话,从小白到开发工程师,我只做了五件事。 我觉得任何商业计划书如果不能用几句话讲...

Python派森
昨天
7
0

没有更多内容

加载失败,请刷新页面

加载更多

返回顶部
顶部