Hive基本操作(数据导入导出)

原创
2017/01/08 21:45
阅读数 441

1、创建数据库(mydb)

CREATE DATABASE IF NOT EXISTS mydb;
use mydb;

2、创建表(t_loginfo)

CREATE TABLE IF NOT EXISTS t_loginfo(
	sdate string,
	stime array<string>,
	level string,
	class string,
	info1 string,
	info2 string,
	info3 string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '
  COLLECTION ITEMS TERMINATED BY ','
  MAP KEYS TERMINATED BY ':';

   可将上述HQL语句保存到一个文本中,如/usr/script/loginfo.hql,然后执行

hive -f /usr/script/loginfo.hql
--或 hive> source /usr/script/loginfo.hql;

hadoop的日志格式:日期,时间,级别,类信息,提示信息。

2017-01-08 01:51:13,445 INFO org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetAsyncDiskService: Deleted BP-1727771939-127.0.0.1-1483765767408 blk_1073741836_1012 file /usr/local/install_path/hadoop-2.7.3/hdfs/data/current/BP-1727771939-127.0.0.1-1483765767408/current/finalized/subdir0/subdir0/blk_1073741836
2017-01-08 01:51:13,458 INFO org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetAsyncDiskService: Deleted BP-1727771939-127.0.0.1-1483765767408 blk_1073741837_1013 file /usr/local/install_path/hadoop-2.7.3/hdfs/data/current/BP-1727771939-127.0.0.1-1483765767408/current/finalized/subdir0/subdir0/blk_1073741837
2017-01-08 01:51:13,459 INFO org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetAsyncDiskService: Deleted BP-1727771939-127.0.0.1-1483765767408 blk_1073741838_1014 file /usr/local/install_path/hadoop-2.7.3/hdfs/data/current/BP-1727771939-127.0.0.1-1483765767408/current/finalized/subdir0/subdir0/blk_1073741838
2017-01-08 01:51:13,460 INFO org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetAsyncDiskService: Deleted BP-1727771939-127.0.0.1-1483765767408 blk_1073741839_1015 file /usr/local/install_path/hadoop-2.7.3/hdfs/data/current/BP-1727771939-127.0.0.1-1483765767408/current/finalized/subdir0/subdir0/blk_1073741839
2017-01-08 06:11:34,368 INFO org.apache.hadoop.hdfs.server.datanode.DirectoryScanner: BlockPool BP-1727771939-127.0.0.1-1483765767408 Total blocks: 6, missing metadata files:0, missing block files:0, missing blocks in memory:0, mismatched blocks:0
2017-01-08 07:16:37,859 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Successfully sent block report 0x3f6343edc1a7,  containing 1 storage report(s), of which we sent 1. The reports had 6 total blocks and used 1 RPC(s). This took 0 msec to generate and 8 msecs for RPC and NN processing. Got back one command: FinalizeCommand/5.
2017-01-08 07:16:37,860 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Got finalize command for block pool BP-1727771939-127.0.0.1-1483765767408
2017-01-08 10:05:01,087 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Receiving BP-1727771939-127.0.0.1-1483765767408:blk_1073741846_1022 src: /192.168.241.129:47726 dest: /192.168.241.129:50010
2017-01-08 10:05:01,176 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.241.129:47726, dest: /192.168.241.129:50010, bytes: 227, op: HDFS_WRITE, cliID: DFSClient_NONMAPREDUCE_-1528398405_1, offset: 0, srvID: 0d460e03-63da-42ea-88da-ac4499dda1f1, blockid: BP-1727771939-127.0.0.1-1483765767408:blk_1073741846_1022, duration: 5938769

3、导入本地数据,若无LOCAL关键字,则从hdfs里面加载,无OVERWRITE关键字,则追加而不是覆盖

LOAD DATA LOCAL INPATH '/usr/local/hadoop-2.7.3/logs/hadoop.log' OVERWRITE INTO TABLE t_loginfo;

4、查看数据

SELECT * FROM t_loginfo;

5、基本操作语句

--列出所有的数据库名称
SHOW DATABASES;

--使用数据库
USE dbName;

--列出所有表
SHOW TABLES;

--列出指定数据库所有表
SHOW TABLES IN dbName;

--正则表达式搜索表
SHOW TABLES LIKE 't%';

--获取建表语句
SHOW CREATE TABLE tableName;

--查看表结构
DESCRIBE tableName;

--复制表结构
CREATE TABLE new_table LIKE old_table;

--复制表结构和内容
CREATE TABLE new_table AS SELECT * FROM old_table;

--查询结果输出到文件中,-S静默执行
hive -S -e "SELECT * FROM tableName" > /tmp/myquery

--从文件中执行Hive查询,将查询语句保存为.q或者是.hql后缀的文件(其它也可)
--脚本 SELECT * FROM tableName;
--方式1:
       hive -f /path/to/file/myquery.hql
--方式2:
       hive> source /path/to/file/myquery.hql;

--执行shell命令,在命令前面加(!),并以分号(;)结束
hive> ! pwd;

--执行hadoop命令只需要将hadoop去掉,并以分号结束
hive> dfs -ls / ;
hive> dfs -help;

--设置显示查询字段名称
hive> set hive.cli.print.header=true;
hive> SELECT * FROM tableName

--删除数据库,不允许删除有表的数据库,若要删除需带上cascade
DROP DATABASE IF EXISTS dbName CASCADE;

--设置数据的属性,其中数据库名和数据库所在目录
hive> ALTER DATABASE daName SET DBPROPERTIES('key' = 'value');

--导出数据
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/t_loginof.txt' SELECT * FROM tableName;

--导出数据到多个文件
FROM tableName t
  INSERT OVERWRITE LOCAL DIRECTORY '/tmp/t1.txt' SELECT * WHERE t.col = 'xx1' 
  INSERT OVERWRITE LOCAL DIRECTORY '/tmp/t2.txt' SELECT * WHERE t.col = 'xx2' 
  INSERT OVERWRITE LOCAL DIRECTORY '/tmp/t3.txt' SELECT * WHERE t.col = 'xx3';

 

展开阅读全文
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部