文档章节

编译impala

Yulong_
 Yulong_
发布于 2016/12/11 17:39
字数 1086
阅读 203
收藏 0

Install Dependencies

for root user

Install Rpms

yum install -y libevent-devel automake libtool flex bison gcc-c++ openssl-devel  make cmake doxygen glib-devel python-devel bzip2-devel svn libevent-devel cyrus-sasl-devel wget git unzip openldap-devel db4-devel lsb

Install pip

cd /tmp
wget https://bootstrap.pypa.io/get-pip.py --no-check-certificate
python get-pip.py

Build Environment

for hadoop user

Build Dirs Tree

[hadoop@localhost ~]$ tree -L 2 /opt/beh/
    /opt/beh/
    ├── buildenv
    │   ├── beh_env
    │   └── impala_env
    └── core
        ├── impala -> impala-2.5.0-cdh5.7.1
        ├── impala-2.5.0-cdh5.7.1
        ├── jdk -> jdk1.7.0_79
        ├── jdk1.7.0_79
        ├── maven -> apache-maven-3.3.9
        └── maven-3.3.9
    7 directories, 2 files

impala_env

[hadoop[@localhost](https://my.oschina.net/u/570656) ~]$ cat /opt/beh/buildenv/impala_env 
export LANG=zh_CN.UTF-8
export BEH_HOME=/opt/beh
export JAVA_HOME=/opt/beh/core/jdk
export MVN_HOME=/opt/beh/core/maven
export IMPALA_HOME=$BEH_HOME/core/impala
export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$MVN_HOME/bin:$PATH

Build

cd /opt/beh/core/impala
source /opt/beh/buildenv/impala_env
source bin/impala-config.sh
${IMPALA_HOME}/buildall.sh -noclean -skiptests -so -release

BUILD SUCCESS
MOVE THE IMPALA TO HADOOP CLUSTER
MODIFY Environment


Modify Cluster Environment

Modify env

  • MODIFY /opt/beh/conf/beh_env
#IMPALA 2.5.0
    export IMPALA_HOME=$BEH_HOME/core/impala
    export IMPALA_CONF_DIR=$IMPALA_HOME/conf
    export CLUSTER_DIR=/opt/beh/core/impala/testdata/cluster
    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$IMPALA_HOME/so/:/opt/beh/core/hadoop/lib/native:/opt/beh/core/jdk/jre/lib/amd64/server
    
    export PATH=$IMPALA_HOME/bin:$PATH

Modify hdfs

  • modify /opt/beh/core/hadoop/etc/hadoop/hdfs-site.xml
<property>
       <name>dfs.client.read.shortcircuit</name>
       <value>true</value>
    </property>   
    <property>
       <name>dfs.client.read.shortcircuit.skip.checksum</name>
       <value>false</value>
    </property>   
    <property>
       <name>dfs.domain.socket.path</name>
       <value>/opt/beh/data/domain/sc_socket</value>
    </property>   
    <property>
       <name>dfs.datanode.data.dir.perm</name>
       <value>755</value>
    </property>   
    <property>
       <name>dfs.block.local-path-access.user</name>
       <value>hadoop</value>
    </property>   
    <property>
       <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
       <value>true</value>
    </property>   
    <property>  
       <name>dfs.client.file-block-storage-locations.timeout</name>  
       <value>10000</value>  
    </property>
  • restart hdfs

Modify impala

cd /opt/beh/core/impala
mkdir conf
cd conf
ln -s /opt/beh/core/hadoop/etc/hadoop/core-site.xml ./
ln -s /opt/beh/core/hadoop/etc/hadoop/hdfs-site.xml ./
ln -s /opt/beh/core/hadoop/etc/hadoop/yarn-site.xml ./
ln -s /opt/beh/core/hive/conf/hive-site.xml ./


cd /opt/beh/core/impala
mkdir so
find . -name '*.so'  | xargs -i cp {} ./so
cd so
ln -s libstdc++.so libstdc++.so.6
  • modify bin/set-classpath.sh
[hadoop@localhost impala]$ cat bin/set-classpath.sh 
    #!/bin/bash
    
    CLASSPATH=\
    $IMPALA_HOME/conf:\
    $IMPALA_HOME/fe/src/test/resources:\
    $IMPALA_HOME/fe/target/classes:\
    $IMPALA_HOME/fe/target/dependency:\
    $IMPALA_HOME/fe/target/test-classes:\
    ${HIVE_HOME}/lib/datanucleus-api-jdo-3.2.1.jar:\
    ${HIVE_HOME}/lib/datanucleus-core-3.2.2.jar:\
    ${HIVE_HOME}/lib/datanucleus-rdbms-3.2.1.jar:
    
    for jar in `ls ${IMPALA_HOME}/fe/target/dependency/*.jar`; do
      CLASSPATH=${CLASSPATH}:$jar
    done
    
    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$IMPALA_HOME/so/:/opt/beh/core/hadoop/lib/native:/opt/beh/core/jdk/jre/lib/amd64/server
    
    #for jar in `ls ${IMPALA_HOME}/testdata/target/dependency/*.jar`; do
    #  CLASSPATH=${CLASSPATH}:$jar      
    #done
    export CLASSPATH
  • modify bin/start-statestored.sh in the last second row.

    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$IMPALA_HOME/so/

  • ln -s /opt/beh/core/hive/lib/mysql-connector-java-5.1.31.jar $IMPALA_HOME/fe/target/dependency/

  • source /opt/beh/conf/beh_env

modify script

create impala conf file

vim $IMPALA_HOME/conf/impala
IMPALA_CATALOG_SERVICE_HOST=hadoop003
IMPALA_STATE_STORE_HOST=hadoop003
IMPALA_STATE_STORE_PORT=24000
IMPALA_BACKEND_PORT=22000
IMPALA_LOG_DIR=/opt/beh/logs/impala

IMPALA_CATALOG_ARGS=" -log_dir=${IMPALA_LOG_DIR} \
    -load_catalog_in_background=true" 

IMPALA_STATE_STORE_ARGS=" -log_dir=${IMPALA_LOG_DIR} \
    -state_store_port=${IMPALA_STATE_STORE_PORT}"

IMPALA_SERVER_ARGS=" \
    -log_dir=${IMPALA_LOG_DIR} \
    -catalog_service_host=${IMPALA_CATALOG_SERVICE_HOST} \
    -state_store_port=${IMPALA_STATE_STORE_PORT} \
    -use_statestore \
    -state_store_host=${IMPALA_STATE_STORE_HOST} \
    -mem_limit=70% \
    -default_pool_max_requests=-1 \
    -be_port=${IMPALA_BACKEND_PORT} " 

ENABLE_CORE_DUMPS=false

modify start-statestored.sh

vim $IMPALA_HOME/bin/start-statestored.sh
...
BUILD_TYPE=latest
#STATESTORED_ARGS=""
source $IMPALA_HOME/conf/impala
STATESTORED_ARGS=${STATESTORED_ARGS:-$IMPALA_STATE_STORE_ARGS}
BINARY_BASE_DIR=${IMPALA_HOME}/be/build
...

modify start-catalogd.sh

vim $IMPALA_HOME/bin/start-catalogd.sh
...
BUILD_TYPE=latest
#CATALOGD_ARGS=""
source $IMPALA_HOME/conf/impala
CATALOGD_ARGS=${CATALOGD_ARGS:-$IMPALA_CATALOG_ARGS}
BINARY_BASE_DIR=${IMPALA_HOME}/be/build
...

modify start-impalad.sh

vim $IMPALA_HOME/bin/start-impalad.sh
...
BUILD_TYPE=latest
#IMPALAD_ARGS=""
source $IMPALA_HOME/conf/impala
IMPALAD_ARGS=${IMPALAD_ARGS:-$IMPALA_SERVER_ARGS}
BINARY_BASE_DIR=${IMPALA_HOME}/be/build
...

start impala

  • cd /opt/beh/core/impala

  • start the statestored , only one in the cluster

./bin/start-statestored.sh
  • start the catalogd , only one in the cluster
./bin/start-catalogd.sh
  • start the impalad , every one with the datanode
./bin/start-impalad.sh -state_store_host={ip address of statestore}
  • connect impala
./bin/impala-shell.sh -i {ip address of one impalad}

#impala安装记录

#问题记录

##错误一

错误描述:impala-2.7.0-cdh5.9.0启动相关服务的时候,总是加载HDFS schema为localhost:20500,无法获取正确的HDFS路径。暂无解决办法。

解决办法:修改版本为impala-2.5.0-cdh5.7.1进行编译安装,无任何报错。

##错误二 错误描述:

[hadoop@hadoop004 impala]$ ./bin/impala-shell.sh --help
Traceback (most recent call last):
  File "/opt/beh/core/impala/infra/python/bootstrap_virtualenv.py", line 279, in <module>
kudu_client_dir = find_kudu_client_install_dir()
  File "/opt/beh/core/impala/infra/python/bootstrap_virtualenv.py", line 210, in find_kudu_client_install_dir
custom_client_dir = os.environ["KUDU_CLIENT_DIR"]
  File "/usr/lib64/python2.6/UserDict.py", line 22, in __getitem__
raise KeyError(key)
KeyError: 'KUDU_CLIENT_DIR'
Error in /opt/beh/core/impala/bin/impala-python at line 25:

错误为检查是否存在kudu,暂不测试kudu,故注释相关检查命令

解决办法:

[hadoop@hadoop003 impala]vim  ${IMPALA_HOME}/infra/python/bootstrap_virtualenv.py
if __name__ == "__main__":
  parser = optparse.OptionParser()
  parser.add_option("-l", "--log-level", default="INFO",
      choices=("DEBUG", "INFO", "WARN", "ERROR"))
  parser.add_option("-r", "--rebuild", action="store_true", help="Force a rebuild of"
      " the virtualenv even if it exists and appears to be completely up-to-date.")
  parser.add_option("--print-ld-library-path", action="store_true", help="Print the"
      " LD_LIBRARY_PATH that should be used when running python from the virtualenv.")
  options, args = parser.parse_args()
#  if options.print_ld_library_path:
#    kudu_client_dir = find_kudu_client_install_dir()
#    print os.path.pathsep.join([os.path.join(kudu_client_dir, 'lib'),
#                                os.path.join(kudu_client_dir, 'lib64')])
#    sys.exit()
#
  logging.basicConfig(level=getattr(logging, options.log_level))
  if options.rebuild:
    delete_virtualenv_if_exist()
  setup_virtualenv_if_not_exists()
#  install_kudu_client_if_possible()

##错误三 错误描述:

[hadoop@hadoop003 impala]$ ./bin/start-catalogd.sh --help > /tmp/start-catalogd.sh   
/opt/beh/core/impala/testdata/cluster/admin: line 46: CDH_MAJOR_VERSION: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 57: TARGET_FILESYSTEM: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 67: KUDU_IS_SUPPORTED: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 97: IMPALA_CLUSTER_LOGS_DIR: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 87: IS_OSX: unbound variable

解决办法:

[hadoop@hadoop003 impala]$ vim ${IMPALA_HOME}/testdata/cluster/admin
'''
'''
IS_OSX=false
IMPALA_CLUSTER_LOGS_DIR=$IMPALA_HOME/logs/cluster
KUDU_IS_SUPPORTED=false
TARGET_FILESYSTEM=hdfs
CDH_MAJOR_VERSION=5
DIR=$(dirname $0)
NODES_DIR="$DIR/cdh$CDH_MAJOR_VERSION"
NODE_COUNT=3
NODE_PREFIX=node-
'''
'''

#对于alluxio的读写支持

vim $IMPALA_HOME/bin/set-classpath.sh
...
CLASSPATH=\
$IMPALA_HOME/conf:\
$IMPALA_HOME/fe/src/test/resources:\
$IMPALA_HOME/fe/target/classes:\
$IMPALA_HOME/fe/target/dependency:\
$IMPALA_HOME/fe/target/test-classes:\
${HIVE_HOME}/lib/datanucleus-api-jdo-3.2.1.jar:\
${HIVE_HOME}/lib/datanucleus-core-3.2.2.jar:\
${HIVE_HOME}/lib/datanucleus-rdbms-3.2.1.jar:\
${HIVE_HOME}/lib/alluxio-core-client-1.2.0-jar-with-dependencies.jar:
...

© 著作权归作者所有

共有 人打赏支持
Yulong_
粉丝 8
博文 93
码字总数 169760
作品 0
朝阳
部门经理
Impala安装json解析udf插件

背景 Impala跟Hive一样,是常用的数据仓库组件之一。熟悉Hive的同学肯定知道,Hive官方提供了getjsonobject函数用于处理json字符串,但是Impala官方并没有提供类似的方法,好在是有第三方实现...

wooyoo
2017/04/18
0
0
自己动手写Impala UDF

概述 出于对可扩展性和性能的考虑,UDF已变成大数据生态圈查询引擎的必备功能之一,无论是Calcite、Hive、Impala都对其进行支持,但是UDF的支持有利也有弊,好处在于它提供了对某些用户独有需...

Zero零_度
2016/10/12
34
0
大数据分析查询引擎Impala

Impala是Cloudera公司主导开发的新型查询系统,它提供SQL语义,能查询存储在Hadoop的HDFS和HBase中的PB级大数据。已有的Hive系统虽然也提供了SQL语义,但由于Hive底层执行使用的是MapReduce...

wypersist
04/26
0
0
powerBi odbc 连接impala 实现自助分析

配置Impala以使用ODBC 可以将第三方产品设计为使用ODBC与Impala集成。为获得最佳体验,请确保支持您打算使用的任何第三方产品。验证支持包括检查Impala,ODBC,操作系统和第三方产品的版本是...

hblt-j
08/16
0
0
自己动手写Impala UDF

本文由 网易云 发布 概述 出于对可扩展性和性能的考虑,UDF已变成大数据生态圈查询引擎的必备功能之一,无论是Calcite、Hive、Impala都对其进行支持,但是UDF的支持有利也有弊,好处在于它提...

wangyiyungw
05/07
0
0

没有更多内容

加载失败,请刷新页面

加载更多

java序列化(七) - fst 序列化

java序列化(七) - fst 序列化 github https://github.com/RuedigerMoeller/fast-serialization 实践 https://gitee.com/mengzhang6/serializable-demo.git maven依赖 <!-- https://mvnrepo......

晨猫
27分钟前
2
0
智力问题汇总

南京新建地铁线路,给你2块钱,测出来需要配置多少辆车? 参考答案:根据地铁有固定时间间隔,坐一圈该线路,推算出需要多少辆。 一共50张卡片,上面写着1--50 ,50个数字,藏起来一张,打乱...

职业搬砖工程师
31分钟前
2
0
ZFS-自我恢复RAID

ZFS-自我恢复RAID 这个给了我一个简单而又强大的理由,让我立马为之折服,ZFS可以自动的检测发生的错误,而且,可以自我修复这些错误。假设有一个时刻,磁盘阵列中的数据是错误的,不管是什么...

openthings
40分钟前
2
0
从Hash到一致性Hash原理(深度好文)

要讲一致性Hash原理,先从一般性Hash讲起,其实Hash的本质就是一个长度可变的数组,那为什么Hash的时间复杂度是O(1),而其他类型的数据结构查找都是要遍历来,遍历去,即便是树,二叉树,也是要经过几...

算法之名
53分钟前
22
0
软件测试工具书籍与面试题汇总下载(持续更新)

简介 本文是https://github.com/china-testing/python-api-tesing/blob/master/books.md 的节选。 欢迎转载,转载请附带此简介,谢谢! 试题 软件测试综合面试题(高级测试)-试题.pdf 软件测试...

python测试开发人工智能安全
今天
1
0

没有更多内容

加载失败,请刷新页面

加载更多

返回顶部
顶部