编译impala
博客专区 > Yulong_ 的博客 > 博客详情
编译impala
Yulong_ 发表于11个月前
编译impala
  • 发表于 11个月前
  • 阅读 126
  • 收藏 0
  • 点赞 0
  • 评论 0

Install Dependencies

for root user

Install Rpms

yum install -y libevent-devel automake libtool flex bison gcc-c++ openssl-devel  make cmake doxygen glib-devel python-devel bzip2-devel svn libevent-devel cyrus-sasl-devel wget git unzip openldap-devel db4-devel lsb

Install pip

cd /tmp
wget https://bootstrap.pypa.io/get-pip.py --no-check-certificate
python get-pip.py

Build Environment

for hadoop user

Build Dirs Tree

[hadoop@localhost ~]$ tree -L 2 /opt/beh/
    /opt/beh/
    ├── buildenv
    │   ├── beh_env
    │   └── impala_env
    └── core
        ├── impala -> impala-2.5.0-cdh5.7.1
        ├── impala-2.5.0-cdh5.7.1
        ├── jdk -> jdk1.7.0_79
        ├── jdk1.7.0_79
        ├── maven -> apache-maven-3.3.9
        └── maven-3.3.9
    7 directories, 2 files

impala_env

[hadoop[@localhost](https://my.oschina.net/u/570656) ~]$ cat /opt/beh/buildenv/impala_env 
export LANG=zh_CN.UTF-8
export BEH_HOME=/opt/beh
export JAVA_HOME=/opt/beh/core/jdk
export MVN_HOME=/opt/beh/core/maven
export IMPALA_HOME=$BEH_HOME/core/impala
export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$MVN_HOME/bin:$PATH

Build

cd /opt/beh/core/impala
source /opt/beh/buildenv/impala_env
source bin/impala-config.sh
${IMPALA_HOME}/buildall.sh -noclean -skiptests -so -release

BUILD SUCCESS
MOVE THE IMPALA TO HADOOP CLUSTER
MODIFY Environment


Modify Cluster Environment

Modify env

  • MODIFY /opt/beh/conf/beh_env
#IMPALA 2.5.0
    export IMPALA_HOME=$BEH_HOME/core/impala
    export IMPALA_CONF_DIR=$IMPALA_HOME/conf
    export CLUSTER_DIR=/opt/beh/core/impala/testdata/cluster
    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$IMPALA_HOME/so/:/opt/beh/core/hadoop/lib/native:/opt/beh/core/jdk/jre/lib/amd64/server
    
    export PATH=$IMPALA_HOME/bin:$PATH

Modify hdfs

  • modify /opt/beh/core/hadoop/etc/hadoop/hdfs-site.xml
<property>
       <name>dfs.client.read.shortcircuit</name>
       <value>true</value>
    </property>   
    <property>
       <name>dfs.client.read.shortcircuit.skip.checksum</name>
       <value>false</value>
    </property>   
    <property>
       <name>dfs.domain.socket.path</name>
       <value>/opt/beh/data/domain/sc_socket</value>
    </property>   
    <property>
       <name>dfs.datanode.data.dir.perm</name>
       <value>755</value>
    </property>   
    <property>
       <name>dfs.block.local-path-access.user</name>
       <value>hadoop</value>
    </property>   
    <property>
       <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
       <value>true</value>
    </property>   
    <property>  
       <name>dfs.client.file-block-storage-locations.timeout</name>  
       <value>10000</value>  
    </property>
  • restart hdfs

Modify impala

cd /opt/beh/core/impala
mkdir conf
cd conf
ln -s /opt/beh/core/hadoop/etc/hadoop/core-site.xml ./
ln -s /opt/beh/core/hadoop/etc/hadoop/hdfs-site.xml ./
ln -s /opt/beh/core/hadoop/etc/hadoop/yarn-site.xml ./
ln -s /opt/beh/core/hive/conf/hive-site.xml ./


cd /opt/beh/core/impala
mkdir so
find . -name '*.so'  | xargs -i cp {} ./so
cd so
ln -s libstdc++.so libstdc++.so.6
  • modify bin/set-classpath.sh
[hadoop@localhost impala]$ cat bin/set-classpath.sh 
    #!/bin/bash
    
    CLASSPATH=\
    $IMPALA_HOME/conf:\
    $IMPALA_HOME/fe/src/test/resources:\
    $IMPALA_HOME/fe/target/classes:\
    $IMPALA_HOME/fe/target/dependency:\
    $IMPALA_HOME/fe/target/test-classes:\
    ${HIVE_HOME}/lib/datanucleus-api-jdo-3.2.1.jar:\
    ${HIVE_HOME}/lib/datanucleus-core-3.2.2.jar:\
    ${HIVE_HOME}/lib/datanucleus-rdbms-3.2.1.jar:
    
    for jar in `ls ${IMPALA_HOME}/fe/target/dependency/*.jar`; do
      CLASSPATH=${CLASSPATH}:$jar
    done
    
    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$IMPALA_HOME/so/:/opt/beh/core/hadoop/lib/native:/opt/beh/core/jdk/jre/lib/amd64/server
    
    #for jar in `ls ${IMPALA_HOME}/testdata/target/dependency/*.jar`; do
    #  CLASSPATH=${CLASSPATH}:$jar      
    #done
    export CLASSPATH
  • modify bin/start-statestored.sh in the last second row.

    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$IMPALA_HOME/so/

  • ln -s /opt/beh/core/hive/lib/mysql-connector-java-5.1.31.jar $IMPALA_HOME/fe/target/dependency/

  • source /opt/beh/conf/beh_env

modify script

create impala conf file

vim $IMPALA_HOME/conf/impala
IMPALA_CATALOG_SERVICE_HOST=hadoop003
IMPALA_STATE_STORE_HOST=hadoop003
IMPALA_STATE_STORE_PORT=24000
IMPALA_BACKEND_PORT=22000
IMPALA_LOG_DIR=/opt/beh/logs/impala

IMPALA_CATALOG_ARGS=" -log_dir=${IMPALA_LOG_DIR} \
    -load_catalog_in_background=true" 

IMPALA_STATE_STORE_ARGS=" -log_dir=${IMPALA_LOG_DIR} \
    -state_store_port=${IMPALA_STATE_STORE_PORT}"

IMPALA_SERVER_ARGS=" \
    -log_dir=${IMPALA_LOG_DIR} \
    -catalog_service_host=${IMPALA_CATALOG_SERVICE_HOST} \
    -state_store_port=${IMPALA_STATE_STORE_PORT} \
    -use_statestore \
    -state_store_host=${IMPALA_STATE_STORE_HOST} \
    -mem_limit=70% \
    -default_pool_max_requests=-1 \
    -be_port=${IMPALA_BACKEND_PORT} " 

ENABLE_CORE_DUMPS=false

modify start-statestored.sh

vim $IMPALA_HOME/bin/start-statestored.sh
...
BUILD_TYPE=latest
#STATESTORED_ARGS=""
source $IMPALA_HOME/conf/impala
STATESTORED_ARGS=${STATESTORED_ARGS:-$IMPALA_STATE_STORE_ARGS}
BINARY_BASE_DIR=${IMPALA_HOME}/be/build
...

modify start-catalogd.sh

vim $IMPALA_HOME/bin/start-catalogd.sh
...
BUILD_TYPE=latest
#CATALOGD_ARGS=""
source $IMPALA_HOME/conf/impala
CATALOGD_ARGS=${CATALOGD_ARGS:-$IMPALA_CATALOG_ARGS}
BINARY_BASE_DIR=${IMPALA_HOME}/be/build
...

modify start-impalad.sh

vim $IMPALA_HOME/bin/start-impalad.sh
...
BUILD_TYPE=latest
#IMPALAD_ARGS=""
source $IMPALA_HOME/conf/impala
IMPALAD_ARGS=${IMPALAD_ARGS:-$IMPALA_SERVER_ARGS}
BINARY_BASE_DIR=${IMPALA_HOME}/be/build
...

start impala

  • cd /opt/beh/core/impala

  • start the statestored , only one in the cluster

./bin/start-statestored.sh
  • start the catalogd , only one in the cluster
./bin/start-catalogd.sh
  • start the impalad , every one with the datanode
./bin/start-impalad.sh -state_store_host={ip address of statestore}
  • connect impala
./bin/impala-shell.sh -i {ip address of one impalad}

#impala安装记录

#问题记录

##错误一

错误描述:impala-2.7.0-cdh5.9.0启动相关服务的时候,总是加载HDFS schema为localhost:20500,无法获取正确的HDFS路径。暂无解决办法。

解决办法:修改版本为impala-2.5.0-cdh5.7.1进行编译安装,无任何报错。

##错误二 错误描述:

[hadoop@hadoop004 impala]$ ./bin/impala-shell.sh --help
Traceback (most recent call last):
  File "/opt/beh/core/impala/infra/python/bootstrap_virtualenv.py", line 279, in <module>
kudu_client_dir = find_kudu_client_install_dir()
  File "/opt/beh/core/impala/infra/python/bootstrap_virtualenv.py", line 210, in find_kudu_client_install_dir
custom_client_dir = os.environ["KUDU_CLIENT_DIR"]
  File "/usr/lib64/python2.6/UserDict.py", line 22, in __getitem__
raise KeyError(key)
KeyError: 'KUDU_CLIENT_DIR'
Error in /opt/beh/core/impala/bin/impala-python at line 25:

错误为检查是否存在kudu,暂不测试kudu,故注释相关检查命令

解决办法:

[hadoop@hadoop003 impala]vim  ${IMPALA_HOME}/infra/python/bootstrap_virtualenv.py
if __name__ == "__main__":
  parser = optparse.OptionParser()
  parser.add_option("-l", "--log-level", default="INFO",
      choices=("DEBUG", "INFO", "WARN", "ERROR"))
  parser.add_option("-r", "--rebuild", action="store_true", help="Force a rebuild of"
      " the virtualenv even if it exists and appears to be completely up-to-date.")
  parser.add_option("--print-ld-library-path", action="store_true", help="Print the"
      " LD_LIBRARY_PATH that should be used when running python from the virtualenv.")
  options, args = parser.parse_args()
#  if options.print_ld_library_path:
#    kudu_client_dir = find_kudu_client_install_dir()
#    print os.path.pathsep.join([os.path.join(kudu_client_dir, 'lib'),
#                                os.path.join(kudu_client_dir, 'lib64')])
#    sys.exit()
#
  logging.basicConfig(level=getattr(logging, options.log_level))
  if options.rebuild:
    delete_virtualenv_if_exist()
  setup_virtualenv_if_not_exists()
#  install_kudu_client_if_possible()

##错误三 错误描述:

[hadoop@hadoop003 impala]$ ./bin/start-catalogd.sh --help > /tmp/start-catalogd.sh   
/opt/beh/core/impala/testdata/cluster/admin: line 46: CDH_MAJOR_VERSION: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 57: TARGET_FILESYSTEM: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 67: KUDU_IS_SUPPORTED: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 97: IMPALA_CLUSTER_LOGS_DIR: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 87: IS_OSX: unbound variable

解决办法:

[hadoop@hadoop003 impala]$ vim ${IMPALA_HOME}/testdata/cluster/admin
'''
'''
IS_OSX=false
IMPALA_CLUSTER_LOGS_DIR=$IMPALA_HOME/logs/cluster
KUDU_IS_SUPPORTED=false
TARGET_FILESYSTEM=hdfs
CDH_MAJOR_VERSION=5
DIR=$(dirname $0)
NODES_DIR="$DIR/cdh$CDH_MAJOR_VERSION"
NODE_COUNT=3
NODE_PREFIX=node-
'''
'''

#对于alluxio的读写支持

vim $IMPALA_HOME/bin/set-classpath.sh
...
CLASSPATH=\
$IMPALA_HOME/conf:\
$IMPALA_HOME/fe/src/test/resources:\
$IMPALA_HOME/fe/target/classes:\
$IMPALA_HOME/fe/target/dependency:\
$IMPALA_HOME/fe/target/test-classes:\
${HIVE_HOME}/lib/datanucleus-api-jdo-3.2.1.jar:\
${HIVE_HOME}/lib/datanucleus-core-3.2.2.jar:\
${HIVE_HOME}/lib/datanucleus-rdbms-3.2.1.jar:\
${HIVE_HOME}/lib/alluxio-core-client-1.2.0-jar-with-dependencies.jar:
...
共有 人打赏支持
粉丝 9
博文 79
码字总数 169741
×
Yulong_
如果觉得我的文章对您有用,请随意打赏。您的支持将鼓励我继续创作!
* 金额(元)
¥1 ¥5 ¥10 ¥20 其他金额
打赏人
留言
* 支付类型
微信扫码支付
打赏金额:
已支付成功
打赏金额: