Sentry for Impala Without kerberos(CDH 5.10)

原创
2017/03/15 10:17
阅读数 489

介绍

    在Impala 1.1及更高版本中,可以使用Sentry项目对impala进行授权。 Sentry为Hadoop添加了细粒度的授权框架。授权决定允许哪些用户访问哪些资源,以及允许哪些操作执行哪些操作。  默认情况下(未启用授权时),Impala将使用impala用户的所有读写操作,这适用于开发/测试环境,但不适用于安全生产环境。 启用授权后,Impala使用运行impala-shell或其他客户端程序的用户的操作系统用户标识,并将各种权限与每个用户相关联。

环境

主机 sentry hive impala
hadoop1     impala-server
hadoop2     impala-server
hadoop3     impala-server
hadoop4     impala-server
hadoop5   hive-server2 impala-server
hadoop6 sentry-store   impala-catalog/impala-server

配置

    hadoop1-6:/etc/default/impala

IMPALA_CATALOG_SERVICE_HOST=172.31.217.156
IMPALA_STATE_STORE_HOST=172.31.217.156
IMPALA_STATE_STORE_PORT=24000
IMPALA_BACKEND_PORT=22000
IMPALA_LOG_DIR=/var/log/impala

IMPALA_CATALOG_ARGS=" \
    -sentry_config=/etc/impala/conf/sentry-site.xml \
    -log_dir=${IMPALA_LOG_DIR}  \
    -state_store_port=${IMPALA_STATE_STORE_PORT}  \
    -state_store_host=${IMPALA_STATE_STORE_HOST}"

IMPALA_STATE_STORE_ARGS=" -log_dir=${IMPALA_LOG_DIR} -state_store_port=${IMPALA_STATE_STORE_PORT}"

IMPALA_SERVER_ARGS=" \
    -sentry_config=/etc/impala/conf/sentry-site.xml \
    -server_name=hadoop6 \
    -use_local_tz_for_unix_timestamp_conversions=true \
    -convert_legacy_hive_parquet_utc_timestamps=true \
    -log_dir=${IMPALA_LOG_DIR} \
    -catalog_service_host=${IMPALA_CATALOG_SERVICE_HOST} \
    -state_store_port=${IMPALA_STATE_STORE_PORT} \
    -use_statestore \
    -state_store_host=${IMPALA_STATE_STORE_HOST} \
    -enable_rm=true \
    -rm_always_use_defaults=true \
    -llama_host=172.31.217.156 \
    -llama_port=15000 \
    -cgroup_hierarchy_path=/cgroup/cpu \
    -be_port=${IMPALA_BACKEND_PORT}" 

ENABLE_CORE_DUMPS=false

# LIBHDFS_OPTS=-Djava.library.path=/usr/lib/impala/lib
# MYSQL_CONNECTOR_JAR=/usr/share/java/mysql-connector-java.jar
# IMPALA_BIN=/usr/lib/impala/sbin
# IMPALA_HOME=/usr/lib/impala
# HIVE_HOME=/usr/lib/hive
# HBASE_HOME=/usr/lib/hbase
# IMPALA_CONF_DIR=/etc/impala/conf
# HADOOP_CONF_DIR=/etc/impala/conf
# HIVE_CONF_DIR=/etc/impala/conf
# HBASE_CONF_DIR=/etc/impala/conf

        ps:在IMPALA_SERVER_ARGS中追加-server_name和-sentry_config,在IMPALA_CATALOG_ARGS中追加-sentry_config,或者使用-authorization_policy_file指定policy文件路径(HDFS上)

    hadoop1-6:/etc/impala/conf/hive-site.xml

<configuration>

    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://172.31.217.156:3306/metastore</value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>hive</value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>123456</value>
    </property>
    
    <property>
        <name>datanucleus.readOnlyDatastore</name>
        <value>false</value>
    </property>

    <property> 
        <name>datanucleus.fixedDatastore</name>
        <value>false</value> 
    </property>

    <property>
        <name>datanucleus.autoCreateSchema</name>
        <value>true</value>
    </property>

    <property>
        <name>datanucleus.autoCreateTables</name>
        <value>true</value>
    </property>

    <property>
        <name>datanucleus.autoCreateColumns</name>
        <value>true</value>
    </property>

    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>

    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>haddop1:23125,hadoop6:23125</value>
    </property>

    <property>
        <name>hive.auto.convert.join</name>
        <value>true</value>
    </property>

    <property>
        <name>hive.metastore.schema.verification</name>
        <value>false</value>
    </property>

    <property>
        <name>hive.metastore.warehouse.dir</name>
        <value>/user/hive/warehouse</value>
    </property>

    <property>
        <name>hive.warehouse.subdir.inherit.perms</name>
        <value>true</value>
    </property>

    <property>
        <name>hive.metastore.uris</name>
        <value>thrift://hadoop5:9083</value>
    </property>

    <property>
        <name>hive.metastore.client.socket.timeout</name>
        <value>36000</value>
    </property>

    <property>
        <name>hive.zookeeper.quorum</name>
        <value>hadoop2:2181,hadoop3:2181,hadoop4:2181</value>
    </property>

    <property>
        <name>hive.server2.thrift.port</name>
        <value>10000</value>
    </property>

    <property>
        <name>hive.server2.thrift.bind.host</name>
        <value>0.0.0.0</value>
    </property>

    <property>
        <name>hive.server2.thrift.min.worker.threads</name>
        <value>2</value>
    </property>

    <property>
        <name>hive.server2.thrift.max.worker.threads</name>
        <value>10</value>
    </property>

    <property>  
        <name>hive.metastore.authorization.storage.checks</name>  
        <value>true</value>  
    </property>  

    <property>
        <name>dfs.client.read.shortcircuit</name>
        <value>true</value>
    </property>

    <property>
        <name>dfs.domain.socket.path</name>
        <value>/var/lib/hadoop-hdfs/dn_socket</value>
    </property>

    <property>
        <name>hive.execution.engine</name>
        <value>spark</value>
    </property>

    <property>
        <name>hive.enable.spark.execution.engine</name>
        <value>true</value>
    </property>

    <property>
        <name>spark.home</name>
        <value>/opt/programs/spark_1.6.0</value>
    </property>


    <!-- Sentry Hiveserver2 config-->
    <property> 
        <name>hive.sentry.conf.url</name>  
        <value>file:///etc/hive/conf/sentry-site.xml</value> 
    </property> 

    <property>
        <name>hive.server2.session.hook</name>
        <value>org.apache.sentry.binding.hive.HiveAuthzBindingSessionHook</value>
    </property>

    <property>
        <name>hive.security.authorization.task.factory</name>
        <value>org.apache.sentry.binding.hive.SentryHiveAuthorizationTaskFactoryImpl</value>
    </property>

    <!-- Sentry hivemeastore config -->
    <property>
        <name>hive.metastore.filter.hook</name>
        <value>org.apache.sentry.binding.metastore.SentryMetaStoreFilterHook</value>
    </property>

    <property>  
        <name>hive.metastore.pre.event.listeners</name>  
        <value>org.apache.sentry.binding.metastore.MetastoreAuthzBinding</value>  
    </property>

    <property>
        <name>hive.metastore.event.listeners</name>  
        <value>org.apache.sentry.binding.metastore.SentryMetastorePostEventListener</value>  
    </property>


</configuration>

    hadoop1-6:/etc/impala/conf/sentry-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property>
   <name>sentry.service.client.server.rpc-port</name>
   <value>8038</value>
</property>
<property>
   <name>sentry.service.client.server.rpc-address</name>
   <value>hadoop6</value>
</property>
<property>
   <name>sentry.service.client.server.rpc-connection-timeout</name>
   <value>200000</value>
</property>
<property>
   <name>sentry.service.security.mode</name>
   <value>none</value>
</property>
</configuration>

启动

    重启impala-catalog

/etc/init.d/impala-catalog restart

    重启sentry

/etc/init.d/sentry-store restart

    重启impala-server(ahdoop1-6)

/etc/init.d/impala-server restart

测试

    默认权限

        ps:可以看到以admin用户登录的时候,只能看到default库,在hive+sentry中建立的两个库并不能看到,要在impala中重新授权。

    impala授权

CREATE ROLE impala_admin_role;
GRANT ROLE impala_admin_role TO GROUP admin;
GRANT ALL ON server hadoop5 to role impala_admin_role;

    impala测试

        ps:可以看到hive+sentry中创建的表

    再次以test用户登录

        ps:也是只能看到default库,并不能看到其他库,因为我们没有授权

 

 

展开阅读全文
打赏
1
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
1
分享
返回顶部
顶部