参考hue:https://docs.gethue.com/administrator/configuration/connectors/
1、由于cdh自带spark版本不支持spark-thrift服务,所以需要在官网下载自己所需的spark版本(http://spark.apache.org/downloads.html)
2、下载并解压后配置环境变量(jdk 安装不再赘述)
vim /etc/profile
export JAVA_HOME=/usr/java/jdk1.8.0_181-cloudera
export PATH=$PATH:$JAVA_HOME/bin
3、修改配置文件
修改spark-env.sh(也就是直接拷贝了cdh spark的配置)
export SPARK_LAUNCH_WITH_SCALA=0
export SPARK_LOG_DIR=/var/log/spark
export SPARK_PID_DIR='/var/run/spark/'
if [ -n "$HADOOP_HOME" ]; then
export LD_LIBRARY_PATH=:/usr/lib/hadoop/lib/native
fi
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}
if [[ -d $SPARK_HOME/python ]]
then
for i in
do
SPARK_DIST_CLASSPATH=${SPARK_DIST_CLASSPATH}:$i
done
fi
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:$HADOOP_CONF_DIR"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/hadoop/lib/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/hadoop/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/hadoop-hdfs/lib/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/hadoop-hdfs/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/hadoop-mapreduce/lib/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/hadoop-mapreduce/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/hadoop-yarn/lib/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/hadoop-yarn/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/hive/lib/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/flume-ng/lib/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/parquet/lib/*"
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/usr/lib/avro/lib/*"
拷贝hive-site.xml ,hbase-site.xml 到conf路径下
4、启动spark-thrift 服务
在sbin目录下执行如下参数(觉得不需要的参数可以去掉,jars的原因是有部分表示hive on hbase查询所需要提供的;其实start-thriftserver.sh 执行了spark-submit --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 --name Thrift JDBC/ODBC Server)
./start-thriftserver.sh --hiveconf hive.server2.thrift.port=14001 --hiveconf hive.exec.mode.local.auto=true --hiveconf hive.auto.convert.join=true --hiveconf hive.mapjoin.smalltable.filesize=50000000 --name thriftserver --master yarn --deploy-mode client --driver-cores 1 --driver-memory 3G --num-executors 5 --executor-cores 4 --executor-memory 8G --conf spark.driver.maxResultSize=2G --conf spark.sql.filesourceTableRelationCacheSize=0 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.default.parallelism=100 --conf spark.sql.shuffle.partitions=100 --jars /opt/cloudera/parcels/CDH/jars/hive-exec-2.1.1-cdh6.2.1-core.jar,/opt/cloudera/parcels/CDH/jars/hive-exec-2.1.1-cdh6.2.1.jar,/opt/cloudera/parcels/CDH/lib/hbase/lib/hbase-protocol-2.1.0-cdh6.2.1.jar,/opt/cloudera/parcels/CDH/lib/hbase/lib/hbase-shaded-netty-2.2.1.jar,/opt/cloudera/parcels/CDH/lib/hbase/lib/hbase-shaded-protobuf-2.2.1.jar,/opt/cloudera/parcels/CDH/lib/hbase/lib/hbase-protocol-shaded-2.1.0-cdh6.2.1.jar,/opt/cloudera/parcels/CDH/lib/hbase/lib/hbase-shaded-miscellaneous-2.2.1.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/guava-14.0.1.jar,/opt/cloudera/parcels/CDH/lib/hbase/hbase-common-2.1.0-cdh6.2.1.jar,/opt/cloudera/parcels/CDH/lib/hbase/lib/hbase-mapreduce-2.1.0-cdh6.2.1.jar,/opt/cloudera/parcels/CDH/lib/hbase/hbase-client.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/hive-hbase-handler-2.1.1-cdh6.2.1.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/antlr-runtime-3.5.2.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/javax.jdo-3.2.0-m3.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/hk2-api-2.5.0-b32.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/joda-time-2.9.9.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/jdo-api-3.0.1.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/datanucleus-rdbms-4.1.7.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/datanucleus-core-4.1.6.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/datanucleus-api-jdo-4.2.1.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/commons-dbcp-1.4.jar,/opt/cloudera/parcels/CDH/lib/hive/lib/hive-metastore-2.1.1-cdh6.2.1.jar,/opt/cloudera/parcels/CDH/jars/htrace-core4-4.2.0-incubating.jar,/usr/share/java/mysql-connector-java.jar
5、配置hue
修改配置hue_safety_valve.ini
[desktop]
app_blacklist=
[spark]
livy_server_host=hadoop202
livy_server_port=8998
livy_server_session_kind=yarn
sql_server_host=hadoop202
#sql_server_port 需要的原因是sparksql 需要使用该端口
sql_server_port=14001
[notebook]
show_notebooks=true
enable_external_statements=true
enable_batch_execute=true
[[interpreters]]
# Define the name and how to connect and execute the language.
[[[hive]]]
# The name of the snippet.
name=Hive
# The backend connection to use to communicate with the server.
interface=hiveserver2
[[[impala]]]
name=Impala
interface=hiveserver2
[[[sparksql]]]
name=SparkSql
interface=hiveserver2
#interface=sqlalchemy
#options='{"url": "hive://hdfs@192.168.40.201:10000/default"}'
[[[spark]]]
name=Spark
interface=livy
[[[pyspark]]]
name=PySpark
interface=livy
[[[r]]]
name=R
interface=livy
[[[jar]]]
name=Spark Submit Jar
interface=livy-batch
6、重启hue ok