devops-大数据-伪分布式集群配置

夹谷成仁
2023-12-01

版本说明

flume-1.9.0-bin
hadoop-2.7.7
hbase-1.4.13
hive-2.3.6-bin
jdk1.8.0_241
spark-2.4.5-bin-hadoop2.7
sqoop-1.4.7
tez-0.9.2-bin
zookeeper-3.5.7-bin

前期准备

初始化命令

关闭防火墙

[root@localhost ~]# systemctl stop firewalld.service
[root@localhost ~]# systemctl status firewalld.service
● firewalld.service - firewalld - dynamic firewall daemon
   Loaded: loaded (/usr/lib/systemd/system/firewalld.service; enabled; vendor preset: enabled)
   Active: inactive (dead) since 五 2020-10-23 22:26:04 CST; 26s ago
     Docs: man:firewalld(1)
  Process: 1228 ExecStart=/usr/sbin/firewalld --nofork --nopid $FIREWALLD_ARGS (code=exited, status=0/SUCCESS)
 Main PID: 1228 (code=exited, status=0/SUCCESS)

10月 23 22:11:36 localhost.localdomain systemd[1]: Starting firewalld - dynamic firewall daemon...
10月 23 22:11:36 localhost.localdomain systemd[1]: Started firewalld - dynamic firewall daemon.
10月 23 22:26:03 localhost.localdomain systemd[1]: Stopping firewalld - dynamic firewall daemon...
10月 23 22:26:04 localhost.localdomain systemd[1]: Stopped firewalld - dynamic firewall daemon.
[root@localhost ~]# systemctl disable firewalld.service
Removed symlink /etc/systemd/system/multi-user.target.wants/firewalld.service.
Removed symlink /etc/systemd/system/dbus-org.fedoraproject.FirewallD1.service.

root ssh localhost免密码

ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys

chmod 700 ~/.ssh
chmod 644 ~/.ssh/authorized_keys

vi /etc/ssh/sshd_config
# Authentication:
LoginGraceTime 120
#PermitRootLogin without-password
PermitRootLogin yes
StrictModes yes

systemctl restart sshd.service

环境变量

export JAVA_HOME=/opt/jdk
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH="$PATH:$JAVA_HOME/bin"

export MAVEN_HOME=/opt/maven
export PATH=$MAVEN_HOME/bin:$PATH
export MAVEN_OPTS="-Xms2g -Xmx2g"

export HADOOP_HOME=/opt/hadoop
#export HADOOP_CLASSPATH=`hadoop classpath`
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"


export HIVE_HOME=/opt/hive
export PATH=$PATH:$HIVE_HOME/bin
export HIVE_CONF_DIR=${HIVE_HOME}/conf
export HCAT_HOME=$HIVE_HOME/hcatalog

export SCALA_HOME=/opt/scala
export PATH=$PATH:$SCALA_HOME/bin

export SPARK_HOME=/opt/spark
export PATH=:$PATH:$SPARK_HOME/bin


export NODE_HOME=/opt/node
export PATH=$PATH:$NODE_HOME/bin
export NODE_PATH=$NODE_HOME/lib/node_modules

export ZOOKEEPER_HOME=/opt/zookeeper
export PATH=$PATH:$ZOOKEEPER_HOME/bin

export HBASE_HOME=/opt/hbase
export PATH=$HBASE_HOME/bin:$PATH

export PHOENIX_HOME=/opt/phoenix
export PHOENIX_CLASSPATH=$PHOENIX_HOME
export PATH=$PATH:$PHOENIX_HOME/bin

export SQOOP_HOME=/opt/sqoop
export PATH=$PATH:$SQOOP_HOME/bin

export KYLIN_HOME=/opt/kylin
export PATH=$PATH:$KYLIN_HOME/bin

export KAFKA_HOME=/opt/kafka
export PATH=$PATH:$KAFKA_HOME/bin

export PHANTOMJS_HOME=/opt/phantomjs
export PATH=$PATH:$PHANTOMJS_HOME/bin

export FLINK_HOME=/opt/flink
export PATH=$PATH:$FLINK_HOME/bin

export CONDA_HOME=/root/anaconda3
export PATH=$PATH:$CONDA_HOME/bin

export MAVEN_HOME=/opt/maven
export PATH=$PATH:$MAVEN_HOME/bin

zookeeper

zoo.cfg

# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial 
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between 
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just 
# example sakes.
dataDir=/opt/zookeeper/data
dataLogDir=/opt/zookeeper/logs
# the port at which the clients will connect
clientPort=2181
#
# Be sure to read the maintenance section of the 
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1

hadoop

core-site.xml

<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:9000</value>
    </property>
<property>
        <name>hadoop.tmp.dir</name>
        <value>/home/tmp/hadoop</value>
</property>
<property>
    <name>hadoop.proxyuser.root.groups</name>
    <value>*</value>   
</property>  
<property>      
    <name>hadoop.proxyuser.root.hosts</name>
    <value>*</value>
</property>
<!-- enable WebHDFS in the NameNode and DataNodes -->
<property> 
    <name>dfs.webhdfs.enabled</name> 
    <value>true</value> 
</property>
<!-- Hue WebHDFS proxy user setting -->
<property>
   <name>hadoop.proxyuser.hue.hosts</name>
  <value>*</value>
</property>
<property>
  <name>hadoop.proxyuser.hue.groups</name>
  <value>*</value>
</property>
<property>  
<name>hadoop.proxyuser.httpfs.hosts</name>  
<value>*</value>  
</property>  
<property>  
<name>hadoop.proxyuser.httpfs.groups</name>  
<value>*</value>  
</property> 
</configuration>

hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
   <property>
      <name>dfs.webhdfs.enabled</name>
      <value>true</value>
   </property>
   <property>
       <name>fs.hdfs.impl.disable.cache</name>
       <value>true</value>
  </property>
     <property>
       <name>fs.file.impl.disable.cache</name>
       <value>true</value>
  </property>
  
  <property>
  <name>dfs.datanode.max.transfer.threads</name>
  <value>4096</value>
</property>
</configuration>

httpfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
-->
<configuration>
<property>
<name>httpfs.proxyuser.httpfs.hosts</name>
<value>*</value>
</property>
<property>
<name>httpfs.proxyuser.httpfs.groups</name>
<value>*</value>
</property>
</configuration>

mapreduce-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapreduce.application.classpath</name>
        <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
    </property>
   <property>
     <name>dfs.replication</name>
   <value>2</value>
   </property>

</configuration>

yarn-site.xml

<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.env-whitelist</name>
        <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
    </property>
	
    <property>
        <name>yarn.resourcemanager.scheduler.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
     </property>
    <property>
        <name>yarn.scheduler.fair.preemption</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.user-as-default-queue</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.allow-undeclared-pools</name>
        <value>false</value>
    </property>
	 <property>
    <name>yarn.nodemanager.pmem-check-enabled</name>
    <value>false</value>
	</property>

    <property>
        <name>yarn.nodemanager.vmem-check-enabled</name>
        <value>false</value>
    </property>
	
	<property>
  <name>yarn.resourcemanager.am.max-attempts</name>
  <value>4</value>
  <description>
    The maximum number of application master execution attempts.
  </description>
</property>
</configuration>

hadoop-env.sh

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set Hadoop-specific environment variables here.

# The only required environment variable is JAVA_HOME.  All others are
# optional.  When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.

# The java implementation to use.
export JAVA_HOME=/opt/jdk1.8.0_241

# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol.  Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}

export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}

# Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
  if [ "$HADOOP_CLASSPATH" ]; then
    export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
  else
    export HADOOP_CLASSPATH=$f
  fi
done

# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""

# Extra Java runtime options.  Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"

# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"

export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"

export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"

# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"

# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol.  This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}

# Where log files are stored.  $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER

# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}

###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""

###
# Advanced Users Only!
###

# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by 
#       the user that will run the hadoop daemons.  Otherwise there is the
#       potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}

# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER

hive

hive-env.sh

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set Hive and Hadoop environment variables here. These variables can be used
# to control the execution of Hive. It should be used by admins to configure
# the Hive installation (so that users do not have to set environment variables
# or set command line parameters to get correct behavior).
#
# The hive service being invoked (CLI etc.) is available via the environment
# variable SERVICE


# Hive Client memory usage can be an issue if a large number of clients
# are running at the same time. The flags below have been useful in 
# reducing memory usage:
#
# if [ "$SERVICE" = "cli" ]; then
#   if [ -z "$DEBUG" ]; then
#     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
#   else
#     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
#   fi
# fi

# The heap size of the jvm stared by hive shell script can be controlled via:
#
# export HADOOP_HEAPSIZE=1024
#
# Larger heap size may be required when running queries over large number of files or partitions. 
# By default hive shell scripts use a heap size of 256 (MB).  Larger heap size would also be 
# appropriate for hive server.


# Set HADOOP_HOME to point to a specific hadoop install directory
 HADOOP_HOME=/opt/hadoop-2.7.7

# Hive Configuration Directory can be controlled by:
 export HIVE_CONF_DIR=/opt/hive-2.3.6-bin/conf

# Folder containing extra libraries required for hive compilation/execution can be controlled by:
 export HIVE_AUX_JARS_PATH=/opt/hive-2.3.6-bin/lib

hive-site.xml

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
-->
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
 
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
 
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>Username to use against metastore database</description>
</property>
 
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>root</value>
<description>password to use against metastore database</description>
</property>

<property>
<name>hive.server2.webui.host</name>
<value>192.168.79.12</value>
</property>

<property>
<name>hive.server2.webui.port</name>
<value>10002</value>
</property>

<property>
<name>hive.exec.local.scratchdir</name>
<value>/opt/hive-2.3.6-bin/tmp/job/</value>
<description>Local scratch space for Hive jobs</description>
</property>
 
<property>
<name>hive.downloaded.resources.dir</name>
<value>/opt/hive-2.3.6-bin/tmp/resource/</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
 
<property>
<name>hive.querylog.location</name>
<value>/opt/hive-2.3.6-bin/tmp/querylog/</value>
<description>Location of Hive run time structured log file</description>
</property>
 
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>/opt/hive-2.3.6-bin/tmp/oplog/</value>
<description>Top level directory where operation logs are stored if logging functionality is enabled</description>
</property>

<property>
   <name>datanucleus.autoStartMechanism</name>
   <value>SchemaTable</value>
</property>
<property>
   <name>hive.metastore.schema.verification</name>
   <value>false</value>
   <description>  
    Enforce metastore schema version consistency.  
    True: Verify that version information stored in metastore matches with one from Hive jars.  Also disable automatic schema migration attempt. Users are required to manully migrate schema after Hive upgrade which ensures proper metastore schema migration. (Default)  
    False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.  
    </description>  
</property>
<property>
    <name>system:java.io.tmpdir</name>
    <value>/opt/hive-2.3.6-bin/tmp/iotmp/</value>
</property>
<property>
    <name>system:user.name</name>
    <value>root</value>
</property>

<property>
   <name>system:java.io.tmpdir</name>
   <value>/home/tmp/hive</value>
</property>
 
<property>
     <name>system:user.name</name>
     <value>root</value>
</property>

<property>
  <name>hive.metastore.uris</name>
  <value>thrift://192.168.79.12:9083</value></property><property>
  <name>hive.server2.thrift.port</name>
  <value>10000</value>
  <description>Port number of HiveServer2 Thrift interface. Can be overridden by setting $HIVE_SERVER2_THRIFT_PORT</description></property><property>
  <name>hive.server2.thrift.bind.host</name>
  <value>192.168.79.12</value>
  <description>Bind host on which to run the HiveServer2 Thrift interface.Can be overridden by setting$HIVE_SERVER2_THRIFT_BIND_HOST</description>
</property>

<property>
  <name>spark.yarn.jars</name>
  <value>hdfs://192.168.79.12:9000/spark-jars/*</value>
</property>

<!-- <property> -->
  <!-- <name>hive.execution.engine</name> -->
  <!-- <value>spark</value> -->
<!-- </property> -->

<!-- <property> -->
  <!-- <name>hive.enable.spark.execution.engine</name> -->
  <!-- <value>true</value> -->
<!-- </property> -->

<!-- <property> -->
  <!-- <name>spark.home</name> -->
  <!-- <value>/opt/spark-2.4.5-bin-hadoop2.7</value> -->
<!-- </property> -->
<!-- <property> -->
  <!-- <name>spark.master</name> -->
  <!-- <value>yarn-client</value> -->
<!-- </property> -->
<!-- <property> -->
  <!-- <name>spark.enentLog.enabled</name> -->
  <!-- <value>true</value> -->
<!-- </property> -->
<!-- <property> -->
  <!-- <name>spark.enentLog.dir</name> -->
  <!-- <value>hdfs://192.168.79.12:9000/spark-log</value> -->
<!-- </property> -->
<!-- <property> -->
  <!-- <name>spark.serializer</name> -->
  <!-- <value>org.apache.spark.serializer.KryoSerializer</value> -->
<!-- </property> -->
<!-- <property> -->
  <!-- <name>spark.executor.memeory</name> -->
  <!-- <value>1g</value> -->
<!-- </property> -->
<!-- <property> -->
  <!-- <name>spark.driver.memeory</name> -->
  <!-- <value>1g</value> -->
<!-- </property> -->
<!-- <property> -->
  <!-- <name>spark.executor.extraJavaOptions</name> -->
  <!-- <value>-XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"</value> -->
<!-- </property> -->
<!-- <property> -->
  <!-- <name>hive.server2.transport.mode</name> -->
  <!-- <value>http</value> -->
<!-- </property> -->
<property>
  <name>hive.server2.enable.doAs</name>
  <value>true</value>
</property>
<property>
    <name>hive.server2.authentication</name>
    <value>NOSASL</value>
    <description>
      Expects one of [nosasl, none, ldap, kerberos, pam, custom].
      Client authentication types.
        NONE: no authentication check
        LDAP: LDAP/AD based authentication
        KERBEROS: Kerberos/GSSAPI authentication
        CUSTOM: Custom authentication provider
                (Use with property hive.server2.custom.authentication.class)
        PAM: Pluggable authentication module
        NOSASL:  Raw transport
    </description>
</property>
<property>
    <name>hive.metastore.schema.verification</name>
    <value>false</value>
</property>
</configuration> 

hbase

hbase-env.sh

#
#/**
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements.  See the NOTICE file
# * distributed with this work for additional information
# * regarding copyright ownership.  The ASF licenses this file
# * to you under the Apache License, Version 2.0 (the
# * "License"); you may not use this file except in compliance
# * with the License.  You may obtain a copy of the License at
# *
# *     http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */

# Set environment variables here.

# This script sets variables multiple times over the course of starting an hbase process,
# so try to keep things idempotent unless you want to take an even deeper look
# into the startup scripts (bin/hbase, etc.)

# The java implementation to use.  Java 1.7+ required.
export JAVA_HOME=/opt/jdk1.8.0_241

# Extra Java CLASSPATH elements.  Optional.
export HBASE_CLASSPATH=/opt/hbase-1.4.13/conf

# The maximum amount of heap to use. Default is left to JVM default.
# export HBASE_HEAPSIZE=1G

# Uncomment below if you intend to use off heap cache. For example, to allocate 8G of 
# offheap, set the value to "8G".
# export HBASE_OFFHEAPSIZE=1G

# Extra Java runtime options.
# Below are what we set by default.  May only work with SUN JVM.
# For more on why as well as other possible settings,
# see http://wiki.apache.org/hadoop/PerformanceTuning
export HBASE_OPTS="-XX:+UseConcMarkSweepGC"

# Configure PermSize. Only needed in JDK7. You can safely remove it for JDK8+
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m -XX:ReservedCodeCacheSize=256m"
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m -XX:ReservedCodeCacheSize=256m"

# Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.

# This enables basic gc logging to the .out file.
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"

# This enables basic gc logging to its own file.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"

# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"

# Uncomment one of the below three options to enable java garbage collection logging for the client processes.

# This enables basic gc logging to the .out file.
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"

# This enables basic gc logging to its own file.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"

# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"

# See the package documentation for org.apache.hadoop.hbase.io.hfile for other configurations
# needed setting up off-heap block caching. 

# Uncomment and adjust to enable JMX exporting
# See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access.
# More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
# NOTE: HBase provides an alternative JMX implementation to fix the random ports issue, please see JMX
# section in HBase Reference Guide for instructions.

# export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
# export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101"
# export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102"
# export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103"
# export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104"
# export HBASE_REST_OPTS="$HBASE_REST_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10105"

# File naming hosts on which HRegionServers will run.  $HBASE_HOME/conf/regionservers by default.
# export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers

# Uncomment and adjust to keep all the Region Server pages mapped to be memory resident
#HBASE_REGIONSERVER_MLOCK=true
#HBASE_REGIONSERVER_UID="hbase"

# File naming hosts on which backup HMaster will run.  $HBASE_HOME/conf/backup-masters by default.
# export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters

# Extra ssh options.  Empty by default.
# export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR"

# Where log files are stored.  $HBASE_HOME/logs by default.
# export HBASE_LOG_DIR=${HBASE_HOME}/logs

# Enable remote JDWP debugging of major HBase processes. Meant for Core Developers 
# export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070"
# export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071"
# export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072"
# export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073"

# A string representing this instance of hbase. $USER by default.
# export HBASE_IDENT_STRING=$USER

# The scheduling priority for daemon processes.  See 'man nice'.
# export HBASE_NICENESS=10

# The directory where pid files are stored. /tmp by default.
# export HBASE_PID_DIR=/var/hadoop/pids

# Seconds to sleep between slave commands.  Unset by default.  This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HBASE_SLAVE_SLEEP=0.1

# Tell HBase whether it should manage it's own instance of Zookeeper or not.
export HBASE_MANAGES_ZK=false

# The default log rolling policy is RFA, where the log file is rolled as per the size defined for the 
# RFA appender. Please refer to the log4j.properties file to see more details on this appender.
# In case one needs to do log rolling on a date change, one should set the environment property
# HBASE_ROOT_LOGGER to "<DESIRED_LOG LEVEL>,DRFA".
# For example:
# HBASE_ROOT_LOGGER=INFO,DRFA
# The reason for changing default to RFA is to avoid the boundary case of filling out disk space as 
# DRFA doesn't put any cap on the log size. Please refer to HBase-5655 for more context.

hbase-policy.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-->

<configuration>
  <property>
    <name>security.client.protocol.acl</name>
    <value>*</value>
    <description>ACL for ClientProtocol and AdminProtocol implementations (ie. 
    clients talking to HRegionServers)
    The ACL is a comma-separated list of user and group names. The user and 
    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
    A special value of "*" means all users are allowed.</description>
  </property>

  <property>
    <name>security.admin.protocol.acl</name>
    <value>*</value>
    <description>ACL for HMasterInterface protocol implementation (ie. 
    clients talking to HMaster for admin operations).
    The ACL is a comma-separated list of user and group names. The user and 
    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
    A special value of "*" means all users are allowed.</description>
  </property>

  <property>
    <name>security.masterregion.protocol.acl</name>
    <value>*</value>
    <description>ACL for HMasterRegionInterface protocol implementations
    (for HRegionServers communicating with HMaster)
    The ACL is a comma-separated list of user and group names. The user and 
    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
    A special value of "*" means all users are allowed.</description>
  </property>
</configuration>

hbase-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-->
<configuration>
    <property>
        <name>hbase.rootdir</name>
        <value>hdfs://localhost:9000/hbase</value>
    </property>
    <property>
        <name>hbase.cluster.distributed</name>
        <value>true</value>
    </property>
	<property>
		<name>hbase.tmp.dir</name>
		<value>/opt/hbase-1.4.13/tmp</value>
	</property>
	<property>
			<name>hbase.master.info.port</name>
            <value>60010</value>
    </property>
	<property>
       <name>hbase.table.sanity.checks</name>
       <value>false</value>
      </property>
	<!-- <property> -->
    <!-- <name>phoenix.schema.isNamespaceMappingEnabled</name> -->
    <!-- <value>true</value> -->
    <!-- </property> -->
    <!-- <property> -->
        <!-- <name>phoenix.schema.mapSystemTablesToNamespace</name> -->
        <!-- <value>true</value> -->
    <!-- </property> -->
    <property>
        <name>hbase.regionserver.wal.codec</name>
        <value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value>
    </property>
    <property>
        <name>hbase.region.server.rpc.scheduler.factory.class</name>
        <value>org.apache.hadoop.hbase.ipc.PhoenixRpcSchedulerFactory</value>
    </property>
    <property>
        <name>hbase.rpc.controllerfactory.class</name>
        <value>org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory</value>
    </property>
	
	<property>
	<name>hbase.zookeeper.quorum</name>
	<value>localhost</value>
	</property>
	<property>
		<name>hbase.zookeeper.property.dataDir</name>
		<value>/opt/zookeeper-3.5.7-bin/data</value>
	</property>
	<property>
		<name>hbase.zookeeper.property.clientPort</name>
		<value>2181</value>
	</property>
	<property>
		<name>zookeeper.session.timeout</name>
		<value>90000</value>
	</property>
	<property>
		<name>hbase.regionserver.restart.on.zk.expire</name>
		<value>true</value>
	</property>
	<property>
		<name>hbase.regionserver.handler.count</name>
		<value>10</value>
	</property>
	<property>
		<name>hbase.unsafe.stream.capability.enforce</name>
		<value>false</value>
	</property>
	<property>
		<name>hbase.wal.provider</name>
		<value>filesystem</value>
	</property>
</configuration>

spark

spark-env.sh

#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# This file is sourced when running various Spark programs.
# Copy it as spark-env.sh and edit that to configure Spark for your site.


export SPARK_LAUNCH_WITH_SCALA=0
export SPARK_WORKER_MEMORY=1g
export SPARK_DRIVER_MEMORY=1g


export JAVA_HOME=/opt/jdk1.8.0_241
export HADOOP_HOME=/opt/hadoop-2.7.7

export HIVE_HOME=/opt/hive-2.3.6-bin
export SCALA_HOME=/opt/scala-2.11.12
export SPARK_HOME=/opt/spark-2.4.5-bin-hadoop2.7

export SPARK_DIST_CLASSPATH=$(/opt/hadoop-2.7.7/bin/hadoop classpath)
export SPARK_CLASSPATH=$SPARK_CLASSPATH:$HIVE_HOME/lib/mysql-connector-java-5.1.41-bin.jar
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native/

# Options read when launching programs locally with
# ./bin/run-example or ./bin/spark-submit
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program

# Options read by executors and drivers running inside the cluster
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos

# Options read in YARN client/cluster mode
# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN
# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)

export SPARK_CONF_DIR=${SPARK_HOME}/conf
export HADOOP_CONF_DIR=/opt/hadoop-2.7.7/etc/hadoop
export YARN_CONF_DIR=/opt/hadoop-2.7.7/etc/hadoop
export SPARK_EXECUTOR_CORES=1
export SPARK_EXECUTOR_MEMORY=1G
export SPARK_DRIVER_MEMORY=1G


export HIVE_SERVER2_THRIFT_PORT=10001
export HIVE_SERVER2_THRIFT_BIND_HOST=192.168.79.12

# Options for the daemons used in the standalone deploy mode
# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
# - SPARK_WORKER_DIR, to set the working directory of worker processes
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers

# Generic options for the daemons used in the standalone deploy mode
# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
# - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
# - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
# - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.
# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
# You might get better performance to enable these options if using native BLAS (see SPARK-21305).
# - MKL_NUM_THREADS=1        Disable multi-threading of Intel MKL
# - OPENBLAS_NUM_THREADS=1   Disable multi-threading of OpenBLAS

# export SPARK_DIST_CLASSPATH=$(hadoop classpath)

#export SPARK_MASTER_IP=192.168.117.51
#export SPARK_LIBRARY_PATH=/root/spark-without-hive/lib
#export SPARK_MASTER_WEBUI_PORT=18080
#export SPARK_WORKER_DIR=/root/spark-without-hive/work
#export SPARK_MASTER_PORT=7077
#export SPARK_WORKER_PORT=7078
#export SPARK_LOG_DIR=/root/spark-without-hive/log
#export SPARK_PID_DIR='/root/spark-without-hive/run'



##################################
#spark.yarn.dist.archives
#spark.yarn.dist.files
#spark.yarn.dist.jars


#export SPARK_MASTER_IP=192.168.79.12
#export SPARK_MASTER_PORT=7077
#export PYSPARK_PYTHON=/usr/bin/python2.7
#export PYSPARK_DRIVER_PYTHON=/usr/bin/python2.7
#export SPARK_LOCAL_IP=192.168.79.12
#export SPARK_YARN_QUEUE=root
#export SPARK_WORKER_CORES=4
#export SPARK_WORKER_INSTANCES=1
#export SPARK_WORKER_MEMORY=2G
#export SPARK_WORKER_WEBUI_PORT=8081
#export SPARK_EXECUTOR_CORES=1
#export SPARK_EXECUTOR_MEMORY=2G

spark.default.conf

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Default system properties included when running spark-submit.
# This is useful for setting default environmental settings.

# Example:
# spark.master                     spark://master:7077
# spark.eventLog.enabled           true
# spark.eventLog.dir               hdfs://namenode:8021/directory
# spark.serializer                 org.apache.spark.serializer.KryoSerializer
# spark.driver.memory              5g
# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"


#spark.eventLog.enabled false
#spark.eventLog.dir hdfs://192.168.79.12:9000/spark-log 

#spark.executor.memory 512m 
#spark.serializer org.apache.spark.serializer.KryoSerializer
#spark.driver.memory 1g
#spark.executor.extraJavaOptions  -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled
#spark.driver.extraJavaOptions    -Dspark.driver.log.level=INFO -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=512M

           
spark.master                     yarn
spark.home                       /opt/spark-2.4.5-bin-hadoop2.7 
spark.eventLog.enabled           true
spark.eventLog.dir               hdfs://localhost:9000/spark-log
spark.serializer                 org.apache.spark.serializer.KryoSerializer
spark.executor.memory            1g
spark.driver.memory              1g
spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"

sqoop

vim conf/sqoop-env.sh

#Set path to where bin/hadoop is available
export HADOOP_COMMON_HOME=/usr/lib/hadoop

#Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce

#set the path to where bin/hbase is available
export HBASE_HOME=/usr/lib/hbase

#Set the path to where bin/hive is available
export HIVE_HOME=/usr/lib/hive

#Set the path for where zookeper config dir is
export ZOOCFGDIR=/usr/lib/zookeeper

export  HADOOP_CONF_DIR=/etc/hadoop/conf
export  HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_HOME}/lib/native
export  HADOOP_OPTS="-Djava.library.path=${HADOOP_HOME}/lib"

export  HIVE_CONF_DIR=${HIVE_HOME}/con

初始化

hadoop

/opt/hadoop/bin/hdfs namenode -format

mariadb

 yum install -y mariadb mariadb-server
 systemctl start mariadb
 systemctl enable mariadb
mysql-secure-installation

grant all privileges on *.* to 'root'@'%' identified by 'mysql' with grant option;
flush privileges;

hive

/opt/hive/bin/schematool -initSchema -dbType mysql

启动

hadoop

start-all.sh

hive

nohup hive --service metastore &
nohup hiveserver2 &

 类似资料: