Oozie英文翻译为:驯象人。一个基于工作流引擎的开源框架,由Cloudera公司贡献给Apache,提供对Hadoop MapReduce、Pig Jobs的任务调度与协调。Oozie需要部署到Java Servlet容器中运行。主要用于定时调度任务,多任务可以按照执行的逻辑顺序调度。
顺序执行流程节点,支持fork(分支多个节点),join(合并多个节点为一个)
定时触发workflow
绑定多个Coordinator
控制流节点一般都是定义在工作流开始或者结束的位置,比如start,end,kill等。以及提供工作流的执行路径机制,如decision,fork,join等。
负责执行具体动作的节点,比如:拷贝文件,执行某个Shell脚本等等。
#打开配置文件
vim /opt/software/maven384/conf/settings.xml
#创建本地仓库
mkdir /opt/software/maven384/ck
<!--本地maven仓库地址-->
<localRepository>/opt/software/maven384/ck</localRepository>
<!--配置阿里的maven仓库-->
<mirror>
<id>alimaven</id>
<name>aliyun maven</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
<mirrorOf>central</mirrorOf>
</mirror>
<!--配置华为的maven仓库-->
<mirror>
<id>huaweicloud</id>
<mirrorOf>*</mirrorOf>
<url>https://mirrors.huaweicloud.com/repository/maven/</url>
</mirror>
#查看maven版本信息,验证maven是否安装成功
mvn -v
#--------------------------------------------------------------------------------------
Apache Maven 3.8.4 (9b656c72d54e5bacbed989b64718c159fe39b537)
Maven home: /opt/software/maven384
Java version: 1.8.0_171, vendor: Oracle Corporation, runtime: /opt/software/jdk8/jre
Default locale: en_US, platform encoding: UTF-8
OS name: "linux", version: "3.10.0-1160.45.1.el7.x86_64", arch: "amd64", family: "unix"
#--------------------------------------------------------------------------------------
#oozie源码下载
wget http://archive.apache.org/dist/oozie/4.3.1/oozie-4.3.1.tar.gz
#解压Oozie
tar -zxvf oozie-4.3.1.tar.gz
#切换到Oozie文件夹中
cd oozie-4.3.1
#编辑pom.xml文件修改hadoop等版本信息
vim pom.xml
#切换到bin文件夹中
cd bin
#执行手工编译命令,指定hadoop的版本信息为3.1.3
./mkdistro.sh -DskipTests -Dhadoop.version=3.1.3 -Puber
#解压安装包
tar -zxvf oozie-4.3.1.tar.gz -C /opt/software/
#配置环境变量
vim /etc/profile.d/my.sh
#------------------------------------------
#OOZIE
export OOZIE_HOME=/opt/software/oozie-4.3.1
export PATH=$PATH:$OOZIE_HOME/bin
vim /etc/profile.d/my.sh
#------------------------------------------
<!-- Oozie Server的Hostname -->
<property>
<name>hadoop.proxyuser.atguigu.hosts</name>
<value>*</value>
</property>
<!-- 允许被Oozie代理的用户组 -->
<property>
<name>hadoop.proxyuser.atguigu.groups</name>
<value>*</value>
</property>
<!-- 配置 MapReduce JobHistory Server 地址 ,默认端口10020 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop102:10020</value>
</property>
<!-- 配置 MapReduce JobHistory Server web ui 地址, 默认端口19888 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop102:19888</value>
</property>
<!-- 任务历史服务 -->
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop102:19888/jobhistory/logs/</value>
</property>
#切换到Oozie文件夹中
cd oozie
#创建文件夹
mkdir libext/
#将hadoop的libs文件夹的文件拷贝到libext文件中
cp ${HADOOP_HOME}/share/hadoop/*/*.jar libext/
cp ${HADOOP_HOME}/share/hadoop/*/lib/*.jar libext/
#拷贝mysql驱动包
cp ext-2.2.zip libext/
cp mysql-connector-java-5.1.35-bin.jar libext/
<!--
需要配置如下的内容:
属性:oozie.service.JPAService.jdbc.driver
属性值:com.mysql.jdbc.Driver
解释:JDBC的驱动
属性:oozie.service.JPAService.jdbc.url
属性值:jdbc:mysql://single01:3306/oozie
解释:oozie所需的数据库地址
属性:oozie.service.JPAService.jdbc.username
属性值:root
解释:数据库用户名
属性:oozie.service.JPAService.jdbc.password
属性值:ok
解释:数据库密码
属性:oozie.service.HadoopAccessorService.hadoop.configurations
属性值:*=/opt/software/hadoop313/etc/hadoop/
解释:让Oozie引用Hadoop的配置文件
-->
<property>
<name>oozie.service.JPAService.jdbc.driver</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>oozie.service.JPAService.jdbc.url</name>
<value>jdbc:mysql://single01:3306/oozie</value>
</property>
<property>
<name>oozie.service.JPAService.jdbc.username</name>
<value>root</value>
</property>
<property>
<name>oozie.service.JPAService.jdbc.password</name>
<value>ok</value>
</property>
<property>
<name>oozie.service.HadoopAccessorService.hadoop.configurations</name>
<value>*=/opt/software/hadoop313/etc/hadoop/</value>
</property>
#登录mysql
mysql -uroot -pok
#创建Oozie数据库
mysql> create database oozie;
vim oozie-4.3.0/oozie-server/conf/server.xml
#取消注释
<Listener className="org.apache.catalina.mbeans.ServerLifecycleListener" />
#打包
./oozie-setup.sh prepare-war
#开启Oozie
oozied.sh start
#关闭Oozie
oozied.sh stop
#访问Oozie页面的URL地址
http://single01:11000/oozie
#解压官方案例模板
tar -zxvf oozie-examples.tar.gz
#创建工作目录
mkdir oozie-apps/
#拷贝模板
cp -r examples/apps/shell/ oozie-apps
#编写脚本p1.sh
vim oozie-apps/shell/p1.sh
#----------------------------------
#!/bin/bash
/sbin/ifconfig > /opt/module/p1.log
#----------------------------------
#HDFS地址
nameNode=hdfs://single01:8020
#ResourceManager地址
jobTracker=single01:8032
#队列名称
queueName=default
examplesRoot=oozie-apps
oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/shell
EXEC=p1.sh
<workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
<start to="shell-node"/>
<action name="shell-node">
<shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<exec>${EXEC}</exec>
<!-- <argument>my_output=Hello Oozie</argument> -->
<file>/user/atguigu/oozie-apps/shell/${EXEC}#${EXEC}</file>
<capture-output/>
</shell>
<ok to="end"/>
<error to="fail"/>
</action>
<decision name="check-output">
<switch>
<case to="end">
${wf:actionData('shell-node')['my_output'] eq 'Hello Oozie'}
</case>
<default to="fail-output"/>
</switch>
</decision>
<kill name="fail">
<message>Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<kill name="fail-output">
<message>Incorrect output, expected [Hello Oozie] but was [${wf:actionData('shell-node')['my_output']}]</message>
</kill>
<end name="end"/>
</workflow-app>
#上传任务配置
hadoop fs -put oozie-apps/ /user
#执行任务
oozie job -oozie http://single01:11000/oozie -config oozie-apps/shell/job.properties -run
#杀死任务
oozie job -oozie http://ingle01:11000/oozie -kill 0000004-170425105153692-oozie-z-W
#解压官方案例模板
tar -zxvf oozie-examples.tar.gz
#创建工作目录
mkdir oozie-apps/
#拷贝模板
cp -r examples/apps/shell/ oozie-apps
#编写脚本p2.sh
vim oozie-apps/shell/p2.sh
#----------------------------------
#!/bin/bash
/sbin/ifconfig > /opt/module/p2.log
#----------------------------------
nameNode=hdfs://single01:8020
jobTracker=single01:8032
queueName=default
examplesRoot=oozie-apps
oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/shell
EXEC1=p1.sh
EXEC2=p2.sh
<workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf">
<start to="p1-shell-node"/>
<action name="p1-shell-node">
<shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<exec>${EXEC1}</exec>
<file>/user/atguigu/oozie-apps/shell/${EXEC1}#${EXEC1}</file>
<!-- <argument>my_output=Hello Oozie</argument>-->
<capture-output/>
</shell>
<ok to="p2-shell-node"/>
<error to="fail"/>
</action>
<action name="p2-shell-node">
<shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<exec>${EXEC2}</exec>
<file>/user/admin/oozie-apps/shell/${EXEC2}#${EXEC2}</file>
<!-- <argument>my_output=Hello Oozie</argument>-->
<capture-output/>
</shell>
<ok to="end"/>
<error to="fail"/>
</action>
<decision name="check-output">
<switch>
<case to="end">
${wf:actionData('shell-node')['my_output'] eq 'Hello Oozie'}
</case>
<default to="fail-output"/>
</switch>
</decision>
<kill name="fail">
<message>Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<kill name="fail-output">
<message>Incorrect output, expected [Hello Oozie] but was [${wf:actionData('shell-node')['my_output']}]</message>
</kill>
<end name="end"/>
</workflow-app>
#上传任务配置
hadoop fs -put oozie-apps/ /user
#执行任务
oozie job -oozie http://single01:11000/oozie -config oozie-apps/shell/job.properties -run
#杀死任务
oozie job -oozie http://ingle01:11000/oozie -kill 0000004-170425105153692-oozie-z-W
#解压官方案例模板
tar -zxvf oozie-examples.tar.gz
#创建工作目录
mkdir oozie-apps/
#拷贝模板
cp -r examples/apps/shell/ oozie-apps
#编写脚本p3.sh
vim oozie-apps/shell/p3.sh
#----------------------------------
#!/bin/bash
/sbin/ifconfig > /opt/module/p3.log
#----------------------------------
nameNode=hdfs://single01:8020
jobTracker=single01:8032
queueName=default
examplesRoot=oozie-apps
#hdfs://hadoop102:8020/user/admin/oozie-apps/map-reduce/workflow.xml
oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/map-reduce/workflow.xml
outputDir=map-reduce
<workflow-app xmlns="uri:oozie:workflow:0.2" name="map-reduce-wf">
<start to="mr-node"/>
<action name="mr-node">
<map-reduce>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<prepare>
<delete path="${nameNode}/output/"/>
</prepare>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<!-- 配置调度MR任务时,使用新的API -->
<property>
<name>mapred.mapper.new-api</name>
<value>true</value>
</property>
<property>
<name>mapred.reducer.new-api</name>
<value>true</value>
</property>
<!-- 指定Job Key输出类型 -->
<property>
<name>mapreduce.job.output.key.class</name>
<value>org.apache.hadoop.io.Text</value>
</property>
<!-- 指定Job Value输出类型 -->
<property>
<name>mapreduce.job.output.value.class</name>
<value>org.apache.hadoop.io.IntWritable</value>
</property>
<!-- 指定输入路径 -->
<property>
<name>mapred.input.dir</name>
<value>/input/</value>
</property>
<!-- 指定输出路径 -->
<property>
<name>mapred.output.dir</name>
<value>/output/</value>
</property>
<!-- 指定Map类 -->
<property>
<name>mapreduce.job.map.class</name>
<value>org.apache.hadoop.examples.WordCount$TokenizerMapper</value>
</property>
<!-- 指定Reduce类 -->
<property>
<name>mapreduce.job.reduce.class</name>
<value>org.apache.hadoop.examples.WordCount$IntSumReducer</value>
</property>
<property>
<name>mapred.map.tasks</name>
<value>1</value>
</property>
</configuration>
</map-reduce>
<ok to="end"/>
<error to="fail"/>
</action>
<kill name="fail">
<message>Map/Reduce failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name="end"/>
</workflow-app>
#上传任务配置
hadoop fs -put oozie-apps/ /user
#执行任务
oozie job -oozie http://single01:11000/oozie -config oozie-apps/shell/job.properties -run
#杀死任务
oozie job -oozie http://ingle01:11000/oozie -kill 0000004-170425105153692-oozie-z-W
#检查系统当前时区
date -R
#删除时区(时区不对时执行)
rm -rf /etc/localtime
#重新设置时区
ln -s /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
#同步时间
ntpdate pool.ntp.org
#修改NTP配置文件
vim /etc/ntp.conf
#--------------------------------------------------------
#去掉下面这行前面的# ,并把网段修改成自己的网段:
restrict 192.168.122.0 mask 255.255.255.0 nomodify notrap
#注释掉以下几行:
#server 0.centos.pool.ntp.org
#server 1.centos.pool.ntp.org
#server 2.centos.pool.ntp.org
#把下面两行前面的#号去掉,如果没有这两行内容,需要手动添加
server 127.127.1.0 # local clock
fudge 127.127.1.0 stratum 10
#--------------------------------------------------------
#重启NTP服务
systemctl start ntpd.service
#解压官方案例模板
tar -zxvf oozie-examples.tar.gz
#创建工作目录
mkdir oozie-apps/
#拷贝模板
cp -r examples/apps/shell/ oozie-apps
#编写脚本p3.sh
vim oozie-apps/shell/p3.sh
#----------------------------------
#!/bin/bash
/sbin/ifconfig > /opt/module/p3.log
#----------------------------------
nameNode=hdfs://single01:8020
jobTracker=single01:8032
queueName=default
examplesRoot=oozie-apps
#hdfs://hadoop102:8020/user/admin/oozie-apps/map-reduce/workflow.xml
oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/map-reduce/workflow.xml
outputDir=map-reduce
EXEC3=p3.sh
<coordinator-app name="cron-coord" frequency="${coord:minutes(5)}" start="${start}" end="${end}" timezone="GMT+0800" xmlns="uri:oozie:coordinator:0.2">
<action>
<workflow>
<app-path>${workflowAppUri}</app-path>
<configuration>
<property>
<name>jobTracker</name>
<value>${jobTracker}</value>
</property>
<property>
<name>nameNode</name>
<value>${nameNode}</value>
</property>
<property>
<name>queueName</name>
<value>${queueName}</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
<workflow-app xmlns="uri:oozie:workflow:0.5" name="one-op-wf">
<start to="p3-shell-node"/>
<action name="p3-shell-node">
<shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<exec>${EXEC3}</exec>
<file>/user/atguigu/oozie-apps/cron/${EXEC3}#${EXEC3}</file>
<!-- <argument>my_output=Hello Oozie</argument>-->
<capture-output/>
</shell>
<ok to="end"/>
<error to="fail"/>
</action>
<kill name="fail">
<message>Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<kill name="fail-output">
<message>Incorrect output, expected [Hello Oozie] but was [${wf:actionData('shell-node')['my_output']}]</message>
</kill>
<end name="end"/>
</workflow-app>
#上传任务配置
hadoop fs -put oozie-apps/ /user
#执行任务
oozie job -oozie http://single01:11000/oozie -config oozie-apps/shell/job.properties -run
#杀死任务
oozie job -oozie http://ingle01:11000/oozie -kill 0000004-170425105153692-oozie-z-W