当前位置: 首页 > 工具软件 > Oozie > 使用案例 >

Oozie 集成 Hive

董俊
2023-12-01

1) prepare shell case
$ cd ~/work/oozie-5.2.1
$ tree oozie/apps/hive2

oozie/apps/hive2
├── job.properties
├── script.q
└── workflow.xml

$ cat oozie/apps/hive2/script.q

--
DROP TABLE IF EXISTS test;
CREATE EXTERNAL TABLE test (a INT) STORED AS TEXTFILE LOCATION '${INPUT}';
insert into test values(10);
insert into test values(20);
insert into test values(30);
-- INSERT OVERWRITE DIRECTORY '${OUTPUT}' SELECT * FROM test;

$ cat oozie/apps/hive2/job.properties

nameNode=hdfs://localhost:9000
resourceManager=localhost:8032
queueName=default
jdbcURL=jdbc:hive2://localhost:10000/default
oozieRoot=user/${user.name}/oozie

oozie.use.system.libpath=true

oozie.wf.application.path=${nameNode}/${oozieRoot}/apps/hive2
inputDir=data/hive2/table
outputDir=data/hive2/output

$ cat oozie/apps/hive2/workflow.xml

<workflow-app xmlns="uri:oozie:workflow:1.0" name="hive2-wf">
    <start to="hive2-node"/>

    <action name="hive2-node">
        <hive2 xmlns="uri:oozie:hive2-action:1.0">
            <resource-manager>${resourceManager}</resource-manager>
            <name-node>${nameNode}</name-node>
            <prepare>
                <delete path="/${oozieRoot}/${outputDir}"/>
                <mkdir path="/${oozieRoot}/${outputDir}"/>
            </prepare>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
            </configuration>
            <jdbc-url>${jdbcURL}</jdbc-url>
            <script>script.q</script>
            <param>INPUT=/${oozieRoot}/${inputDir}</param>
            <param>OUTPUT=/${oozieRoot}/${outputDir}</param>
        </hive2>
        <ok to="end"/>
        <error to="fail"/>
    </action>

    <kill name="fail">
        <message>Hive2 (Beeline) action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <end name="end"/>
</workflow-app>

2) upload to hdfs
$ hdfs dfs -put oozie/apps/hive2 oozie/apps/

3) run and check
firstly, guarantee HiveMetaStore and HiveServer2 have been started
$ bin/oozie job -config oozie/apps/hive2/job.properties -run  
job: 0000000-220630151459153-oozie-sun_-W
$ bin/oozie job -info 0000000-220630151459153-oozie-sun_-W

Job ID : 0000000-220630151459153-oozie-sun_-W
------------------------------------------------------------------------------------------------------------------------------------
Workflow Name : hive2-wf
App Path      : hdfs://localhost:9000/user/sun_xo/oozie/apps/hive2
Status        : SUCCEEDED
Run           : 0
User          : sun_xo
Group         : -
Created       : 2022-06-30 09:58 GMT
Started       : 2022-06-30 09:58 GMT
Last Modified : 2022-06-30 09:59 GMT
Ended         : 2022-06-30 09:59 GMT
CoordAction ID: -

Actions
------------------------------------------------------------------------------------------------------------------------------------
ID                                                                            Status    Ext ID                 Ext Status Err Code  
------------------------------------------------------------------------------------------------------------------------------------
0000000-220630151459153-oozie-sun_-W@:start:                                  OK        -                      OK         -         
------------------------------------------------------------------------------------------------------------------------------------
0000000-220630151459153-oozie-sun_-W@hive2-node                               OK        application_1656559415643_0018SUCCEEDED  -         
------------------------------------------------------------------------------------------------------------------------------------
0000000-220630151459153-oozie-sun_-W@end                                      OK        -                      OK         -         
------------------------------------------------------------------------------------------------------------------------------------

$ hdfs dfs -text "oozie/data/hive2/table/*"

10
20
30

And you can get relevant job log as following:
$ hdfs dfs -get /tmp/logs/sun_xo/logs/application_1656559415643_0018 logs/

 类似资料: