首先pom文件导入依赖,Hadoop和hive的依赖导入自己机器的版本,hive记得导jdbc
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.6.1</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-service</artifactId> <version>1.2.1</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>1.2.1</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> <version>1.2.1</version> </dependency>
新建UDF的Java类
package com.hive.udf; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.io.Text; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; /** * Created by lenovo on 2017/10/26. */ public class DateTransforUDF extends UDF{ public static final SimpleDateFormat inputFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); //匹配讲要格式化的日期格式 public static final SimpleDateFormat outputFormat=new SimpleDateFormat("yyyyMMddHHmmss"); //要格式化的最终格式 /* 2013-02-17 10:38:34 20130217103834 */ public Text evaluate(final Text input){ Text output=new Text(); if (null==input){ return null; } String inputDate = input.toString().trim(); if(null==inputDate){ return null; } try { Date parseDate = inputFormat.parse(inputDate); String outputDate = outputFormat.format(parseDate); output.set(outputDate); } catch (ParseException e) { e.printStackTrace(); return output; } return output; } public static void main(String[] args) { System.out.println(new DateTransforUDF().evaluate(new Text("2013-02-17 10:38:34"))); } }
打jar包,上传到Linux,
hive (register_info)> add jar /root/hive_udf/BaseTest-1.0-SNAPSHOT.jar > ; Added [/root/hive_udf/BaseTest-1.0-SNAPSHOT.jar] to class path Added resources: [/root/hive_udf/BaseTest-1.0-SNAPSHOT.jar] hive (register_info)> create temporary function Tdate as 'com.test.OnlyDateUDF'; OK Time taken: 0.006 seconds