首先Pom文件:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>ApacheBeamModel</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<beam.version>2.38.0</beam.version>
<hive.version>3.1.2</hive.version>
<jackson.version>2.13.0</jackson.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-direct-java</artifactId>
<version>${beam.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-jdbc</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.28</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-hadoop-file-system</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-hdfs</artifactId>
<version>0.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-hadoop-common</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-hcatalog</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
<version>3.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-kafka</artifactId>
<version>${beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.28</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.apache.kafka</groupId>-->
<!-- <artifactId>kafka-clients</artifactId>-->
<!-- <version>2.3.1</version>-->
<!-- </dependency>-->
</dependencies>
</project>
然后主类:
package com.yss.beam.jdbc;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.jdbc.JdbcIO;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.*;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.TypeDescriptors;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.Iterator;
/**
* @author Edwards Wang
*/
/**
* 该案例是使用Apache Beam 读写Mysql的案例
*
* 该案例读取Mysql数据后,根绝key进行聚合操作,然后累加value,并将value值扩大10倍,最后打印结果后写入Mysql
*
*/
public class BeamJdbcModel {
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.create();
Pipeline pipeline = Pipeline.create(options);
//读取Mysql
PCollection<KV<String, Integer>> resultCollection = pipeline.apply(JdbcIO.<KV<String, Integer>>read()
.withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(
"com.mysql.jdbc.Driver", "jdbc:mysql://192.168.165.34:3306/development2_3")
.withUsername("root")
.withPassword("1q2w3eROOT!"))
.withQuery("select * from TestBeam")
//对结果集中的每一条数据进行处理
.withRowMapper(new JdbcIO.RowMapper<KV<String, Integer>>() {
@Override
public KV<String, Integer> mapRow(ResultSet resultSet) throws Exception {
String id = resultSet.getString(1);
String name = resultSet.getString(2);
System.out.println(id + ":" + name);
return KV.of(name, 1);
}
}))
// 根据key聚合
.apply(GroupByKey.<String, Integer>create())
// 对聚合后的结果进行处理
.apply(MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())).via(e -> {
Iterable<Integer> value = e.getValue();
if (value == null) {
throw new NullPointerException();
}
Iterator<Integer> iterator = e.getValue().iterator();
Integer i = 0;
while (iterator.hasNext()) {
i += iterator.next();
}
return KV.of(e.getKey(), i*10);
}))
// 自定义算子打印结果集
.apply(ParDo.of(new DoFn<KV<String, Integer>, KV<String, Integer>>() {
@ProcessElement
public void processElement(ProcessContext context) {
// 从管道中取出的每个元素
KV<String, Integer> element = context.element();
System.out.println(element);
if (element != null) {
context.output(element);
}
}
}));
// 将结果集写入数据库
resultCollection.apply(JdbcIO.<KV<String,Integer>>write()
.withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(
"com.mysql.jdbc.Driver", "jdbc:mysql://192.168.165.34:3306/development2_3")
.withUsername("root")
.withPassword("1q2w3eROOT!"))
.withStatement("insert into TestBeamCount values(?,?)")
.withPreparedStatementSetter(new JdbcIO.PreparedStatementSetter<KV<String, Integer>>() {
@Override
public void setParameters(KV<String, Integer> element, PreparedStatement preparedStatement) throws Exception {
preparedStatement.setString(1,element.getKey());
preparedStatement.setInt(2,element.getValue());
}
}));
pipeline.run();
}
}