scala连接mongodb_Spark连接MongoDB之Scala

公冶才
2023-12-01

MongoDB Connector for Spark

Spark Connector Scala Guide

spark-shell --jars "mongo-spark-connector_2.11-2.0.0.jar,mongo-hadoop-core-2.0.2.jar,mongo-java-driver-3.4.2.jar"

import org.apache.spark.sql.SparkSession

import com.mongodb.spark._

import com.mongodb.spark.config._

import org.bson.Document

val spark = SparkSession.builder()

.master("local")

.appName("MongoSparkConnector")

.config("spark.some.config.option", "some-value")

.getOrCreate()

val uri = "mongodb://172.1.1.1:27017"

val userDF = spark.sql("""

select

uid,

name,

current_date() version

from test_table

limit 100

""").repartition(8)

// Write to MongoDB

userDF.write.mode("overwrite").format("com.mongodb.spark.sql").options(

Map(

"uri" -> uri,

"database" -> "test",

"collection" -> "test_table")).save()

// Read From MongoDB

val df = spark.read.format("com.mongodb.spark.sql").options(

Map(

"uri" -> uri,

"database" -> "test",

"collection" -> "test_table")).load()

// 其他方式

userDF.write.mode("overwrite").format("com.mongodb.spark.sql").options(

Map(

"spark.mongodb.input.uri" -> uri,

"spark.mongodb.output.uri" -> uri,

"spark.mongodb.output.database" -> "test",

"spark.mongodb.output.collection" -> "test_table")).save()

MongoSpark.save(

userDF.write.mode("overwrite").options(

Map(

"spark.mongodb.input.uri" -> uri,

"spark.mongodb.output.uri" -> uri,

"spark.mongodb.output.database" -> "test",

"spark.mongodb.output.collection" -> "test_table")))

MongoSpark.save(

userDF.write.mode("overwrite").options(

Map(

"uri" -> uri,

"database" -> "test",

"collection" -> "test_table")))

spark.stop()

 类似资料: