当前位置: 首页 > 工具软件 > Spark Kernel > 使用案例 >

【spark】spark+kafka

罗智志
2023-12-01
:启动kafka
MobaXterm_Personal_8.5.exe
D:/Develop/kafka_2.10-0.8.2.1/bin/windows/zookeeper-server-start.bat   D:/Develop/kafka_2.10-0.8.2.1/config/zookeeper.properties
D:/Develop/kafka_2.10-0.8.2.1/bin/windows/kafka-server-start.bat   D:/Develop/kafka_2.10-0.8.2.1/config/server.properties
D:/Develop/kafka_2.10-0.8.2.1/bin/windows/kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test
D:/Develop/kafka_2.10-0.8.2.1/bin/windows/kafka-topics.bat --list --zookeeper localhost:2181
D:/Develop/kafka_2.10-0.8.2.1/bin/windows/kafka-console-producer.bat --broker-list localhost:9092 --topic test
D:/Develop/kafka_2.10-0.8.2.1/bin/windows/kafka-console-consumer.bat --zookeeper localhost:2181 --topic test --from-beginning

D:/Develop/kafka_2.10-0.8.2.1/bin/windows/kafka-topics.bat --describe --zookeeper localhost:2181 --topic test

:连接kafka

package com.baibaiw5.ml.antispam

import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{ Seconds, StreamingContext }
import org.apache.spark.Logging

object EmailSpamStreamingApp extends Logging {
  def main(args: Array[String]) {
    logInfo("start EmailSpamStreamingApp")
    val zk = "localhost:2181"

    val group = "my-group"
    val topics = "test"
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("EmailSpamStreamingApp")
    val sc = new StreamingContext(sparkConf, Seconds(5))
    //should be lease than cup cores and local[2]
    val numThreads = 1
    val topicpMap = topics.split(",").map((_, numThreads.toInt)).toMap

    logInfo("create kafka consumer")
    val lines = KafkaUtils.createStream(sc, zk, group, topicpMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2)

    val words = lines.flatMap(_.split(" "))
    val pairs = words.map(word => (word, 1))
    val wordCounts = pairs.reduceByKey(_ + _)
    wordCounts.print()
    
    logInfo("start streamnig")
    sc.start()
    sc.awaitTermination()
  }
}



 类似资料: