D:/Develop/kafka_2.10-0.8.2.1/bin/windows/kafka-topics.bat --describe --zookeeper localhost:2181 --topic test
:连接kafka
package com.baibaiw5.ml.antispam
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{ Seconds, StreamingContext }
import org.apache.spark.Logging
object EmailSpamStreamingApp extends Logging {
def main(args: Array[String]) {
logInfo("start EmailSpamStreamingApp")
val zk = "localhost:2181"
val group = "my-group"
val topics = "test"
val sparkConf = new SparkConf().setMaster("local[2]").setAppName("EmailSpamStreamingApp")
val sc = new StreamingContext(sparkConf, Seconds(5))
//should be lease than cup cores and local[2]
val numThreads = 1
val topicpMap = topics.split(",").map((_, numThreads.toInt)).toMap
logInfo("create kafka consumer")
val lines = KafkaUtils.createStream(sc, zk, group, topicpMap, StorageLevel.MEMORY_AND_DISK_SER).map(_._2)
val words = lines.flatMap(_.split(" "))
val pairs = words.map(word => (word, 1))
val wordCounts = pairs.reduceByKey(_ + _)
wordCounts.print()
logInfo("start streamnig")
sc.start()
sc.awaitTermination()
}
}