1,258
社区成员
发帖
与我相关
我的任务
分享
object AdsClick1 {
def main(args: Array[String]): Unit = {
val kafkaBrokerList = "192.168.1.114:9092,192.168.1.115:9092,192.168.1.116:9092"
val groupId = "SparkStreaming-ads-test"
val topicSet = Set[String]("ads_test")
val blacklist = "blacklist-test"
val kafkaParamMap = Map[String, String](
"group.id" -> groupId,
"metadata.broker.list" -> kafkaBrokerList,
"auto.offset.reset" -> "largest").toMap
val conf = new SparkConf()
.setMaster("local- "
- )
.setAppName(s"${this.getClass.getSimpleName}")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.set("spark.task.maxFailures", "16")
val sc = new SparkContext(conf)
sc.setLogLevel("WARN")
val streamingContext = new StreamingContext(sc, Seconds(2))
streamingContext.checkpoint("checkpoint-ads")
val directStream: InputDStream[(String, String)] = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](streamingContext, kafkaParamMap, topicSet.toSet)
directStream.foreachRDD(rdd => {
val offsetsList = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
val kc = new KafkaCluster(kafkaParamMap)
val offsetMap = Map[TopicAndPartition, Long]()
for (offsets <- offsetsList) {
val tp = TopicAndPartition(offsets.topic, offsets.partition)
offsetMap += (tp -> offsets.untilOffset)
}
kc.setConsumerOffsets(groupId, offsetMap.toMap)
})
val adsPairRDD: DStream[(String, (String, String, String,Integer))] = directStream.map(item => {
val fields: Array[String] = item._2.split(",")
(fields(1), (fields(0), fields(2), fields(3),1))
})
val reduceWindowRDD: DStream[(String, (String, String, String, Integer))] = adsPairRDD.reduceByKeyAndWindow(
(a, b) => (a._1, a._2, a._3, a._4 + b._4)
,(a, b) => (a._1, a._2, a._3, a._4 - b._4)
, Seconds(10)
,Seconds(2)
)
reduceWindowRDD.foreachRDD(rdd=>{
rdd.collect().foreach(item=>{
println(item)
})
})
streamingContext.start()
streamingContext.awaitTermination()
}
}