1,258
社区成员
发帖
与我相关
我的任务
分享
int numStreams = 5;
List<JavaPairDStream<String, String>> kafkaStreams = new ArrayList<JavaPairDStream<String, String>>(numStreams);
for (int i = 0; i < numStreams; i++) {
kafkaStreams.add(KafkaUtils.createStream(...));
}
JavaPairDStream<String, String> unifiedStream = streamingContext.union(kafkaStreams.get(0), kafkaStreams.subList(1, kafkaStreams.size()));
unifiedStream.print();
String groupId = args[0];
String zookeepers = args[1];
String topics = "tpsN5a";
Integer numPartitions = Integer.parseInt(args[3]);
Map<String, Integer> topicsMap = new HashMap<String, Integer>();
for (String topic : topics.split(",")) {
topicsMap.put(topic, numPartitions);
}
// 多长时间统计一次
Duration batchInterval = Durations.seconds(2);
SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaConsumerWordCount");
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf,
batchInterval);
JavaPairReceiverInputDStream<String, String> kafkaStream = KafkaUtils
.createStream(ssc, zookeepers, groupId, topicsMap, StorageLevel.MEMORY_AND_DISK_SER());
String topics2 = "tpsN5b";
Map<String, Integer> topicsMap2 = new HashMap<String, Integer>();
topicsMap2.put(topics2, numPartitions);
JavaPairReceiverInputDStream<String, String> kafkaStream2 = KafkaUtils
.createStream(ssc, zookeepers, groupId, topicsMap2, StorageLevel.MEMORY_AND_DISK_SER());
List<JavaPairDStream<String, String>> kafkaStreams = new ArrayList<JavaPairDStream<String, String>>(2);
kafkaStreams.add(kafkaStream);
kafkaStreams.add(kafkaStream2);
ssc.checkpoint("/spark/stream/checkpoint/d1");
JavaPairDStream<String, String> unifiedStream = ssc.union(kafkaStreams.get(0), kafkaStreams.subList(1, kafkaStreams.size()));
JavaDStream<String> lines = unifiedStream//kafkaStream
.map(new Function<Tuple2<String, String>, String>() {
@Override
public String call(Tuple2<String, String> arg0)
throws Exception {
logger.warn(Thread.currentThread().getName() + " msg1:" + arg0._1 + "|msg2:" + arg0._2);
return arg0._2();
}
});