51,408
社区成员
发帖
与我相关
我的任务
分享
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
public class SimpleApp {
public static void main(String[] args) {
// 创建一个Java版本的Spark Context
SparkConf conf = new SparkConf().setAppName("wordCount").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf); // 读取我们的输入数据
JavaRDD<String> input = sc.textFile("/bigdata/softwares/spark-2.1.0-bin-hadoop2.7/testdata/a.txt");
System.out.println();
Long bCount = input.filter(new Function<String,Boolean>(){
public Boolean call(String s){return s.contains("yes");}
}).count();
Long cCount = input.filter(new Function<String,Boolean>(){
public Boolean call(String s){return s.contains("ywq");}
}).count();
System.out.println("yes:"+bCount+" ywq:"+cCount+" 总:");
// sc.stop();
}
}
<dependencies>
<dependency> <!-- Spark dependency --><!-- Spark需要加入的依赖 -->
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
</dependencies>
<build>
<plugins> <!-- 用来创建超级JAR包的Maven shade插件 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

