安装Maven仓库管理工具,版本要求是3.2版本以上。新建Maven项目,配置pom.xml。导入必要的包。
- 1.val conf = new SparkConf()
- 2.conf.setMaster("local")
- 3.conf.setAppName("scala-wc")
- 4.val sc = new SparkContext(conf)
- 5.val lines = sc.textFile("./data/words")
- 6.val words = lines.flatMap(line=>{line.split(" ")})
- 7.val pairWords = words.map(word=>{new Tuple2(word,1)})
- 8.val result = pairWords.reduceByKey((v1:Int,v2:Int)=>{v1+v2})
- 9.result.foreach(println)
- 1.SparkConf conf = new SparkConf();
- 2.conf.setMaster("local");
- 3.conf.setAppName("java-wc");
- 4.JavaSparkContext sc = new JavaSparkContext(conf);
- 5.JavaRDD<String> lines = sc.textFile("./data/words");
- 6.JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
- 7. @Override
- 8. public Iterator<String> call(String s) throws Exception {
- 9. String[] split = s.split(" ");
- 10. return Arrays.asList(split).iterator();
- 11. }
- 12.});
- 13.JavaPairRDD<String, Integer> pairWords = words.mapToPair(new PairFunction<String, String, Integer>() {
- 14. @Override
- 15. public Tuple2<String, Integer> call(String word) throws Exception {
- 16. return new Tuple2<>(word, 1);
- 17. }
- 18.});
- 19.JavaPairRDD<String, Integer> result = pairWords.reduceByKey(new Function2<Integer, Integer, Integer>() {
- 20. @Override
- 21. public Integer call(Integer v1, Integer v2) throws Exception {
- 22. return v1 + v2;
- 23. }
- 24.});
- 25.result.foreach(new VoidFunction<Tuple2<String, Integer>>() {
- 26. @Override
- 27. public void call(Tuple2<String, Integer> tuple2) throws Exception {
- 28. System.out.println(tuple2);
- 29. }
- 30.});
- 31.sc.stop();