实现如下

package com.scala.my

import org.apache.spark.streaming.StreamingContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.Durations
/**
* scala版本的hdfs上wordCount
*/
object HdfsWordCount {
def main(args: Array[String]): Unit = {
//获取streamingContex,并设置切分rdd时间为6s
val sc=new StreamingContext(new SparkConf().setAppName("hdfsCount").setMaster("local[2]"),Durations.seconds(5))
//读取hdfs上的数据
val lines =sc.textFileStream("hdfs://master:8020/wordcount_dir")
//压扁
val paris=lines.flatMap(x=>x.split(","))
//map
val map=paris.map { (_,1) }
//reduceBykey
val words=map.reduceByKey(_+_)
//打印前10个
words.print()
//开启
sc.start()
//等待
sc.awaitTermination()
//关闭资源
sc.stop()
}
}