需要开发spark 项目,开发语言为java
2 步骤2.1 创建工程
创建maven工程,这个不在这篇文档论述
2.2 引入pom
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<spark.version>2.3.3</spark.version>
<commons-codec.version>1.15</commons-codec.version>
</properties>
<dependencies>
<!-- spark -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
<exclusions>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- commons-codec -->
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>${commons-codec.version}</version>
</dependency>
</dependencies>
2.3 创建类,创建main方法
public static void main(String[] args) {
//简单的demo,创建一个list集合,里面有三个数字,1,2,3
List<Integer> array = new ArrayList<>();
array.add(1);
array.add(2);
array.add(3);
//构建spark环境
SparkConf conf = new SparkConf().setAppName("map-reduce").setMaster("local");
JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
//简单的spark处理
JavaRDD<Integer> parallelize = javaSparkContext.parallelize(array);
long count = parallelize.count();
System.out.println(count);
}