-------------------------------------------------------VM模版--------------------------------------------------------------
1.)
安装VM (虚拟机设置-》硬件-》网络适配器-》桥接模式) 所有主机root 123456
vi /etc/sysconfig/network-scripts/ifcfg-eth0
-------设置固定IP-------
DEVICE=eth0
TYPE=Ethernet
ONBOOT=yes
BOOTPROTO=static
IPADDR=192.168.0.55
NETMASK=255.255.255.0
DNS1="114.114.114.114"
GATEWAY=192.168.0.1
---end-设置固定IP-------
2.) ssh-开启 防火墙-关闭 windows 关闭防火强
#---------------------------------------SSH链接-----------------------------------------------
a.) 关掉防火墙 service iptables status(查看状态) service iptables stop(临时关掉) chkconfig iptables off(永久关掉需要reboot 重启)
修改/etc/selinux/config 文件 将SELINUX=enforcing改为SELINUX=disabled 重启机器即
b.) 打开sshd服务 service sshd status(查看状态) service sshd restart chkconfig sshd on (永久开启需要reboot)
c.) 查看ip ifconfig eth0 inet addr:(ip)
#---------------------------------end---SSH链接-----------------------------------------------
3.)host映射
liunx
/etc/hosts 文件配置本地IP到host映射
增加一行 192.168.0.55 spark001
windows
C:\Windows\System32\drivers\etc
增加一行 192.168.0.55 spark001
4.)配置DNS服务器 ,替换repo文件,配置yum
vi /etc/resolv.config
nameserver 114.114.114.114
ping www.baidu.com
-----------------------centOS6-Base-163.repo---------------------
# CentOS-Base.repo
#
# The mirror system uses the connecting IP address of the client and the
# update status of each mirror to pick mirrors that are updated to and
# geographically close to the client. You should use this for CentOS updates
# unless you are manually picking other mirrors.
#
# If the mirrorlist= does not work for you, as a fall back you can try the
# remarked out baseurl= line instead.
#
#
[base]
name=CentOS-$releasever - Base - 163.com
baseurl=http://mirrors.163.com/centos/$releasever/os/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=os
gpgcheck=0
gpgkey=http://mirror.centos.org/centos/RPM-GPG-KEY-CentOS-6
#released updates
[updates]
name=CentOS-$releasever - Updates - 163.com
baseurl=http://mirrors.163.com/centos/$releasever/updates/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=updates
gpgcheck=0
gpgkey=http://mirror.centos.org/centos/RPM-GPG-KEY-CentOS-6
#additional packages that may be useful
[extras]
name=CentOS-$releasever - Extras - 163.com
baseurl=http://mirrors.163.com/centos/$releasever/extras/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=extras
gpgcheck=0
gpgkey=http://mirror.centos.org/centos/RPM-GPG-KEY-CentOS-6
#additional packages that extend functionality of existing packages
[centosplus]
name=CentOS-$releasever - Plus - 163.com
baseurl=http://mirrors.163.com/centos/$releasever/centosplus/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=centosplus
gpgcheck=0
enabled=0
gpgkey=http://mirror.centos.org/centos/RPM-GPG-KEY-CentOS-6
#contrib - packages by Centos Users
[contrib]
name=CentOS-$releasever - Contrib - 163.com
baseurl=http://mirrors.163.com/centos/$releasever/contrib/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=contrib
gpgcheck=0
enabled=0
gpgkey=http://mirror.centos.org/centos/RPM-GPG-KEY-CentOS-6
-----------------end---centOS6-Base-163.repo---------------------
cd /usr/local/
rm -rf *
将centOS6-Base-163.repo上传到centos中的/usr/local目录下
cd /etc/yum.repos.d/
rm -rf *
cp /usr/local/centOS6-Base-163.repo /etc/yum.repos.d/
修改 centOS6-Base-163.repo 里面 gpgcheck 为 0
配置yum
yum clean all
yum makecache
yum install telnet
5.)安装jdk
配置方案一
a.) 修改 vim /etc/profile
在文件最下面增加(也是unset -f pathmunge 下方)
JAVA_HOME=/root/lzmInstall/jdk/jdk1.7.0_17
CLASSPATH=$JAVA_HOME/lib;$JAVA_HOME/jre/lib
PATH=$PATH:$JAVA_HOME/bin;$JAVA_HOME/jre/bin
export PATH CLASSPATH JAVA_HOME
b.) reboot 重启动
配置方案二
a.)
centos版本 新增加 vi /etc/profile.d/java.sh
JAVA_HOME=/root/lzmInstall/jdk/jdk1.7.0_17
CLASSPATH=$JAVA_HOME/lib;$JAVA_HOME/jre/lib
PATH=$PATH:$JAVA_HOME/bin;$JAVA_HOME/jre/bin
export PATH CLASSPATH JAVA_HOME
b.)不用重新启动用 source /etc/profile
安装好了确认
echo $JAVA_HOME
echo $PATH
--------------------------------------------end-----VM模版-----------------------------------------------------------------
window liunx hosts 设置
192.168.0.55 spark001 (主节点)
192.168.0.56 spark002 (从节点)
192.168.0.57 spark003 (从节点)
配置集群 SSH 免密码登录
-------------三台分别执行-----------
ssh-keygen -t rsa
cd /root/.ssh/
cp id_rsa.pub authorized_keys
-------end---三台分别执行-----------
spark001上执行
ssh-copy-id -i spark002
ssh-copy-id -i spark003
spark002上执行
ssh-copy-id -i spark001
ssh-copy-id -i spark003
spark003上执行
ssh-copy-id -i spark001
ssh-copy-id -i spark002
CDH hadoop安装
------------------------------------spark001 CDH hadoop配置-------------------------------------------------------
http://archive.cloudera.com/cdh5/cdh/5/ 下载 hadoop-2.5.0-cdh5.3.6.tar.gz
上传到spark001 /usr/local/
cd /usr/local/
tar -zxvf hadoop-2.5.0-cdh5.3.6.tar.gz
rm -rf
hadoop-2.5.0-cdh5.3.6.tar.gz
mv hadoop-2.5.0-cdh5.3.6 hadoop 重命名
mk dir data (存放数据)
配置hadoop 相关环境变量 vi /etc/profile.d/java.sh
JAVA_HOME=/root/module/jdk1.7.0_17
HADOOP_HOME=/usr/local/hadoop
CLASSPATH=$JAVA_HOME/lib;$JAVA_HOME/jre/lib
PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export PATH CLASSPATH JAVA_HOME
不用重新启动用 source /etc/profile
修改hadoop 配置文件 core-site.xml /etc/local/hadoop/etc/hadoop/core-site.xml
<!-- hdfs 地址 -->
<property>
<name>fs.default.name</name>
<value>hdfs://spark001:9000</value>
</property>
修改hadoop 配置文件 hdfs-site.xml /etc/local/hadoop/etc/hadoop/hdfs-site.xml
<!-- hdfs namenode进程存放的目录 -->
<property>
<name>dfs.name.dir</name>
<value>/usr/local/data/namenode</value>
</property>
<!-- hdfs datanode进程存放的目录 -->
<property>
<name>dfs.data.dir</name>
<value>/usr/local/data/datanode</value>
</property>
<!-- hdfs 临时进程存放的目录 -->
<property>
<name>dfs.tmp.dir</name>
<value>/usr/local/data/tmp</value>
</property>
<!-- 每一份hdfs文件 它的每个一block副本数 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
修改hadoop 配置文件 mapred-site.xml /etc/local/hadoop/etc/hadoop/mapred-site.xml
mv mapred-site.xml.template mapred-site.xml
<!-- mapreduce框架用yarn 不用mapreduce1 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
修改hadoop 配置文件 yarn-site.xml /etc/local/hadoop/etc/hadoop/yarn-site.xml
<!-- resourcemanager位置 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>spark001</value>
</property>
<!-- -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
修改hadoop 配置文件 slaves (从节点) /etc/local/hadoop/etc/hadoop/slaves
spark002
spark003
-------------------------end--------spark001 CDH hadoop配置-------------------------------------------------------
在spark002 spark003 配置hadoop
scp 将spark001 hadoop安装包 data jdk(source /etc/profile)
scp -r /usr/local/hadoop root@spark002:/usr/local
scp -r /usr/local/hadoop root@spark003:/usr/local
scp -r /usr/local/data root@spark002:/usr/local
scp -r /usr/local/data root@spark003:/usr/local
scp -r /etc/profile.d/java.sh root@spark002:/etc/profile.d/
source /etc/profile
scp -r /etc/profile.d/java.sh root@spark003:/etc/profile.d/
source /etc/profile
启动hdfs集群
spark001 格式化namenode hdfs namenode -format
spark001启动hdfs start-dfs.sh
jps查看进程 多
spark001 NameNode SecondaryNameNode (spark001:50070 查看web hdfs管理页面)
spark002 DataNode
spark003 DataNode
测试上传文件到HDFS上
vi hello.txt
hdfs dfs -put hello.txt /hello.txt 到spark001:50070 查看
启动yarn集群
spark001启动start-yarn.sh
jps查看进程 (多)
spark001 ResourceManager (spark001:8088 查看web yarn管理页面)
spark002 NodeManager
spark003 NodeManager
hive安装 cdh hive-0.13.1-cdh5.3.6
http://archive.cloudera.com/cdh5/cdh/5/ 下载 hive-0.13.1-cdh5.3.6
上传到spark001 /usr/local/
cd /usr/local/
tar -zxvf hive-0.13.1-cdh5.3.6.tar.gz
rm -rf
hive-0.13.1-cdh5.3.6.tar.gz
mv hive-0.13.1-cdh5.3.6 hive 重命名
配置hive 相关环境变量 vi /etc/profile.d/java.sh
JAVA_HOME=/root/module/jdk1.7.0_17
HADOOP_HOME=/usr/local/hadoop
HIVE_HOME=/usr/local/hive
CLASSPATH=$JAVA_HOME/lib;$JAVA_HOME/jre/lib
PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin
export PATH CLASSPATH JAVA_HOME
不用重新启动用 source /etc/profile
安装mysql (存hive原数据 存spark项项目输出数据)
在spark001安装mysql
使用yum安装mysql server
yum install -y mysql-server
service mysqld start
chkconfig mysqld on
使用yum安装mysql connector
yum install -y mysql-connector-java
将mysql connector拷贝到hive的lib包中
cp /usr/share/java/mysql-connector-java-5.1.17.jar /usr/local/hive/lib
在mysql上创建hive元数据库,创建hive账号,并对hive进行授权
create database if not exists hive_metadata;
grant all privileges on hive_metadata.* to 'hive'@'%' identified by 'hive';
grant all privileges on hive_metadata.* to 'hive'@'localhost' identified by 'hive';
grant all privileges on hive_metadata.* to 'hive'@'spark001' identified by 'hive';
flush privileges;
use hive_metadata;
hive 配置
cd /usr/local/hive/conf
mv hive-default.xml.template hive-site.xml
<!-- mysql 连接地址 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://spark001:3306/hive_metadata?createDatabaseIfNotExist=true</value>
</property>
<!-- mysql 驱动 -->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<!-- mysql 用户名 -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>username to use against metastore database</description>
</property>
<!-- mysql 密码 -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hive</value>
<description>password to use against metastore database</description>
</property>
配置hive-env.sh hive-config.sh
mv hive-env.sh.template hive-env.sh
vi /usr/local/hive/bin/hive-config.sh
export JAVA_HOME=/root/module/jdk1.7.0_17
export HIVE_HOME=/usr/local/hive
export HADOOP_HOME=/usr/local/hadoop
验证hive是否安装成功
vi /usr/local/user.txt
1
张在
2
name
create table if not exists user(
id int,
name string
)row format delimited
fields terminated by '\t'
stored as textfile;
load data local inpath '/usr/local/user.txt' into table user;
select * from user;
drop table user;(删除) hive进入命令行 exit; 退出
zookeeper zookeeper-3.4.5-cdh5.3.6(kafka消息队列)
http://archive.cloudera.com/cdh5/cdh/5/ 下载 zookeeper-3.4.5-cdh5.3.6
上传到spark001 /usr/local/
cd /usr/local/
tar -zxvf zookeeper-3.4.5-cdh5.3.6.tar.gz
rm -rf
zookeeper-3.4.5-cdh5.3.6.tar.gz
mv zookeeper-3.4.5-cdh5.3.6 zk 重命名
配置hive 相关环境变量 vi /etc/profile.d/java.sh
JAVA_HOME=/root/module/jdk1.7.0_17
HADOOP_HOME=/usr/local/hadoop
HIVE_HOME=/usr/local/hive
ZOOKEEPER_HOME=/usr/local/zk
CLASSPATH=$JAVA_HOME/lib;$JAVA_HOME/jre/lib
PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin
export PATH CLASSPATH JAVA_HOME
不用重新启动用 source /etc/profile
配置zoo.cfg
cd zk/conf
mv zoo_sample.cfg zoo.cfg
mkdir /usr/local/zk/data
cd /usr/local/zk/data
vi myid 写入0
vi zoo.cfg
修改: dataDir=/usr/local/zk/data
新增:
server.0=spark001:2888:3888
server.1=spark002:2888:3888
server.2=spark003:2888:3888
搭建zk集群
将spark001 zk包 jdk配置 拷贝到spark002 spark003
scp -r /usr/local/zk root@spark002:/usr/local/
scp -r /usr/local/zk root@spark003:/usr/local/
scp -r /etc/profile.d/java.sh root@spark002:/etc/profile.d/
source /etc/profile
scp -r /etc/profile.d/java.sh root@spark003:/etc/profile.d/
source /etc/profile
将spark002 /usr/local/zk/data/myid 文件内容修改成1
将spark003 /usr/local/zk/data/myid 文件内容修改成2
启动ZK集群
spark001 spark002 spark003
zkServer.sh start
检查zkServer.sh status状态
jps spark001 spark002 spark003 都会增加 QuorumPeerMain进程
安装scala-2.11.4.tgz
上传到spark001 /usr/local/
cd /usr/local/
tar -zxvf scala-2.11.4.tgz
rm -rf
scala-2.11.4.tgz
mv scala-2.11.4 scala 重命名
配置scala 相关环境变量 vi /etc/profile.d/java.sh
JAVA_HOME=/root/module/jdk1.7.0_17
HADOOP_HOME=/usr/local/hadoop
HIVE_HOME=/usr/local/hive
ZOOKEEPER_HOME=/usr/local/zk
SCALA_HOME=/usr/local/scala
CLASSPATH=$JAVA_HOME/lib;$JAVA_HOME/jre/lib
PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin
export PATH CLASSPATH JAVA_HOME
不用重新启动用 source /etc/profile
scala -version
拷贝到spark002 spark003
scp -r /usr/local/scala root@spark002:/usr/local/
scp -r /usr/local/scala root@spark003:/usr/local/
scp -r /etc/profile.d/java.sh root@spark002:/etc/profile.d/
source /etc/profile
scp -r /etc/profile.d/java.sh root@spark003:/etc/profile.d/
source /etc/profile
安装kafka_2.9.2-0.8.1.tgz集群 (依赖zk)
上传到spark001 /usr/local/
cd /usr/local/
tar -zxvf kafka_2.9.2-0.8.1.tgz
rm -rf
kafka_2.9.2-0.8.1.tgz
mv scala-2.11.4 scala 重命名
配置kafka
vi /usr/local/kafka/config/server.properties
broker.id:依次增长的整数,0,1,2,3,4, 集群中Broker的唯一id
zookeeper.connect=192.168.0.55:2181,192.168.0.56:2181,192.168.0.57:2181
解决kafka Unrecongnized VM option 'UseCompressedOops'问题
vi bin/kafka-run-class.sh
# JVM performance options
if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then
KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseCompressedOops -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:+CMSScavengeBeforeRemark -XX:+DisableExplicitGC -Djava.awt.headless=true"
fi
去掉 -XX:+UseCompressedOops
安装slf4j
slf4j-1.7.6.zip window下解压 找出 slf4j-nop-1.7.6.jar拷贝到kafka的libs目录下
集群
将spark001 配置复制到spark002 spark003
scp -r /usr/local/kafka root@spark002:/usr/local/
scp -r /usr/local/kafka root@spark003:/usr/local/
spark002 vi /usr/local/kafka/config/server.properties 修改broker.id=1
spark003 vi /usr/local/kafka/config/server.properties 修改broker.id=2
启动kafka集群 (spark001 spark002 spark003 )
nohup /usr/local/kafka/bin/kafka-server-start.sh /usr/local/kafka/config/server.properties &
cat /usr/local/kafka/nohup.out(进入kafka目录里执行) cat /usr/local/zk/data/nohup.out
jps (spark001 spark002 spark003) 增加 Kafka 进程
测试kafka集群
创建队列(spark001)
bin/kafka-topics.sh --zookeeper 192.168.0.55:2181,192.168.0.56:2181,192.168.0.57:2181 --topic TestTopic --replication-factor 1 --partitions 1 --create
创建生产者(spark001)
bin/kafka-console-producer.sh --broker-list 192.168.0.55:9092,192.168.0.56:9092,192.168.0.57:9092 --topic TestTopic
消费者(重新打开一个spark001)
bin/kafka-console-consumer.sh --zookeeper 192.168.0.55:2181,192.168.0.56:2181,192.168.0.57:2181 --topic TestTopic --from-beginning
在生产者上打 hello word 查看消费者是否接收
flume-ng-1.5.0-cdh5.3.6安装
上传到spark001 /usr/local/
cd /usr/local/
tar -zxvf flume-ng-1.5.0-cdh5.3.6.tar.gz
rm -rf
flume-ng-1.5.0-cdh5.3.6.tar.gz
mv apache-flume-1.5.0-cdh5.3.6-bin flume 重命名
配置scala 相关环境变量 vi /etc/profile.d/java.sh
JAVA_HOME=/root/module/jdk1.7.0_17
HADOOP_HOME=/usr/local/hadoop
HIVE_HOME=/usr/local/hive
ZOOKEEPER_HOME=/usr/local/zk
SCALA_HOME=/usr/local/scala
FLUME_HOME=/usr/local/flume
FLUME_HOME_CONF=$FLUME_HOME/conf
CLASSPATH=$JAVA_HOME/lib;$JAVA_HOME/jre/lib
PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin:$FLUME_HOME/bin
export PATH CLASSPATH JAVA_HOME
不用重新启动用 source /etc/profile
修改flume配置文件
mv /usr/local/flume/conf/flume-conf.properties.template /usr/local/flume/conf/flume-conf.properties
vi /usr/local/flume/conf/flume-conf.properties
agent1.sources=source1
agent1.sinks=sink1
agent1.channels=channel1
# 监控 /usr/local/logs 文件
agent1.sources.source1.type=spooldir
agent1.sources.source1.spoolDir=/usr/local/logs
agent1.sources.source1.channels=channel1
agent1.sources.source1.fileHeader=false
agent1.sources.source1.interceptors=i1
agent1.sources.source1.interceptors.i1.type=timestamp
agent1.channels.channel1.type=file
agent1.channels.channel1.checkpointDir=/usr/local/logs_tmp_cp
agent1.channels.channel1.dataDirs=/usr/local/logs_tmp
agent1.sinks.sink1.type=hdfs
agent1.sinks.sink1.hdfs.path=hdfs://spark001:9000/logs
agent1.sinks.sink1.hdfs.fileType=DataStream
agent1.sinks.sink1.hdfs.writeFormat=TEXT
agent1.sinks.sink1.hdfs.rollInterval=1
agent1.sinks.sink1.channel=channel1
agent1.sinks.sink1.hdfs.filePrefix=%Y-%m-%d
本地文件夹 mkdir /usr/local/logs
HDFS文件夹 hdfs dfs -mkdir /logs
启动flume-agent
flume-ng agent -n agent1 -c conf -f conf/flume-conf.properties -Dflume.root.logger=DEBUG,console
再启动spark001终端 在/usr/local/logs目录下 增加一个文件 testflume.txt
查看 hdfs dfs -ls /logs 查看内容hdfs dfs -text /logs/xxxx-xx-xxx.xxx
安装spark客户端 spark-1.5.1-bin-hadoop2.4.tgz
上传到spark001 /usr/local/
cd /usr/local/
tar -zxvf spark-1.5.1-bin-hadoop2.4.tgz
rm -rf
spark-1.5.1-bin-hadoop2.4.tgz
mv spark-1.5.1-bin-hadoop2.4 spark 重命名
配置scala 相关环境变量 vi /etc/profile.d/java.sh
JAVA_HOME=/root/module/jdk1.7.0_17
HADOOP_HOME=/usr/local/hadoop
HIVE_HOME=/usr/local/hive
ZOOKEEPER_HOME=/usr/local/zk
SCALA_HOME=/usr/local/scala
FLUME_HOME=/usr/local/flume
FLUME_HOME_CONF=$FLUME_HOME/conf
SPARK_HOME=/usr/local/spark
CLASSPATH=$JAVA_HOME/lib;$JAVA_HOME/jre/lib
PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin:$FLUME_HOME/bin:$SPARK_HOME/bin
export PATH CLASSPATH JAVA_HOME
不用重新启动用 source /etc/profile
修改spark-env.sh
cp /usr/local/spark/conf/spark-env.sh.template /usr/local/spark/conf/spark-env.sh.
vi /usr/local/spark/conf/spark-env.sh
export JAVA_HOME=/root/module/jdk1.7.0_17
SCALA_HOME=/usr/local/scala
HADOOP_HOME=/usr/local/hadoop
HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
用yarn-client模式提交spark作业
mkdir /usr/local/spark-study
cd /usr/local/spark-study
vi spark_pi.sh
------------------内容-------------------(待测试)
/usr/local/spark/bin/spark-submit \
--class org.apache.spark.example.javaSparkPi \
--master yarn-client \
--num-executors 1 \
--driver-memory 10m \
--executor-memory 10m \
--executor-cores 1 \
/usr/local/spark/lib/spark-examples-1.5.1-hadoop2.4.0.jar \
-----------end----内容-------------------
chmod 777 spark_pi.sh
用yarn-cluster模式提交spark作业
mkdir /usr/local/spark-study
cd /usr/local/spark-study
vi spark_pi.sh
------------------内容-------------------(没问题)
/usr/local/spark/bin/spark-submit \
--class org.apache.spark.example.javaSparkPi \
--master yarn-cluster \
--num-executors 1 \
--driver-memory 10m \
--executor-memory 10m \
--executor-cores 1 \
/usr/local/spark/lib/spark-examples-1.5.1-hadoop2.4.0.jar \
-----------end----内容-------------------
chmod 777 spark_pi.sh
集群时间同步
crontab -u root -l 查看集群指定用户(默认当前用户)所有定时任务
crontab -e
spark提交客户端和集群有什么区别
转载本文章为转载内容,我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题,欢迎原作者联系我们进行内容更正或删除文章。
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
详解spark任务提交至yarn的集群和客户端模式spark 大数据