标签(空格分隔): kubernetes系列
一: 系统环境介绍
系统:
centos7.9x64
k8s 集群版本:
k8s1.26.5 采用kubesphere 做页面
caclico 版本:
calico v3.26.1
containerd版本:
containerd://1.6.24
hadoop 版本:
hadoop 3.3.6
helm 版本:
helm 3.9.0
二: Spark的实施部署
2.1 软件准备
mkdir -p /root/bigdata/spark && cd /root/bigdata/spark
下载:
wget https://archive.apache.org/dist/spark/spark-3.4.1/spark-3.4.1-bin-hadoop3.tgz
2.2. spark 用户授权
Spark 用户权限配置
kubectl create ns spark
kubectl get ns
kubectl create serviceaccount spark -n spark
kubectl get sa -n spark
kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=spark:spark
kubectl get ClusterRole,ClusterRoleBinding | grep spark
2.3 构建spark的镜像
mkdir -p /root/bigdata/spark/image/
cat > /root/bigdata/spark/image/Dockerfile << 'EOF'
# 使用官方基础镜像
FROM apache/spark:3.4.1
# 把spark程序jar 存入镜像
COPY hello_word-1.0-SNAPSHOT.jar /opt/spark/examples/jars/hello_word-1.0-SNAPSHOT.jar
# spark程序需要(读取)动态传参的文件
COPY A.txt /opt/spark/examples/jars/A.txt
EOF
# 查看镜像
nerdctl -n k8s.io images
# 生成镜像导入containerd
buildctl build --frontend=dockerfile.v0 --local context=. --local dockerfile=. --output type=docker,name=bigdata/spark:3.4.1 | ctr -n k8s.io image import -
# 查看镜像
nerdctl -n k8s.io images
导出镜像到其它节点:
nerdctl -n k8s.io save -o spark-image-341.tar.gz bigdata/spark:3.4.1
导入镜像:
nerdctl -n k8s.io load -i spark-image-341.tar.gz
查看k8s的master的apiserver 的URL
kubectl cluster-info
2.4 提交任务测试
cd /root/bigdata/spark/
tar -zxvf spark-3.4.1-bin-hadoop3.tar.gz
cd spark-3.4.1-bin-hadoop3/
bin/spark-submit \
--master k8s://https://lb.kubesphere.local:6443 \
--deploy-mode cluster \
--name spark-pi \
--conf spark.executor.instances=3 \
--conf spark.kubernetes.namespace=spark \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
--conf spark.kubernetes.container.image=bigdata/spark:3.4.1 \
--class Spark_Test1 \
local:///opt/spark/examples/jars/hello_word-1.0-SNAPSHOT.jar file:///opt/spark/examples/jars/A.txt
kubectl get pod -n spark
查看日志
kubectl logs -f spark-pi-4af08a8b83afb474-driver -n spark
2.5 调用HDFS 集成测试
在 HDFS 上创建 Spark 日志的目录
kubectl exec -it hadoop-hadoop-hdfs-nn-0 -n hadoop -- bash
hdfs dfs -ls /
hdfs dfs -mkdir /sparkhistory
hdfs dfs -chmod 777 /sparkhistory
hdfs dfs -ls /
创建spark的SVC
cd /root/bigdata/spark
cat > svc.yaml << 'EOF'
apiVersion: v1
kind: Service
metadata:
name: spark-hs-svc
namespace: spark
spec:
type: NodePort
ports:
- port: 18080
protocol: TCP
targetPort: 18080
nodePort: 31180
selector:
run: spark-history-server
status:
loadBalancer: {}
EOF
kubectl apply -f svc.yaml
kubectl get svc -n spark
创建pod
cat > pod.yaml << 'EOF'
apiVersion: apps/v1
kind: Deployment
metadata:
name: spark-history-server
namespace: spark
spec:
selector:
matchLabels:
run: spark-history-server
replicas: 1
template:
metadata:
labels:
run: spark-history-server
spec:
containers:
- image: apache/spark:3.4.1
name: spark-history-server
args: ["/opt/spark/bin/spark-class", "org.apache.spark.deploy.history.HistoryServer"]
ports:
- containerPort: 18080
name: http
env:
- name: SPARK_HISTORY_OPTS
value: "-Dspark.history.fs.logDirectory=hdfs://hadoop-hadoop-hdfs-nn.hadoop:9000/sparkhistory"
EOF
kubectl apply -f pod.yaml
kubectl get pod -n spark -o wide
打开web:
http://172.16.10.11:31180/
Spark 提交任务测试(二)
添加 Conf Log.dir
cd /root/bigdata/spark/spark-3.4.1-bin-hadoop3/
bin/spark-submit \
--master k8s://https://lb.kubesphere.local:6443 \
--deploy-mode cluster \
--name spark-pi \
--conf spark.executor.instances=3 \
--conf spark.kubernetes.namespace=spark \
--conf spark.eventLog.enabled=true \
--conf spark.eventLog.dir=hdfs://hadoop-hadoop-hdfs-nn.hadoop:9000/sparkhistory \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
--conf spark.kubernetes.container.image=bigdata/spark:3.4.1 \
--class Spark_Test1 \
local:///opt/spark/examples/jars/hello_word-1.0-SNAPSHOT.jar file:///opt/spark/examples/jars/A.txt
kubectl logs -f spark-pi-0e9d4e8b8455fc99-driver -n spark
查看spark的任务历史:
将jar 包放在HDFS 上面运行
cd /root/bigdata/spark
kubectl cp ./hello_word-1.0-SNAPSHOT.jar hadoop-hadoop-hdfs-nn-0:/tmp/ -n hadoop
kubectl exec -it hadoop-hadoop-hdfs-nn-0 -n hadoop -- bash
hdfs dfs -put /tmp/hello_word-1.0-SNAPSHOT.jar hdfs://hadoop-hadoop-hdfs-nn.hadoop:9000/sparkhistory/
hdfs dfs -ls /sparkhistory
提交任务
cd /root/bigdata/spark/spark-3.4.1-bin-hadoop3/
bin/spark-submit \
--master k8s://https://lb.kubesphere.local:6443 \
--deploy-mode cluster \
--name spark-pi \
--conf spark.executor.instances=3 \
--conf spark.kubernetes.namespace=spark \
--conf spark.eventLog.enabled=true \
--conf spark.eventLog.dir=hdfs://hadoop-hadoop-hdfs-nn.hadoop:9000/sparkhistory \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
--conf spark.kubernetes.container.image=bigdata/spark:3.4.1 \
--class Spark_Test1 \
hdfs://hadoop-hadoop-hdfs-nn.hadoop:9000/sparkhistory/hello_word-1.0-SNAPSHOT.jar file:///opt/spark/examples/jars/A.txt
三:spark的删除卸载
cd /root/bigdata/spark
kubectl delete -f pod.yaml
kubectl get pod -n spark -o wide
kubectl delete -f svc.yaml
kubectl get svc -n spark
kubectl delete pod --all -n spark
kubectl get pod -n spark
kubectl delete clusterrolebinding spark-role
kubectl get ClusterRole,ClusterRoleBinding | grep spark
kubectl delete serviceaccount spark -n spark
kubectl get sa -n spark
kubectl delete ns spark
kubectl get ns