当前位置: 首页 > 工具软件 > docker-spark > 使用案例 >

基于docker-compose安装spark 1+3及Spark On Yarn模式集群

李跃
2023-12-01

此处的样例是参考别人的,之后自己整合一套可以使用的1+3模式的集群。spark镜像可以自行在docker hub选择自己想要的进行替换即可。

备注:此处未开启日志功能,在 WEB 界面上面找不到 log的,如需开启,可自行添加参数或自己进入容器手动修改。

可参考链接: 基于docker-compose安装hadoop 1+3模式集群.

1、docker-compose.yml

version: "2.2"
services:
  master:
    image: gettyimages/spark:1.6.0-hadoop-2.6
    container_name: master
    hostname: master
    command: bin/spark-class org.apache.spark.deploy.master.Master -h master
    network_mode: 'host'
    restart: always
    environment:
      MASTER: spark://master:7077
      SPARK_CONF_DIR: /conf
      # 此处设置了 WEB UI界面的端口,因为这个端口用的比较多,上面的Hadoop那边好像用到了,可改回8080
      SPARK_MASTER_WEBUI_PORT: 8090
    volumes:
      - ./conf/master:/conf
      - ./data:/tmp/data
      - ./jars:/root/jars
    env_file:
      - ./spark.env

  worker1:
    image: gettyimages/spark:1.6.0-hadoop-2.6
    container_name: worker1
    hostname: worker1
    command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
    network_mode: 'host'
    restart: always
    environment:
      SPARK_CONF_DIR: /conf
      SPARK_WORKER_CORES: 8
      SPARK_WORKER_MEMORY: 8g
      SPARK_WORKER_PORT: 8881
      SPARK_WORKER_WEBUI_PORT: 8091
    volumes:
      - ./conf/worker1:/conf
      - ./data/worker1:/tmp/data
    env_file:
      - ./spark.env

  worker2:
    image: gettyimages/spark:1.6.0-hadoop-2.6
    container_name: worker2
    hostname: worker2
    command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
    network_mode: 'host'
    restart: always
    environment:
     SPARK_CONF_DIR: /conf
     SPARK_WORKER_CORES: 8
     SPARK_WORKER_MEMORY: 8g
     SPARK_WORKER_PORT: 8882
     SPARK_WORKER_WEBUI_PORT: 8092
    volumes:
     - ./conf/worker2:/conf
     - ./data/worker2:/tmp/data
    env_file:
      - ./spark.env

  worker3:
    image: gettyimages/spark:1.6.0-hadoop-2.6
    container_name: worker3
    hostname: worker3
    command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
    network_mode: 'host'
    restart: always
    environment:
     SPARK_CONF_DIR: /conf
     SPARK_WORKER_CORES: 8
     SPARK_WORKER_MEMORY: 8g
     SPARK_WORKER_PORT: 8883
     SPARK_WORKER_WEBUI_PORT: 8093
    volumes:
     - ./conf/worker3:/conf
     - ./data/worker3:/tmp/data
    env_file:
      - ./spark.env

2、spark.env

# 此处配置spark on yarn模式
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager

备注:

  • 别忘了在hosts文件中添加域名解析

  • 经过自己的测试,好像 spark on yarnjava远程调用存在问题,hadoop读取spark的包的时候无法提取。(有点头疼啊,这个东西,项目组要求用java远程调用此模式,参数都是java那边提交的。。。)

 类似资料: