当前位置: 首页 > 工具软件 > Crawlab > 使用案例 >

Crawlab主从分布式部署

穆宏胜
2023-12-01

一、生产环境上我们不希望仅仅是简单部署,我们可以在多台服务器上分别部署Crawlab然后连接公共的MongoDB及Redis。这时候就需要让Mongo及Redis独立出来,避免耦合启动。
二、 Docker-Compose主节点
docker-compose.yml

version: '3.3'
services:
  master: 
    image: tikazyq/crawlab:latest
    container_name: master
    environment:
      CRAWLAB_API_ADDRESS: "http://39.115.24.224:8000"  # backend API address 后端 API 地址. 适用于 https 或者源码部署
      CRAWLAB_SERVER_MASTER: "Y"  # whether to be master node 是否为主节点,主节点为 Y,工作节点为 N
#      CRAWLAB_MONGO_HOST: "mongo"
      CRAWLAB_MONGO_HOST: "39.115.24.224" 
# MongoDB host address MongoDB 的地址,在 docker compose 网络中,直接引用服务名称
      CRAWLAB_MONGO_PORT: "27017"  # MongoDB port MongoDB 的端口
      CRAWLAB_MONGO_DB: "crawlab"  # MongoDB database MongoDB 的数据库
      CRAWLAB_MONGO_USERNAME: "admin"  # MongoDB username MongoDB 的用户名
      CRAWLAB_MONGO_PASSWORD: "123456"  # MongoDB password MongoDB 的密码
      CRAWLAB_MONGO_AUTHSOURCE: "admin"  # MongoDB auth source MongoDB 的验证源
      CRAWLAB_REDIS_ADDRESS: "39.115.24.224"  # Redis host address Redis 的地址,在 docker compose 网络中,直接引用服务名称
      CRAWLAB_REDIS_PORT: "6379"  # Redis port Redis 的端口
      CRAWLAB_REDIS_DATABASE: "1"  # Redis database Redis 的数据库
      CRAWLAB_REDIS_PASSWORD: "root"  # Redis password Redis 的密码
      CRAWLAB_LOG_LEVEL: "info"  # log level 日志级别. 默认为 info
      # CRAWLAB_LOG_ISDELETEPERIODICALLY: "N"  # whether to periodically delete log files 是否周期性删除日志文件. 默认不删除
      # CRAWLAB_LOG_DELETEFREQUENCY: "@hourly"  # frequency of deleting log files 删除日志文件的频率. 默认为每小时
      CRAWLAB_SERVER_REGISTER_TYPE: "ip"  
      # node register type 节点注册方式. 默认为 mac 地址,也可设置为 ip(防止 mac 地址冲突)
      CRAWLAB_SERVER_REGISTER_IP: "172.19.0.1"  # node register ip 节点注册IP. 节点唯一识别号,只有当 CRAWLAB_SERVER_REGISTER_TYPE 为 "ip" 时才生效
      # CRAWLAB_TASK_WORKERS: 8  # number of task executors 任务执行器个数(并行执行任务数)
      # CRAWLAB_RPC_WORKERS: 16  # number of RPC workers RPC 工作协程个数
      # CRAWLAB_SERVER_LANG_NODE: "Y"  # whether to pre-install Node.js 预安装 Node.js 语言环境
      # CRAWLAB_SERVER_LANG_JAVA: "Y"  # whether to pre-install Java 预安装 Java 语言环境
      # CRAWLAB_SETTING_ALLOWREGISTER: "N"  # whether to allow user registration 是否允许用户注册
      # CRAWLAB_SETTING_ENABLETUTORIAL: "N"  # whether to enable tutorial 是否启用教程
      # CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com  # STMP server address STMP 服务器地址
      # CRAWLAB_NOTIFICATION_MAIL_PORT: 465  # STMP server port STMP 服务器端口
      # CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL: admin@exmaple.com  # sender email 发送者邮箱
      # CRAWLAB_NOTIFICATION_MAIL_SENDERIDENTITY: admin@exmaple.com  # sender ID 发送者 ID
      # CRAWLAB_NOTIFICATION_MAIL_SMTP_USER: username  # SMTP username SMTP 用户名
      # CRAWLAB_NOTIFICATION_MAIL_SMTP_PASSWORD: password  # SMTP password SMTP 密码
    ports:    
      - "8080:8080" # frontend port mapping 前端端口映射
      - "8000:8000" # backend

Docker-Compose从节点

version: '3.3'
services:
  worker: 
    image: tikazyq/crawlab:latest
    container_name: worker
    environment:
      CRAWLAB_SERVER_MASTER: "N"  # whether to be master node 是否为主节点,主节点为 Y,工作节点为 N
      CRAWLAB_MONGO_HOST: "39.115.24.224"  # MongoDB host address MongoDB 的地址,在 docker compose 网络中,直接引用服务名称
      CRAWLAB_MONGO_PORT: "27017"  # MongoDB port MongoDB 的端口
      CRAWLAB_MONGO_DB: "crawlab"  # MongoDB database MongoDB 的数据库
      CRAWLAB_MONGO_USERNAME: "admin"  # MongoDB username MongoDB 的用户名
      CRAWLAB_MONGO_PASSWORD: "123456"  # MongoDB password MongoDB 的密码
      CRAWLAB_MONGO_AUTHSOURCE: "admin"  # MongoDB auth source MongoDB 的验证源
      CRAWLAB_REDIS_ADDRESS: "39.115.24.224"  # Redis host address Redis 的地址,在 docker compose 网络中,直接引用服务名称
      CRAWLAB_REDIS_PORT: "6379"  # Redis port Redis 的端口
      CRAWLAB_REDIS_DATABASE: "1"  # Redis database Redis 的数据库
      CRAWLAB_REDIS_PASSWORD: "root"  # Redis password Redis 的密码
#      CRAWLAB_LOG_LEVEL: "info"  # log level 日志级别. 默认为 info
#      CRAWLAB_LOG_ISDELETEPERIODICALLY: "N"  # whether to periodically delete log files 是否周期性删除日志文件. 默认不删除
#      CRAWLAB_LOG_DELETEFREQUENCY: "@hourly"  
#      frequency of deleting log files 删除日志文件的频率. 默认为每小时
      CRAWLAB_SERVER_REGISTER_TYPE: "ip"  # node register type 节点注册方式. 默认为 mac 地址,也可设置为 ip(防止 mac 地址冲突)
#    volumes:  # 卷映射格式 "宿主:容器"
#      - "/etc/localtime:/etc/localtime"  # 使容器时区与时间和宿主同步
#      - "/opt/docker/crawlab/logs/crawlab/:/var/logs/crawlab/"  # 持久化日志数据到宿主机本地
#      - "/opt/docker/crawlab/spiders/:/app/spiders/"  # 持久化爬虫项目文件到宿主机本地
#      - "/opt/docker/crawlab/tmp/:/tmp/"  # 持久化临时文件目录到宿主机本地
#    networks:  # 固定 ipv4_address, 请根据自己的需求分配
#      default:
#        ipv4_address: "172.31.16.100"
      CRAWLAB_SERVER_REGISTER_IP: "172.19.0.2"  # node register ip 节点注册IP. 节点唯一识别号,只有当 CRAWLAB_SERVER_REGISTER_TYPE 为 "ip" 时才生效
      # CRAWLAB_TASK_WORKERS: 8  # number of task executors 任务执行器个数(并行执行任务数)
      # CRAWLAB_RPC_WORKERS: 16  # number of RPC workers RPC 工作协程个数
      # CRAWLAB_SERVER_LANG_NODE: "Y"  # whether to pre-install Node.js 预安装 Node.js 语言环境
      # CRAWLAB_SERVER_LANG_JAVA: "Y"  # whether to pre-install Java 预安装 Java 语言环境
      # CRAWLAB_SETTING_ALLOWREGISTER: "N"  # whether to allow user registration 是否允许用户注册
      # CRAWLAB_SETTING_ENABLETUTORIAL: "N"  # whether to enable tutorial 是否启用教程
      # CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com  # STMP server address STMP 服务器地址
      # CRAWLAB_NOTIFICATION_MAIL_PORT: 465  # STMP server port STMP 服务器端口
      # CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL: admin@exmaple.com  # sender email 发送者邮箱
      # CRAWLAB_NOTIFICATION_MAIL_SENDERIDENTITY: admin@exmaple.com  # sender ID 发送者 ID
      # CRAWLAB_NOTIFICATION_MAIL_SMTP_USER: username  # SMTP username SMTP 用户名
      # CRAWLAB_NOTIFICATION_MAIL_SMTP_PASSWORD: password  # SMTP password SMTP 密码

三、感慨
网上找了好久,官方文档写的也含含糊糊,让用户只能摸不着头脑,只能简简单单的部署,根本用不到生产环境。使用Kubernetes 多节点部署对服务器要求至少2核2G。作者条件不足也没有尝试成功。

 类似资料: