一、生产环境上我们不希望仅仅是简单部署,我们可以在多台服务器上分别部署Crawlab然后连接公共的MongoDB及Redis。这时候就需要让Mongo及Redis独立出来,避免耦合启动。
二、 Docker-Compose主节点
docker-compose.yml
version: '3.3'
services:
master:
image: tikazyq/crawlab:latest
container_name: master
environment:
CRAWLAB_API_ADDRESS: "http://39.115.24.224:8000" # backend API address 后端 API 地址. 适用于 https 或者源码部署
CRAWLAB_SERVER_MASTER: "Y" # whether to be master node 是否为主节点,主节点为 Y,工作节点为 N
# CRAWLAB_MONGO_HOST: "mongo"
CRAWLAB_MONGO_HOST: "39.115.24.224"
# MongoDB host address MongoDB 的地址,在 docker compose 网络中,直接引用服务名称
CRAWLAB_MONGO_PORT: "27017" # MongoDB port MongoDB 的端口
CRAWLAB_MONGO_DB: "crawlab" # MongoDB database MongoDB 的数据库
CRAWLAB_MONGO_USERNAME: "admin" # MongoDB username MongoDB 的用户名
CRAWLAB_MONGO_PASSWORD: "123456" # MongoDB password MongoDB 的密码
CRAWLAB_MONGO_AUTHSOURCE: "admin" # MongoDB auth source MongoDB 的验证源
CRAWLAB_REDIS_ADDRESS: "39.115.24.224" # Redis host address Redis 的地址,在 docker compose 网络中,直接引用服务名称
CRAWLAB_REDIS_PORT: "6379" # Redis port Redis 的端口
CRAWLAB_REDIS_DATABASE: "1" # Redis database Redis 的数据库
CRAWLAB_REDIS_PASSWORD: "root" # Redis password Redis 的密码
CRAWLAB_LOG_LEVEL: "info" # log level 日志级别. 默认为 info
# CRAWLAB_LOG_ISDELETEPERIODICALLY: "N" # whether to periodically delete log files 是否周期性删除日志文件. 默认不删除
# CRAWLAB_LOG_DELETEFREQUENCY: "@hourly" # frequency of deleting log files 删除日志文件的频率. 默认为每小时
CRAWLAB_SERVER_REGISTER_TYPE: "ip"
# node register type 节点注册方式. 默认为 mac 地址,也可设置为 ip(防止 mac 地址冲突)
CRAWLAB_SERVER_REGISTER_IP: "172.19.0.1" # node register ip 节点注册IP. 节点唯一识别号,只有当 CRAWLAB_SERVER_REGISTER_TYPE 为 "ip" 时才生效
# CRAWLAB_TASK_WORKERS: 8 # number of task executors 任务执行器个数(并行执行任务数)
# CRAWLAB_RPC_WORKERS: 16 # number of RPC workers RPC 工作协程个数
# CRAWLAB_SERVER_LANG_NODE: "Y" # whether to pre-install Node.js 预安装 Node.js 语言环境
# CRAWLAB_SERVER_LANG_JAVA: "Y" # whether to pre-install Java 预安装 Java 语言环境
# CRAWLAB_SETTING_ALLOWREGISTER: "N" # whether to allow user registration 是否允许用户注册
# CRAWLAB_SETTING_ENABLETUTORIAL: "N" # whether to enable tutorial 是否启用教程
# CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com # STMP server address STMP 服务器地址
# CRAWLAB_NOTIFICATION_MAIL_PORT: 465 # STMP server port STMP 服务器端口
# CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL: admin@exmaple.com # sender email 发送者邮箱
# CRAWLAB_NOTIFICATION_MAIL_SENDERIDENTITY: admin@exmaple.com # sender ID 发送者 ID
# CRAWLAB_NOTIFICATION_MAIL_SMTP_USER: username # SMTP username SMTP 用户名
# CRAWLAB_NOTIFICATION_MAIL_SMTP_PASSWORD: password # SMTP password SMTP 密码
ports:
- "8080:8080" # frontend port mapping 前端端口映射
- "8000:8000" # backend
Docker-Compose从节点
version: '3.3'
services:
worker:
image: tikazyq/crawlab:latest
container_name: worker
environment:
CRAWLAB_SERVER_MASTER: "N" # whether to be master node 是否为主节点,主节点为 Y,工作节点为 N
CRAWLAB_MONGO_HOST: "39.115.24.224" # MongoDB host address MongoDB 的地址,在 docker compose 网络中,直接引用服务名称
CRAWLAB_MONGO_PORT: "27017" # MongoDB port MongoDB 的端口
CRAWLAB_MONGO_DB: "crawlab" # MongoDB database MongoDB 的数据库
CRAWLAB_MONGO_USERNAME: "admin" # MongoDB username MongoDB 的用户名
CRAWLAB_MONGO_PASSWORD: "123456" # MongoDB password MongoDB 的密码
CRAWLAB_MONGO_AUTHSOURCE: "admin" # MongoDB auth source MongoDB 的验证源
CRAWLAB_REDIS_ADDRESS: "39.115.24.224" # Redis host address Redis 的地址,在 docker compose 网络中,直接引用服务名称
CRAWLAB_REDIS_PORT: "6379" # Redis port Redis 的端口
CRAWLAB_REDIS_DATABASE: "1" # Redis database Redis 的数据库
CRAWLAB_REDIS_PASSWORD: "root" # Redis password Redis 的密码
# CRAWLAB_LOG_LEVEL: "info" # log level 日志级别. 默认为 info
# CRAWLAB_LOG_ISDELETEPERIODICALLY: "N" # whether to periodically delete log files 是否周期性删除日志文件. 默认不删除
# CRAWLAB_LOG_DELETEFREQUENCY: "@hourly"
# frequency of deleting log files 删除日志文件的频率. 默认为每小时
CRAWLAB_SERVER_REGISTER_TYPE: "ip" # node register type 节点注册方式. 默认为 mac 地址,也可设置为 ip(防止 mac 地址冲突)
# volumes: # 卷映射格式 "宿主:容器"
# - "/etc/localtime:/etc/localtime" # 使容器时区与时间和宿主同步
# - "/opt/docker/crawlab/logs/crawlab/:/var/logs/crawlab/" # 持久化日志数据到宿主机本地
# - "/opt/docker/crawlab/spiders/:/app/spiders/" # 持久化爬虫项目文件到宿主机本地
# - "/opt/docker/crawlab/tmp/:/tmp/" # 持久化临时文件目录到宿主机本地
# networks: # 固定 ipv4_address, 请根据自己的需求分配
# default:
# ipv4_address: "172.31.16.100"
CRAWLAB_SERVER_REGISTER_IP: "172.19.0.2" # node register ip 节点注册IP. 节点唯一识别号,只有当 CRAWLAB_SERVER_REGISTER_TYPE 为 "ip" 时才生效
# CRAWLAB_TASK_WORKERS: 8 # number of task executors 任务执行器个数(并行执行任务数)
# CRAWLAB_RPC_WORKERS: 16 # number of RPC workers RPC 工作协程个数
# CRAWLAB_SERVER_LANG_NODE: "Y" # whether to pre-install Node.js 预安装 Node.js 语言环境
# CRAWLAB_SERVER_LANG_JAVA: "Y" # whether to pre-install Java 预安装 Java 语言环境
# CRAWLAB_SETTING_ALLOWREGISTER: "N" # whether to allow user registration 是否允许用户注册
# CRAWLAB_SETTING_ENABLETUTORIAL: "N" # whether to enable tutorial 是否启用教程
# CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com # STMP server address STMP 服务器地址
# CRAWLAB_NOTIFICATION_MAIL_PORT: 465 # STMP server port STMP 服务器端口
# CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL: admin@exmaple.com # sender email 发送者邮箱
# CRAWLAB_NOTIFICATION_MAIL_SENDERIDENTITY: admin@exmaple.com # sender ID 发送者 ID
# CRAWLAB_NOTIFICATION_MAIL_SMTP_USER: username # SMTP username SMTP 用户名
# CRAWLAB_NOTIFICATION_MAIL_SMTP_PASSWORD: password # SMTP password SMTP 密码
三、感慨
网上找了好久,官方文档写的也含含糊糊,让用户只能摸不着头脑,只能简简单单的部署,根本用不到生产环境。使用Kubernetes 多节点部署对服务器要求至少2核2G。作者条件不足也没有尝试成功。