近期在做视频流量的统计,通过定时拉取云厂商的视频播放统计数据。由于数据比较多,而且每天都要处理,这样数据膨胀的非常快,每年的数据量达到了千万级别,因此有必要采取分库分表的方案进行数据分片。当然有很多的NewSQL数据库方案,比如TiDB或OceanBase等分布式存储的数据库,但是对于目前我们的维护成本是不可接受的。故而还是采取 Middleware + MySQL的方式满足现在的业务需求。
├── conf
│ ├── config-database-discovery.yaml
│ ├── config-encrypt.yaml
│ ├── config-readwrite-splitting.yaml
│ ├── config-shadow.yaml
│ ├── config-sharding.yaml
│ ├── logback.xml
│ └── server.yaml
├── docker-compose.yml
└── ext-lib
└── mysql-connector-java-8.0.11.jar
为了最终验证分库分表后能够满足日常开发的需求,需要做相应的测试,本篇文章采用 订单和子订单表作为数据表,并录入基础的数据进行CRUD的测试。
CREATE TABLE `t_order_0` (
`order_id` bigint(20) UNSIGNED NOT NULL COMMENT '主键ID',
`user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户ID',
`total_money` int(10) UNSIGNED NOT NULL COMMENT '订单总金额',
PRIMARY KEY (`order_id`),
KEY `idx_user_id` (`user_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='订单总表';
CREATE TABLE `t_order_1` (
`order_id` bigint(20) UNSIGNED NOT NULL COMMENT '主键ID',
`user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户ID',
`total_money` int(10) UNSIGNED NOT NULL COMMENT '订单总金额',
PRIMARY KEY (`order_id`),
KEY `idx_user_id` (`user_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='订单总表';
CREATE TABLE `t_order_item_0` (
`order_item_id` bigint(20) UNSIGNED NOT NULL COMMENT '子订单ID',
`order_id` bigint(20) UNSIGNED NOT NULL COMMENT '主键ID',
`user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户ID',
`money` int(10) UNSIGNED NOT NULL COMMENT '子订单金额',
PRIMARY KEY (`order_item_id`),
KEY `idx_order_id` (`order_id`) USING BTREE,
KEY `idx_user_id` (`user_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='订单子表';
CREATE TABLE `t_order_item_1` (
`order_item_id` bigint(20) UNSIGNED NOT NULL COMMENT '子订单ID',
`order_id` bigint(20) UNSIGNED NOT NULL COMMENT '主键ID',
`user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户ID',
`money` int(10) UNSIGNED NOT NULL COMMENT '子订单金额',
PRIMARY KEY (`order_item_id`),
KEY `idx_order_id` (`order_id`) USING BTREE,
KEY `idx_user_id` (`user_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='订单子表';
version: "3"
services:
ShardingSphereProxy:
image: apache/shardingsphere-proxy
container_name: shardingsphere-proxy
network_mode: "host"
restart: always
command: server /data
ports:
- 13307:3307
volumes:
- ./conf:/opt/shardingsphere-proxy/conf
- ./ext-lib:/opt/shardingsphere-proxy/ext-lib
environment:
- JVM_OPTS="-Djava.awt.headless=true"
######################################################################################################
#
# 用于配置:数据分片规则
#
######################################################################################################
schemaName: data-center_db
dataSources:
ds_0:
url: jdbc:mysql://127.0.0.1:3306/data-center_0?serverTimezone=UTC&useSSL=false
username: proxy
password: 123654
connectionTimeoutMilliseconds: 30000
idleTimeoutMilliseconds: 60000
maxLifetimeMilliseconds: 1800000
maxPoolSize: 50
minPoolSize: 1
ds_1:
url: jdbc:mysql://127.0.0.1:3306/data-center_1?serverTimezone=UTC&useSSL=false
username: proxy
password: 123654
connectionTimeoutMilliseconds: 30000
idleTimeoutMilliseconds: 60000
maxLifetimeMilliseconds: 1800000
maxPoolSize: 50
minPoolSize: 1
rules:
- !SHARDING
tables: # 数据分片规则配置
t_order: # 订单逻辑表名称
actualDataNodes: ds_${0..1}.t_order_${0..1}
databaseStrategy: # 配置分库策略
standard:
shardingColumn: user_id
shardingAlgorithmName: database_user_inline
tableStrategy: # 分表策略
standard:
shardingColumn: order_id
shardingAlgorithmName: t_order_inline
keyGenerateStrategy:
column: order_id
keyGeneratorName: snowflake
t_order_item: # 子订单逻辑表名称
actualDataNodes: ds_${0..1}.t_order_item_${0..1}
databaseStrategy: # 配置分库策略
standard:
shardingColumn: user_id
shardingAlgorithmName: database_user_inline
tableStrategy: # 分表策略
standard:
shardingColumn: order_id
shardingAlgorithmName: t_order_item_inline
keyGenerateStrategy:
column: order_item_id
keyGeneratorName: snowflake
bindingTables: # 绑定表规则列表
- t_order,t_order_item
# 分片算法配置
shardingAlgorithms:
database_user_inline:
type: INLINE
props:
algorithm-expression: ds_${user_id % 2}
t_order_inline: # 订单表分片算法名称
type: INLINE
props:
algorithm-expression: t_order_${order_id % 2}
allow-range-query-with-inline-sharding: true
t_order_item_inline: # 子订单表分片算法名称
type: INLINE
props:
algorithm-expression: t_order_item_${order_id % 2}
allow-range-query-with-inline-sharding: true
# 分布式序列算法配置
keyGenerators:
snowflake:
type: SNOWFLAKE
props:
worker-id: 1
######################################################################################################
#
# 用于配置:数据接入迁移&弹性伸缩、分布式治理模式、权限、代理属性.
#
######################################################################################################
#scaling:
# blockQueueSize: 10000 # 数据传输通道队列大小
# workerThread: 40 # 工作线程池大小,允许同时运行的迁移任务线程数
# clusterAutoSwitchAlgorithm:
# type: IDLE
# props:
# incremental-task-idle-minute-threshold: 30
# dataConsistencyCheckAlgorithm:
# type: DEFAULT
#
#mode:
# type: Cluster
# repository:
# type: ZooKeeper
# props:
# namespace: governance_ds
# server-lists: localhost:2181
# retryIntervalMilliseconds: 500
# timeToLiveSeconds: 60
# maxRetries: 3
# operationTimeoutMilliseconds: 500
# overwrite: false
#
rules:
- !AUTHORITY
users:
- root@%:123654
- sharding@:sharding
provider:
type: ALL_PRIVILEGES_PERMITTED
- !TRANSACTION
defaultType: XA
providerType: Atomikos
props:
max-connections-size-per-query: 1
kernel-executor-size: 16 # Infinite by default.
proxy-frontend-flush-threshold: 128 # The default value is 128.
# proxy-opentracing-enabled: false
# proxy-hint-enabled: false
sql-show: true
# check-table-metadata-enabled: false
# show-process-list-enabled: false
# # Proxy backend query fetch size. A larger value may increase the memory usage of ShardingSphere Proxy.
# # The default value is -1, which means set the minimum value for different JDBC drivers.
# proxy-backend-query-fetch-size: -1
check-duplicate-table-enabled: true
# sql-comment-parse-enabled: false
# proxy-frontend-executor-size: 0 # Proxy frontend executor size. The default value is 0, which means let Netty decide.
# # Available options of proxy backend executor suitable: OLAP(default), OLTP. The OLTP option may reduce time cost of writing packets to client, but it may increase the latency of SQL execution
# # if client connections are more than proxy-frontend-netty-executor-size, especially executing slow SQL.
# proxy-backend-executor-suitable: OLAP
# proxy-frontend-max-connections: 0 # Less than or equal to 0 means no limitation.
# sql-federation-enabled: false
根据需要自行配置
根据需要自行配置
根据需要自行配置
docker-compose up -d
起容器
docker-compose ps
查看运行情况
docker-compose logs
查看日志,现实如下则proxy启动成功
o.a.s.p.v.ShardingSphereProxyVersion - Database name is `MySQL`, version is `8.0.27`
shardingsphere-proxy | [INFO ] 2022-04-15 06:00:08.454 [main] o.a.s.p.frontend.ShardingSphereProxy - ShardingSphere-Proxy Memory mode started successfully
进入容器执行:
//1.先备份
cp /etc/apt/sources.list /etc/apt/sources.list.bak
//2.清空
echo " " > /etc/apt/sources.list
//3.写阿里源
echo "deb https://mirrors.aliyun.com/debian stable main contrib non-free">>/etc/apt/sources.list
echo "deb https://mirrors.aliyun.com/debian stable-updates main contrib non-free">>/etc/apt/sources.list
//4.清空缓存
apt-get clean
apt-get update
有时虽然容器启动成功,但是服务确不能正常启动,很可能因为网络不通的问题导致的,需要进行排查。
# 安装telnet
apt-get install telnet
# 安装curl
apt-get install curl
# 安装ifconfig
apt-get install net-tools
# 安装vim
apt-get install vim
# 安装ping
apt-get install inetutils-ping
# 订单主表数据
INSERT INTO t_order (user_id,total_money) VALUES(1,111);
INSERT INTO t_order (user_id,total_money) VALUES(2,222);
INSERT INTO t_order (user_id,total_money) VALUES(3,333);
INSERT INTO t_order (user_id,total_money) VALUES(4,444);
INSERT INTO t_order (user_id,total_money) VALUES(5,555);
INSERT INTO t_order (user_id,total_money) VALUES(6,666);
INSERT INTO t_order (user_id,total_money) VALUES(7,777);
INSERT INTO t_order (user_id,total_money) VALUES(8,888);
INSERT INTO t_order (user_id,total_money) VALUES(9,999);
INSERT INTO t_order (user_id,total_money) VALUES(10,1000);
# 订单子表数据(通过主表的订单ID生成)
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199578025985,2,111);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199578025985,2,111);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199653523457,4,200);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199653523457,4,244);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199724826625,6,300);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199724826625,6,366);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199800324097,8,444);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199800324097,8,444);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552203344510977,10,200);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552203344510977,10,400);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552203344510977,10,400);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552198902743040,1,50);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552198902743040,1,61);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199607386112,3,133);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199607386112,3,200);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199682883584,5,255);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199682883584,5,300);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199770963968,7,177);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199770963968,7,200);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199770963968,7,100);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199770963968,7,300);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199825489920,9,333);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199825489920,9,333);
INSERT INTO t_order_item (order_id,user_id,money) VALUES(721552199825489920,9,333);
可以在服务器端通过MySQL的客户端工具进行连接,命令如下:
mysql -h127.0.0.1 -P13307 -uroot -p123654
连接上之后可以安装正常的mysql命令行进行crud的SQL语句,还有ShardingSphere-Proxy的DistSQL。
具体支持的SQL可查阅 官方文档
查看分片规则:show sharding table rules\G;
*************************** 1. row ***************************
table: t_order
actual_data_nodes: ds_${0..1}.t_order_${0..1}
actual_data_sources:
database_strategy_type: INLINE
database_sharding_column: user_id
database_sharding_algorithm_type: INLINE
database_sharding_algorithm_props: algorithm-expression=ds_${user_id % 2}
table_strategy_type: INLINE
table_sharding_column: order_id
table_sharding_algorithm_type: INLINE
table_sharding_algorithm_props: algorithm-expression=t_order_${order_id % 2},allow-range-query-with-inline-sharding=true
key_generate_column: order_id
key_generator_type: SNOWFLAKE
key_generator_props: worker-id=1
*************************** 2. row ***************************
table: t_order_item
actual_data_nodes: ds_${0..1}.t_order_item_${0..1}
actual_data_sources:
database_strategy_type: INLINE
database_sharding_column: user_id
database_sharding_algorithm_type: INLINE
database_sharding_algorithm_props: algorithm-expression=ds_${user_id % 2}
table_strategy_type: INLINE
table_sharding_column: order_id
table_sharding_algorithm_type: INLINE
table_sharding_algorithm_props: algorithm-expression=t_order_item_${order_id % 2},allow-range-query-with-inline-sharding=true
key_generate_column: order_item_id
key_generator_type: SNOWFLAKE
key_generator_props: worker-id=1
基本查询:
MySQL [data-center_db]> SELECT * FROM t_order;
+--------------------+---------+-------------+
| order_id | user_id | total_money |
+--------------------+---------+-------------+
| 721552199578025985 | 2 | 222 |
| 721552199653523457 | 4 | 444 |
| 721552199724826625 | 6 | 666 |
| 721552199800324097 | 8 | 888 |
| 721552203344510977 | 10 | 1000 |
| 721552198902743040 | 1 | 111 |
| 721552199607386112 | 3 | 333 |
| 721552199682883584 | 5 | 555 |
| 721552199770963968 | 7 | 777 |
| 721552199825489920 | 9 | 999 |
+--------------------+---------+-------------+
10 rows in set (0.00 sec)
联表查询:
MySQL [data-center_db]> SELECT * FROM t_order o INNER JOIN t_order_item i ON o.order_id = i.order_id WHERE o.order_id = 721552199825489920;
+--------------------+---------+-------------+--------------------+--------------------+---------+-------+
| order_id | user_id | total_money | order_item_id | order_id | user_id | money |
+--------------------+---------+-------------+--------------------+--------------------+---------+-------+
| 721552199825489920 | 9 | 999 | 721555653729976321 | 721552199825489920 | 9 | 333 |
| 721552199825489920 | 9 | 999 | 721555653776113664 | 721552199825489920 | 9 | 333 |
| 721552199825489920 | 9 | 999 | 721555656959590401 | 721552199825489920 | 9 | 333 |
+--------------------+---------+-------------+--------------------+--------------------+---------+-------+
3 rows in set (0.00 sec)
本文通过docker-compose 部署了数据库中间 ShardingSphere-Proxy,主要测试了数据分片的功能。当然后期以下的工作: