From 98d8e61aa177220bbae5a93acf15996e31f8e459 Mon Sep 17 00:00:00 2001 From: Eva Yu Date: Fri, 27 Mar 2026 17:37:29 -0700 Subject: [PATCH] Add multidisk-jbod-balancing.md --- .../multidisk-jbod-balancing.md | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 content/en/altinity-kb-setup-and-maintenance/multidisk-jbod-balancing.md diff --git a/content/en/altinity-kb-setup-and-maintenance/multidisk-jbod-balancing.md b/content/en/altinity-kb-setup-and-maintenance/multidisk-jbod-balancing.md new file mode 100644 index 0000000000..3008a6f2a3 --- /dev/null +++ b/content/en/altinity-kb-setup-and-maintenance/multidisk-jbod-balancing.md @@ -0,0 +1,104 @@ +--- +title: "MultiDisk (JBOD) Balancing" +linkTitle: "MultiDisk (JBOD) Balancing" +--- + +ClickHouse provides two options to balance an insert across disks in a volume with more than one disk: `round_robin` and `least_used` . + +## **Round Robin (Default):** + +ClickHouse selects the next disk in a round robin manner to write each part created. + +This is the default setting and is most effective when parts created on insert are roughly the same size. + +Drawbacks: may lead to disk skew + +## **Least Used:** + +ClickHouse selects the disk with the most available space and writes to that disk. + +Changing to least_used when even disk space consumption is desirable or when you have a JBOD volume with differing disk sizes. To prevent hot-spots, it is best to set this policy on a fresh volume or on a volume that has already been (re)balanced. + +Drawbacks: may lead to hot-spots + +## Configurations + +Configurations that can affect disk selected: + +- storage policy volume configuration: `least_used_ttl_ms`. Only applies to `least_used` policy, 60s default. +- disk setting: `keep_free_space_bytes` , `keep_free_space_ratio` + +Configuration to assist rebalancing: + +- MergeTree setting: `min_bytes_to_rebalance_partition_over_jbod`. Setting is not about where the data is written on insert. This setting considers redistribution of parts across disks of the same volume on a merge. + +> Note: setting `min_bytes_to_rebalance_partition_over_jbod` does not guarantee balanced partitions and balanced disk usage. +> + +Example of least_used policy: + +```xml + + + + + /var/lib/clickhouse/ + 10737418240 + + + /mnt/disk1/ + 10737418240 + + + /mnt/disk2/ + 10737418240 + + + + + + + disk1 + disk2 + least_used + 60000 + + + + + + +``` + +## Manual Rebalancing Parts over JBOD Disks + +```sql +WITH + '%' AS target_tables, + '%' AS target_databases +SELECT sub.q FROM +( + SELECT + 'ALTER TABLE ' || parts.database || '.' || parts.`table` || ' MOVE PART \'' || parts.name ||'\' TO DISK \'' || other_disk_candidate || '\';' as q, + parts.database as db, + parts.`table` as t, + parts.name as part_name, + parts.disk_name as part_disk_name, + parts.bytes_on_disk AS part_bytes_on_disk, + sp.storage_policy as part_storage_policy, + arrayJoin(arrayRemove(v.disks, parts.disk_name)) AS other_disk_candidate, + candidate_disks.free_space AS candidate_disk_free_space + FROM system.parts AS parts + INNER JOIN ( SELECT database, `table`, storage_policy FROM system.tables where (name LIKE target_tables) AND (database LIKE target_databases) group by 1, 2, 3 ) AS sp ON sp.`table` = parts.`table` AND sp.database = parts.database + INNER JOIN ( SELECT policy_name, volume_name, disks AS disks FROM system.storage_policies WHERE volume_type = 0 ) AS v ON sp.storage_policy = v.policy_name + INNER JOIN ( SELECT name, free_space FROM system.disks ORDER BY free_space DESC ) AS candidate_disks ON candidate_disks.name = other_disk_candidate + WHERE parts.active = 1 + AND (parts.bytes_on_disk >= 10737418240) --10GB prioritize larger parts + AND (parts.`table` LIKE target_tables) + AND (parts.database LIKE target_databases) + AND candidate_disks.free_space > parts.bytes_on_disk*2 -- 2x buffer + ORDER BY parts.bytes_on_disk DESC, candidate_disk_free_space DESC + LIMIT 1 BY db, t, part_name +) as sub +FORMAT TSVRaw +```