From 9b33ac83d115ed86dd1d518ea94224767d126a5a Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 6 May 2025 12:30:32 +0200 Subject: [PATCH 01/37] pass secondary_stg_name during bdev_distrib_create --- simplyblock_core/rpc_client.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 40293e149..a30fac8b0 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -427,7 +427,9 @@ def bdev_alceml_create(self, alceml_name, nvme_name, uuid, pba_init_mode=3, def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm_names, chunk_size, ha_comm_addrs=None, ha_inode_self=None, pba_page_size=2097152, distrib_cpu_mask="", ha_is_non_leader=True, jm_vuid=0, write_protection=False, - full_page_unmap=True): + full_page_unmap=False, storage_tiering_id=0, secondary_io_timeout_us=1 << 30, ghost_capacity=100, fifo_main_capacity=1000, fifo_small_capacity=100, + support_storage_tiering=False, secondary_stg_name="", disaster_recovery=False, + ): """" // Optional (not specified = no HA) // Comma-separated communication addresses, for each node, e.g. "192.168.10.1:45001,192.168.10.1:32768". @@ -443,7 +445,7 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm ret = self.get_bdevs(name) if ret: return ret - except: + except Exception: pass params = { "name": name, @@ -469,6 +471,16 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm params["write_protection"] = True if full_page_unmap: params["use_map_whole_page_on_1st_write"] = True + + if support_storage_tiering: + params['support_storage_tiering'] = support_storage_tiering + params['secondary_stg_name'] = secondary_stg_name + params['secondary_io_timeout_us'] = secondary_io_timeout_us + params['disaster_recovery'] = disaster_recovery + params['storage_tiering_id'] = storage_tiering_id + params['ghost_capacity'] = ghost_capacity + params['fifo_main_capacity'] = fifo_main_capacity + params['fifo_small_capacity'] = fifo_small_capacity return self._request("bdev_distrib_create", params) def bdev_lvol_delete_lvstore(self, name): From 67b66f2864401b657d258110e233ba63931cc125 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 6 May 2025 15:09:22 +0200 Subject: [PATCH 02/37] storage tiering --- simplyblock_core/models/cluster.py | 3 ++ simplyblock_core/rpc_client.py | 15 +++++++++ simplyblock_core/storage_node_ops.py | 43 +++++++++++++++++++++++-- simplyblock_web/blueprints/snode_ops.py | 2 ++ 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/simplyblock_core/models/cluster.py b/simplyblock_core/models/cluster.py index cdd45fd23..06931836f 100644 --- a/simplyblock_core/models/cluster.py +++ b/simplyblock_core/models/cluster.py @@ -61,6 +61,9 @@ class Cluster(BaseModel): strict_node_anti_affinity: bool = False tls: bool = False is_re_balancing: bool = False + storage_tiering: bool = True + s3_endpoint: str = "http://192.168.10.146:9000" + s3_bucket: str = "mybucket" def get_status_code(self): if self.status in self.STATUS_CODE_MAP: diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index a30fac8b0..313899d23 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -483,6 +483,21 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm params['fifo_small_capacity'] = fifo_small_capacity return self._request("bdev_distrib_create", params) + def bdev_s3_create(self, name, uuid=None, bdb_lcpu_mask=0, s3_lcpu_mask=0, s3_thread_pool_size=32): + params = { 'name': name, 'bdb_lcpu_mask': bdb_lcpu_mask, 's3_lcpu_mask': s3_lcpu_mask, 's3_thread_pool_size': s3_thread_pool_size } + if uuid: + params['uuid'] = uuid + + return self._request("bdev_s3_create", params) + + def bdev_s3_delete(self, name): + params = { 'name': name } + return self._request("bdev_s3_delete", params) + + def bdev_s3_add_bucket(self, name, bucket_name): + params = { 'name': name, 'bucket_name': bucket_name } + return self._request("bdev_s3_add_bucket_name", params) + def bdev_lvol_delete_lvstore(self, name): params = {"lvs_name": name} return self._request2("bdev_lvol_delete_lvstore", params) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 1d9f44aea..57d55b4f8 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -3185,6 +3185,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo snode.jm_vuid = jm_vuid snode.write_to_db() + cluster = db_controller.get_cluster_by_id(snode.cluster_id) write_protection = False if ndcs > 1: write_protection = True @@ -3214,9 +3215,31 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo } ] ) + if cluster.storage_tiering: + lvstore_stack.append( + { + "type": "bdev_s3_create", + "params": { + 'name': 's3_{}'.format(distrib_name), + 'local_testing': True, + 'local_endpoint': cluster.s3_endpoint, + } + } + ) distrib_list.append(distrib_name) distrib_vuids.append(distrib_vuid) + if cluster.storage_tiering: + lvstore_stack.append( + { + "type": "bdev_s3_add_bucket_name", + "params": { + 'name': 's3_{}'.format(distrib_name), + 'bucket_name': cluster.s3_bucket, + } + } + ) + if len(distrib_list) == 1: raid_device = distrib_list[0] else: @@ -3259,7 +3282,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo snode.lvstore_status = "in_creation" snode.write_to_db() - ret, err = _create_bdev_stack(snode, lvstore_stack) + ret, err = _create_bdev_stack(snode, lvstore_stack, cluster.storage_tiering) if err: logger.error(f"Failed to create lvstore on node {snode.get_id()}") logger.error(err) @@ -3310,8 +3333,8 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo return True -def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None): - rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) +def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tiering=False): + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password ) created_bdevs = [] if not lvstore_stack: @@ -3334,12 +3357,26 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None): if name in node_bdev_names: continue + elif type == "bdev_s3_create": + if params['local_testing']: + params['local_endpoint'] = snode.s3_endpoint + ret = rpc_client.bdev_s3_create(**params) + if not ret: + return False, f"Failed to create S3 bdev: {name}" + + elif type == "bdev_s3_add_bucket_name": + ret = rpc_client.bdev_s3_add_bucket_name(**params) + if not ret: + return False, f"Failed to add bucket name: {name}" elif type == "bdev_distr": if primary_node: params['jm_names'] = get_node_jm_names(primary_node, remote_node=snode) else: params['jm_names'] = get_node_jm_names(snode) + if storage_tiering: + distrib_name = params['name'] + params['secondary_stg_name'] = 's3_{}'.format(distrib_name) if snode.distrib_cpu_cores: distrib_cpu_mask = utils.decimal_to_hex_power_of_2(snode.distrib_cpu_cores[snode.distrib_cpu_index]) params['distrib_cpu_mask'] = distrib_cpu_mask diff --git a/simplyblock_web/blueprints/snode_ops.py b/simplyblock_web/blueprints/snode_ops.py index 34367096a..43c4e4dca 100644 --- a/simplyblock_web/blueprints/snode_ops.py +++ b/simplyblock_web/blueprints/snode_ops.py @@ -188,6 +188,8 @@ def spdk_process_start(body: SPDKParams): f"ssd_pcie={ssd_pcie_params}", f"PCI_ALLOWED={ssd_pcie_list}", f"TOTAL_HP={total_mem_mib}", + f"AWS_ACCEESS_KEY_ID=foobar", + f"AWS_SECRET_ACCESS_KEY=barfoobarfoo", ] # restart_policy={"Name": "on-failure", "MaximumRetryCount": 99} ) From 7875a5d3038eeae6178f3b055bc209d833c39340 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 6 May 2025 15:19:29 +0200 Subject: [PATCH 03/37] pass parameters properly --- simplyblock_core/storage_node_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 57d55b4f8..3f3bbe0db 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -3282,7 +3282,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo snode.lvstore_status = "in_creation" snode.write_to_db() - ret, err = _create_bdev_stack(snode, lvstore_stack, cluster.storage_tiering) + ret, err = _create_bdev_stack(snode, lvstore_stack, storage_tiering=cluster.storage_tiering) if err: logger.error(f"Failed to create lvstore on node {snode.get_id()}") logger.error(err) From 7d58069abfabf5c3ae707ba1423240b411e96f77 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 6 May 2025 15:31:15 +0200 Subject: [PATCH 04/37] create s3_dev before distrib --- simplyblock_core/storage_node_ops.py | 34 ++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 3f3bbe0db..fc5a14fac 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -3189,6 +3189,8 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo write_protection = False if ndcs > 1: write_protection = True + + storage_tiering_ops = [] for _ in range(snode.number_of_distribs): distrib_vuid = utils.get_random_vuid() while distrib_vuid in distrib_vuids: @@ -3216,7 +3218,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo ] ) if cluster.storage_tiering: - lvstore_stack.append( + storage_tiering_ops.append( { "type": "bdev_s3_create", "params": { @@ -3230,7 +3232,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo distrib_vuids.append(distrib_vuid) if cluster.storage_tiering: - lvstore_stack.append( + storage_tiering_ops.append( { "type": "bdev_s3_add_bucket_name", "params": { @@ -3282,6 +3284,22 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo snode.lvstore_status = "in_creation" snode.write_to_db() + if storage_tiering_ops: + for op in storage_tiering_ops: + type = op['type'] + if type == "bdev_s3_create": + params = op['params'] + if params['local_testing']: + params['local_endpoint'] = snode.s3_endpoint + ret = rpc_client.bdev_s3_create(**params) + if not ret: + return False, f"Failed to create S3 bdev: {params['name']}" + + elif type == "bdev_s3_add_bucket_name": + ret = rpc_client.bdev_s3_add_bucket_name(**params) + if not ret: + return False, f"Failed to add bucket name: {params['name']}" + ret, err = _create_bdev_stack(snode, lvstore_stack, storage_tiering=cluster.storage_tiering) if err: logger.error(f"Failed to create lvstore on node {snode.get_id()}") @@ -3356,18 +3374,6 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tie if name in node_bdev_names: continue - - elif type == "bdev_s3_create": - if params['local_testing']: - params['local_endpoint'] = snode.s3_endpoint - ret = rpc_client.bdev_s3_create(**params) - if not ret: - return False, f"Failed to create S3 bdev: {name}" - - elif type == "bdev_s3_add_bucket_name": - ret = rpc_client.bdev_s3_add_bucket_name(**params) - if not ret: - return False, f"Failed to add bucket name: {name}" elif type == "bdev_distr": if primary_node: params['jm_names'] = get_node_jm_names(primary_node, remote_node=snode) From da9bf7637a38e4dcbe812cd1a25ee9c789536bb5 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 6 May 2025 16:10:27 +0200 Subject: [PATCH 05/37] remove support storage tiering default --- simplyblock_core/storage_node_ops.py | 1 + simplyblock_web/blueprints/snode_ops.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index fc5a14fac..e576b94eb 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -3383,6 +3383,7 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tie if storage_tiering: distrib_name = params['name'] params['secondary_stg_name'] = 's3_{}'.format(distrib_name) + params['support_storage_tiering'] = True if snode.distrib_cpu_cores: distrib_cpu_mask = utils.decimal_to_hex_power_of_2(snode.distrib_cpu_cores[snode.distrib_cpu_index]) params['distrib_cpu_mask'] = distrib_cpu_mask diff --git a/simplyblock_web/blueprints/snode_ops.py b/simplyblock_web/blueprints/snode_ops.py index 43c4e4dca..772d58b12 100644 --- a/simplyblock_web/blueprints/snode_ops.py +++ b/simplyblock_web/blueprints/snode_ops.py @@ -188,8 +188,8 @@ def spdk_process_start(body: SPDKParams): f"ssd_pcie={ssd_pcie_params}", f"PCI_ALLOWED={ssd_pcie_list}", f"TOTAL_HP={total_mem_mib}", - f"AWS_ACCEESS_KEY_ID=foobar", - f"AWS_SECRET_ACCESS_KEY=barfoobarfoo", + "AWS_ACCEESS_KEY_ID=foobar", + "AWS_SECRET_ACCESS_KEY=barfoobarfoo", ] # restart_policy={"Name": "on-failure", "MaximumRetryCount": 99} ) From 95d666381a0d4c3476c7ada97be42b5284a2cc6f Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 6 May 2025 16:18:54 +0200 Subject: [PATCH 06/37] fix syntax --- simplyblock_core/storage_node_ops.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index e576b94eb..268111526 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -3289,13 +3289,12 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo type = op['type'] if type == "bdev_s3_create": params = op['params'] - if params['local_testing']: - params['local_endpoint'] = snode.s3_endpoint ret = rpc_client.bdev_s3_create(**params) if not ret: return False, f"Failed to create S3 bdev: {params['name']}" elif type == "bdev_s3_add_bucket_name": + params = op['params'] ret = rpc_client.bdev_s3_add_bucket_name(**params) if not ret: return False, f"Failed to add bucket name: {params['name']}" From d55937d4f88b45a8ed8ce96590bdcec29526a240 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 04:27:35 +0200 Subject: [PATCH 07/37] initialize rpc client --- simplyblock_core/storage_node_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 268111526..af50ac694 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -3285,6 +3285,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo snode.write_to_db() if storage_tiering_ops: + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password ) for op in storage_tiering_ops: type = op['type'] if type == "bdev_s3_create": From 3bec750cba9ac02c10a2a7a194922249029609d2 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 04:49:21 +0200 Subject: [PATCH 08/37] pass storage_tiering param at all usages --- simplyblock_core/storage_node_ops.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index af50ac694..e0fa27df5 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -2856,7 +2856,7 @@ def recreate_lvstore_on_sec(secondary_node): lvol_list.append(lv) ### 1- create distribs and raid - ret, err = _create_bdev_stack(secondary_node, primary_node.lvstore_stack, primary_node=primary_node) + ret, err = _create_bdev_stack(secondary_node, primary_node.lvstore_stack, primary_node=primary_node, storage_tiering=cluster.storage_tiering) if err: logger.error(f"Failed to recreate lvstore on node {secondary_node.get_id()}") logger.error(err) @@ -2920,9 +2920,10 @@ def recreate_lvstore(snode): snode = db_controller.get_storage_node_by_id(snode.get_id()) snode.remote_jm_devices = _connect_to_remote_jm_devs(snode) snode.write_to_db() + cluster = db_controller.get_cluster_by_id(snode.cluster_id) ### 1- create distribs and raid - ret, err = _create_bdev_stack(snode, []) + ret, err = _create_bdev_stack(snode, [], storage_tiering=cluster.storage_tiering) if err: logger.error(f"Failed to recreate lvstore on node {snode.get_id()}") logger.error(err) @@ -3321,7 +3322,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo # creating lvstore on secondary sec_node.remote_jm_devices = _connect_to_remote_jm_devs(sec_node) sec_node.write_to_db() - ret, err = _create_bdev_stack(sec_node, lvstore_stack, primary_node=snode) + ret, err = _create_bdev_stack(sec_node, lvstore_stack, primary_node=snode, storage_tiering=cluster.storage_tiering) if err: logger.error(f"Failed to create lvstore on node {sec_node.get_id()}") logger.error(err) From 322229ace7e6eb9d6ad67447ef6aa06fae4cc353 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 05:10:41 +0200 Subject: [PATCH 09/37] use storage-tiering docker image --- simplyblock_core/env_var | 5 ++--- simplyblock_core/storage_node_ops.py | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/simplyblock_core/env_var b/simplyblock_core/env_var index 5bac2d53c..5c7da6d66 100644 --- a/simplyblock_core/env_var +++ b/simplyblock_core/env_var @@ -1,6 +1,5 @@ SIMPLY_BLOCK_COMMAND_NAME=sbcli-dev SIMPLY_BLOCK_VERSION=18.0.63 - -SIMPLY_BLOCK_DOCKER_IMAGE=simplyblock/simplyblock:main -SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=simplyblock/spdk:main-latest +SIMPLY_BLOCK_DOCKER_IMAGE=public.ecr.aws/simply-block/simplyblock:manohar-storage-tiering2 +SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=public.ecr.aws/simply-block/ultra:main-latest diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index e0fa27df5..cba92ee5e 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -2855,6 +2855,7 @@ def recreate_lvstore_on_sec(secondary_node): if lv.status not in [LVol.STATUS_IN_DELETION, LVol.STATUS_IN_CREATION]: lvol_list.append(lv) + cluster = db_controller.get_cluster_by_id(primary_node.cluster_id) ### 1- create distribs and raid ret, err = _create_bdev_stack(secondary_node, primary_node.lvstore_stack, primary_node=primary_node, storage_tiering=cluster.storage_tiering) if err: @@ -3355,6 +3356,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tiering=False): rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password ) + print("STORAGE TIERING: ", storage_tiering) created_bdevs = [] if not lvstore_stack: # Restart case From 55cc5d928d0cf3767bb294d49794340d31f50608 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 05:21:54 +0200 Subject: [PATCH 10/37] use **kwargs --- simplyblock_core/rpc_client.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 313899d23..f95b18813 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -2,7 +2,7 @@ import inspect import requests - +import inspect from simplyblock_core import constants, utils from requests.adapters import HTTPAdapter from urllib3 import Retry @@ -483,15 +483,10 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm params['fifo_small_capacity'] = fifo_small_capacity return self._request("bdev_distrib_create", params) - def bdev_s3_create(self, name, uuid=None, bdb_lcpu_mask=0, s3_lcpu_mask=0, s3_thread_pool_size=32): - params = { 'name': name, 'bdb_lcpu_mask': bdb_lcpu_mask, 's3_lcpu_mask': s3_lcpu_mask, 's3_thread_pool_size': s3_thread_pool_size } - if uuid: - params['uuid'] = uuid - + def bdev_s3_create(self, **params): return self._request("bdev_s3_create", params) - def bdev_s3_delete(self, name): - params = { 'name': name } + def bdev_s3_delete(self, **params): return self._request("bdev_s3_delete", params) def bdev_s3_add_bucket(self, name, bucket_name): From 8a8b6b91c0fc0ec454bb5abbe75b2de2ce99b13b Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 05:44:07 +0200 Subject: [PATCH 11/37] use seperate functions --- simplyblock_core/storage_node_ops.py | 54 +++++++++++++++++++--------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index cba92ee5e..6ad874e7b 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -3286,22 +3286,6 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo snode.lvstore_status = "in_creation" snode.write_to_db() - if storage_tiering_ops: - rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password ) - for op in storage_tiering_ops: - type = op['type'] - if type == "bdev_s3_create": - params = op['params'] - ret = rpc_client.bdev_s3_create(**params) - if not ret: - return False, f"Failed to create S3 bdev: {params['name']}" - - elif type == "bdev_s3_add_bucket_name": - params = op['params'] - ret = rpc_client.bdev_s3_add_bucket_name(**params) - if not ret: - return False, f"Failed to add bucket name: {params['name']}" - ret, err = _create_bdev_stack(snode, lvstore_stack, storage_tiering=cluster.storage_tiering) if err: logger.error(f"Failed to create lvstore on node {snode.get_id()}") @@ -3353,6 +3337,42 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo return True +def s3_bdev_create(node_id, name, local_testing, local_endpoint): + db_controller = DBController() + + snode = db_controller.get_storage_node_by_id(node_id) + if not snode: + logger.error(f"Can not find storage node: {node_id}") + return False + + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) + + return rpc_client.bdev_s3_create(name, local_testing, local_endpoint) + +def s3_bdev_delete(node_id, name): + db_controller = DBController() + + snode = db_controller.get_storage_node_by_id(node_id) + if not snode: + logger.error(f"Can not find storage node: {node_id}") + return False + + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) + + return rpc_client.bdev_s3_delete(name) + +def s3_bdev_add_bucket_name(node_id, name, bucket_name): + db_controller = DBController() + + snode = db_controller.get_storage_node_by_id(node_id) + if not snode: + logger.error(f"Can not find storage node: {node_id}") + return False + + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) + + return rpc_client.bdev_s3_add_bucket(name, bucket_name) + def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tiering=False): rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password ) @@ -3387,6 +3407,8 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tie distrib_name = params['name'] params['secondary_stg_name'] = 's3_{}'.format(distrib_name) params['support_storage_tiering'] = True + s3_bdev_create(snode.get_id(), params['secondary_stg_name'], True, "http://192.168.10.146:9000") + s3_bdev_add_bucket_name(snode.get_id(), params['secondary_stg_name'], "mys3bucket") if snode.distrib_cpu_cores: distrib_cpu_mask = utils.decimal_to_hex_power_of_2(snode.distrib_cpu_cores[snode.distrib_cpu_index]) params['distrib_cpu_mask'] = distrib_cpu_mask From 2cdef9f14f7f95ef6e4bf8c0b71068dcab3b050e Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 05:51:46 +0200 Subject: [PATCH 12/37] pass params explicitly --- simplyblock_core/rpc_client.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index f95b18813..54b7e270c 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -483,7 +483,12 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm params['fifo_small_capacity'] = fifo_small_capacity return self._request("bdev_distrib_create", params) - def bdev_s3_create(self, **params): + def bdev_s3_create(self, name, local_testing, local_endpoint): + params = { + "name": name, + "local_testing": local_testing, + "local_endpoint": local_endpoint, + } return self._request("bdev_s3_create", params) def bdev_s3_delete(self, **params): From 2a4e4efda58dfd4d2a888bc6602e38ffb75e74f4 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 07:22:02 +0200 Subject: [PATCH 13/37] s3 bdev create only once per node --- simplyblock_core/rpc_client.py | 3 +++ simplyblock_core/storage_node_ops.py | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 54b7e270c..d11a99a47 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -3,6 +3,7 @@ import requests import inspect +import random from simplyblock_core import constants, utils from requests.adapters import HTTPAdapter from urllib3 import Retry @@ -473,6 +474,8 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm params["use_map_whole_page_on_1st_write"] = True if support_storage_tiering: + # generate a random int + storage_tiering_id = random.randint(1, 1000000) params['support_storage_tiering'] = support_storage_tiering params['secondary_stg_name'] = secondary_stg_name params['secondary_io_timeout_us'] = secondary_io_timeout_us diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 6ad874e7b..696f0c6c6 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -3347,7 +3347,15 @@ def s3_bdev_create(node_id, name, local_testing, local_endpoint): rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) - return rpc_client.bdev_s3_create(name, local_testing, local_endpoint) + resp = rpc_client.bdev_get_bdevs(name) + if resp is None: + print("s3 bdev does not exist. creating...") + rpc_client.bdev_s3_create(name, local_testing, local_endpoint) + rpc_client.bdev_s3_add_bucket(name, "mys3bucket") + else: + print("bdev already exists") + print(resp) + def s3_bdev_delete(node_id, name): db_controller = DBController() @@ -3404,11 +3412,11 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tie params['jm_names'] = get_node_jm_names(snode) if storage_tiering: - distrib_name = params['name'] - params['secondary_stg_name'] = 's3_{}'.format(distrib_name) + snode_id = snode.get_id() + params['secondary_stg_name'] = 's3_{}'.format(snode_id.split("-")[0]) params['support_storage_tiering'] = True s3_bdev_create(snode.get_id(), params['secondary_stg_name'], True, "http://192.168.10.146:9000") - s3_bdev_add_bucket_name(snode.get_id(), params['secondary_stg_name'], "mys3bucket") + # s3_bdev_add_bucket_name(snode.get_id(), params['secondary_stg_name'], "mys3bucket") if snode.distrib_cpu_cores: distrib_cpu_mask = utils.decimal_to_hex_power_of_2(snode.distrib_cpu_cores[snode.distrib_cpu_index]) params['distrib_cpu_mask'] = distrib_cpu_mask From 9c86971c2e67391f127922f6ce43a97646153381 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 07:35:30 +0200 Subject: [PATCH 14/37] add distrib bdev create --- simplyblock_core/rpc_client.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index d11a99a47..0ea5578a6 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -91,6 +91,10 @@ def _request2(self, method, params=None): def get_version(self): return self._request("spdk_get_version") + def bdev_get_bdevs(self, name): + params = { 'name': name } + return self._request("bdev_get_bdevs", params) + def subsystem_list(self, nqn_name=None): data = self._request("nvmf_get_subsystems") if data and nqn_name: From 0c019460ac1ea2bb62e6b0bc9e0615b34bbe9aaa Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 07:42:47 +0200 Subject: [PATCH 15/37] limit to 2**16-2 --- simplyblock_core/rpc_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 0ea5578a6..c65a399f1 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -479,7 +479,7 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm if support_storage_tiering: # generate a random int - storage_tiering_id = random.randint(1, 1000000) + storage_tiering_id = random.randint(0, 2**16 - 2) params['support_storage_tiering'] = support_storage_tiering params['secondary_stg_name'] = secondary_stg_name params['secondary_io_timeout_us'] = secondary_io_timeout_us From 1b661274d459084437bfaa30a3a852aa88d81022 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 09:17:02 +0200 Subject: [PATCH 16/37] fix typo --- simplyblock_core/env_var | 2 +- simplyblock_web/blueprints/snode_ops.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/simplyblock_core/env_var b/simplyblock_core/env_var index 5c7da6d66..db1d3a887 100644 --- a/simplyblock_core/env_var +++ b/simplyblock_core/env_var @@ -2,4 +2,4 @@ SIMPLY_BLOCK_COMMAND_NAME=sbcli-dev SIMPLY_BLOCK_VERSION=18.0.63 SIMPLY_BLOCK_DOCKER_IMAGE=public.ecr.aws/simply-block/simplyblock:manohar-storage-tiering2 -SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=public.ecr.aws/simply-block/ultra:main-latest +SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=public.ecr.aws/simply-block/ultra:storage-tiering-ha-latest diff --git a/simplyblock_web/blueprints/snode_ops.py b/simplyblock_web/blueprints/snode_ops.py index 772d58b12..0f5627c39 100644 --- a/simplyblock_web/blueprints/snode_ops.py +++ b/simplyblock_web/blueprints/snode_ops.py @@ -188,7 +188,7 @@ def spdk_process_start(body: SPDKParams): f"ssd_pcie={ssd_pcie_params}", f"PCI_ALLOWED={ssd_pcie_list}", f"TOTAL_HP={total_mem_mib}", - "AWS_ACCEESS_KEY_ID=foobar", + "AWS_ACCESS_KEY_ID=foobar", "AWS_SECRET_ACCESS_KEY=barfoobarfoo", ] # restart_policy={"Name": "on-failure", "MaximumRetryCount": 99} From 343f3d4bd074ced1201e59c1c369dec9d578ac63 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 7 May 2025 09:53:43 +0200 Subject: [PATCH 17/37] remove un used imports --- simplyblock_web/node_utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/simplyblock_web/node_utils.py b/simplyblock_web/node_utils.py index 356f60220..033ef70b6 100644 --- a/simplyblock_web/node_utils.py +++ b/simplyblock_web/node_utils.py @@ -9,11 +9,7 @@ import re import jc -from kubernetes.stream import stream -from kubernetes import client, config - from simplyblock_core import shell_utils -from simplyblock_web import utils From 3634fc72c1725cebc3c61201285d4978f04f52fe Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Thu, 8 May 2025 06:45:21 +0200 Subject: [PATCH 18/37] use sanitizer image --- simplyblock_core/env_var | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplyblock_core/env_var b/simplyblock_core/env_var index db1d3a887..9e0903800 100644 --- a/simplyblock_core/env_var +++ b/simplyblock_core/env_var @@ -2,4 +2,4 @@ SIMPLY_BLOCK_COMMAND_NAME=sbcli-dev SIMPLY_BLOCK_VERSION=18.0.63 SIMPLY_BLOCK_DOCKER_IMAGE=public.ecr.aws/simply-block/simplyblock:manohar-storage-tiering2 -SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=public.ecr.aws/simply-block/ultra:storage-tiering-ha-latest +SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=simplyblock/spdk:storage-tiering-ha-sanitizer From 2076a53aed3058ab5d30824aecbb8299df95a5ba Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 14 May 2025 11:53:51 +0200 Subject: [PATCH 19/37] pass bucket name from cluster object --- simplyblock_core/storage_node_ops.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 696f0c6c6..4c3b42d89 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -2857,7 +2857,7 @@ def recreate_lvstore_on_sec(secondary_node): cluster = db_controller.get_cluster_by_id(primary_node.cluster_id) ### 1- create distribs and raid - ret, err = _create_bdev_stack(secondary_node, primary_node.lvstore_stack, primary_node=primary_node, storage_tiering=cluster.storage_tiering) + ret, err = _create_bdev_stack(secondary_node, primary_node.lvstore_stack, primary_node=primary_node, storage_tiering=cluster.storage_tiering, endpoint=cluster.endpoint, bucket_name=cluster.s3_bucket) if err: logger.error(f"Failed to recreate lvstore on node {secondary_node.get_id()}") logger.error(err) @@ -2924,7 +2924,7 @@ def recreate_lvstore(snode): cluster = db_controller.get_cluster_by_id(snode.cluster_id) ### 1- create distribs and raid - ret, err = _create_bdev_stack(snode, [], storage_tiering=cluster.storage_tiering) + ret, err = _create_bdev_stack(snode, [], storage_tiering=cluster.storage_tiering, endpoint=cluster.endpoint, bucket_name=cluster.s3_bucket) if err: logger.error(f"Failed to recreate lvstore on node {snode.get_id()}") logger.error(err) @@ -3286,7 +3286,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo snode.lvstore_status = "in_creation" snode.write_to_db() - ret, err = _create_bdev_stack(snode, lvstore_stack, storage_tiering=cluster.storage_tiering) + ret, err = _create_bdev_stack(snode, lvstore_stack, storage_tiering=cluster.storage_tiering, endpoint=cluster.s3_endpoint, bucket_name=cluster.s3_bucket) if err: logger.error(f"Failed to create lvstore on node {snode.get_id()}") logger.error(err) @@ -3307,7 +3307,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo # creating lvstore on secondary sec_node.remote_jm_devices = _connect_to_remote_jm_devs(sec_node) sec_node.write_to_db() - ret, err = _create_bdev_stack(sec_node, lvstore_stack, primary_node=snode, storage_tiering=cluster.storage_tiering) + ret, err = _create_bdev_stack(sec_node, lvstore_stack, primary_node=snode, storage_tiering=cluster.storage_tiering, endpoint=cluster.s3_endpoint, bucket_name=cluster.s3_bucket) if err: logger.error(f"Failed to create lvstore on node {sec_node.get_id()}") logger.error(err) @@ -3337,7 +3337,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo return True -def s3_bdev_create(node_id, name, local_testing, local_endpoint): +def s3_bdev_create(node_id, name, local_testing, local_endpoint, bucket_name): db_controller = DBController() snode = db_controller.get_storage_node_by_id(node_id) @@ -3351,7 +3351,7 @@ def s3_bdev_create(node_id, name, local_testing, local_endpoint): if resp is None: print("s3 bdev does not exist. creating...") rpc_client.bdev_s3_create(name, local_testing, local_endpoint) - rpc_client.bdev_s3_add_bucket(name, "mys3bucket") + rpc_client.bdev_s3_add_bucket(name, bucket_name) else: print("bdev already exists") print(resp) @@ -3381,7 +3381,7 @@ def s3_bdev_add_bucket_name(node_id, name, bucket_name): return rpc_client.bdev_s3_add_bucket(name, bucket_name) -def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tiering=False): +def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tiering=False, endpoint=None, bucket_name=None): rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password ) print("STORAGE TIERING: ", storage_tiering) @@ -3415,8 +3415,7 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tie snode_id = snode.get_id() params['secondary_stg_name'] = 's3_{}'.format(snode_id.split("-")[0]) params['support_storage_tiering'] = True - s3_bdev_create(snode.get_id(), params['secondary_stg_name'], True, "http://192.168.10.146:9000") - # s3_bdev_add_bucket_name(snode.get_id(), params['secondary_stg_name'], "mys3bucket") + s3_bdev_create(snode.get_id(), params['secondary_stg_name'], local_testing=True, local_endpoint=endpoint, bucket_name=bucket_name) if snode.distrib_cpu_cores: distrib_cpu_mask = utils.decimal_to_hex_power_of_2(snode.distrib_cpu_cores[snode.distrib_cpu_index]) params['distrib_cpu_mask'] = distrib_cpu_mask From ff35e7f9a6079fc05621737a4932c75531699771 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 14 May 2025 20:57:24 +0200 Subject: [PATCH 20/37] make cluster_sz multiple of ndcs --- simplyblock_core/storage_node_ops.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 4c3b42d89..d090357ae 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -3169,10 +3169,8 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo distrib_list = [] distrib_vuids = [] size = max_size // snode.number_of_distribs - distr_page_size = page_size_in_blocks - # distr_page_size = (ndcs + npcs) * page_size_in_blocks - # cluster_sz = ndcs * page_size_in_blocks - cluster_sz = page_size_in_blocks + distr_page_size = ndcs * page_size_in_blocks + cluster_sz = ndcs * page_size_in_blocks strip_size_kb = int((ndcs + npcs) * 2048) strip_size_kb = utils.nearest_upper_power_of_2(strip_size_kb) jm_vuid = 1 From c9231f02a4fc64353f27d49d007fe748b827bb8e Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Thu, 15 May 2025 10:56:05 +0200 Subject: [PATCH 21/37] use vuid for storage_tiering_id --- simplyblock_core/rpc_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index c65a399f1..34bd41dd8 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -479,12 +479,12 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm if support_storage_tiering: # generate a random int - storage_tiering_id = random.randint(0, 2**16 - 2) + # storage_tiering_id = random.randint(0, 2**16 - 2) params['support_storage_tiering'] = support_storage_tiering params['secondary_stg_name'] = secondary_stg_name params['secondary_io_timeout_us'] = secondary_io_timeout_us params['disaster_recovery'] = disaster_recovery - params['storage_tiering_id'] = storage_tiering_id + params['storage_tiering_id'] = params['vuid'] params['ghost_capacity'] = ghost_capacity params['fifo_main_capacity'] = fifo_main_capacity params['fifo_small_capacity'] = fifo_small_capacity From d08adbe20de7e3c2e2078141f5f98c8843716deb Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Fri, 16 May 2025 09:35:14 +0200 Subject: [PATCH 22/37] set addtional flags during lvol create & clone --- simplyblock_core/rpc_client.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 34bd41dd8..120a15cd9 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -285,10 +285,22 @@ def create_lvstore(self, name, bdev_name, cluster_sz, clear_method, num_md_pages "lvs_name": name, "cluster_sz": cluster_sz, "clear_method": clear_method, + "not_evict_lvstore_md_pages": True, "num_md_pages_per_cluster_ratio": num_md_pages_per_cluster_ratio, } return self._request("bdev_lvol_create_lvstore", params) + def bdev_lvol_create_lvstore_persistent(self, name, bdev_name, cluster_sz, clear_method, num_md_pages_per_cluster_ratio, not_evict_lvstore_md_pages=True): + params = { + "bdev_name": bdev_name, + "lvs_name": name, + "cluster_sz": cluster_sz, + "clear_method": clear_method, + "num_md_pages_per_cluster_ratio": num_md_pages_per_cluster_ratio, + "not_evict_lvstore_md_pages": True + } + return self._request("bdev_lvol_create_lvstore_persistent", params) + def create_lvol(self, name, size_in_mib, lvs_name, lvol_priority_class=0): params = { "lvol_name": name, @@ -296,6 +308,7 @@ def create_lvol(self, name, size_in_mib, lvs_name, lvol_priority_class=0): "lvs_name": lvs_name, "thin_provision": True, "clear_method": "unmap", + "sync_fetch": True, "lvol_priority_class": lvol_priority_class, } return self._request("bdev_lvol_create", params) @@ -331,13 +344,17 @@ def lvol_read_only(self, name): def lvol_create_snapshot(self, lvol_id, snapshot_name): params = { "lvol_name": lvol_id, - "snapshot_name": snapshot_name} + "snapshot_name": snapshot_name, + "sync_fetch": True, + } return self._request("bdev_lvol_snapshot", params) def lvol_clone(self, snapshot_name, clone_name): params = { "snapshot_name": snapshot_name, - "clone_name": clone_name} + "clone_name": clone_name, + "sync_fetch": True, + } return self._request("bdev_lvol_clone", params) def lvol_compress_create(self, base_bdev_name, pm_path): From f6ce7abf9d2e89ade789c3205d94a0cf7d86d4b5 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 20 May 2025 16:28:34 +0200 Subject: [PATCH 23/37] set ghost and small to 100 and main queue to 1000 --- simplyblock_core/rpc_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 120a15cd9..55647656a 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -286,6 +286,7 @@ def create_lvstore(self, name, bdev_name, cluster_sz, clear_method, num_md_pages "cluster_sz": cluster_sz, "clear_method": clear_method, "not_evict_lvstore_md_pages": True, + # "disaster_recovery": True, "num_md_pages_per_cluster_ratio": num_md_pages_per_cluster_ratio, } return self._request("bdev_lvol_create_lvstore", params) From 291da452c0cb2ae0d08b4f92d8add7d408cc9fe8 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 21 May 2025 17:29:35 +0200 Subject: [PATCH 24/37] update endpoint --- simplyblock_core/models/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplyblock_core/models/cluster.py b/simplyblock_core/models/cluster.py index 06931836f..ff75db468 100644 --- a/simplyblock_core/models/cluster.py +++ b/simplyblock_core/models/cluster.py @@ -62,7 +62,7 @@ class Cluster(BaseModel): tls: bool = False is_re_balancing: bool = False storage_tiering: bool = True - s3_endpoint: str = "http://192.168.10.146:9000" + s3_endpoint: str = "http://192.168.10.141:9000" s3_bucket: str = "mybucket" def get_status_code(self): From 3fa69587056a1ecaf3f58238574827c30d975eac Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 21 May 2025 20:46:31 +0200 Subject: [PATCH 25/37] Revert "update endpoint" This reverts commit fd5010ea72b70a895d23a9764d70df140f62c00b. --- simplyblock_core/models/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplyblock_core/models/cluster.py b/simplyblock_core/models/cluster.py index ff75db468..06931836f 100644 --- a/simplyblock_core/models/cluster.py +++ b/simplyblock_core/models/cluster.py @@ -62,7 +62,7 @@ class Cluster(BaseModel): tls: bool = False is_re_balancing: bool = False storage_tiering: bool = True - s3_endpoint: str = "http://192.168.10.141:9000" + s3_endpoint: str = "http://192.168.10.146:9000" s3_bucket: str = "mybucket" def get_status_code(self): From 598379bdbc03ca188b97f79560fdd42285d6ce68 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Thu, 22 May 2025 22:48:25 +0200 Subject: [PATCH 26/37] set is_raid_base to true during storage_tiering --- simplyblock_core/rpc_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 55647656a..18e677c69 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -504,6 +504,7 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm params['disaster_recovery'] = disaster_recovery params['storage_tiering_id'] = params['vuid'] params['ghost_capacity'] = ghost_capacity + params['is_raid_base'] = True params['fifo_main_capacity'] = fifo_main_capacity params['fifo_small_capacity'] = fifo_small_capacity return self._request("bdev_distrib_create", params) From a0ffa5d949d80b5fa7151a3f74fd66b09b2a3be9 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Sat, 24 May 2025 14:29:59 +0530 Subject: [PATCH 27/37] remove the usage of is_raid_base flag --- simplyblock_core/rpc_client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 18e677c69..55647656a 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -504,7 +504,6 @@ def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm params['disaster_recovery'] = disaster_recovery params['storage_tiering_id'] = params['vuid'] params['ghost_capacity'] = ghost_capacity - params['is_raid_base'] = True params['fifo_main_capacity'] = fifo_main_capacity params['fifo_small_capacity'] = fifo_small_capacity return self._request("bdev_distrib_create", params) From 3bd4f25bc4ad07042a2b290d796f9a2f15a8da03 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Thu, 29 May 2025 14:05:19 +0530 Subject: [PATCH 28/37] toggle disaster recovery during lvstore recover --- simplyblock_core/rpc_client.py | 2 +- simplyblock_core/storage_node_ops.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 55647656a..3bb6e72d4 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -286,7 +286,7 @@ def create_lvstore(self, name, bdev_name, cluster_sz, clear_method, num_md_pages "cluster_sz": cluster_sz, "clear_method": clear_method, "not_evict_lvstore_md_pages": True, - # "disaster_recovery": True, + # "disaster_recovery": True, # toggle then when the node needs to be created in disaster recovery mode "num_md_pages_per_cluster_ratio": num_md_pages_per_cluster_ratio, } return self._request("bdev_lvol_create_lvstore", params) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index d090357ae..be57de346 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -708,12 +708,13 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None): if this_node.lvstore_stack_secondary_1: org_node = db_controller.get_storage_node_by_id(this_node.lvstore_stack_secondary_1) - if org_node.jm_device and org_node.jm_device.status == JMDevice.STATUS_ONLINE: - remote_devices.append(org_node.jm_device) - for jm_id in org_node.jm_ids: - jm_dev = db_controller.get_jm_device_by_id(jm_id) - if jm_dev and jm_dev not in remote_devices: - remote_devices.append(jm_dev) + if org_node: + if org_node.jm_device and org_node.jm_device.status == JMDevice.STATUS_ONLINE: + remote_devices.append(org_node.jm_device) + for jm_id in org_node.jm_ids: + jm_dev = db_controller.get_jm_device_by_id(jm_id) + if jm_dev and jm_dev not in remote_devices: + remote_devices.append(jm_dev) if len(remote_devices) < 2: for node in db_controller.get_storage_nodes_by_cluster_id(this_node.cluster_id): From 36149ea020e7f20d267fddea311f428f36dd9645 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Thu, 29 May 2025 14:12:52 +0530 Subject: [PATCH 29/37] use full_page_unmap as true --- simplyblock_core/rpc_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 3bb6e72d4..b21461da5 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -450,7 +450,7 @@ def bdev_alceml_create(self, alceml_name, nvme_name, uuid, pba_init_mode=3, def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm_names, chunk_size, ha_comm_addrs=None, ha_inode_self=None, pba_page_size=2097152, distrib_cpu_mask="", ha_is_non_leader=True, jm_vuid=0, write_protection=False, - full_page_unmap=False, storage_tiering_id=0, secondary_io_timeout_us=1 << 30, ghost_capacity=100, fifo_main_capacity=1000, fifo_small_capacity=100, + full_page_unmap=True, storage_tiering_id=0, secondary_io_timeout_us=1 << 30, ghost_capacity=100, fifo_main_capacity=1000, fifo_small_capacity=100, support_storage_tiering=False, secondary_stg_name="", disaster_recovery=False, ): """" From fac55afeb4107061c84869f4663fa65300e470f7 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 3 Jun 2025 05:26:35 +0530 Subject: [PATCH 30/37] storage tiering --- simplyblock_core/env_var | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplyblock_core/env_var b/simplyblock_core/env_var index 9e0903800..a7f99be24 100644 --- a/simplyblock_core/env_var +++ b/simplyblock_core/env_var @@ -1,5 +1,5 @@ SIMPLY_BLOCK_COMMAND_NAME=sbcli-dev SIMPLY_BLOCK_VERSION=18.0.63 -SIMPLY_BLOCK_DOCKER_IMAGE=public.ecr.aws/simply-block/simplyblock:manohar-storage-tiering2 +SIMPLY_BLOCK_DOCKER_IMAGE=public.ecr.aws/simply-block/simplyblock:manohar-storage-tiering3 SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=simplyblock/spdk:storage-tiering-ha-sanitizer From 2f4a32f5f0913d31b24ead8f70c7b3fd8b480da1 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 3 Jun 2025 10:28:17 +0530 Subject: [PATCH 31/37] storage tiering --- simplyblock_core/env_var | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplyblock_core/env_var b/simplyblock_core/env_var index a7f99be24..857416a28 100644 --- a/simplyblock_core/env_var +++ b/simplyblock_core/env_var @@ -2,4 +2,4 @@ SIMPLY_BLOCK_COMMAND_NAME=sbcli-dev SIMPLY_BLOCK_VERSION=18.0.63 SIMPLY_BLOCK_DOCKER_IMAGE=public.ecr.aws/simply-block/simplyblock:manohar-storage-tiering3 -SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=simplyblock/spdk:storage-tiering-ha-sanitizer +SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=simplyblock/spdk:storage-tiering-ha-tested-sanitizer From b73bd1f7983c0a55364cc26a452540025a2cdc95 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 3 Jun 2025 11:21:59 +0530 Subject: [PATCH 32/37] backup a snapshot --- simplyblock_cli/cli-reference.yaml | 7 +++++ simplyblock_cli/cli.py | 7 +++++ simplyblock_cli/clibase.py | 4 +++ .../controllers/snapshot_controller.py | 29 +++++++++++++++++++ simplyblock_core/rpc_client.py | 14 +++++++++ 5 files changed, 61 insertions(+) diff --git a/simplyblock_cli/cli-reference.yaml b/simplyblock_cli/cli-reference.yaml index 639acb764..5e39a50d8 100644 --- a/simplyblock_cli/cli-reference.yaml +++ b/simplyblock_cli/cli-reference.yaml @@ -1987,6 +1987,13 @@ commands: dest: resize type: size default: "0" + - name: backup + help: "Backs up a snapshot to a s3 storage using storage tiering" + arguments: + - name: "snapshot_id" + help: "Snapshot id" + dest: snapshot_id + type: str - name: "caching-node" help: "Caching node commands" aliases: diff --git a/simplyblock_cli/cli.py b/simplyblock_cli/cli.py index 5647d1d05..0334abd45 100644 --- a/simplyblock_cli/cli.py +++ b/simplyblock_cli/cli.py @@ -784,6 +784,7 @@ def init_snapshot(self): self.init_snapshot__list(subparser) self.init_snapshot__delete(subparser) self.init_snapshot__clone(subparser) + self.init_snapshot__backup(subparser) def init_snapshot__add(self, subparser): @@ -806,6 +807,10 @@ def init_snapshot__clone(self, subparser): subcommand.add_argument('lvol_name', help='Logical volume name', type=str) argument = subcommand.add_argument('--resize', help='New logical volume size: 10M, 10G, 10(bytes). Can only increase.', type=size_type(), default='0', dest='resize') + def init_snapshot__backup(self, subparser): + subcommand = self.add_sub_command(subparser, 'backup', 'Backs up a snapshot to a s3 storage using storage tiering') + subcommand.add_argument('snapshot_id', help='Snapshot id', type=str) + def init_caching_node(self): subparser = self.add_command('caching-node', 'Caching node commands', aliases=['cn',]) @@ -1212,6 +1217,8 @@ def run(self): ret = self.snapshot__delete(sub_command, args) elif sub_command in ['clone']: ret = self.snapshot__clone(sub_command, args) + elif sub_command in ['backup']: + ret = self.snapshot__backup(sub_command, args) else: self.parser.print_help() diff --git a/simplyblock_cli/clibase.py b/simplyblock_cli/clibase.py index 40e648eba..f5e3048fb 100644 --- a/simplyblock_cli/clibase.py +++ b/simplyblock_cli/clibase.py @@ -600,6 +600,10 @@ def snapshot__clone(self, sub_command, args): success, details = snapshot_controller.clone(args.snapshot_id, args.lvol_name, new_size) return details + def snapshot__backup(self, sub_command, args): + snapshot_id = args.snapshot_id + return snapshot_controller.backup(snapshot_id) + def caching_node__deploy(self, sub_command, args): return caching_node_controller.deploy(args.ifname) diff --git a/simplyblock_core/controllers/snapshot_controller.py b/simplyblock_core/controllers/snapshot_controller.py index e7e1780c0..fa22c71bc 100644 --- a/simplyblock_core/controllers/snapshot_controller.py +++ b/simplyblock_core/controllers/snapshot_controller.py @@ -230,6 +230,35 @@ def list(all=False): return utils.print_table(data) +def backup(snapshot_uuid): + snap = db_controller.get_snapshot_by_id(snapshot_uuid) + if not snap: + msg = f"Snapshot not found: {snapshot_uuid}" + logger.error(msg) + return False, msg + + snode = db_controller.get_storage_node_by_id(snap.lvol.node_id) + if not snode: + msg = f"Storage node not found: {snap.lvol.node_id}" + logger.error(msg) + return False, msg + + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) + ret = rpc_client.bdev_lvol_backup_snapshot(snap.snap_uuid, timeout_us=1000000, dev_page_size=2 * 1024 * 1024, nmax_retries=4, nmax_flush_jobs=4) + if not ret: + msg = f"Failed to backup snapshot: {snap.snap_bdev}" + logger.error(msg) + return False, msg + + times = 5 + while times > 0: + resp = rpc_client.bdev_lvol_get_snapshot_backup_status(lvol_name=snap.snap_uuid) + time.sleep(3) + times = times - 1 + print(resp) + logger.info("Done") + return True, "" + def delete(snapshot_uuid, force_delete=False): snap = db_controller.get_snapshot_by_id(snapshot_uuid) if not snap: diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index b21461da5..7c6751a52 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -1042,6 +1042,20 @@ def bdev_lvol_clone_register(self, clone_name, snapshot_name, registered_uuid, b } return self._request("bdev_lvol_clone_register", params) + def bdev_lvol_backup_snapshot(self, lvol_name, timeout_us, dev_page_size, nmax_retries=4, nmax_flush_jobs=4): + params = { + 'lvol_name': lvol_name, + 'timeout_us': timeout_us, + 'dev_page_size': dev_page_size, + 'nmax_retries': nmax_retries, + 'nmax_flush_jobs': nmax_flush_jobs + } + return self._request("bdev_lvol_backup_snapshot", params) + + def bdev_lvol_get_snapshot_backup_status(self, lvol_name): + params = { 'lvol_name': lvol_name } + return self._request("bdev_lvol_get_snapshot_backup_status", params) + def distr_replace_id_in_map_prob(self, storage_ID_from, storage_ID_to): params = { "storage_ID_from": storage_ID_from, From 09cf2ba49ad6a53a69c83bfc1afc035f9a51dc90 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 3 Jun 2025 11:46:17 +0530 Subject: [PATCH 33/37] snapshot backedup at --- simplyblock_core/controllers/snapshot_controller.py | 10 +++++++--- simplyblock_core/models/snapshot.py | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/simplyblock_core/controllers/snapshot_controller.py b/simplyblock_core/controllers/snapshot_controller.py index fa22c71bc..a7412fd70 100644 --- a/simplyblock_core/controllers/snapshot_controller.py +++ b/simplyblock_core/controllers/snapshot_controller.py @@ -255,9 +255,13 @@ def backup(snapshot_uuid): resp = rpc_client.bdev_lvol_get_snapshot_backup_status(lvol_name=snap.snap_uuid) time.sleep(3) times = times - 1 - print(resp) - logger.info("Done") - return True, "" + if resp == "SUCCEEDED": + snap.backedup_at = str(datetime.datetime.now(datetime.timezone.utc)) + snap.write_to_db(db_controller.kv_store) + logger.info("Done") + return True, "" + + return False, f"Failed to backup snapshot: {snap.snap_bdev}, status: {resp}" def delete(snapshot_uuid, force_delete=False): snap = db_controller.get_snapshot_by_id(snapshot_uuid) diff --git a/simplyblock_core/models/snapshot.py b/simplyblock_core/models/snapshot.py index 300b984a2..853613455 100644 --- a/simplyblock_core/models/snapshot.py +++ b/simplyblock_core/models/snapshot.py @@ -22,3 +22,4 @@ class SnapShot(BaseModel): snap_ref_id: str = "" snap_uuid: str = "" vuid: int = 0 + backedup_at: str = "" From 2019d76eef8222df5892f49ecae5e75191d59a2c Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 3 Jun 2025 12:39:57 +0530 Subject: [PATCH 34/37] S3 Endpoint tests --- simplyblock_core/rpc_client.py | 1 - simplyblock_core/storage_node_ops.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 7c6751a52..20a22b5cf 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -985,7 +985,6 @@ def listeners_del(self, nqn, trtype, traddr, trsvcid): } return self._request("nvmf_subsystem_remove_listener", params) - def bdev_distrib_force_to_non_leader(self, jm_vuid=0): params = None if jm_vuid: diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index be57de346..8bf5c2cf6 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -2858,7 +2858,7 @@ def recreate_lvstore_on_sec(secondary_node): cluster = db_controller.get_cluster_by_id(primary_node.cluster_id) ### 1- create distribs and raid - ret, err = _create_bdev_stack(secondary_node, primary_node.lvstore_stack, primary_node=primary_node, storage_tiering=cluster.storage_tiering, endpoint=cluster.endpoint, bucket_name=cluster.s3_bucket) + ret, err = _create_bdev_stack(secondary_node, primary_node.lvstore_stack, primary_node=primary_node, storage_tiering=cluster.storage_tiering, endpoint=cluster.s3_endpoint, bucket_name=cluster.s3_bucket) if err: logger.error(f"Failed to recreate lvstore on node {secondary_node.get_id()}") logger.error(err) @@ -2925,7 +2925,7 @@ def recreate_lvstore(snode): cluster = db_controller.get_cluster_by_id(snode.cluster_id) ### 1- create distribs and raid - ret, err = _create_bdev_stack(snode, [], storage_tiering=cluster.storage_tiering, endpoint=cluster.endpoint, bucket_name=cluster.s3_bucket) + ret, err = _create_bdev_stack(snode, [], storage_tiering=cluster.storage_tiering, endpoint=cluster.s3_endpoint, bucket_name=cluster.s3_bucket) if err: logger.error(f"Failed to recreate lvstore on node {snode.get_id()}") logger.error(err) From d5d15e71cce8f5c114574caf4e5d261a4183331c Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 3 Jun 2025 12:42:10 +0530 Subject: [PATCH 35/37] set all values to zero --- simplyblock_core/rpc_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 20a22b5cf..ac7ec168b 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -450,7 +450,7 @@ def bdev_alceml_create(self, alceml_name, nvme_name, uuid, pba_init_mode=3, def bdev_distrib_create(self, name, vuid, ndcs, npcs, num_blocks, block_size, jm_names, chunk_size, ha_comm_addrs=None, ha_inode_self=None, pba_page_size=2097152, distrib_cpu_mask="", ha_is_non_leader=True, jm_vuid=0, write_protection=False, - full_page_unmap=True, storage_tiering_id=0, secondary_io_timeout_us=1 << 30, ghost_capacity=100, fifo_main_capacity=1000, fifo_small_capacity=100, + full_page_unmap=True, storage_tiering_id=0, secondary_io_timeout_us=1 << 30, ghost_capacity=1, fifo_main_capacity=1, fifo_small_capacity=1, support_storage_tiering=False, secondary_stg_name="", disaster_recovery=False, ): """" From 8affbd944421fee028b929140f59f861ca661a8f Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Tue, 3 Jun 2025 13:19:02 +0530 Subject: [PATCH 36/37] pass parameters to cluster create --- simplyblock_cli/cli-reference.yaml | 70 ++++++++++++++++++++++++++++ simplyblock_cli/cli.py | 14 ++++++ simplyblock_cli/clibase.py | 16 ++++++- simplyblock_core/models/cluster.py | 4 ++ simplyblock_core/storage_node_ops.py | 6 +++ 5 files changed, 109 insertions(+), 1 deletion(-) diff --git a/simplyblock_cli/cli-reference.yaml b/simplyblock_cli/cli-reference.yaml index 5e39a50d8..bc36e2550 100644 --- a/simplyblock_cli/cli-reference.yaml +++ b/simplyblock_cli/cli-reference.yaml @@ -1091,6 +1091,41 @@ commands: dest: strict_node_anti_affinity type: bool action: store_true + - name: "--support-storage-tiering" + help: "Enable storage tiering for the cluster" + dest: storage_tiering + type: bool + default: false + - name: "--s3-endpoint" + help: "S3 endpoint for storage tiering" + dest: s3_endpoint + type: str + default: "" + - name: "--s3-bucket" + help: "S3 bucket for storage tiering" + dest: s3_bucket + type: str + default: "" + - name: "--s3-access-key" + help: "S3 access key for storage tiering" + dest: s3_access_key + type: str + default: "" + - name: "--s3-secret-key" + help: "S3 secret key for storage tiering" + dest: s3_secret_key + type: str + default: "" + - name: "--s3-workerpool-mask" + help: "S3 workerpool mask for storage tiering" + dest: s3_workerpool_mask + type: + regex: "^(0x|0X)?[a-fA-F0-9]+$" + - name: "--s3-workerpool-size" + help: "S3 workerpool size for storage tiering" + dest: s3_workerpool_size + type: int + default: 32 - name: add help: "Adds a new cluster" arguments: @@ -1191,6 +1226,41 @@ commands: dest: strict_node_anti_affinity type: bool action: store_true + - name: "--support-storage-tiering" + help: "Enable storage tiering for the cluster" + dest: storage_tiering + type: bool + default: false + - name: "--s3-endpoint" + help: "S3 endpoint for storage tiering" + dest: s3_endpoint + type: str + default: "" + - name: "--s3-bucket" + help: "S3 bucket for storage tiering" + dest: s3_bucket + type: str + default: "" + - name: "--s3-access-key" + help: "S3 access key for storage tiering" + dest: s3_access_key + type: str + default: "" + - name: "--s3-secret-key" + help: "S3 secret key for storage tiering" + dest: s3_secret_key + type: str + default: "" + - name: "--s3-workerpool-mask" + help: "S3 workerpool mask for storage tiering" + dest: s3_workerpool_mask + type: + regex: "^(0x|0X)?[a-fA-F0-9]+$" + - name: "--s3-workerpool-size" + help: "S3 workerpool size for storage tiering" + dest: s3_workerpool_size + type: int + default: 32 - name: activate help: > Activates a cluster. diff --git a/simplyblock_cli/cli.py b/simplyblock_cli/cli.py index 0334abd45..9cdbcda9b 100644 --- a/simplyblock_cli/cli.py +++ b/simplyblock_cli/cli.py @@ -440,6 +440,13 @@ def init_cluster__create(self, subparser): if self.developer_mode: argument = subcommand.add_argument('--enable-qos', help='Enable qos bdev for storage nodes, true by default', type=bool, default=False, dest='enable_qos') argument = subcommand.add_argument('--strict-node-anti-affinity', help='Enable strict node anti affinity for storage nodes. Never more than one chunk is placed on a node. This requires a minimum of _data-chunks-in-stripe + parity-chunks-in-stripe + 1_ nodes in the cluster.', dest='strict_node_anti_affinity', action='store_true') + argument = subcommand.add_argument('--support-storage-tiering', help='Enable storage tiering for the cluster', type=bool, default=False, dest='storage_tiering') + argument = subcommand.add_argument('--s3-endpoint', help='S3 endpoint for storage tiering', type=str, default='', dest='s3_endpoint') + argument = subcommand.add_argument('--s3-bucket', help='S3 bucket for storage tiering', type=str, default='', dest='s3_bucket') + argument = subcommand.add_argument('--s3-access-key', help='S3 access key for storage tiering', type=str, default='', dest='s3_access_key') + argument = subcommand.add_argument('--s3-secret-key', help='S3 secret key for storage tiering', type=str, default='', dest='s3_secret_key') + argument = subcommand.add_argument('--s3-workerpool-mask', help='S3 workerpool mask for storage tiering', type=regex_type(r'^(0x|0X)?[a-fA-F0-9]+$'), dest='s3_workerpool_mask') + argument = subcommand.add_argument('--s3-workerpool-size', help='S3 workerpool size for storage tiering', type=int, default=32, dest='s3_workerpool_size') def init_cluster__add(self, subparser): subcommand = self.add_sub_command(subparser, 'add', 'Adds a new cluster') @@ -465,6 +472,13 @@ def init_cluster__add(self, subparser): if self.developer_mode: argument = subcommand.add_argument('--enable-qos', help='Enable qos bdev for storage nodes, default: true', type=bool, default=False, dest='enable_qos') argument = subcommand.add_argument('--strict-node-anti-affinity', help='Enable strict node anti affinity for storage nodes. Never more than one chunk is placed on a node. This requires a minimum of _data-chunks-in-stripe + parity-chunks-in-stripe + 1_ nodes in the cluster."', dest='strict_node_anti_affinity', action='store_true') + argument = subcommand.add_argument('--support-storage-tiering', help='Enable storage tiering for the cluster', type=bool, default=False, dest='storage_tiering') + argument = subcommand.add_argument('--s3-endpoint', help='S3 endpoint for storage tiering', type=str, default='', dest='s3_endpoint') + argument = subcommand.add_argument('--s3-bucket', help='S3 bucket for storage tiering', type=str, default='', dest='s3_bucket') + argument = subcommand.add_argument('--s3-access-key', help='S3 access key for storage tiering', type=str, default='', dest='s3_access_key') + argument = subcommand.add_argument('--s3-secret-key', help='S3 secret key for storage tiering', type=str, default='', dest='s3_secret_key') + argument = subcommand.add_argument('--s3-workerpool-mask', help='S3 workerpool mask for storage tiering', type=regex_type(r'^(0x|0X)?[a-fA-F0-9]+$'), dest='s3_workerpool_mask') + argument = subcommand.add_argument('--s3-workerpool-size', help='S3 workerpool size for storage tiering', type=int, default=32, dest='s3_workerpool_size') def init_cluster__activate(self, subparser): subcommand = self.add_sub_command(subparser, 'activate', 'Activates a cluster.') diff --git a/simplyblock_cli/clibase.py b/simplyblock_cli/clibase.py index f5e3048fb..7999bd45e 100644 --- a/simplyblock_cli/clibase.py +++ b/simplyblock_cli/clibase.py @@ -664,13 +664,19 @@ def cluster_add(self, args): distr_bs = args.distr_bs distr_chunk_bs = args.distr_chunk_bs ha_type = args.ha_type - enable_node_affinity = args.enable_node_affinity qpair_count = args.qpair_count max_queue_size = args.max_queue_size inflight_io_threshold = args.inflight_io_threshold enable_qos = args.enable_qos strict_node_anti_affinity = args.strict_node_anti_affinity + storage_tiering = args.storage_tiering + s3_endpoint = args.s3_endpoint + s3_bucket = args.s3_bucket + s3_access_key = args.s3_access_key + s3_secret_key = args.s3_secret_key + s3_workerpool_mask = args.s3_workerpool_mask + s3_workerpool_size = args.s3_workerpool_size return cluster_ops.add_cluster( blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, @@ -794,6 +800,14 @@ def cluster_create(self, args): enable_qos = args.enable_qos strict_node_anti_affinity = args.strict_node_anti_affinity + storage_tiering = args.storage_tiering + s3_endpoint = args.s3_endpoint + s3_bucket = args.s3_bucket + s3_access_key = args.s3_access_key + s3_secret_key = args.s3_secret_key + s3_workerpool_mask = args.s3_workerpool_mask + s3_workerpool_size = args.s3_workerpool_size + return cluster_ops.create_cluster( blk_size, page_size_in_blocks, CLI_PASS, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, diff --git a/simplyblock_core/models/cluster.py b/simplyblock_core/models/cluster.py index 06931836f..c23ecb966 100644 --- a/simplyblock_core/models/cluster.py +++ b/simplyblock_core/models/cluster.py @@ -62,8 +62,12 @@ class Cluster(BaseModel): tls: bool = False is_re_balancing: bool = False storage_tiering: bool = True + aws_access_key: str = "foobar" + aws_secret_key: str = "barfoobarfoo" s3_endpoint: str = "http://192.168.10.146:9000" s3_bucket: str = "mybucket" + s3_lcpu_mask: str = "0x00000000" + s3_thread_pool_size: int = 32 def get_status_code(self): if self.status in self.STATUS_CODE_MAP: diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 8bf5c2cf6..026d02795 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -3414,6 +3414,12 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None, storage_tie snode_id = snode.get_id() params['secondary_stg_name'] = 's3_{}'.format(snode_id.split("-")[0]) params['support_storage_tiering'] = True + params['ghost_capacity'] = 1 + params['fifo_main_capacity'] = 1 + params['fifo_small_capacity'] = 1 + # params['ghost_capacity'] = (snode.total_capacity/snode.number_of_distrib) * 0.25 + # params['fifo_main_capacity'] = 0.1 * params['ghost_capacity'] + # params['fifo_small_capacity'] = 0.2 * params['ghost_capacity'] s3_bdev_create(snode.get_id(), params['secondary_stg_name'], local_testing=True, local_endpoint=endpoint, bucket_name=bucket_name) if snode.distrib_cpu_cores: distrib_cpu_mask = utils.decimal_to_hex_power_of_2(snode.distrib_cpu_cores[snode.distrib_cpu_index]) From f599815bca88f8d7101816e3ea7722661040eaa1 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Mon, 16 Jun 2025 17:33:53 +0530 Subject: [PATCH 37/37] take cluster params from cli --- simplyblock_cli/cli.py | 2 ++ simplyblock_cli/clibase.py | 18 ++++++++++-------- simplyblock_core/cluster_ops.py | 11 ++++++++++- .../controllers/snapshot_controller.py | 1 + simplyblock_core/models/cluster.py | 8 ++++---- simplyblock_core/rpc_client.py | 3 --- 6 files changed, 27 insertions(+), 16 deletions(-) diff --git a/simplyblock_cli/cli.py b/simplyblock_cli/cli.py index 78cd05d7a..761a63d6f 100644 --- a/simplyblock_cli/cli.py +++ b/simplyblock_cli/cli.py @@ -1233,6 +1233,8 @@ def run(self): ret = self.snapshot__delete(sub_command, args) elif sub_command in ['clone']: ret = self.snapshot__clone(sub_command, args) + elif sub_command in ['backup']: + ret = self.snapshot__backup(sub_command, args) else: self.parser.print_help() diff --git a/simplyblock_cli/clibase.py b/simplyblock_cli/clibase.py index a0318535e..f04aecbf0 100644 --- a/simplyblock_cli/clibase.py +++ b/simplyblock_cli/clibase.py @@ -709,7 +709,9 @@ def cluster_add(self, args): return cluster_ops.add_cluster( blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity, - qpair_count, max_queue_size, inflight_io_threshold, enable_qos, strict_node_anti_affinity) + qpair_count, max_queue_size, inflight_io_threshold, enable_qos, strict_node_anti_affinity, + storage_tiering, s3_endpoint, s3_bucket, s3_access_key, s3_secret_key, s3_workerpool_mask, + s3_workerpool_size) def cluster_deploy(self, args): grafana_endpoint = "" @@ -829,13 +831,13 @@ def cluster_create(self, args): enable_qos = args.enable_qos strict_node_anti_affinity = args.strict_node_anti_affinity - storage_tiering = args.storage_tiering - s3_endpoint = args.s3_endpoint - s3_bucket = args.s3_bucket - s3_access_key = args.s3_access_key - s3_secret_key = args.s3_secret_key - s3_workerpool_mask = args.s3_workerpool_mask - s3_workerpool_size = args.s3_workerpool_size + # storage_tiering = args.storage_tiering + # s3_endpoint = args.s3_endpoint + # s3_bucket = args.s3_bucket + # s3_access_key = args.s3_access_key + # s3_secret_key = args.s3_secret_key + # s3_workerpool_mask = args.s3_workerpool_mask + # s3_workerpool_size = args.s3_workerpool_size return cluster_ops.create_cluster( blk_size, page_size_in_blocks, diff --git a/simplyblock_core/cluster_ops.py b/simplyblock_core/cluster_ops.py index bbfe8f58a..9670bb8d4 100644 --- a/simplyblock_core/cluster_ops.py +++ b/simplyblock_core/cluster_ops.py @@ -458,7 +458,8 @@ def deploy_cluster(storage_nodes,test,ha_type,distr_ndcs,distr_npcs,enable_qos,i def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity, qpair_count, - max_queue_size, inflight_io_threshold, enable_qos, strict_node_anti_affinity) -> str: + max_queue_size, inflight_io_threshold, enable_qos, strict_node_anti_affinity, storage_tiering, + s3_endpoint, s3_bucket, s3_access_key, s3_secret_key, s3_workerpool_mask, s3_workerpool_size) -> str: db_controller = DBController() clusters = db_controller.get_clusters() if not clusters: @@ -493,6 +494,14 @@ def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn cluster.max_queue_size = max_queue_size cluster.inflight_io_threshold = inflight_io_threshold cluster.enable_qos = enable_qos + cluster.storage_tiering = storage_tiering + cluster.s3_endpoint = s3_endpoint + cluster.s3_bucket = s3_bucket + cluster.s3_access_key = s3_access_key + cluster.s3_secret_key = s3_secret_key + cluster.s3_workerpool_mask = s3_workerpool_mask + cluster.s3_workerpool_size = s3_workerpool_size + if cap_warn and cap_warn > 0: cluster.cap_warn = cap_warn if cap_crit and cap_crit > 0: diff --git a/simplyblock_core/controllers/snapshot_controller.py b/simplyblock_core/controllers/snapshot_controller.py index 96520044a..4e521cb8d 100644 --- a/simplyblock_core/controllers/snapshot_controller.py +++ b/simplyblock_core/controllers/snapshot_controller.py @@ -2,6 +2,7 @@ import logging as lg import time import uuid +import datetime from simplyblock_core.controllers import lvol_controller, snapshot_events, pool_controller diff --git a/simplyblock_core/models/cluster.py b/simplyblock_core/models/cluster.py index 4a39c2ccf..0efc60045 100644 --- a/simplyblock_core/models/cluster.py +++ b/simplyblock_core/models/cluster.py @@ -62,12 +62,12 @@ class Cluster(BaseModel): tls: bool = False is_re_balancing: bool = False storage_tiering: bool = True - aws_access_key: str = "foobar" - aws_secret_key: str = "barfoobarfoo" + s3_access_key: str = "foobar" + s3_secret_key: str = "barfoobarfoo" s3_endpoint: str = "http://192.168.10.146:9000" s3_bucket: str = "mybucket" - s3_lcpu_mask: str = "0x00000000" - s3_thread_pool_size: int = 32 + s3_workerpool_mask: str = "0x00000000" + s3_workerpool_size: int = 32 full_page_unmap: bool = True def get_status_code(self): diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 65bcad81a..26b41f618 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -1,9 +1,6 @@ import json import inspect - import requests -import inspect -import random from simplyblock_core import constants, utils from requests.adapters import HTTPAdapter from urllib3 import Retry