diff --git a/python/understack-workflows/pyproject.toml b/python/understack-workflows/pyproject.toml index 4cd33eb29..cea4a543e 100644 --- a/python/understack-workflows/pyproject.toml +++ b/python/understack-workflows/pyproject.toml @@ -33,7 +33,6 @@ dependencies = [ bmc-kube-password = "understack_workflows.main.bmc_display_password:main" bmc-password = "understack_workflows.main.print_bmc_password:main" enroll-server = "understack_workflows.main.enroll_server:main" -get-raid-devices = "understack_workflows.main.get_raid_devices:main" netapp-configure-interfaces = "understack_workflows.main.netapp_configure_net:main" netapp-create-svm = "understack_workflows.main.netapp_create_svm:main" openstack-oslo-event = "understack_workflows.main.openstack_oslo_event:main" diff --git a/python/understack-workflows/tests/test_enroll_server.py b/python/understack-workflows/tests/test_enroll_server.py index 8e3f35306..bf596ee90 100644 --- a/python/understack-workflows/tests/test_enroll_server.py +++ b/python/understack-workflows/tests/test_enroll_server.py @@ -33,6 +33,7 @@ def make_node_inventory( interfaces: list[dict] | None = None, connected_interface_names: list[str] | None = None, serial_number: str = "FL6PC14", + storage_controllers: list[dict] | None = None, ) -> dict: """Build a realistic Ironic node inventory dict, suitable for mocking.""" if interfaces is None: @@ -56,6 +57,7 @@ def make_node_inventory( return { "inventory": { "interfaces": interfaces, + "storage_controllers": storage_controllers or [], "system_vendor": { "product_name": "PowerEdge R7615", "serial_number": serial_number, @@ -179,6 +181,15 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): fake_bmc = make_bmc(mocker, fake_sushy=make_raid_hardware()) inventory = make_node_inventory( connected_interface_names=["NIC.Integrated.1-1", "NIC.Integrated.1-2"], + storage_controllers=[ + { + "id": "RAID.Integrated.1-1", + "drives": [ + {"id": "Disk1", "size": 479559942144}, + {"id": "Disk2", "size": 479559942144}, + ], + } + ], ) # port-enroll-config hook has run during agent inspection and flagged # pxe_enabled on LLDP-connected ports. port-bios-name (OOB) stamped @@ -305,7 +316,7 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): {"interface": "raid", "step": "create_configuration"}, ], runbook=None, - disable_ramdisk=True, + disable_ramdisk=False, ), call( created_node.uuid, diff --git a/python/understack-workflows/tests/test_raid.py b/python/understack-workflows/tests/test_raid.py new file mode 100644 index 000000000..87083d28c --- /dev/null +++ b/python/understack-workflows/tests/test_raid.py @@ -0,0 +1,420 @@ +import pytest + +from understack_workflows.raid import PhysicalDisk +from understack_workflows.raid import _generate_raid_config +from understack_workflows.raid import _physical_disks_from_inventory + + +def raid_config_for_inventory(inventory: dict) -> dict: + return _generate_raid_config(_physical_disks_from_inventory(inventory)) + + +@pytest.fixture +def inventory_payload(): + return { + "inventory": { + "memory": {"physical_mb": 98304}, + "cpu": { + "count": 32, + "architecture": "x86_64", + "model_name": "AMD EPYC 9124 16-Core Processor", + "frequency": 4400, + }, + "disks": [ + {"name": "Solid State Disk 0:1:0", "size": 479559942144}, + {"name": "Solid State Disk 0:1:1", "size": 479559942144}, + ], + "storage_controllers": [ + { + "id": "RAID.SL.1-1", + "name": "PERC H755 Front", + "storage_controllers": [ + { + "member_id": "0", + "name": "PERC H755 Front", + "raid_types": [ + "RAID0", + "RAID1", + "RAID5", + "RAID6", + "RAID10", + "RAID50", + "RAID60", + ], + "speed_gbps": 12.0, + "controller_protocols": ["PCIe"], + "device_protocols": ["SAS", "SATA"], + "status": {"state": "Enabled"}, + } + ], + "drives": [ + { + "name": "Solid State Disk 0:1:0", + "size": 479559942144, + "id": "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + "media_type": "SSD", + "serial_number": "234043C8E9AE", + "manufacturer": "MICRON", + "model": "MTFDDAK480TGA-1B", + "revision": "D4DK003", + "protocol": "SATA", + "status": {"state": "Enabled"}, + }, + { + "name": "Solid State Disk 0:1:1", + "size": 479559942144, + "id": "Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + "media_type": "SSD", + "serial_number": "234043C90EE1", + "manufacturer": "MICRON", + "model": "MTFDDAK480TGA-1B", + "revision": "D4DK003", + "protocol": "SATA", + "status": {"state": "Enabled"}, + }, + ], + }, + { + "id": "AHCI.Embedded.1-1", + "name": "FCH SATA Controller [AHCI mode]", + "storage_controllers": [ + { + "member_id": "0", + "name": "FCH SATA Controller [AHCI mode]", + "raid_types": [], + "speed_gbps": None, + "controller_protocols": ["PCIe"], + "device_protocols": [], + "status": {"state": "Enabled"}, + } + ], + "drives": [], + }, + { + "id": "CPU.1", + "name": "CPU.1", + "storage_controllers": [], + "drives": [], + }, + ], + }, + "plugin_data": {}, + } + + +def test_raid_config_from_inventory_payload(inventory_payload): + assert raid_config_for_inventory(inventory_payload) == { + "logical_disks": [ + { + "controller": "RAID.SL.1-1", + "physical_disks": [ + "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + "Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + ], + "raid_level": "1", + "size_gb": "MAX", + "is_root_volume": True, + } + ] + } + + +def test_physical_disks_from_inventory_ignores_non_raid_controllers(inventory_payload): + assert _physical_disks_from_inventory(inventory_payload) == { + PhysicalDisk( + id="Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + PhysicalDisk( + id="Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + } + + +def test_raid_config_groups_disk_sizes_by_rounded_down_gb(): + assert raid_config_for_inventory( + { + "inventory": { + "storage_controllers": [ + { + "id": "RAID.SL.1-1", + "drives": [ + { + "id": "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + "size": 479559942144, + }, + { + "id": "Disk.Bay.2:Enclosure.Internal.0-1:RAID.SL.1-1", + "size": 960000000000, + }, + { + "id": "Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + "size": 479999999999, + }, + { + "id": "Disk.Bay.3:Enclosure.Internal.0-1:RAID.SL.1-1", + "size": 960999999999, + }, + ], + }, + ], + }, + } + ) == { + "logical_disks": [ + { + "controller": "RAID.SL.1-1", + "physical_disks": [ + "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + "Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + ], + "raid_level": "1", + "size_gb": "MAX", + "is_root_volume": True, + }, + { + "controller": "RAID.SL.1-1", + "physical_disks": [ + "Disk.Bay.2:Enclosure.Internal.0-1:RAID.SL.1-1", + "Disk.Bay.3:Enclosure.Internal.0-1:RAID.SL.1-1", + ], + "raid_level": "1", + "size_gb": "MAX", + "is_root_volume": False, + }, + ] + } + + +def test_raid_config_sets_one_root_volume_across_all_controllers(): + assert raid_config_for_inventory( + { + "inventory": { + "storage_controllers": [ + { + "id": "RAID.SL.1-1", + "drives": [ + { + "id": "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + "size": 960000000000, + }, + ], + }, + { + "id": "RAID.SL.2-1", + "drives": [ + { + "id": "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.2-1", + "size": 479559942144, + }, + ], + }, + { + "id": "RAID.SL.3-1", + "drives": [ + { + "id": "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.3-1", + "size": 479999999999, + }, + ], + }, + ], + }, + } + ) == { + "logical_disks": [ + { + "controller": "RAID.SL.2-1", + "physical_disks": [ + "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.2-1", + ], + "raid_level": "0", + "size_gb": "MAX", + "is_root_volume": True, + }, + { + "controller": "RAID.SL.3-1", + "physical_disks": [ + "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.3-1", + ], + "raid_level": "0", + "size_gb": "MAX", + "is_root_volume": False, + }, + { + "controller": "RAID.SL.1-1", + "physical_disks": [ + "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + ], + "raid_level": "0", + "size_gb": "MAX", + "is_root_volume": False, + }, + ] + } + + +def test_raid_config_uses_raid0_for_one_drive(): + assert _generate_raid_config( + { + PhysicalDisk( + id="Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ) + } + ) == { + "logical_disks": [ + { + "controller": "RAID.SL.1-1", + "physical_disks": [ + "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + ], + "raid_level": "0", + "size_gb": "MAX", + "is_root_volume": True, + } + ] + } + + +def test_raid_config_uses_raid1_for_two_drives(): + assert _generate_raid_config( + { + PhysicalDisk( + id="Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + PhysicalDisk( + id="Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + } + ) == { + "logical_disks": [ + { + "controller": "RAID.SL.1-1", + "physical_disks": [ + "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + "Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + ], + "raid_level": "1", + "size_gb": "MAX", + "is_root_volume": True, + } + ] + } + + +def test_raid_config_uses_raid5_for_three_drives(): + assert _generate_raid_config( + { + PhysicalDisk( + id="Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + PhysicalDisk( + id="Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + PhysicalDisk( + id="Disk.Bay.2:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + } + ) == { + "logical_disks": [ + { + "controller": "RAID.SL.1-1", + "physical_disks": [ + "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + "Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + "Disk.Bay.2:Enclosure.Internal.0-1:RAID.SL.1-1", + ], + "raid_level": "5", + "size_gb": "MAX", + "is_root_volume": True, + } + ] + } + + +def test_raid_config_uses_raid5_for_four_drives(): + assert _generate_raid_config( + { + PhysicalDisk( + id="Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + PhysicalDisk( + id="Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + PhysicalDisk( + id="Disk.Bay.2:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + PhysicalDisk( + id="Disk.Bay.3:Enclosure.Internal.0-1:RAID.SL.1-1", + controller="RAID.SL.1-1", + size_gb=479, + ), + } + ) == { + "logical_disks": [ + { + "controller": "RAID.SL.1-1", + "physical_disks": [ + "Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.1-1", + "Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.1-1", + "Disk.Bay.2:Enclosure.Internal.0-1:RAID.SL.1-1", + "Disk.Bay.3:Enclosure.Internal.0-1:RAID.SL.1-1", + ], + "raid_level": "5", + "size_gb": "MAX", + "is_root_volume": True, + } + ] + } + + +def test_raid_config_is_empty_without_raid_disks(): + assert raid_config_for_inventory( + { + "inventory": { + "storage_controllers": [ + { + "id": "AHCI.Embedded.1-1", + "drives": [ + { + "id": ( + "Disk.Bay.0:Enclosure.Internal.0-1" + ":AHCI.Embedded.1-1" + ), + "size": 479559942144, + }, + ], + }, + { + "id": "CPU.1", + "drives": [], + }, + { + "id": "RAID.SL.1-1", + "drives": [], + }, + ], + }, + } + ) == {"logical_disks": []} diff --git a/python/understack-workflows/understack_workflows/main/enroll_server.py b/python/understack-workflows/understack_workflows/main/enroll_server.py index c9f34731d..27cc87d16 100644 --- a/python/understack-workflows/understack_workflows/main/enroll_server.py +++ b/python/understack-workflows/understack_workflows/main/enroll_server.py @@ -16,6 +16,7 @@ from understack_workflows.bmc_hostname import bmc_set_hostname from understack_workflows.bmc_settings import update_dell_drac_settings from understack_workflows.helpers import setup_logger +from understack_workflows.raid import configure_raid logger = logging.getLogger(__name__) @@ -99,6 +100,7 @@ def enroll( # and it bails out unless we have ports with pxe_enabled, local_link_info, # etc. ironic_node.inspect_out_of_band(node) + inventory = ironic_node.get_node_inventory(node) # Agent inspection gathers LLDP and full hardware inventory. # @@ -124,7 +126,7 @@ def enroll( update_dell_bios_settings(bmc, pxe_interface=pxe_interface) if raid_configure: - configure_raid(node, bmc) + configure_raid(node, inventory) # RAID reconfiguration changes the disk layout; refresh inventory. ironic_node.inspect_out_of_band(node) else: @@ -189,63 +191,6 @@ def device_name(device_info: ChassisInfo) -> str: return f"{device_info.manufacturer}-{device_info.serial_number}" -def configure_raid(node: Node, bmc: Bmc): - raid_details = discover_controller_details(bmc) - if not raid_details: - logger.info("%s No RAID hardware found in node", node.uuid) - return - - logger.info("%s Applying RAID configuration", node.uuid) - raid_config = build_raid_config(**raid_details) - ironic_node.set_target_raid_config(node, raid_config) - ironic_node.transition( - node, - target_state="clean", - expected_state="manageable", - clean_steps=[ - {"interface": "raid", "step": "delete_configuration"}, - {"interface": "raid", "step": "create_configuration"}, - ], - disable_ramdisk=True, - ) - - -def discover_controller_details(bmc: Bmc) -> dict | None: - """Parse available RAID controller details for execution.""" - system_objects = bmc.sushy().get_system_collection().get_members() - system = system_objects[0] - for controller in system.storage.get_members(): - if "RAID" in controller.identity: - return { - "controller": controller.identity, - "physical_disks": [d.identity for d in controller.drives], - } - return None - - -def build_raid_config(controller: str, physical_disks: list[str]): - """Return a raid config supported by ironic for cleanup tasks.""" - if len(physical_disks) < 2: - raid_level = "0" - elif len(physical_disks) > 2: - raid_level = "5" - else: - raid_level = "1" - - result = { - "logical_disks": [ - { - "controller": controller, - "is_root_volume": True, - "physical_disks": physical_disks, - "raid_level": raid_level, - "size_gb": "MAX", - } - ] - } - return result - - def parse_bool(value: str) -> bool: return value.lower() == "true" diff --git a/python/understack-workflows/understack_workflows/main/get_raid_devices.py b/python/understack-workflows/understack_workflows/main/get_raid_devices.py deleted file mode 100644 index c6b501020..000000000 --- a/python/understack-workflows/understack_workflows/main/get_raid_devices.py +++ /dev/null @@ -1,106 +0,0 @@ -import argparse -import json -import logging -import os - -from sushy import Sushy - -from understack_workflows.bmc import bmc_for_ip_address -from understack_workflows.helpers import setup_logger - -logger = logging.getLogger(__name__) - - -def main(): - """Export RAID details for a BMC using Sushy. - - - connect to the BMC using standard password - - - Using Sushy, gather controller details: - - controller name - - list of drive references for raid configuration. - - - output json object response. - """ - setup_logger() - args = argument_parser().parse_args() - - ip_address = args.ip_address - password = args.password or None - logger.debug("%s starting for ip_address=%s", __file__, ip_address) - - bmc = bmc_for_ip_address(ip_address=ip_address, password=password) - client = bmc.sushy() - - # argo workflows captures stdout as the results which we can use - # to return the device UUID - raid_config = parse_controller_details(client) - json_details = build_raid_config(raid_config) - print(json.dumps(json_details)) - - -def argument_parser(): - """Parse runtime arguments.""" - parser = argparse.ArgumentParser( - prog=os.path.basename(__file__), description="Gather RAID Device info." - ) - parser.add_argument("--ip-address", type=str, required=True, help="BMC IP") - parser.add_argument( - "--password", type=str, required=False, help="Custom Password", default=None - ) - return parser - - -def parse_controller_details(client: Sushy) -> dict: - """Parse available RAID controller details for execution.""" - result = {"controller": None, "physical_disks": []} - system_objects = client.get_system_collection().get_members() - system = system_objects.pop() - for c in system.storage.get_members(): - if "RAID" in c.identity.upper(): - result["controller"] = c.identity - for d in c.drives: - capacity = d.capacity_bytes / (10**9) - result["physical_disks"].append( - {"name": d.identity, "size_gb": f"{capacity:.0f}"} - ) - break - return result - - -def get_raid_type(disk_count: int) -> int: - if disk_count < 2: - return 0 - if disk_count > 2: - return 5 - return 1 - - -def build_raid_config(raid_config: dict): - """Return a raid config supported by ironic for cleanup tasks.""" - sizes = sorted({int(d["size_gb"]) for d in raid_config["physical_disks"]}) - base_config = {"logical_disks": []} - for size in sizes: - _root_vol = bool(size == sizes[0]) or bool( - len(sizes) == 1 - ) # First size or only size. - disks = [ - d["name"] - for d in raid_config["physical_disks"] - if int(d["size_gb"]) == size - ] - raid_level = get_raid_type(len(disks)) - base_config["logical_disks"].append( - { - "controller": raid_config["controller"], - "is_root_volume": _root_vol, - "physical_disks": disks, - "raid_level": str(raid_level), - "size_gb": "MAX", - } - ) - return base_config - - -if __name__ == "__main__": - main() diff --git a/python/understack-workflows/understack_workflows/raid.py b/python/understack-workflows/understack_workflows/raid.py new file mode 100644 index 000000000..b4794156c --- /dev/null +++ b/python/understack-workflows/understack_workflows/raid.py @@ -0,0 +1,120 @@ +import logging +from collections.abc import Iterable +from dataclasses import dataclass + +from ironicclient.v1.node import Node + +from understack_workflows import ironic_node + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class PhysicalDisk: + id: str + controller: str + size_gb: int + + +def configure_raid(node: Node, inventory: dict): + """Set target RAID config && run clean steps to re-create the RAID array. + + We find any storage controllers that have "RAID" in the name and have one or + more disks attached. + + We group disks by size (creating an array of mismatched disks wastes space + and/or performance). + + We form a logical volume from each group of disks on each controller. + + The volume that has the smallest disks is marked as the "root volume". + """ + physical_disks = _physical_disks_from_inventory(inventory) + if not physical_disks: + logger.info("%s No RAID hardware found in node", node.uuid) + return + + raid_config = _generate_raid_config(physical_disks) + logger.info("%s Applying RAID configuration", node.uuid) + ironic_node.set_target_raid_config(node, raid_config) + + ironic_node.transition( + node, + target_state="clean", + expected_state="manageable", + clean_steps=[ + {"interface": "raid", "step": "delete_configuration"}, + {"interface": "raid", "step": "create_configuration"}, + ], + disable_ramdisk=False, + ) + + +def _generate_raid_config(physical_disks: set[PhysicalDisk]) -> dict: + """Return a raid config supported by Ironic's clean steps.""" + return {"logical_disks": list(_logical_disks(physical_disks))} + + +def _logical_disks(disks: set[PhysicalDisk]): + is_root_volume = True + for metadata, diskgroup in sorted(_group_by_size_and_controller(disks).items()): + (_, controller_id) = metadata + yield { + "controller": controller_id, + "physical_disks": sorted(disk.id for disk in diskgroup), + "raid_level": _raid_level(len(diskgroup)), + "size_gb": "MAX", + "is_root_volume": is_root_volume, + } + is_root_volume = False + + +def _physical_disks_from_inventory(inventory: dict) -> set[PhysicalDisk]: + """Parse Inventory data as returned by the redfish inspection. + + Answer the set of PhysicalDisks that are associated with RAID controllers in + this server. + """ + inventory_data = inventory.get("inventory", {}) + return { + disk + for controller in inventory_data.get("storage_controllers", []) + for disk in _physical_disks_for_controller(controller) + } + + +def _physical_disks_for_controller(storage_controller: dict) -> set[PhysicalDisk]: + controller_id = str(storage_controller.get("id")) + disks = storage_controller.get("drives", []) + + if "RAID" not in controller_id.upper(): + return set() + + return { + PhysicalDisk( + id=disk["id"], + controller=controller_id, + size_gb=disk["size"] // 10**9, + ) + for disk in disks + } + + +def _group_by_size_and_controller( + disks: Iterable[PhysicalDisk], +) -> dict[tuple, list[PhysicalDisk]]: + disks_by_size_and_controller = {} + for disk in disks: + key = (disk.size_gb, disk.controller) + disks_by_size_and_controller.setdefault(key, []).append(disk) + return disks_by_size_and_controller + + +def _raid_level(disk_count: int) -> str: + match disk_count: + case 1: + return "0" + case 2: + return "1" + case _: + return "5"