diff --git a/components/data_processing/__init__.py b/components/data_processing/__init__.py index 161238215..fb124dc92 100644 --- a/components/data_processing/__init__.py +++ b/components/data_processing/__init__.py @@ -5,4 +5,4 @@ """ # Auto-generated imports will be added here by scripts/update_init_imports.py -# Components will be imported dynamically based on subdirectories +# Components will be imported dynamically based on subdirectories \ No newline at end of file diff --git a/components/data_processing/yoda_data_preparation.py b/components/data_processing/yoda_data_preparation.py new file mode 100644 index 000000000..039742068 --- /dev/null +++ b/components/data_processing/yoda_data_preparation.py @@ -0,0 +1,88 @@ +from google.auth.exceptions import InvalidValue +from typing import Any + +from kfp import dsl +import kfp.compiler + + +@dsl.component( + packages_to_install=["datasets"], +) +def prepare_yoda_dataset( + yoda_input_dataset: str, + yoda_train_dataset: dsl.Output[dsl.Dataset], + yoda_eval_dataset: dsl.Output[dsl.Dataset], + operation_map: dict[str, Any] = {"rename_column": {"sentence":"prompt"}}, + train_split_ratio: float = 0.8, +): + """Prepare the training and evaluation datasets by downloading and preprocessing. + + Downloads the yoda_sentences dataset from HuggingFace, renames columns to match + the expected format for training (prompt/completion), splits into train/eval sets, + and saves them as output artifacts. + + Args: + yoda_input_dataset (str): Dataset to download from HuggingFace + yoda_train_dataset (dsl.Output[dsl.Dataset]): Output dataset for training. + yoda_eval_dataset (dsl.Output[dsl.Dataset]): Output dataset for evaluation. + operation_map (dict): Specify list of operations you want to perform on the data set before splitting it e.g. {"rename_column": {"sentence":"prompt"}, "remove_columns": "translation"} + train_split_ratio (float): Ratio of data to use for training (0.0-1.0). + Defaults to 0.8 (80% train, 20% eval). + """ + from datasets import load_dataset + + print(f"Downloading and loading the dataset from {yoda_input_dataset}") + dataset = load_dataset(yoda_input_dataset, split="train") + if operation_map: + for operation_name, operation_value in operation_map.items(): + print(f'Performing operation: "{operation_name}"') + if operation_name == 'rename_column': + if type(operation_value) != dict: + raise RuntimeError(f'Dict value is required to perform operation "{operation_name}"') + for key, value in operation_value.items(): + dataset = dataset.rename_column(key, value) + elif operation_name == "remove_columns": + if type(operation_value) == str: + dataset = dataset.remove_columns(["translation"]) + elif type(operation_value) == list: + dataset = dataset.remove_columns("translation") + else: + raise RuntimeError(f'Only list and str type are allowed to perform "{operation_name}" operation') + else: + raise InvalidValue(f'Unrecogonized operation value "{operation_name}"') + + # Add prefix to prompts + print("Adding Yoda speak prefix to prompts") + def add_yoda_prefix(example): + example["prompt"] = ( + "Translate the following to Yoda speak: " + example["prompt"] + ) + return example + + dataset = dataset.map(add_yoda_prefix) + + # Split the dataset into train and eval sets + print( + f"Splitting dataset with {len(dataset)} rows into train ({train_split_ratio:.1%}) and eval ({(1-train_split_ratio):.1%}) sets" + ) + split_dataset = dataset.train_test_split(test_size=1 - train_split_ratio, seed=42) + + train_dataset = split_dataset["train"] + eval_dataset = split_dataset["test"] + + print(f"Train set: {len(train_dataset)} rows") + print(f"Eval set: {len(eval_dataset)} rows") + + # Save both datasets + print(f"Saving train dataset to {yoda_train_dataset.path}") + train_dataset.save_to_disk(yoda_train_dataset.path) + + print(f"Saving eval dataset to {yoda_eval_dataset.path}") + eval_dataset.save_to_disk(yoda_eval_dataset.path) + + +if __name__ == "__main__": + kfp.compiler.Compiler().compile( + prepare_yoda_dataset, + package_path=__file__.replace(".py", "_component.yaml"), + ) \ No newline at end of file diff --git a/components/data_processing/yoda_data_preparation_component.yaml b/components/data_processing/yoda_data_preparation_component.yaml new file mode 100644 index 000000000..f7d133a9e --- /dev/null +++ b/components/data_processing/yoda_data_preparation_component.yaml @@ -0,0 +1,187 @@ +# PIPELINE DEFINITION +# Name: prepare-yoda-dataset +# Description: Prepare the training and evaluation datasets by downloading and preprocessing. +# Downloads the yoda_sentences dataset from HuggingFace, renames columns to match +# the expected format for training (prompt/completion), splits into train/eval sets, +# and saves them as output artifacts. +# Inputs: +# operation_map: dict [Default: {'rename_column': {'sentence': 'prompt'}}] +# train_split_ratio: float [Default: 0.8] +# yoda_input_dataset: str +# Outputs: +# yoda_eval_dataset: system.Dataset +# yoda_train_dataset: system.Dataset +components: + comp-prepare-yoda-dataset: + executorLabel: exec-prepare-yoda-dataset + inputDefinitions: + parameters: + operation_map: + defaultValue: + rename_column: + sentence: prompt + description: 'Specify list of operations you want to perform on the data + set before splitting it e.g. {"rename_column": {"sentence":"prompt"}, + "remove_columns": "translation"}' + isOptional: true + parameterType: STRUCT + train_split_ratio: + defaultValue: 0.8 + description: 'Ratio of data to use for training (0.0-1.0). + + Defaults to 0.8 (80% train, 20% eval).' + isOptional: true + parameterType: NUMBER_DOUBLE + yoda_input_dataset: + description: Dataset to download from HuggingFace + parameterType: STRING + outputDefinitions: + artifacts: + yoda_eval_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + yoda_train_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-prepare-yoda-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - prepare_yoda_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'datasets' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.15.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef prepare_yoda_dataset(\n yoda_input_dataset: str,\n \ + \ yoda_train_dataset: dsl.Output[dsl.Dataset],\n yoda_eval_dataset:\ + \ dsl.Output[dsl.Dataset],\n operation_map: dict[str, Any] = {\"\ + rename_column\": {\"sentence\":\"prompt\"}},\n train_split_ratio:\ + \ float = 0.8,\n):\n \"\"\"Prepare the training and evaluation datasets\ + \ by downloading and preprocessing.\n\n Downloads the yoda_sentences\ + \ dataset from HuggingFace, renames columns to match\n the expected format\ + \ for training (prompt/completion), splits into train/eval sets,\n and\ + \ saves them as output artifacts.\n\n Args:\n yoda_input_dataset\ + \ (str): Dataset to download from HuggingFace\n yoda_train_dataset\ + \ (dsl.Output[dsl.Dataset]): Output dataset for training.\n yoda_eval_dataset\ + \ (dsl.Output[dsl.Dataset]): Output dataset for evaluation.\n operation_map\ + \ (dict): Specify list of operations you want to perform on the data set\ + \ before splitting it e.g. {\"rename_column\": {\"sentence\":\"prompt\"\ + }, \"remove_columns\": \"translation\"}\n train_split_ratio (float):\ + \ Ratio of data to use for training (0.0-1.0).\n \ + \ Defaults to 0.8 (80% train, 20% eval).\n \"\"\"\n from\ + \ datasets import load_dataset\n\n print(f\"Downloading and loading the\ + \ dataset from {yoda_input_dataset}\")\n dataset = load_dataset(yoda_input_dataset,\ + \ split=\"train\")\n if operation_map:\n for operation_name, operation_value\ + \ in operation_map.items():\n print(f'Performing operation: \"\ + {operation_name}\"')\n if operation_name == 'rename_column':\n\ + \ if type(operation_value) != dict:\n \ + \ raise RuntimeError(f'Dict value is required to perform operation \"{operation_name}\"\ + ')\n for key, value in operation_value.items():\n \ + \ dataset = dataset.rename_column(key, value)\n elif\ + \ operation_name == \"remove_columns\":\n if type(operation_value)\ + \ == str:\n dataset = dataset.remove_columns([\"translation\"\ + ])\n elif type(operation_value) == list:\n \ + \ dataset = dataset.remove_columns(\"translation\")\n \ + \ else:\n raise RuntimeError(f'Only list and str type\ + \ are allowed to perform \"{operation_name}\" operation')\n else:\n\ + \ raise InvalidValue(f'Unrecogonized operation value \"{operation_name}\"\ + ')\n\n # Add prefix to prompts\n print(\"Adding Yoda speak prefix\ + \ to prompts\")\n def add_yoda_prefix(example):\n example[\"prompt\"\ + ] = (\n \"Translate the following to Yoda speak: \" + example[\"\ + prompt\"]\n )\n return example\n\n dataset = dataset.map(add_yoda_prefix)\n\ + \n # Split the dataset into train and eval sets\n print(\n \ + \ f\"Splitting dataset with {len(dataset)} rows into train ({train_split_ratio:.1%})\ + \ and eval ({(1-train_split_ratio):.1%}) sets\"\n )\n split_dataset\ + \ = dataset.train_test_split(test_size=1 - train_split_ratio, seed=42)\n\ + \n train_dataset = split_dataset[\"train\"]\n eval_dataset = split_dataset[\"\ + test\"]\n\n print(f\"Train set: {len(train_dataset)} rows\")\n print(f\"\ + Eval set: {len(eval_dataset)} rows\")\n\n # Save both datasets\n print(f\"\ + Saving train dataset to {yoda_train_dataset.path}\")\n train_dataset.save_to_disk(yoda_train_dataset.path)\n\ + \n print(f\"Saving eval dataset to {yoda_eval_dataset.path}\")\n eval_dataset.save_to_disk(yoda_eval_dataset.path)\n\ + \n" + image: python:3.11 +pipelineInfo: + name: prepare-yoda-dataset +root: + dag: + outputs: + artifacts: + yoda_eval_dataset: + artifactSelectors: + - outputArtifactKey: yoda_eval_dataset + producerSubtask: prepare-yoda-dataset + yoda_train_dataset: + artifactSelectors: + - outputArtifactKey: yoda_train_dataset + producerSubtask: prepare-yoda-dataset + tasks: + prepare-yoda-dataset: + cachingOptions: + enableCache: true + componentRef: + name: comp-prepare-yoda-dataset + inputs: + parameters: + operation_map: + componentInputParameter: operation_map + train_split_ratio: + componentInputParameter: train_split_ratio + yoda_input_dataset: + componentInputParameter: yoda_input_dataset + taskInfo: + name: prepare-yoda-dataset + inputDefinitions: + parameters: + operation_map: + defaultValue: + rename_column: + sentence: prompt + description: 'Specify list of operations you want to perform on the data set + before splitting it e.g. {"rename_column": {"sentence":"prompt"}, "remove_columns": + "translation"}' + isOptional: true + parameterType: STRUCT + train_split_ratio: + defaultValue: 0.8 + description: 'Ratio of data to use for training (0.0-1.0). + + Defaults to 0.8 (80% train, 20% eval).' + isOptional: true + parameterType: NUMBER_DOUBLE + yoda_input_dataset: + description: Dataset to download from HuggingFace + parameterType: STRING + outputDefinitions: + artifacts: + yoda_eval_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + yoda_train_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.15.2 diff --git a/components/evaluation/evaluate_yoda_model.py b/components/evaluation/evaluate_yoda_model.py new file mode 100644 index 000000000..7f7811f3e --- /dev/null +++ b/components/evaluation/evaluate_yoda_model.py @@ -0,0 +1,349 @@ +from kfp import dsl +import kfp + + +@dsl.component( + base_image="registry.access.redhat.com/ubi9/python-311:latest", + packages_to_install=[ + "transformers", + "torch", + "accelerate", + "lm-eval[vllm]", + "unitxt", + "sacrebleu", + "datasets", + ], +) +def evaluate_yoda_model( + model_path: str, + output_metrics: dsl.Output[dsl.Metrics], + output_results: dsl.Output[dsl.Artifact], + output_prompts: dsl.Output[dsl.Artifact], + lora_adapter: dsl.Input[dsl.Model] = None, + batch_size: int = 1, + limit: int = None, + max_model_len: int = 4096, + gpu_memory_utilization: float = 0.8, + dtype: str = "auto", + add_bos_token: bool = True, + include_classification_tasks: bool = True, + include_summarization_tasks: bool = True, + custom_translation_dataset: dsl.Input[dsl.Dataset] = None, + log_prompts: bool = True, + verbosity: str = "INFO", + max_batch_size: int = None, +): + import logging + import os + import json + import time + import random + from typing import Dict, Any, Optional + + from lm_eval.tasks.unitxt import task + from lm_eval.api.registry import get_model + from lm_eval.api.model import LM + from lm_eval.evaluator import evaluate + from lm_eval.tasks import get_task_dict + from lm_eval.api.instance import Instance + from lm_eval import tasks + from lm_eval.api.task import TaskConfig + from lm_eval.api.metrics import mean + from datasets import load_from_disk + import torch + import sacrebleu + + class TranslationTask(tasks.Task): + """ + A custom lm-eval task for translation, using the greedy_until method + and evaluating with the BLEU metric. + """ + + VERSION = 0 + + def __init__(self, dataset_path, task_name: str, log_prompts=False, prompts_log=None): + self.dataset_path = dataset_path + self.task_name = task_name + self.log_prompts = log_prompts + self.prompts_log = [] if prompts_log is None else prompts_log + config = TaskConfig(task=task_name, dataset_path=dataset_path) + super().__init__(config=config) + self.config.task = task_name + self.fewshot_rnd = random.Random() + + def download( + self, data_dir=None, cache_dir=None, download_mode=None, **kwargs + ) -> None: + self.dataset = {"test": load_from_disk(self.dataset_path)} + + def has_test_docs(self): + return "test" in self.dataset + + def has_validation_docs(self): + return False + + def has_training_docs(self): + return False + + def test_docs(self): + return self.dataset["test"] + + def doc_to_text(self, doc): + return doc["prompt"] + + def doc_to_target(self, doc): + return doc["completion"] + + def construct_requests(self, doc, ctx, **kwargs): + kwargs.pop("apply_chat_template", False) + kwargs.pop("chat_template", False) + return Instance( + request_type="generate_until", + doc=doc, + arguments=(ctx, {}), + idx=0, + **kwargs, + ) + + def process_results(self, doc, results): + (generated_text,) = results + + prediction = generated_text.strip() + + if self.log_prompts: + try: + self.prompts_log.append( + {"prompt": self.doc_to_text(doc), "response": prediction} + ) + except Exception: + # Best-effort logging; avoid breaking evaluation if logging fails + pass + + predictions = [prediction] + references = [[self.doc_to_target(doc).strip()]] + + bleu_score = sacrebleu.corpus_bleu(predictions, references).score + + exact_match = 1.0 if prediction == references[0][0] else 0.0 + + return {"bleu": bleu_score, "exact_match": exact_match} + + def aggregation(self): + return {"bleu": mean, "exact_match": mean} + + def should_decontaminate(self): + return False + + def doc_to_prefix(self, doc): + return "" + + def higher_is_better(self): + return {"bleu": True, "exact_match": True} + + TASK_CONFIGS = { + "classification": [ + { + "task": "classification_rte_simple", + "recipe": "card=cards.rte,template=templates.classification.multi_class.relation.simple", + "group": "classification", + "output_type": "generate_until", + }, + { + "task": "classification_rte_default", + "recipe": "card=cards.rte,template=templates.classification.multi_class.relation.default", + "group": "classification", + "output_type": "generate_until", + }, + { + "task": "classification_rte_wnli", + "recipe": "card=cards.wnli,template=templates.classification.multi_class.relation.simple", + "group": "classification", + "output_type": "generate_until", + }, + ], + "summarization": [ + { + "task": "summarization_xsum_formal", + "recipe": "card=cards.xsum,template=templates.summarization.abstractive.formal,num_demos=0", + "group": "summarization", + "output_type": "generate_until", + } + ], + } + + logging.basicConfig( + level=getattr(logging, verbosity.upper()), + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + logger = logging.getLogger(__name__) + + os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" + os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1" + + logger.info("Validating parameters...") + + if not torch.cuda.is_available(): + raise ValueError("CUDA is not available") + + if not (0.0 <= gpu_memory_utilization <= 1.0): + raise ValueError("gpu_memory_utilization must be between 0.0 and 1.0") + + if batch_size <= 0: + raise ValueError("batch_size must be positive") + + if max_model_len <= 0: + raise ValueError("max_model_len must be positive") + + if limit is not None and limit <= 0: + raise ValueError("limit must be positive or None") + + if ( + not include_classification_tasks + and not include_summarization_tasks + and not custom_translation_dataset + ): + raise ValueError( + "At least one of include_classification_tasks, include_summarization_tasks, or custom_translation_dataset must be provided" + ) + + logger.info("Parameter validation passed") + + logger.info("Creating tasks...") + start_time = time.time() + + eval_tasks = [] + prompt_response_log = [] + + if custom_translation_dataset: + logger.info("Adding custom translation task...") + translation_task = TranslationTask( + custom_translation_dataset.path, + "custom_translation", + log_prompts=log_prompts, + prompts_log=prompt_response_log, + ) + eval_tasks.append(translation_task) + + if include_classification_tasks: + logger.info("Adding classification tasks...") + classification_configs = TASK_CONFIGS["classification"] + + for config in classification_configs: + task_obj = task.Unitxt(config=config) + # TODO: Remove after https://github.com/EleutherAI/lm-evaluation-harness/pull/3225 is merged. + task_obj.config.task = config["task"] + eval_tasks.append(task_obj) + + if include_summarization_tasks: + logger.info("Adding summarization tasks...") + summarization_config = TASK_CONFIGS["summarization"][0] + + task_obj = task.Unitxt(config=summarization_config) + # TODO: Remove after https://github.com/EleutherAI/lm-evaluation-harness/pull/3225 is merged. + task_obj.config.task = summarization_config["task"] + eval_tasks.append(task_obj) + + task_dict = get_task_dict(eval_tasks) + logger.info(f"Created {len(eval_tasks)} tasks in {time.time() - start_time:.2f}s") + + logger.info("Loading model...") + start_time = time.time() + + try: + model_args = { + "add_bos_token": add_bos_token, + "dtype": dtype, + "max_model_len": max_model_len, + "gpu_memory_utilization": gpu_memory_utilization, + "pretrained": model_path, + "trust_remote_code": True, + } + + # Optionally provide LoRA adapter to lm-eval's VLLM backend + # The backend expects `lora_local_path` and internally constructs the LoRARequest. + if lora_adapter and lora_adapter.path: + logger.info("LoRA adapter provided; passing lora_local_path to VLLM backend") + model_args["lora_local_path"] = lora_adapter.path + + model_class = get_model("vllm") + additional_config = { + "batch_size": batch_size, + "max_batch_size": max_batch_size, + "device": None, + } + + loaded_model = model_class.create_from_arg_obj(model_args, additional_config) + logger.info(f"Model loaded successfully in {time.time() - start_time:.2f}s") + except Exception as e: + logger.error(f"Failed to load model: {e}") + raise RuntimeError(f"Model loading failed: {e}") + + logger.info("Starting evaluation...") + start_time = time.time() + + results = evaluate( + lm=loaded_model, + task_dict=task_dict, + limit=limit, + verbosity=verbosity, + ) + + logger.info(f"Evaluation completed in {time.time() - start_time:.2f}s") + + logger.info("Saving results...") + + def clean_for_json(obj): + """Recursively clean objects to make them JSON serializable.""" + if isinstance(obj, dict): + return {k: clean_for_json(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [clean_for_json(item) for item in obj] + elif isinstance(obj, (int, float, str, bool, type(None))): + return obj + else: + # Convert non-serializable objects to string representation + return str(obj) + + clean_results = clean_for_json(results) + + output_results.name = "results.json" + + with open(output_results.path, "w") as f: + json.dump(clean_results, f, indent=2) + logger.info(f"Results saved to {output_results.path}") + + # Save prompt/response log for custom TranslationTask only + if log_prompts and custom_translation_dataset and len(prompt_response_log) > 0: + try: + output_prompts.name = "prompts.json" + with open(output_prompts.path, "w") as f: + json.dump(prompt_response_log, f, indent=2) + logger.info(f"Prompt/response log saved to {output_prompts.path}") + except Exception as e: + logger.warning(f"Failed to save prompt/response log: {e}") + + logger.info("Logging metrics...") + + for task_name, task_results in clean_results["results"].items(): + for metric_name, metric_value in task_results.items(): + if isinstance(metric_value, (int, float)): + # Skip metrics that are 0 due to a bug in the RHOAI UI. + # TODO: Fix RHOAI UI to handle 0 values. + # TODO: Ignore store_session_info from metrics in RHOAI UI. + if metric_value == 0: + continue + + metric_key = f"{task_name}_{metric_name}" + output_metrics.log_metric(metric_key, metric_value) + logger.debug(f"Logged metric: {metric_key} = {metric_value}") + + logger.info("Metrics logged successfully") + + logger.info("Pipeline completed successfully") + + +if __name__ == "__main__": + kfp.compiler.Compiler().compile( + evaluate_yoda_model, + package_path=__file__.replace(".py", "_component.yaml"), + ) \ No newline at end of file diff --git a/components/evaluation/evaluate_yoda_model_component.yaml b/components/evaluation/evaluate_yoda_model_component.yaml new file mode 100644 index 000000000..d9f199a00 --- /dev/null +++ b/components/evaluation/evaluate_yoda_model_component.yaml @@ -0,0 +1,412 @@ +# PIPELINE DEFINITION +# Name: evaluate-yoda-model +# Inputs: +# add_bos_token: bool [Default: True] +# batch_size: int [Default: 1.0] +# custom_translation_dataset: system.Dataset +# dtype: str [Default: 'auto'] +# gpu_memory_utilization: float [Default: 0.8] +# include_classification_tasks: bool [Default: True] +# include_summarization_tasks: bool [Default: True] +# limit: int +# log_prompts: bool [Default: True] +# lora_adapter: system.Model +# max_batch_size: int +# max_model_len: int [Default: 4096.0] +# model_path: str +# verbosity: str [Default: 'INFO'] +# Outputs: +# output_metrics: system.Metrics +# output_prompts: system.Artifact +# output_results: system.Artifact +components: + comp-evaluate-yoda-model: + executorLabel: exec-evaluate-yoda-model + inputDefinitions: + artifacts: + custom_translation_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + isOptional: true + lora_adapter: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 + isOptional: true + parameters: + add_bos_token: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + batch_size: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + dtype: + defaultValue: auto + isOptional: true + parameterType: STRING + gpu_memory_utilization: + defaultValue: 0.8 + isOptional: true + parameterType: NUMBER_DOUBLE + include_classification_tasks: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + include_summarization_tasks: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + limit: + isOptional: true + parameterType: NUMBER_INTEGER + log_prompts: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + max_batch_size: + isOptional: true + parameterType: NUMBER_INTEGER + max_model_len: + defaultValue: 4096.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_path: + parameterType: STRING + verbosity: + defaultValue: INFO + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + output_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + output_prompts: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + output_results: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-evaluate-yoda-model: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - evaluate_yoda_model + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'transformers'\ + \ 'torch' 'accelerate' 'lm-eval[vllm]' 'unitxt' 'sacrebleu' 'datasets' \ + \ && python3 -m pip install --quiet --no-warn-script-location 'kfp==2.15.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef evaluate_yoda_model(\n model_path: str,\n output_metrics:\ + \ dsl.Output[dsl.Metrics],\n output_results: dsl.Output[dsl.Artifact],\n\ + \ output_prompts: dsl.Output[dsl.Artifact],\n lora_adapter:\ + \ dsl.Input[dsl.Model] = None,\n batch_size: int = 1,\n limit:\ + \ int = None,\n max_model_len: int = 4096,\n gpu_memory_utilization:\ + \ float = 0.8,\n dtype: str = \"auto\",\n add_bos_token: bool\ + \ = True,\n include_classification_tasks: bool = True,\n include_summarization_tasks:\ + \ bool = True,\n custom_translation_dataset: dsl.Input[dsl.Dataset]\ + \ = None,\n log_prompts: bool = True,\n verbosity: str = \"\ + INFO\",\n max_batch_size: int = None,\n):\n import logging\n \ + \ import os\n import json\n import time\n import random\n \ + \ from typing import Dict, Any, Optional\n\n from lm_eval.tasks.unitxt\ + \ import task\n from lm_eval.api.registry import get_model\n from\ + \ lm_eval.api.model import LM\n from lm_eval.evaluator import evaluate\n\ + \ from lm_eval.tasks import get_task_dict\n from lm_eval.api.instance\ + \ import Instance\n from lm_eval import tasks\n from lm_eval.api.task\ + \ import TaskConfig\n from lm_eval.api.metrics import mean\n from\ + \ datasets import load_from_disk\n import torch\n import sacrebleu\n\ + \n class TranslationTask(tasks.Task):\n \"\"\"\n A custom\ + \ lm-eval task for translation, using the greedy_until method\n and\ + \ evaluating with the BLEU metric.\n \"\"\"\n\n VERSION =\ + \ 0\n\n def __init__(self, dataset_path, task_name: str, log_prompts=False,\ + \ prompts_log=None):\n self.dataset_path = dataset_path\n \ + \ self.task_name = task_name\n self.log_prompts = log_prompts\n\ + \ self.prompts_log = [] if prompts_log is None else prompts_log\n\ + \ config = TaskConfig(task=task_name, dataset_path=dataset_path)\n\ + \ super().__init__(config=config)\n self.config.task\ + \ = task_name\n self.fewshot_rnd = random.Random()\n\n \ + \ def download(\n self, data_dir=None, cache_dir=None, download_mode=None,\ + \ **kwargs\n ) -> None:\n self.dataset = {\"test\": load_from_disk(self.dataset_path)}\n\ + \n def has_test_docs(self):\n return \"test\" in self.dataset\n\ + \n def has_validation_docs(self):\n return False\n\n \ + \ def has_training_docs(self):\n return False\n\n \ + \ def test_docs(self):\n return self.dataset[\"test\"]\n\n\ + \ def doc_to_text(self, doc):\n return doc[\"prompt\"\ + ]\n\n def doc_to_target(self, doc):\n return doc[\"completion\"\ + ]\n\n def construct_requests(self, doc, ctx, **kwargs):\n \ + \ kwargs.pop(\"apply_chat_template\", False)\n kwargs.pop(\"\ + chat_template\", False)\n return Instance(\n request_type=\"\ + generate_until\",\n doc=doc,\n arguments=(ctx,\ + \ {}),\n idx=0,\n **kwargs,\n )\n\ + \n def process_results(self, doc, results):\n (generated_text,)\ + \ = results\n\n prediction = generated_text.strip()\n\n \ + \ if self.log_prompts:\n try:\n \ + \ self.prompts_log.append(\n {\"prompt\": self.doc_to_text(doc),\ + \ \"response\": prediction}\n )\n except\ + \ Exception:\n # Best-effort logging; avoid breaking\ + \ evaluation if logging fails\n pass\n\n predictions\ + \ = [prediction]\n references = [[self.doc_to_target(doc).strip()]]\n\ + \n bleu_score = sacrebleu.corpus_bleu(predictions, references).score\n\ + \n exact_match = 1.0 if prediction == references[0][0] else 0.0\n\ + \n return {\"bleu\": bleu_score, \"exact_match\": exact_match}\n\ + \n def aggregation(self):\n return {\"bleu\": mean, \"\ + exact_match\": mean}\n\n def should_decontaminate(self):\n \ + \ return False\n\n def doc_to_prefix(self, doc):\n \ + \ return \"\"\n\n def higher_is_better(self):\n return\ + \ {\"bleu\": True, \"exact_match\": True}\n\n TASK_CONFIGS = {\n \ + \ \"classification\": [\n {\n \"task\": \"\ + classification_rte_simple\",\n \"recipe\": \"card=cards.rte,template=templates.classification.multi_class.relation.simple\"\ + ,\n \"group\": \"classification\",\n \"output_type\"\ + : \"generate_until\",\n },\n {\n \"\ + task\": \"classification_rte_default\",\n \"recipe\": \"\ + card=cards.rte,template=templates.classification.multi_class.relation.default\"\ + ,\n \"group\": \"classification\",\n \"output_type\"\ + : \"generate_until\",\n },\n {\n \"\ + task\": \"classification_rte_wnli\",\n \"recipe\": \"card=cards.wnli,template=templates.classification.multi_class.relation.simple\"\ + ,\n \"group\": \"classification\",\n \"output_type\"\ + : \"generate_until\",\n },\n ],\n \"summarization\"\ + : [\n {\n \"task\": \"summarization_xsum_formal\"\ + ,\n \"recipe\": \"card=cards.xsum,template=templates.summarization.abstractive.formal,num_demos=0\"\ + ,\n \"group\": \"summarization\",\n \"output_type\"\ + : \"generate_until\",\n }\n ],\n }\n\n logging.basicConfig(\n\ + \ level=getattr(logging, verbosity.upper()),\n format=\"%(asctime)s\ + \ - %(name)s - %(levelname)s - %(message)s\",\n )\n logger = logging.getLogger(__name__)\n\ + \n os.environ[\"HF_HUB_DISABLE_TELEMETRY\"] = \"1\"\n os.environ[\"\ + HF_HUB_DISABLE_SYMLINKS_WARNING\"] = \"1\"\n\n logger.info(\"Validating\ + \ parameters...\")\n\n if not torch.cuda.is_available():\n raise\ + \ ValueError(\"CUDA is not available\")\n\n if not (0.0 <= gpu_memory_utilization\ + \ <= 1.0):\n raise ValueError(\"gpu_memory_utilization must be between\ + \ 0.0 and 1.0\")\n\n if batch_size <= 0:\n raise ValueError(\"\ + batch_size must be positive\")\n\n if max_model_len <= 0:\n raise\ + \ ValueError(\"max_model_len must be positive\")\n\n if limit is not\ + \ None and limit <= 0:\n raise ValueError(\"limit must be positive\ + \ or None\")\n\n if (\n not include_classification_tasks\n\ + \ and not include_summarization_tasks\n and not custom_translation_dataset\n\ + \ ):\n raise ValueError(\n \"At least one of include_classification_tasks,\ + \ include_summarization_tasks, or custom_translation_dataset must be provided\"\ + \n )\n\n logger.info(\"Parameter validation passed\")\n\n logger.info(\"\ + Creating tasks...\")\n start_time = time.time()\n\n eval_tasks = []\n\ + \ prompt_response_log = []\n\n if custom_translation_dataset:\n \ + \ logger.info(\"Adding custom translation task...\")\n translation_task\ + \ = TranslationTask(\n custom_translation_dataset.path,\n \ + \ \"custom_translation\",\n log_prompts=log_prompts,\n\ + \ prompts_log=prompt_response_log,\n )\n eval_tasks.append(translation_task)\n\ + \n if include_classification_tasks:\n logger.info(\"Adding classification\ + \ tasks...\")\n classification_configs = TASK_CONFIGS[\"classification\"\ + ]\n\n for config in classification_configs:\n task_obj\ + \ = task.Unitxt(config=config)\n # TODO: Remove after https://github.com/EleutherAI/lm-evaluation-harness/pull/3225\ + \ is merged.\n task_obj.config.task = config[\"task\"]\n \ + \ eval_tasks.append(task_obj)\n\n if include_summarization_tasks:\n\ + \ logger.info(\"Adding summarization tasks...\")\n summarization_config\ + \ = TASK_CONFIGS[\"summarization\"][0]\n\n task_obj = task.Unitxt(config=summarization_config)\n\ + \ # TODO: Remove after https://github.com/EleutherAI/lm-evaluation-harness/pull/3225\ + \ is merged.\n task_obj.config.task = summarization_config[\"task\"\ + ]\n eval_tasks.append(task_obj)\n\n task_dict = get_task_dict(eval_tasks)\n\ + \ logger.info(f\"Created {len(eval_tasks)} tasks in {time.time() - start_time:.2f}s\"\ + )\n\n logger.info(\"Loading model...\")\n start_time = time.time()\n\ + \n try:\n model_args = {\n \"add_bos_token\": add_bos_token,\n\ + \ \"dtype\": dtype,\n \"max_model_len\": max_model_len,\n\ + \ \"gpu_memory_utilization\": gpu_memory_utilization,\n \ + \ \"pretrained\": model_path,\n \"trust_remote_code\":\ + \ True,\n }\n\n # Optionally provide LoRA adapter to lm-eval's\ + \ VLLM backend\n # The backend expects `lora_local_path` and internally\ + \ constructs the LoRARequest.\n if lora_adapter and lora_adapter.path:\n\ + \ logger.info(\"LoRA adapter provided; passing lora_local_path\ + \ to VLLM backend\")\n model_args[\"lora_local_path\"] = lora_adapter.path\n\ + \n model_class = get_model(\"vllm\")\n additional_config =\ + \ {\n \"batch_size\": batch_size,\n \"max_batch_size\"\ + : max_batch_size,\n \"device\": None,\n }\n\n loaded_model\ + \ = model_class.create_from_arg_obj(model_args, additional_config)\n \ + \ logger.info(f\"Model loaded successfully in {time.time() - start_time:.2f}s\"\ + )\n except Exception as e:\n logger.error(f\"Failed to load model:\ + \ {e}\")\n raise RuntimeError(f\"Model loading failed: {e}\")\n\n\ + \ logger.info(\"Starting evaluation...\")\n start_time = time.time()\n\ + \n results = evaluate(\n lm=loaded_model,\n task_dict=task_dict,\n\ + \ limit=limit,\n verbosity=verbosity,\n )\n\n logger.info(f\"\ + Evaluation completed in {time.time() - start_time:.2f}s\")\n\n logger.info(\"\ + Saving results...\")\n\n def clean_for_json(obj):\n \"\"\"Recursively\ + \ clean objects to make them JSON serializable.\"\"\"\n if isinstance(obj,\ + \ dict):\n return {k: clean_for_json(v) for k, v in obj.items()}\n\ + \ elif isinstance(obj, list):\n return [clean_for_json(item)\ + \ for item in obj]\n elif isinstance(obj, (int, float, str, bool,\ + \ type(None))):\n return obj\n else:\n # Convert\ + \ non-serializable objects to string representation\n return\ + \ str(obj)\n\n clean_results = clean_for_json(results)\n\n output_results.name\ + \ = \"results.json\"\n\n with open(output_results.path, \"w\") as f:\n\ + \ json.dump(clean_results, f, indent=2)\n logger.info(f\"Results\ + \ saved to {output_results.path}\")\n\n # Save prompt/response log for\ + \ custom TranslationTask only\n if log_prompts and custom_translation_dataset\ + \ and len(prompt_response_log) > 0:\n try:\n output_prompts.name\ + \ = \"prompts.json\"\n with open(output_prompts.path, \"w\")\ + \ as f:\n json.dump(prompt_response_log, f, indent=2)\n \ + \ logger.info(f\"Prompt/response log saved to {output_prompts.path}\"\ + )\n except Exception as e:\n logger.warning(f\"Failed\ + \ to save prompt/response log: {e}\")\n\n logger.info(\"Logging metrics...\"\ + )\n\n for task_name, task_results in clean_results[\"results\"].items():\n\ + \ for metric_name, metric_value in task_results.items():\n \ + \ if isinstance(metric_value, (int, float)):\n # Skip\ + \ metrics that are 0 due to a bug in the RHOAI UI.\n # TODO:\ + \ Fix RHOAI UI to handle 0 values.\n # TODO: Ignore store_session_info\ + \ from metrics in RHOAI UI.\n if metric_value == 0:\n \ + \ continue\n\n metric_key = f\"{task_name}_{metric_name}\"\ + \n output_metrics.log_metric(metric_key, metric_value)\n\ + \ logger.debug(f\"Logged metric: {metric_key} = {metric_value}\"\ + )\n\n logger.info(\"Metrics logged successfully\")\n\n logger.info(\"\ + Pipeline completed successfully\")\n\n" + image: registry.access.redhat.com/ubi9/python-311:latest +pipelineInfo: + name: evaluate-yoda-model +root: + dag: + outputs: + artifacts: + output_metrics: + artifactSelectors: + - outputArtifactKey: output_metrics + producerSubtask: evaluate-yoda-model + output_prompts: + artifactSelectors: + - outputArtifactKey: output_prompts + producerSubtask: evaluate-yoda-model + output_results: + artifactSelectors: + - outputArtifactKey: output_results + producerSubtask: evaluate-yoda-model + tasks: + evaluate-yoda-model: + cachingOptions: + enableCache: true + componentRef: + name: comp-evaluate-yoda-model + inputs: + artifacts: + custom_translation_dataset: + componentInputArtifact: custom_translation_dataset + lora_adapter: + componentInputArtifact: lora_adapter + parameters: + add_bos_token: + componentInputParameter: add_bos_token + batch_size: + componentInputParameter: batch_size + dtype: + componentInputParameter: dtype + gpu_memory_utilization: + componentInputParameter: gpu_memory_utilization + include_classification_tasks: + componentInputParameter: include_classification_tasks + include_summarization_tasks: + componentInputParameter: include_summarization_tasks + limit: + componentInputParameter: limit + log_prompts: + componentInputParameter: log_prompts + max_batch_size: + componentInputParameter: max_batch_size + max_model_len: + componentInputParameter: max_model_len + model_path: + componentInputParameter: model_path + verbosity: + componentInputParameter: verbosity + taskInfo: + name: evaluate-yoda-model + inputDefinitions: + artifacts: + custom_translation_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + isOptional: true + lora_adapter: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 + isOptional: true + parameters: + add_bos_token: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + batch_size: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + dtype: + defaultValue: auto + isOptional: true + parameterType: STRING + gpu_memory_utilization: + defaultValue: 0.8 + isOptional: true + parameterType: NUMBER_DOUBLE + include_classification_tasks: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + include_summarization_tasks: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + limit: + isOptional: true + parameterType: NUMBER_INTEGER + log_prompts: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + max_batch_size: + isOptional: true + parameterType: NUMBER_INTEGER + max_model_len: + defaultValue: 4096.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_path: + parameterType: STRING + verbosity: + defaultValue: INFO + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + output_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + output_prompts: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + output_results: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.15.2 diff --git a/components/training/__init__.py b/components/training/__init__.py index c78fc3720..945c9596a 100644 --- a/components/training/__init__.py +++ b/components/training/__init__.py @@ -6,3 +6,4 @@ # Auto-generated imports will be added here by scripts/update_init_imports.py # Components will be imported dynamically based on subdirectories + diff --git a/components/training/train_yoda_model.py b/components/training/train_yoda_model.py new file mode 100644 index 000000000..31328fd53 --- /dev/null +++ b/components/training/train_yoda_model.py @@ -0,0 +1,642 @@ +from kfp import dsl +from typing import Optional +import kfp + + +@dsl.component( + base_image="registry.access.redhat.com/ubi9/python-311:latest", + kfp_package_path="git+https://github.com/kubeflow/pipelines@master#egg=kfp&subdirectory=sdk/python", + packages_to_install=["kubernetes"], + task_config_passthroughs=[ + dsl.TaskConfigField.RESOURCES, + dsl.TaskConfigField.KUBERNETES_TOLERATIONS, + dsl.TaskConfigField.KUBERNETES_NODE_SELECTOR, + dsl.TaskConfigField.KUBERNETES_AFFINITY, + dsl.TaskConfigPassthrough(field=dsl.TaskConfigField.ENV, apply_to_task=True), + dsl.TaskConfigPassthrough(field=dsl.TaskConfigField.KUBERNETES_VOLUMES, apply_to_task=True), + ], +) +def train_model( + input_dataset: dsl.Input[dsl.Dataset], + model_name: str, + run_id: str, + pvc_path: str, + output_model: dsl.Output[dsl.Model], + output_metrics: dsl.Output[dsl.Metrics], + # Training configuration parameters + epochs: int = 10, + lora_rank: int = 8, + learning_rate: float = 3e-4, + batch_size: int = 16, + max_length: int = 64, + # Training control parameters + max_steps: Optional[int] = None, + logging_steps: int = 10, + save_steps: Optional[int] = None, + save_strategy: str = "epoch", + # Optimizer parameters + optimizer: str = "adamw_torch", + adam_beta1: float = 0.9, + adam_beta2: float = 0.999, + adam_epsilon: float = 1e-8, + weight_decay: float = 0.01, + # Performance optimization + use_flash_attention: bool = False, + # Infrastructure parameters + num_nodes: int = 2, + trainer_runtime: str = "torch-distributed", + kubernetes_config: dsl.TaskConfig = None, +): + """Train a large language model using distributed training with LoRA fine-tuning. + + This function creates and manages a Kubernetes TrainJob for distributed training + of a large language model using LoRA (Low-Rank Adaptation) fine-tuning. It handles + the complete training workflow including job creation, monitoring, and artifact + collection. + + Args: + model_name (str): HuggingFace model identifier (e.g., "meta-llama/Llama-3.2-3B-Instruct"). + run_id (str): Unique identifier for this training run. Use dsl.PIPELINE_JOB_ID_PLACEHOLDER. + dataset_path (str): Path to the training dataset within the PVC. + pvc_path (str): Base path within the PVC for storing outputs. + output_model (dsl.Output[dsl.Model]): Kubeflow output artifact for the trained model. + output_metrics (dsl.Output[dsl.Metrics]): Kubeflow output artifact for training metrics. + epochs (int, optional): Number of training epochs. Defaults to 10. + lora_rank (int, optional): LoRA adapter rank (lower = fewer parameters, faster training). Defaults to 8. + learning_rate (float, optional): Learning rate for training optimization. Defaults to 3e-4. + batch_size (int, optional): Per-device training batch size. Defaults to 16. + max_length (int, optional): Maximum token sequence length for training. Defaults to 64. + max_steps (int, optional): Maximum number of training steps. If specified, overrides epochs. Defaults to None. + logging_steps (int, optional): Number of steps between logging outputs. Defaults to 10. + save_steps (int, optional): Number of steps between model checkpoints. Defaults to None. + save_strategy (str, optional): Checkpoint saving strategy ("epoch" or "steps"). Defaults to "epoch". + optimizer (str, optional): Optimizer to use (e.g., "adamw_torch", "adamw_torch_fused"). Defaults to "adamw_torch". + adam_beta1 (float, optional): Beta1 parameter for Adam optimizer. Defaults to 0.9. + adam_beta2 (float, optional): Beta2 parameter for Adam optimizer. Defaults to 0.999. + adam_epsilon (float, optional): Epsilon parameter for Adam optimizer. Defaults to 1e-8. + weight_decay (float, optional): Weight decay for regularization. Defaults to 0.01. + use_flash_attention (bool, optional): Whether to use Flash Attention 2 for improved performance. Defaults to False. + num_nodes (int, optional): Number of nodes for distributed training. Defaults to 2. + trainer_runtime (str, optional): Runtime to use for Kubeflow Trainer. Defaults to "torch-distributed". + """ + import json + import os + import shutil + import textwrap + import time + import inspect + + from kubernetes import client as k8s_client, config + from kubernetes.client.rest import ApiException + + def get_target_modules(model_name: str) -> list: + """Get appropriate LoRA target modules based on model architecture. + + Selects optimal layers for LoRA adaptation based on research findings: + - Attention layers (q_proj, k_proj, v_proj, o_proj) control attention patterns + - MLP layers (gate_proj, up_proj, down_proj) store task-specific knowledge + + Model-specific targeting: + - Granite: Attention layers only (q,k,v,o) + - LLaMA/Mistral/Qwen: Full coverage (attention + MLP) + - Phi: Uses 'dense' instead of 'o_proj' + - Unknown: Conservative fallback (q,v) + + Based on LoRA (arXiv:2106.09685), QLoRA (arXiv:2305.14314), and model-specific research. + """ + model_name_lower = model_name.lower() + + if "granite" in model_name_lower: + return ["q_proj", "v_proj", "k_proj", "o_proj"] + elif "llama" in model_name_lower: + return [ + "q_proj", + "v_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "down_proj", + ] + elif "mistral" in model_name_lower or "mixtral" in model_name_lower: + return [ + "q_proj", + "v_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "down_proj", + ] + elif "qwen" in model_name_lower: + return [ + "q_proj", + "v_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "down_proj", + ] + elif "phi" in model_name_lower: + return ["q_proj", "v_proj", "k_proj", "dense"] + else: + print( + f"Warning: Unknown model architecture for {model_name}, using conservative LoRA targets" + ) + return ["q_proj", "v_proj"] + + def train_model_func( + lora_rank: int, + learning_rate: float, + batch_size: int, + max_length: int, + model_name: str, + dataset_path: str, + epochs: int, + pvc_path: str, + target_modules: list, + max_steps: int, + logging_steps: int, + save_steps: int, + save_strategy: str, + optimizer: str, + adam_beta1: float, + adam_beta2: float, + adam_epsilon: float, + weight_decay: float, + use_flash_attention: bool, + ): + import os + import json + import torch + from datasets import load_from_disk + from peft import get_peft_model, LoraConfig + from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + TrainerCallback, + ) + from trl import SFTConfig, SFTTrainer + + local_rank = int(os.environ.get("LOCAL_RANK", 0)) + world_rank = int(os.environ.get("RANK", 0)) + world_size = int(os.environ.get("WORLD_SIZE", 1)) + + print( + f"Worker info - Local rank: {local_rank}, World rank: {world_rank}, World size: {world_size}" + ) + + is_main_worker = world_rank == 0 + + class MetricsCallback(TrainerCallback): + def __init__(self, is_main_worker): + self.is_main_worker = is_main_worker + self.initial_loss = None + self.final_loss = None + + def on_log(self, args, state, control, logs=None, **kwargs): + if logs and self.is_main_worker and "loss" in logs: + if self.initial_loss is None: + self.initial_loss = logs["loss"] + self.final_loss = logs["loss"] + + metrics_callback = MetricsCallback(is_main_worker) + + print("Downloading and loading model") + model_kwargs = { + "device_map": "auto", + "torch_dtype": torch.float16, + "trust_remote_code": True, + } + if use_flash_attention: + model_kwargs["attn_implementation"] = "flash_attention_2" + + model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs) + + print(f"Using LoRA target modules for {model_name}: {target_modules}") + + config = LoraConfig( + r=lora_rank, + lora_alpha=lora_rank * 2, + bias="none", + lora_dropout=0.05, + task_type="CAUSAL_LM", + target_modules=target_modules, + ) + model = get_peft_model(model, config) + + print("Loading dataset") + dataset = load_from_disk(dataset_path) + + tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + tokenizer.padding_side = "right" + + sft_config = SFTConfig( + ## Memory optimization + gradient_checkpointing=True, + gradient_checkpointing_kwargs={"use_reentrant": False}, + gradient_accumulation_steps=1, + per_device_train_batch_size=batch_size, + auto_find_batch_size=True, + ## Dataset configuration + max_length=max_length, + packing=use_flash_attention, # Packing works best with Flash Attention + ## Training parameters + num_train_epochs=epochs if max_steps is None else None, + max_steps=-1 if max_steps is None else max_steps, + learning_rate=learning_rate, + optim=optimizer, + ## Optimizer parameters + adam_beta1=adam_beta1, + adam_beta2=adam_beta2, + adam_epsilon=adam_epsilon, + weight_decay=weight_decay, + ## Logging and saving + logging_steps=logging_steps, + save_steps=save_steps, + save_strategy=save_strategy, + logging_dir="./logs", + report_to="none", + ) + trainer = SFTTrainer( + model=model, + processing_class=tokenizer, + args=sft_config, + train_dataset=dataset, + callbacks=[metrics_callback], + ) + + train_result = trainer.train() + + if torch.distributed.is_initialized(): + torch.distributed.barrier() + print(f"Worker {world_rank} - Training completed and synchronized") + + if not is_main_worker: + print( + f"Worker {world_rank} - Skipping model export and metrics (not main worker)" + ) + # Clean up distributed process group for non-main workers + if torch.distributed.is_initialized(): + print(f"Worker {world_rank} - Cleaning up distributed process group") + torch.distributed.destroy_process_group() + print(f"Worker {world_rank} - Distributed process group destroyed") + return + + print("Main worker (rank 0) - Exporting model and metrics...") + + # Save LoRA adapter + model_output_path = os.path.join(pvc_path, "adapter") + model.save_pretrained(model_output_path) + tokenizer.save_pretrained(model_output_path) + print("LoRA adapter exported successfully!") + + # Clean up distributed process group for main worker AFTER model saving + if torch.distributed.is_initialized(): + print(f"Worker {world_rank} - Cleaning up distributed process group") + torch.distributed.destroy_process_group() + print(f"Worker {world_rank} - Distributed process group destroyed") + + print(f"Collecting essential metrics") + metrics_dict = {} + + if hasattr(train_result, "train_loss"): + metrics_dict["final_train_loss"] = train_result.train_loss + if hasattr(train_result, "train_runtime"): + metrics_dict["train_runtime_seconds"] = train_result.train_runtime + if hasattr(train_result, "train_samples_per_second"): + metrics_dict["throughput_samples_per_sec"] = ( + train_result.train_samples_per_second + ) + + total_params = sum(p.numel() for p in model.parameters()) + trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + metrics_dict["total_parameters_millions"] = total_params / 1_000_000 + metrics_dict["trainable_parameters_millions"] = trainable_params / 1_000_000 + metrics_dict["lora_efficiency_percent"] = ( + trainable_params / total_params + ) * 100 + + metrics_dict["lora_rank"] = config.r + metrics_dict["learning_rate"] = sft_config.learning_rate + metrics_dict["effective_batch_size"] = ( + sft_config.per_device_train_batch_size * world_size + ) + metrics_dict["dataset_size"] = len(dataset) + + metrics_dict["num_nodes"] = ( + world_size // torch.cuda.device_count() + if torch.cuda.is_available() and torch.cuda.device_count() > 0 + else 1 + ) + if torch.cuda.is_available(): + metrics_dict["peak_gpu_memory_gb"] = torch.cuda.max_memory_allocated() / ( + 1024**3 + ) + + if metrics_callback.initial_loss and metrics_callback.final_loss: + metrics_dict["initial_loss"] = metrics_callback.initial_loss + metrics_dict["loss_reduction"] = ( + metrics_callback.initial_loss - metrics_callback.final_loss + ) + metrics_dict["loss_reduction_percent"] = ( + (metrics_callback.initial_loss - metrics_callback.final_loss) + / metrics_callback.initial_loss + ) * 100 + + with open(os.path.join(pvc_path, "metrics.json"), "w") as f: + json.dump(metrics_dict, f, indent=2) + + print( + f"Exported {len(metrics_dict)} metrics to {os.path.join(pvc_path, 'metrics.json')}" + ) + print("Model and metrics exported successfully!") + + print("Copying dataset to PVC...") + dataset_path = os.path.join(pvc_path, "dataset", "train") + os.makedirs(dataset_path, exist_ok=True) + shutil.copytree( + input_dataset.path, + dataset_path, + dirs_exist_ok=True, + ) + print(f"Dataset copied successfully from {input_dataset.path} to {dataset_path}") + + print("=== Starting TrainJob creation process ===") + + target_modules = get_target_modules(model_name) + print(f"Selected LoRA target modules for {model_name}: {target_modules}") + + with open( + "/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r" + ) as ns_file: + namespace = ns_file.readline() + + print("Generating command...") + + func_code = inspect.getsource(train_model_func) + func_code = textwrap.dedent(func_code) + + func_call_code = f""" +import os +import json + +# Parse function arguments from environment variable +config_json = os.environ.get("TRAINING_CONFIG", "{{}}") +func_args = json.loads(config_json) + +# Call the training function with parsed arguments +{train_model_func.__name__}(**func_args) +""" + + func_code = f"{func_code}\n{func_call_code}" + + # Build package list based on configuration + packages = ["transformers", "peft", "accelerate", "trl"] + if use_flash_attention: + packages.append("flash-attn") + packages_str = " ".join(packages) + + install_script = f"""set -e +set -o pipefail + +echo "=== Starting container setup ===" +echo "Python version: $(python --version)" + +if ! [ -x "$(command -v pip)" ]; then + echo "Installing pip..." + python -m ensurepip || python -m ensurepip --user +fi + +echo "Installing Python packages..." +PIP_DISABLE_PIP_VERSION_CHECK=1 python -m pip install --user --quiet --no-warn-script-location {packages_str} + +echo "Creating training script..." +cat > ephemeral_component.py << 'EOF' +{func_code} +EOF + +echo "Starting distributed training..." +torchrun --nproc_per_node=1 ephemeral_component.py""" + + command = ["bash", "-c", install_script] + + print(f"Generated command: {command}") + print(f"Command length: {len(command)}") + print(f"Command type: {type(command)}") + + print("Loading Kubernetes configuration...") + try: + config.load_incluster_config() + print("Loaded in-cluster Kubernetes configuration") + except config.ConfigException: + config.load_kube_config() + print("Loaded kubeconfig Kubernetes configuration") + + print("Creating Kubernetes API client...") + api_client = k8s_client.ApiClient() + custom_objects_api = k8s_client.CustomObjectsApi(api_client) + print("Successfully created Kubernetes API client") + + print("Defining TrainJob resource...") + + env_vars = [ + {"name": "HOME", "value": "/tmp"}, + { + "name": "TRAINING_CONFIG", + "value": json.dumps( + { + "lora_rank": lora_rank, + "learning_rate": learning_rate, + "batch_size": batch_size, + "max_length": max_length, + "model_name": model_name, + "dataset_path": dataset_path, + "epochs": epochs, + "pvc_path": pvc_path, + "target_modules": target_modules, + "max_steps": max_steps, + "logging_steps": logging_steps, + "save_steps": save_steps, + "save_strategy": save_strategy, + "optimizer": optimizer, + "adam_beta1": adam_beta1, + "adam_beta2": adam_beta2, + "adam_epsilon": adam_epsilon, + "weight_decay": weight_decay, + "use_flash_attention": use_flash_attention, + } + ), + }, + *(kubernetes_config.env or []), + ] + + train_job = { + "apiVersion": "trainer.kubeflow.org/v1alpha1", + "kind": "TrainJob", + "metadata": {"name": f"kfp-{run_id}", "namespace": namespace}, + "spec": { + "runtimeRef": {"name": trainer_runtime}, + "trainer": { + "numNodes": num_nodes, + "resourcesPerNode": kubernetes_config.resources, + "env": env_vars, + "command": command, + }, + "podSpecOverrides": [ + { + "targetJobs": [{"name": "node"}], + "volumes": kubernetes_config.volumes, + "containers": [ + { + "name": "node", + "volumeMounts": kubernetes_config.volume_mounts, + } + ], + "nodeSelector": kubernetes_config.node_selector, + "tolerations": kubernetes_config.tolerations, + } + ], + }, + } + + print(f"TrainJob definition created:") + print(f" - Name: kfp-{run_id}") + print(f" - Namespace: {namespace}") + + print(f" - Runtime: {trainer_runtime}") + print(f" - Nodes: {num_nodes}") + print(f" - Model: {model_name}") + print(f" - Dataset: {dataset_path}") + print(f" - Epochs: {epochs}") + + print("Submitting TrainJob to Kubernetes...") + try: + response = custom_objects_api.create_namespaced_custom_object( + group="trainer.kubeflow.org", + version="v1alpha1", + namespace=namespace, + plural="trainjobs", + body=train_job, + ) + job_name = response["metadata"]["name"] + print(f"TrainJob {job_name} created successfully") + print(f"Response metadata: {response.get('metadata', {})}") + except ApiException as e: + print(f"Error creating TrainJob: {e}") + print(f"Error details: {e.body}") + print(f"Error status: {e.status}") + raise + + print(f"Starting to monitor TrainJob {job_name} status...") + check_count = 0 + while True: + check_count += 1 + try: + print(f"Checking job status (attempt {check_count})...") + job_status = custom_objects_api.get_namespaced_custom_object( + group="trainer.kubeflow.org", + version="v1alpha1", + namespace=namespace, + plural="trainjobs", + name=job_name, + ) + + status = job_status.get("status", {}) + conditions = status.get("conditions", []) + print(f"Job status conditions: {conditions}") + + completed = False + failed = False + + for condition in conditions: + condition_type = condition.get("type", "") + condition_status = condition.get("status", "") + condition_reason = condition.get("reason", "") + condition_message = condition.get("message", "") + + print( + f"Condition: type={condition_type}, status={condition_status}, reason={condition_reason}" + ) + + if condition_type == "Complete" and condition_status == "True": + print( + f"Training job {job_name} completed successfully: {condition_message}" + ) + completed = True + break + elif condition_type == "Failed" and condition_status == "True": + print(f"Training job {job_name} failed: {condition_message}") + failed = True + break + elif condition_type == "Cancelled" and condition_status == "True": + print(f"Training job {job_name} was cancelled: {condition_message}") + failed = True + break + + if completed: + break + elif failed: + raise RuntimeError(f"Training job {job_name} failed or was cancelled") + else: + print(f"Job is still running, continuing to wait...") + + except ApiException as e: + print(f"Error checking job status: {e}") + print(f"Error details: {e.body}") + + print(f"Waiting 10 seconds before next check...") + time.sleep(10) + + print(f"Training job {job_name} completed. Logs would be retrieved here.") + + print("Processing training results...") + + metrics_file_path = os.path.join(pvc_path, "metrics.json") + print(f"Looking for metrics file at: {metrics_file_path}") + if os.path.exists(metrics_file_path): + print(f"Found metrics file, reading from {metrics_file_path}") + with open(metrics_file_path, "r") as f: + metrics_dict = json.load(f) + + print(f"Loaded {len(metrics_dict)} metrics from file") + + exported_count = 0 + for metric_name, metric_value in metrics_dict.items(): + # Ignore metrics that are 0 to avoid a bug in the RHOAI UI. + if isinstance(metric_value, (int, float)) and metric_value != 0: + output_metrics.log_metric(metric_name, metric_value) + print(f"Exported metric: {metric_name} = {metric_value}") + exported_count += 1 + + print(f"Successfully exported {exported_count} metrics to Kubeflow") + os.remove(metrics_file_path) + else: + print(f"Warning: Metrics file {metrics_file_path} not found") + + print("Copying model from PVC to Kubeflow output path...") + model_source = os.path.join(pvc_path, "adapter") + print(f"Model source: {model_source}") + print(f"Destination: {output_model.path}") + + if not os.path.exists(model_source): + raise FileNotFoundError( + f"Trained model not found at expected location: {model_source}" + ) + + output_model.name = f"{model_name}-adapter" + shutil.copytree(model_source, output_model.path, dirs_exist_ok=True) + print(f"Model copied successfully from {model_source} to {output_model.path}") + + print("=== TrainJob process completed successfully ===") + + +if __name__ == "__main__": + kfp.compiler.Compiler().compile( + train_model, + package_path=__file__.replace(".py", "_component.yaml"), + ) \ No newline at end of file diff --git a/components/training/train_yoda_model_component.yaml b/components/training/train_yoda_model_component.yaml new file mode 100644 index 000000000..dad2f7990 --- /dev/null +++ b/components/training/train_yoda_model_component.yaml @@ -0,0 +1,721 @@ +# PIPELINE DEFINITION +# Name: train-model +# Description: Train a large language model using distributed training with LoRA fine-tuning. +# This function creates and manages a Kubernetes TrainJob for distributed training +# of a large language model using LoRA (Low-Rank Adaptation) fine-tuning. It handles +# the complete training workflow including job creation, monitoring, and artifact +# collection. +# Inputs: +# adam_beta1: float [Default: 0.9] +# adam_beta2: float [Default: 0.999] +# adam_epsilon: float [Default: 1e-08] +# batch_size: int [Default: 16.0] +# epochs: int [Default: 10.0] +# input_dataset: system.Dataset +# kubernetes_config: TaskConfig +# learning_rate: float [Default: 0.0003] +# logging_steps: int [Default: 10.0] +# lora_rank: int [Default: 8.0] +# max_length: int [Default: 64.0] +# max_steps: int +# model_name: str +# num_nodes: int [Default: 2.0] +# optimizer: str [Default: 'adamw_torch'] +# pvc_path: str +# run_id: str +# save_steps: int +# save_strategy: str [Default: 'epoch'] +# trainer_runtime: str [Default: 'torch-distributed'] +# use_flash_attention: bool [Default: False] +# weight_decay: float [Default: 0.01] +# Outputs: +# output_metrics: system.Metrics +# output_model: system.Model +components: + comp-train-model: + executorLabel: exec-train-model + inputDefinitions: + artifacts: + input_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + adam_beta1: + defaultValue: 0.9 + description: Beta1 parameter for Adam optimizer. Defaults to 0.9. + isOptional: true + parameterType: NUMBER_DOUBLE + adam_beta2: + defaultValue: 0.999 + description: Beta2 parameter for Adam optimizer. Defaults to 0.999. + isOptional: true + parameterType: NUMBER_DOUBLE + adam_epsilon: + defaultValue: 1.0e-08 + description: Epsilon parameter for Adam optimizer. Defaults to 1e-8. + isOptional: true + parameterType: NUMBER_DOUBLE + batch_size: + defaultValue: 16.0 + description: Per-device training batch size. Defaults to 16. + isOptional: true + parameterType: NUMBER_INTEGER + epochs: + defaultValue: 10.0 + description: Number of training epochs. Defaults to 10. + isOptional: true + parameterType: NUMBER_INTEGER + kubernetes_config: + isOptional: true + parameterType: TASK_CONFIG + learning_rate: + defaultValue: 0.0003 + description: Learning rate for training optimization. Defaults to 3e-4. + isOptional: true + parameterType: NUMBER_DOUBLE + logging_steps: + defaultValue: 10.0 + description: Number of steps between logging outputs. Defaults to 10. + isOptional: true + parameterType: NUMBER_INTEGER + lora_rank: + defaultValue: 8.0 + description: LoRA adapter rank (lower = fewer parameters, faster training). + Defaults to 8. + isOptional: true + parameterType: NUMBER_INTEGER + max_length: + defaultValue: 64.0 + description: Maximum token sequence length for training. Defaults to 64. + isOptional: true + parameterType: NUMBER_INTEGER + max_steps: + description: Maximum number of training steps. If specified, overrides epochs. + Defaults to None. + isOptional: true + parameterType: NUMBER_INTEGER + model_name: + description: HuggingFace model identifier (e.g., "meta-llama/Llama-3.2-3B-Instruct"). + parameterType: STRING + num_nodes: + defaultValue: 2.0 + description: Number of nodes for distributed training. Defaults to 2. + isOptional: true + parameterType: NUMBER_INTEGER + optimizer: + defaultValue: adamw_torch + description: Optimizer to use (e.g., "adamw_torch", "adamw_torch_fused"). + Defaults to "adamw_torch". + isOptional: true + parameterType: STRING + pvc_path: + description: Base path within the PVC for storing outputs. + parameterType: STRING + run_id: + description: Unique identifier for this training run. Use dsl.PIPELINE_JOB_ID_PLACEHOLDER. + parameterType: STRING + save_steps: + description: Number of steps between model checkpoints. Defaults to None. + isOptional: true + parameterType: NUMBER_INTEGER + save_strategy: + defaultValue: epoch + description: Checkpoint saving strategy ("epoch" or "steps"). Defaults to + "epoch". + isOptional: true + parameterType: STRING + trainer_runtime: + defaultValue: torch-distributed + description: Runtime to use for Kubeflow Trainer. Defaults to "torch-distributed". + isOptional: true + parameterType: STRING + use_flash_attention: + defaultValue: false + description: Whether to use Flash Attention 2 for improved performance. + Defaults to False. + isOptional: true + parameterType: BOOLEAN + weight_decay: + defaultValue: 0.01 + description: Weight decay for regularization. Defaults to 0.01. + isOptional: true + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + output_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + output_model: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 + taskConfigPassthroughs: + - field: RESOURCES + - field: KUBERNETES_TOLERATIONS + - field: KUBERNETES_NODE_SELECTOR + - field: KUBERNETES_AFFINITY + - applyToTask: true + field: ENV + - applyToTask: true + field: KUBERNETES_VOLUMES +deploymentSpec: + executors: + exec-train-model: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - train_model + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kubernetes'\ + \ && python3 -m pip install --quiet --no-warn-script-location 'git+https://github.com/kubeflow/pipelines@master#egg=kfp&subdirectory=sdk/python'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef train_model(\n input_dataset: dsl.Input[dsl.Dataset],\n\ + \ model_name: str,\n run_id: str,\n pvc_path: str,\n\ + \ output_model: dsl.Output[dsl.Model],\n output_metrics: dsl.Output[dsl.Metrics],\n\ + \ # Training configuration parameters\n epochs: int = 10,\n\ + \ lora_rank: int = 8,\n learning_rate: float = 3e-4,\n \ + \ batch_size: int = 16,\n max_length: int = 64,\n # Training\ + \ control parameters\n max_steps: Optional[int] = None,\n \ + \ logging_steps: int = 10,\n save_steps: Optional[int] = None,\n\ + \ save_strategy: str = \"epoch\",\n # Optimizer parameters\n\ + \ optimizer: str = \"adamw_torch\",\n adam_beta1: float =\ + \ 0.9,\n adam_beta2: float = 0.999,\n adam_epsilon: float\ + \ = 1e-8,\n weight_decay: float = 0.01,\n # Performance optimization\n\ + \ use_flash_attention: bool = False,\n # Infrastructure parameters\n\ + \ num_nodes: int = 2,\n trainer_runtime: str = \"torch-distributed\"\ + ,\n kubernetes_config: dsl.TaskConfig = None,\n):\n \"\"\"Train\ + \ a large language model using distributed training with LoRA fine-tuning.\n\ + \n This function creates and manages a Kubernetes TrainJob for distributed\ + \ training\n of a large language model using LoRA (Low-Rank Adaptation)\ + \ fine-tuning. It handles\n the complete training workflow including\ + \ job creation, monitoring, and artifact\n collection.\n\n Args:\n\ + \ model_name (str): HuggingFace model identifier (e.g., \"meta-llama/Llama-3.2-3B-Instruct\"\ + ).\n run_id (str): Unique identifier for this training run. Use dsl.PIPELINE_JOB_ID_PLACEHOLDER.\n\ + \ dataset_path (str): Path to the training dataset within the PVC.\n\ + \ pvc_path (str): Base path within the PVC for storing outputs.\n\ + \ output_model (dsl.Output[dsl.Model]): Kubeflow output artifact\ + \ for the trained model.\n output_metrics (dsl.Output[dsl.Metrics]):\ + \ Kubeflow output artifact for training metrics.\n epochs (int, optional):\ + \ Number of training epochs. Defaults to 10.\n lora_rank (int, optional):\ + \ LoRA adapter rank (lower = fewer parameters, faster training). Defaults\ + \ to 8.\n learning_rate (float, optional): Learning rate for training\ + \ optimization. Defaults to 3e-4.\n batch_size (int, optional): Per-device\ + \ training batch size. Defaults to 16.\n max_length (int, optional):\ + \ Maximum token sequence length for training. Defaults to 64.\n max_steps\ + \ (int, optional): Maximum number of training steps. If specified, overrides\ + \ epochs. Defaults to None.\n logging_steps (int, optional): Number\ + \ of steps between logging outputs. Defaults to 10.\n save_steps\ + \ (int, optional): Number of steps between model checkpoints. Defaults to\ + \ None.\n save_strategy (str, optional): Checkpoint saving strategy\ + \ (\"epoch\" or \"steps\"). Defaults to \"epoch\".\n optimizer (str,\ + \ optional): Optimizer to use (e.g., \"adamw_torch\", \"adamw_torch_fused\"\ + ). Defaults to \"adamw_torch\".\n adam_beta1 (float, optional): Beta1\ + \ parameter for Adam optimizer. Defaults to 0.9.\n adam_beta2 (float,\ + \ optional): Beta2 parameter for Adam optimizer. Defaults to 0.999.\n \ + \ adam_epsilon (float, optional): Epsilon parameter for Adam optimizer.\ + \ Defaults to 1e-8.\n weight_decay (float, optional): Weight decay\ + \ for regularization. Defaults to 0.01.\n use_flash_attention (bool,\ + \ optional): Whether to use Flash Attention 2 for improved performance.\ + \ Defaults to False.\n num_nodes (int, optional): Number of nodes\ + \ for distributed training. Defaults to 2.\n trainer_runtime (str,\ + \ optional): Runtime to use for Kubeflow Trainer. Defaults to \"torch-distributed\"\ + .\n \"\"\"\n import json\n import os\n import shutil\n import\ + \ textwrap\n import time\n import inspect\n\n from kubernetes import\ + \ client as k8s_client, config\n from kubernetes.client.rest import ApiException\n\ + \n def get_target_modules(model_name: str) -> list:\n \"\"\"Get\ + \ appropriate LoRA target modules based on model architecture.\n\n \ + \ Selects optimal layers for LoRA adaptation based on research findings:\n\ + \ - Attention layers (q_proj, k_proj, v_proj, o_proj) control attention\ + \ patterns\n - MLP layers (gate_proj, up_proj, down_proj) store task-specific\ + \ knowledge\n\n Model-specific targeting:\n - Granite: Attention\ + \ layers only (q,k,v,o)\n - LLaMA/Mistral/Qwen: Full coverage (attention\ + \ + MLP)\n - Phi: Uses 'dense' instead of 'o_proj'\n - Unknown:\ + \ Conservative fallback (q,v)\n\n Based on LoRA (arXiv:2106.09685),\ + \ QLoRA (arXiv:2305.14314), and model-specific research.\n \"\"\"\ + \n model_name_lower = model_name.lower()\n\n if \"granite\"\ + \ in model_name_lower:\n return [\"q_proj\", \"v_proj\", \"k_proj\"\ + , \"o_proj\"]\n elif \"llama\" in model_name_lower:\n \ + \ return [\n \"q_proj\",\n \"v_proj\",\n \ + \ \"k_proj\",\n \"o_proj\",\n \ + \ \"gate_proj\",\n \"up_proj\",\n \"down_proj\"\ + ,\n ]\n elif \"mistral\" in model_name_lower or \"mixtral\"\ + \ in model_name_lower:\n return [\n \"q_proj\"\ + ,\n \"v_proj\",\n \"k_proj\",\n \ + \ \"o_proj\",\n \"gate_proj\",\n \"up_proj\"\ + ,\n \"down_proj\",\n ]\n elif \"qwen\"\ + \ in model_name_lower:\n return [\n \"q_proj\"\ + ,\n \"v_proj\",\n \"k_proj\",\n \ + \ \"o_proj\",\n \"gate_proj\",\n \"up_proj\"\ + ,\n \"down_proj\",\n ]\n elif \"phi\" in\ + \ model_name_lower:\n return [\"q_proj\", \"v_proj\", \"k_proj\"\ + , \"dense\"]\n else:\n print(\n f\"Warning:\ + \ Unknown model architecture for {model_name}, using conservative LoRA targets\"\ + \n )\n return [\"q_proj\", \"v_proj\"]\n\n def\ + \ train_model_func(\n lora_rank: int,\n learning_rate:\ + \ float,\n batch_size: int,\n max_length: int,\n \ + \ model_name: str,\n dataset_path: str,\n \ + \ epochs: int,\n pvc_path: str,\n target_modules:\ + \ list,\n max_steps: int,\n logging_steps: int,\n\ + \ save_steps: int,\n save_strategy: str,\n \ + \ optimizer: str,\n adam_beta1: float,\n adam_beta2:\ + \ float,\n adam_epsilon: float,\n weight_decay: float,\n\ + \ use_flash_attention: bool,\n ):\n import os\n \ + \ import json\n import torch\n from datasets import load_from_disk\n\ + \ from peft import get_peft_model, LoraConfig\n from transformers\ + \ import (\n AutoModelForCausalLM,\n AutoTokenizer,\n\ + \ TrainerCallback,\n )\n from trl import SFTConfig,\ + \ SFTTrainer\n\n local_rank = int(os.environ.get(\"LOCAL_RANK\",\ + \ 0))\n world_rank = int(os.environ.get(\"RANK\", 0))\n world_size\ + \ = int(os.environ.get(\"WORLD_SIZE\", 1))\n\n print(\n \ + \ f\"Worker info - Local rank: {local_rank}, World rank: {world_rank},\ + \ World size: {world_size}\"\n )\n\n is_main_worker = world_rank\ + \ == 0\n\n class MetricsCallback(TrainerCallback):\n def\ + \ __init__(self, is_main_worker):\n self.is_main_worker =\ + \ is_main_worker\n self.initial_loss = None\n \ + \ self.final_loss = None\n\n def on_log(self, args, state,\ + \ control, logs=None, **kwargs):\n if logs and self.is_main_worker\ + \ and \"loss\" in logs:\n if self.initial_loss is None:\n\ + \ self.initial_loss = logs[\"loss\"]\n \ + \ self.final_loss = logs[\"loss\"]\n\n metrics_callback\ + \ = MetricsCallback(is_main_worker)\n\n print(\"Downloading and loading\ + \ model\")\n model_kwargs = {\n \"device_map\": \"auto\"\ + ,\n \"torch_dtype\": torch.float16,\n \"trust_remote_code\"\ + : True,\n }\n if use_flash_attention:\n model_kwargs[\"\ + attn_implementation\"] = \"flash_attention_2\"\n\n model = AutoModelForCausalLM.from_pretrained(model_name,\ + \ **model_kwargs)\n\n print(f\"Using LoRA target modules for {model_name}:\ + \ {target_modules}\")\n\n config = LoraConfig(\n r=lora_rank,\n\ + \ lora_alpha=lora_rank * 2,\n bias=\"none\",\n \ + \ lora_dropout=0.05,\n task_type=\"CAUSAL_LM\",\n \ + \ target_modules=target_modules,\n )\n model = get_peft_model(model,\ + \ config)\n\n print(\"Loading dataset\")\n dataset = load_from_disk(dataset_path)\n\ + \n tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n\ + \n if tokenizer.pad_token is None:\n tokenizer.pad_token\ + \ = tokenizer.eos_token\n tokenizer.padding_side = \"right\"\n\n\ + \ sft_config = SFTConfig(\n ## Memory optimization\n \ + \ gradient_checkpointing=True,\n gradient_checkpointing_kwargs={\"\ + use_reentrant\": False},\n gradient_accumulation_steps=1,\n \ + \ per_device_train_batch_size=batch_size,\n auto_find_batch_size=True,\n\ + \ ## Dataset configuration\n max_length=max_length,\n\ + \ packing=use_flash_attention, # Packing works best with Flash\ + \ Attention\n ## Training parameters\n num_train_epochs=epochs\ + \ if max_steps is None else None,\n max_steps=-1 if max_steps\ + \ is None else max_steps,\n learning_rate=learning_rate,\n \ + \ optim=optimizer,\n ## Optimizer parameters\n \ + \ adam_beta1=adam_beta1,\n adam_beta2=adam_beta2,\n \ + \ adam_epsilon=adam_epsilon,\n weight_decay=weight_decay,\n\ + \ ## Logging and saving\n logging_steps=logging_steps,\n\ + \ save_steps=save_steps,\n save_strategy=save_strategy,\n\ + \ logging_dir=\"./logs\",\n report_to=\"none\",\n\ + \ )\n trainer = SFTTrainer(\n model=model,\n \ + \ processing_class=tokenizer,\n args=sft_config,\n \ + \ train_dataset=dataset,\n callbacks=[metrics_callback],\n\ + \ )\n\n train_result = trainer.train()\n\n if torch.distributed.is_initialized():\n\ + \ torch.distributed.barrier()\n print(f\"Worker {world_rank}\ + \ - Training completed and synchronized\")\n\n if not is_main_worker:\n\ + \ print(\n f\"Worker {world_rank} - Skipping model\ + \ export and metrics (not main worker)\"\n )\n # Clean\ + \ up distributed process group for non-main workers\n if torch.distributed.is_initialized():\n\ + \ print(f\"Worker {world_rank} - Cleaning up distributed\ + \ process group\")\n torch.distributed.destroy_process_group()\n\ + \ print(f\"Worker {world_rank} - Distributed process group\ + \ destroyed\")\n return\n\n print(\"Main worker (rank\ + \ 0) - Exporting model and metrics...\")\n\n # Save LoRA adapter\n\ + \ model_output_path = os.path.join(pvc_path, \"adapter\")\n \ + \ model.save_pretrained(model_output_path)\n tokenizer.save_pretrained(model_output_path)\n\ + \ print(\"LoRA adapter exported successfully!\")\n\n # Clean\ + \ up distributed process group for main worker AFTER model saving\n \ + \ if torch.distributed.is_initialized():\n print(f\"Worker\ + \ {world_rank} - Cleaning up distributed process group\")\n torch.distributed.destroy_process_group()\n\ + \ print(f\"Worker {world_rank} - Distributed process group destroyed\"\ + )\n\n print(f\"Collecting essential metrics\")\n metrics_dict\ + \ = {}\n\n if hasattr(train_result, \"train_loss\"):\n \ + \ metrics_dict[\"final_train_loss\"] = train_result.train_loss\n \ + \ if hasattr(train_result, \"train_runtime\"):\n metrics_dict[\"\ + train_runtime_seconds\"] = train_result.train_runtime\n if hasattr(train_result,\ + \ \"train_samples_per_second\"):\n metrics_dict[\"throughput_samples_per_sec\"\ + ] = (\n train_result.train_samples_per_second\n \ + \ )\n\n total_params = sum(p.numel() for p in model.parameters())\n\ + \ trainable_params = sum(p.numel() for p in model.parameters() if\ + \ p.requires_grad)\n metrics_dict[\"total_parameters_millions\"]\ + \ = total_params / 1_000_000\n metrics_dict[\"trainable_parameters_millions\"\ + ] = trainable_params / 1_000_000\n metrics_dict[\"lora_efficiency_percent\"\ + ] = (\n trainable_params\ + \ / total_params\n ) *\ + \ 100\n\n metrics_dict[\"lora_rank\"] = config.r\n metrics_dict[\"\ + learning_rate\"] = sft_config.learning_rate\n metrics_dict[\"effective_batch_size\"\ + ] = (\n sft_config.per_device_train_batch_size * world_size\n\ + \ )\n metrics_dict[\"dataset_size\"] = len(dataset)\n\n \ + \ metrics_dict[\"num_nodes\"] = (\n world_size // torch.cuda.device_count()\n\ + \ if torch.cuda.is_available() and torch.cuda.device_count()\ + \ > 0\n else 1\n )\n if torch.cuda.is_available():\n\ + \ metrics_dict[\"peak_gpu_memory_gb\"] = torch.cuda.max_memory_allocated()\ + \ / (\n 1024**3\n )\n\n if metrics_callback.initial_loss\ + \ and metrics_callback.final_loss:\n metrics_dict[\"initial_loss\"\ + ] = metrics_callback.initial_loss\n metrics_dict[\"loss_reduction\"\ + ] = (\n metrics_callback.initial_loss - metrics_callback.final_loss\n\ + \ )\n metrics_dict[\"loss_reduction_percent\"] = (\n\ + \ (metrics_callback.initial_loss\ + \ - metrics_callback.final_loss)\n \ + \ / metrics_callback.initial_loss\n \ + \ ) * 100\n\n with open(os.path.join(pvc_path,\ + \ \"metrics.json\"), \"w\") as f:\n json.dump(metrics_dict, f,\ + \ indent=2)\n\n print(\n f\"Exported {len(metrics_dict)}\ + \ metrics to {os.path.join(pvc_path, 'metrics.json')}\"\n )\n \ + \ print(\"Model and metrics exported successfully!\")\n\n print(\"\ + Copying dataset to PVC...\")\n dataset_path = os.path.join(pvc_path,\ + \ \"dataset\", \"train\")\n os.makedirs(dataset_path, exist_ok=True)\n\ + \ shutil.copytree(\n input_dataset.path,\n dataset_path,\n\ + \ dirs_exist_ok=True,\n )\n print(f\"Dataset copied successfully\ + \ from {input_dataset.path} to {dataset_path}\")\n\n print(\"=== Starting\ + \ TrainJob creation process ===\")\n\n target_modules = get_target_modules(model_name)\n\ + \ print(f\"Selected LoRA target modules for {model_name}: {target_modules}\"\ + )\n\n with open(\n \"/var/run/secrets/kubernetes.io/serviceaccount/namespace\"\ + , \"r\"\n ) as ns_file:\n namespace = ns_file.readline()\n\n \ + \ print(\"Generating command...\")\n\n func_code = inspect.getsource(train_model_func)\n\ + \ func_code = textwrap.dedent(func_code)\n\n func_call_code = f\"\"\ + \"\nimport os\nimport json\n\n# Parse function arguments from environment\ + \ variable\nconfig_json = os.environ.get(\"TRAINING_CONFIG\", \"{{}}\")\n\ + func_args = json.loads(config_json)\n\n# Call the training function with\ + \ parsed arguments\n{train_model_func.__name__}(**func_args)\n\"\"\"\n\n\ + \ func_code = f\"{func_code}\\n{func_call_code}\"\n\n # Build package\ + \ list based on configuration\n packages = [\"transformers\", \"peft\"\ + , \"accelerate\", \"trl\"]\n if use_flash_attention:\n packages.append(\"\ + flash-attn\")\n packages_str = \" \".join(packages)\n\n install_script\ + \ = f\"\"\"set -e\nset -o pipefail\n\necho \"=== Starting container setup\ + \ ===\"\necho \"Python version: $(python --version)\"\n\nif ! [ -x \"$(command\ + \ -v pip)\" ]; then\n echo \"Installing pip...\"\n python -m ensurepip\ + \ || python -m ensurepip --user\nfi\n\necho \"Installing Python packages...\"\ + \nPIP_DISABLE_PIP_VERSION_CHECK=1 python -m pip install --user --quiet --no-warn-script-location\ + \ {packages_str}\n\necho \"Creating training script...\"\ncat > ephemeral_component.py\ + \ << 'EOF'\n{func_code}\nEOF\n\necho \"Starting distributed training...\"\ + \ntorchrun --nproc_per_node=1 ephemeral_component.py\"\"\"\n\n command\ + \ = [\"bash\", \"-c\", install_script]\n\n print(f\"Generated command:\ + \ {command}\")\n print(f\"Command length: {len(command)}\")\n print(f\"\ + Command type: {type(command)}\")\n\n print(\"Loading Kubernetes configuration...\"\ + )\n try:\n config.load_incluster_config()\n print(\"Loaded\ + \ in-cluster Kubernetes configuration\")\n except config.ConfigException:\n\ + \ config.load_kube_config()\n print(\"Loaded kubeconfig Kubernetes\ + \ configuration\")\n\n print(\"Creating Kubernetes API client...\")\n\ + \ api_client = k8s_client.ApiClient()\n custom_objects_api = k8s_client.CustomObjectsApi(api_client)\n\ + \ print(\"Successfully created Kubernetes API client\")\n\n print(\"\ + Defining TrainJob resource...\")\n\n env_vars = [\n {\"name\"\ + : \"HOME\", \"value\": \"/tmp\"},\n {\n \"name\": \"TRAINING_CONFIG\"\ + ,\n \"value\": json.dumps(\n {\n \ + \ \"lora_rank\": lora_rank,\n \"learning_rate\"\ + : learning_rate,\n \"batch_size\": batch_size,\n \ + \ \"max_length\": max_length,\n \"model_name\"\ + : model_name,\n \"dataset_path\": dataset_path,\n \ + \ \"epochs\": epochs,\n \"pvc_path\"\ + : pvc_path,\n \"target_modules\": target_modules,\n \ + \ \"max_steps\": max_steps,\n \"logging_steps\"\ + : logging_steps,\n \"save_steps\": save_steps,\n \ + \ \"save_strategy\": save_strategy,\n \ + \ \"optimizer\": optimizer,\n \"adam_beta1\": adam_beta1,\n\ + \ \"adam_beta2\": adam_beta2,\n \"\ + adam_epsilon\": adam_epsilon,\n \"weight_decay\": weight_decay,\n\ + \ \"use_flash_attention\": use_flash_attention,\n \ + \ }\n ),\n },\n *(kubernetes_config.env\ + \ or []),\n ]\n\n train_job = {\n \"apiVersion\": \"trainer.kubeflow.org/v1alpha1\"\ + ,\n \"kind\": \"TrainJob\",\n \"metadata\": {\"name\": f\"\ + kfp-{run_id}\", \"namespace\": namespace},\n \"spec\": {\n \ + \ \"runtimeRef\": {\"name\": trainer_runtime},\n \"trainer\"\ + : {\n \"numNodes\": num_nodes,\n \"resourcesPerNode\"\ + : kubernetes_config.resources,\n \"env\": env_vars,\n \ + \ \"command\": command,\n },\n \"podSpecOverrides\"\ + : [\n {\n \"targetJobs\": [{\"name\":\ + \ \"node\"}],\n \"volumes\": kubernetes_config.volumes,\n\ + \ \"containers\": [\n {\n \ + \ \"name\": \"node\",\n \ + \ \"volumeMounts\": kubernetes_config.volume_mounts,\n \ + \ }\n ],\n \"nodeSelector\"\ + : kubernetes_config.node_selector,\n \"tolerations\"\ + : kubernetes_config.tolerations,\n }\n ],\n \ + \ },\n }\n\n print(f\"TrainJob definition created:\")\n print(f\"\ + \ - Name: kfp-{run_id}\")\n print(f\" - Namespace: {namespace}\")\n\ + \n print(f\" - Runtime: {trainer_runtime}\")\n print(f\" - Nodes:\ + \ {num_nodes}\")\n print(f\" - Model: {model_name}\")\n print(f\"\ + \ - Dataset: {dataset_path}\")\n print(f\" - Epochs: {epochs}\")\n\n\ + \ print(\"Submitting TrainJob to Kubernetes...\")\n try:\n \ + \ response = custom_objects_api.create_namespaced_custom_object(\n \ + \ group=\"trainer.kubeflow.org\",\n version=\"v1alpha1\"\ + ,\n namespace=namespace,\n plural=\"trainjobs\",\n\ + \ body=train_job,\n )\n job_name = response[\"\ + metadata\"][\"name\"]\n print(f\"TrainJob {job_name} created successfully\"\ + )\n print(f\"Response metadata: {response.get('metadata', {})}\"\ + )\n except ApiException as e:\n print(f\"Error creating TrainJob:\ + \ {e}\")\n print(f\"Error details: {e.body}\")\n print(f\"\ + Error status: {e.status}\")\n raise\n\n print(f\"Starting to monitor\ + \ TrainJob {job_name} status...\")\n check_count = 0\n while True:\n\ + \ check_count += 1\n try:\n print(f\"Checking job\ + \ status (attempt {check_count})...\")\n job_status = custom_objects_api.get_namespaced_custom_object(\n\ + \ group=\"trainer.kubeflow.org\",\n version=\"\ + v1alpha1\",\n namespace=namespace,\n plural=\"\ + trainjobs\",\n name=job_name,\n )\n\n \ + \ status = job_status.get(\"status\", {})\n conditions = status.get(\"\ + conditions\", [])\n print(f\"Job status conditions: {conditions}\"\ + )\n\n completed = False\n failed = False\n\n \ + \ for condition in conditions:\n condition_type = condition.get(\"\ + type\", \"\")\n condition_status = condition.get(\"status\"\ + , \"\")\n condition_reason = condition.get(\"reason\", \"\ + \")\n condition_message = condition.get(\"message\", \"\"\ + )\n\n print(\n f\"Condition: type={condition_type},\ + \ status={condition_status}, reason={condition_reason}\"\n \ + \ )\n\n if condition_type == \"Complete\" and condition_status\ + \ == \"True\":\n print(\n f\"\ + Training job {job_name} completed successfully: {condition_message}\"\n\ + \ )\n completed = True\n \ + \ break\n elif condition_type == \"Failed\" and\ + \ condition_status == \"True\":\n print(f\"Training job\ + \ {job_name} failed: {condition_message}\")\n failed\ + \ = True\n break\n elif condition_type\ + \ == \"Cancelled\" and condition_status == \"True\":\n \ + \ print(f\"Training job {job_name} was cancelled: {condition_message}\"\ + )\n failed = True\n break\n\n \ + \ if completed:\n break\n elif failed:\n\ + \ raise RuntimeError(f\"Training job {job_name} failed or\ + \ was cancelled\")\n else:\n print(f\"Job is still\ + \ running, continuing to wait...\")\n\n except ApiException as e:\n\ + \ print(f\"Error checking job status: {e}\")\n print(f\"\ + Error details: {e.body}\")\n\n print(f\"Waiting 10 seconds before\ + \ next check...\")\n time.sleep(10)\n\n print(f\"Training job\ + \ {job_name} completed. Logs would be retrieved here.\")\n\n print(\"\ + Processing training results...\")\n\n metrics_file_path = os.path.join(pvc_path,\ + \ \"metrics.json\")\n print(f\"Looking for metrics file at: {metrics_file_path}\"\ + )\n if os.path.exists(metrics_file_path):\n print(f\"Found metrics\ + \ file, reading from {metrics_file_path}\")\n with open(metrics_file_path,\ + \ \"r\") as f:\n metrics_dict = json.load(f)\n\n print(f\"\ + Loaded {len(metrics_dict)} metrics from file\")\n\n exported_count\ + \ = 0\n for metric_name, metric_value in metrics_dict.items():\n\ + \ # Ignore metrics that are 0 to avoid a bug in the RHOAI UI.\n\ + \ if isinstance(metric_value, (int, float)) and metric_value\ + \ != 0:\n output_metrics.log_metric(metric_name, metric_value)\n\ + \ print(f\"Exported metric: {metric_name} = {metric_value}\"\ + )\n exported_count += 1\n\n print(f\"Successfully\ + \ exported {exported_count} metrics to Kubeflow\")\n os.remove(metrics_file_path)\n\ + \ else:\n print(f\"Warning: Metrics file {metrics_file_path} not\ + \ found\")\n\n print(\"Copying model from PVC to Kubeflow output path...\"\ + )\n model_source = os.path.join(pvc_path, \"adapter\")\n print(f\"\ + Model source: {model_source}\")\n print(f\"Destination: {output_model.path}\"\ + )\n\n if not os.path.exists(model_source):\n raise FileNotFoundError(\n\ + \ f\"Trained model not found at expected location: {model_source}\"\ + \n )\n\n output_model.name = f\"{model_name}-adapter\"\n shutil.copytree(model_source,\ + \ output_model.path, dirs_exist_ok=True)\n print(f\"Model copied successfully\ + \ from {model_source} to {output_model.path}\")\n\n print(\"=== TrainJob\ + \ process completed successfully ===\")\n\n" + image: registry.access.redhat.com/ubi9/python-311:latest +pipelineInfo: + name: train-model +root: + dag: + outputs: + artifacts: + output_metrics: + artifactSelectors: + - outputArtifactKey: output_metrics + producerSubtask: train-model + output_model: + artifactSelectors: + - outputArtifactKey: output_model + producerSubtask: train-model + tasks: + train-model: + cachingOptions: + enableCache: true + componentRef: + name: comp-train-model + inputs: + artifacts: + input_dataset: + componentInputArtifact: input_dataset + parameters: + adam_beta1: + componentInputParameter: adam_beta1 + adam_beta2: + componentInputParameter: adam_beta2 + adam_epsilon: + componentInputParameter: adam_epsilon + batch_size: + componentInputParameter: batch_size + epochs: + componentInputParameter: epochs + kubernetes_config: + componentInputParameter: kubernetes_config + learning_rate: + componentInputParameter: learning_rate + logging_steps: + componentInputParameter: logging_steps + lora_rank: + componentInputParameter: lora_rank + max_length: + componentInputParameter: max_length + max_steps: + componentInputParameter: max_steps + model_name: + componentInputParameter: model_name + num_nodes: + componentInputParameter: num_nodes + optimizer: + componentInputParameter: optimizer + pvc_path: + componentInputParameter: pvc_path + run_id: + componentInputParameter: run_id + save_steps: + componentInputParameter: save_steps + save_strategy: + componentInputParameter: save_strategy + trainer_runtime: + componentInputParameter: trainer_runtime + use_flash_attention: + componentInputParameter: use_flash_attention + weight_decay: + componentInputParameter: weight_decay + taskInfo: + name: train-model + inputDefinitions: + artifacts: + input_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + adam_beta1: + defaultValue: 0.9 + description: Beta1 parameter for Adam optimizer. Defaults to 0.9. + isOptional: true + parameterType: NUMBER_DOUBLE + adam_beta2: + defaultValue: 0.999 + description: Beta2 parameter for Adam optimizer. Defaults to 0.999. + isOptional: true + parameterType: NUMBER_DOUBLE + adam_epsilon: + defaultValue: 1.0e-08 + description: Epsilon parameter for Adam optimizer. Defaults to 1e-8. + isOptional: true + parameterType: NUMBER_DOUBLE + batch_size: + defaultValue: 16.0 + description: Per-device training batch size. Defaults to 16. + isOptional: true + parameterType: NUMBER_INTEGER + epochs: + defaultValue: 10.0 + description: Number of training epochs. Defaults to 10. + isOptional: true + parameterType: NUMBER_INTEGER + kubernetes_config: + isOptional: true + parameterType: TASK_CONFIG + learning_rate: + defaultValue: 0.0003 + description: Learning rate for training optimization. Defaults to 3e-4. + isOptional: true + parameterType: NUMBER_DOUBLE + logging_steps: + defaultValue: 10.0 + description: Number of steps between logging outputs. Defaults to 10. + isOptional: true + parameterType: NUMBER_INTEGER + lora_rank: + defaultValue: 8.0 + description: LoRA adapter rank (lower = fewer parameters, faster training). + Defaults to 8. + isOptional: true + parameterType: NUMBER_INTEGER + max_length: + defaultValue: 64.0 + description: Maximum token sequence length for training. Defaults to 64. + isOptional: true + parameterType: NUMBER_INTEGER + max_steps: + description: Maximum number of training steps. If specified, overrides epochs. + Defaults to None. + isOptional: true + parameterType: NUMBER_INTEGER + model_name: + description: HuggingFace model identifier (e.g., "meta-llama/Llama-3.2-3B-Instruct"). + parameterType: STRING + num_nodes: + defaultValue: 2.0 + description: Number of nodes for distributed training. Defaults to 2. + isOptional: true + parameterType: NUMBER_INTEGER + optimizer: + defaultValue: adamw_torch + description: Optimizer to use (e.g., "adamw_torch", "adamw_torch_fused"). + Defaults to "adamw_torch". + isOptional: true + parameterType: STRING + pvc_path: + description: Base path within the PVC for storing outputs. + parameterType: STRING + run_id: + description: Unique identifier for this training run. Use dsl.PIPELINE_JOB_ID_PLACEHOLDER. + parameterType: STRING + save_steps: + description: Number of steps between model checkpoints. Defaults to None. + isOptional: true + parameterType: NUMBER_INTEGER + save_strategy: + defaultValue: epoch + description: Checkpoint saving strategy ("epoch" or "steps"). Defaults to + "epoch". + isOptional: true + parameterType: STRING + trainer_runtime: + defaultValue: torch-distributed + description: Runtime to use for Kubeflow Trainer. Defaults to "torch-distributed". + isOptional: true + parameterType: STRING + use_flash_attention: + defaultValue: false + description: Whether to use Flash Attention 2 for improved performance. Defaults + to False. + isOptional: true + parameterType: BOOLEAN + weight_decay: + defaultValue: 0.01 + description: Weight decay for regularization. Defaults to 0.01. + isOptional: true + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + output_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + output_model: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 + taskConfigPassthroughs: + - field: RESOURCES + - field: KUBERNETES_TOLERATIONS + - field: KUBERNETES_NODE_SELECTOR + - field: KUBERNETES_AFFINITY + - applyToTask: true + field: ENV + - applyToTask: true + field: KUBERNETES_VOLUMES +schemaVersion: 2.1.0 +sdkVersion: kfp-2.15.2 diff --git a/pipelines/yoda_model/train_and_evaluate_yoda_model.py b/pipelines/yoda_model/train_and_evaluate_yoda_model.py new file mode 100644 index 000000000..610662743 --- /dev/null +++ b/pipelines/yoda_model/train_and_evaluate_yoda_model.py @@ -0,0 +1,121 @@ +import kfp +from kfp import dsl, kubernetes + +from components.data_processing.yoda_data_preparation import prepare_yoda_dataset +from components.training.train_yoda_model import train_model +from components.evaluation.evaluate_yoda_model import evaluate_yoda_model + + +@dsl.pipeline( + name="Yoda finetune", + description="Prepare Yoda dataset, finetune a base model with LoRA, and evaluate baseline vs fine-tuned", + pipeline_config=dsl.PipelineConfig( + workspace=dsl.WorkspaceConfig( + size="20Gi", + kubernetes=dsl.KubernetesWorkspaceConfig( + pvcSpecPatch={ + "accessModes": ["ReadWriteMany"], + "storageClassName": "efs-sc", + } + ) + ), + ) +) +def yoda_finetune_and_evaluate( + model_name: str = "meta-llama/Llama-3.2-3B-Instruct", + eval_limit: int = None, +): + """Prepare, finetune, and evaluate a model using the Yoda dataset. + + Args: + model_name (str): HuggingFace model ID for both baseline and training. + Defaults to "meta-llama/Llama-3.2-3B-Instruct". + eval_limit (int): Maximum number of examples per task for evaluation. + Use None to evaluate all available examples. Defaults to None. + """ + + # 1) Prepare dataset splits + prepare_dataset_op = ( + prepare_yoda_dataset(yoda_input_dataset="dvgodoy/yoda_sentences", + operation_map={"rename_column": {"sentence":"prompt"}, + "translation": {"translation_extra":"completion"}, + "remove_columns": "translation"}) + .set_caching_options(enable_caching=False) + .set_retry(3) + ) + + # 2 Train LoRA adapter on Yoda train split + train_model_op = ( + train_model( + input_dataset=prepare_dataset_op.outputs["yoda_train_dataset"], + model_name=model_name, + pvc_path=dsl.WORKSPACE_PATH_PLACEHOLDER, + run_id=dsl.PIPELINE_JOB_ID_PLACEHOLDER, + ) + .after(prepare_dataset_op) + .set_caching_options(enable_caching=False) + .set_cpu_request("2") + .set_cpu_limit("2") + .set_memory_request("30Gi") + .set_memory_limit("30Gi") + .set_accelerator_type("nvidia.com/gpu") + .set_accelerator_limit("1") + ) + + # Ensure HF token available for gated model access during training + kubernetes.use_secret_as_env( + task=train_model_op, + secret_name="hf-token", + secret_key_to_env={"HF_TOKEN": "HF_TOKEN"}, + ) + + + # 3) Baseline evaluation (no adapter) + baseline_eval_op = ( + evaluate_yoda_model( + model_path=model_name, + custom_translation_dataset=prepare_dataset_op.outputs["yoda_eval_dataset"], + limit=eval_limit, + ) + .set_caching_options(enable_caching=False) + .set_accelerator_type("nvidia.com/gpu") + .set_accelerator_limit("1") + .set_cpu_request("4000m") + .set_memory_request("100G") + ) + + # Ensure HF token available for gated model access during baseline eval + kubernetes.use_secret_as_env( + task=baseline_eval_op, + secret_name="hf-token", + secret_key_to_env={"HF_TOKEN": "HF_TOKEN"}, + ) + + # 4) Fine-tuned evaluation (with LoRA adapter) + finetuned_eval_op = ( + evaluate_yoda_model( + model_path=model_name, + custom_translation_dataset=prepare_dataset_op.outputs["yoda_eval_dataset"], + lora_adapter=train_model_op.outputs["output_model"], + limit=eval_limit, + ) + .set_caching_options(enable_caching=False) + .set_accelerator_type("nvidia.com/gpu") + .set_accelerator_limit("1") + .set_cpu_request("4000m") + .set_memory_request("100G") + ).after(train_model_op) + + # Ensure HF token available for gated model access during fine-tuned eval + kubernetes.use_secret_as_env( + task=finetuned_eval_op, + secret_name="hf-token", + secret_key_to_env={"HF_TOKEN": "HF_TOKEN"}, + ) + + +if __name__ == "__main__": + kfp.compiler.Compiler().compile( + pipeline_func=yoda_finetune_and_evaluate, + package_path=__file__.replace(".py", ".yaml"), + ) \ No newline at end of file diff --git a/pipelines/yoda_model/train_and_evaluate_yoda_model.yaml b/pipelines/yoda_model/train_and_evaluate_yoda_model.yaml new file mode 100644 index 000000000..828a7e249 --- /dev/null +++ b/pipelines/yoda_model/train_and_evaluate_yoda_model.yaml @@ -0,0 +1,1312 @@ +# PIPELINE DEFINITION +# Name: yoda-finetune +# Description: Prepare Yoda dataset, finetune a base model with LoRA, and evaluate baseline vs fine-tuned +# Inputs: +# eval_limit: int +# model_name: str [Default: 'meta-llama/Llama-3.2-3B-Instruct'] +components: + comp-evaluate-yoda-model: + executorLabel: exec-evaluate-yoda-model + inputDefinitions: + artifacts: + custom_translation_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + isOptional: true + lora_adapter: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 + isOptional: true + parameters: + add_bos_token: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + batch_size: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + dtype: + defaultValue: auto + isOptional: true + parameterType: STRING + gpu_memory_utilization: + defaultValue: 0.8 + isOptional: true + parameterType: NUMBER_DOUBLE + include_classification_tasks: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + include_summarization_tasks: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + limit: + isOptional: true + parameterType: NUMBER_INTEGER + log_prompts: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + max_batch_size: + isOptional: true + parameterType: NUMBER_INTEGER + max_model_len: + defaultValue: 4096.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_path: + parameterType: STRING + verbosity: + defaultValue: INFO + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + output_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + output_prompts: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + output_results: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-evaluate-yoda-model-2: + executorLabel: exec-evaluate-yoda-model-2 + inputDefinitions: + artifacts: + custom_translation_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + isOptional: true + lora_adapter: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 + isOptional: true + parameters: + add_bos_token: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + batch_size: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + dtype: + defaultValue: auto + isOptional: true + parameterType: STRING + gpu_memory_utilization: + defaultValue: 0.8 + isOptional: true + parameterType: NUMBER_DOUBLE + include_classification_tasks: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + include_summarization_tasks: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + limit: + isOptional: true + parameterType: NUMBER_INTEGER + log_prompts: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + max_batch_size: + isOptional: true + parameterType: NUMBER_INTEGER + max_model_len: + defaultValue: 4096.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_path: + parameterType: STRING + verbosity: + defaultValue: INFO + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + output_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + output_prompts: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + output_results: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-prepare-yoda-dataset: + executorLabel: exec-prepare-yoda-dataset + inputDefinitions: + parameters: + operation_map: + defaultValue: + rename_column: + sentence: prompt + description: 'Specify list of operations you want to perform on the data + set before splitting it e.g. {"rename_column": {"sentence":"prompt"}, + "remove_columns": "translation"}' + isOptional: true + parameterType: STRUCT + train_split_ratio: + defaultValue: 0.8 + description: 'Ratio of data to use for training (0.0-1.0). + + Defaults to 0.8 (80% train, 20% eval).' + isOptional: true + parameterType: NUMBER_DOUBLE + yoda_input_dataset: + description: Dataset to download from HuggingFace + parameterType: STRING + outputDefinitions: + artifacts: + yoda_eval_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + yoda_train_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-train-model: + executorLabel: exec-train-model + inputDefinitions: + artifacts: + input_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + adam_beta1: + defaultValue: 0.9 + description: Beta1 parameter for Adam optimizer. Defaults to 0.9. + isOptional: true + parameterType: NUMBER_DOUBLE + adam_beta2: + defaultValue: 0.999 + description: Beta2 parameter for Adam optimizer. Defaults to 0.999. + isOptional: true + parameterType: NUMBER_DOUBLE + adam_epsilon: + defaultValue: 1.0e-08 + description: Epsilon parameter for Adam optimizer. Defaults to 1e-8. + isOptional: true + parameterType: NUMBER_DOUBLE + batch_size: + defaultValue: 16.0 + description: Per-device training batch size. Defaults to 16. + isOptional: true + parameterType: NUMBER_INTEGER + epochs: + defaultValue: 10.0 + description: Number of training epochs. Defaults to 10. + isOptional: true + parameterType: NUMBER_INTEGER + kubernetes_config: + isOptional: true + parameterType: TASK_CONFIG + learning_rate: + defaultValue: 0.0003 + description: Learning rate for training optimization. Defaults to 3e-4. + isOptional: true + parameterType: NUMBER_DOUBLE + logging_steps: + defaultValue: 10.0 + description: Number of steps between logging outputs. Defaults to 10. + isOptional: true + parameterType: NUMBER_INTEGER + lora_rank: + defaultValue: 8.0 + description: LoRA adapter rank (lower = fewer parameters, faster training). + Defaults to 8. + isOptional: true + parameterType: NUMBER_INTEGER + max_length: + defaultValue: 64.0 + description: Maximum token sequence length for training. Defaults to 64. + isOptional: true + parameterType: NUMBER_INTEGER + max_steps: + description: Maximum number of training steps. If specified, overrides epochs. + Defaults to None. + isOptional: true + parameterType: NUMBER_INTEGER + model_name: + description: HuggingFace model identifier (e.g., "meta-llama/Llama-3.2-3B-Instruct"). + parameterType: STRING + num_nodes: + defaultValue: 2.0 + description: Number of nodes for distributed training. Defaults to 2. + isOptional: true + parameterType: NUMBER_INTEGER + optimizer: + defaultValue: adamw_torch + description: Optimizer to use (e.g., "adamw_torch", "adamw_torch_fused"). + Defaults to "adamw_torch". + isOptional: true + parameterType: STRING + pvc_path: + description: Base path within the PVC for storing outputs. + parameterType: STRING + run_id: + description: Unique identifier for this training run. Use dsl.PIPELINE_JOB_ID_PLACEHOLDER. + parameterType: STRING + save_steps: + description: Number of steps between model checkpoints. Defaults to None. + isOptional: true + parameterType: NUMBER_INTEGER + save_strategy: + defaultValue: epoch + description: Checkpoint saving strategy ("epoch" or "steps"). Defaults to + "epoch". + isOptional: true + parameterType: STRING + trainer_runtime: + defaultValue: torch-distributed + description: Runtime to use for Kubeflow Trainer. Defaults to "torch-distributed". + isOptional: true + parameterType: STRING + use_flash_attention: + defaultValue: false + description: Whether to use Flash Attention 2 for improved performance. + Defaults to False. + isOptional: true + parameterType: BOOLEAN + weight_decay: + defaultValue: 0.01 + description: Weight decay for regularization. Defaults to 0.01. + isOptional: true + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + output_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + output_model: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 + taskConfigPassthroughs: + - field: RESOURCES + - field: KUBERNETES_TOLERATIONS + - field: KUBERNETES_NODE_SELECTOR + - field: KUBERNETES_AFFINITY + - applyToTask: true + field: ENV + - applyToTask: true + field: KUBERNETES_VOLUMES +deploymentSpec: + executors: + exec-evaluate-yoda-model: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - evaluate_yoda_model + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'transformers'\ + \ 'torch' 'accelerate' 'lm-eval[vllm]' 'unitxt' 'sacrebleu' 'datasets' \ + \ && python3 -m pip install --quiet --no-warn-script-location 'kfp==2.15.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef evaluate_yoda_model(\n model_path: str,\n output_metrics:\ + \ dsl.Output[dsl.Metrics],\n output_results: dsl.Output[dsl.Artifact],\n\ + \ output_prompts: dsl.Output[dsl.Artifact],\n lora_adapter:\ + \ dsl.Input[dsl.Model] = None,\n batch_size: int = 1,\n limit:\ + \ int = None,\n max_model_len: int = 4096,\n gpu_memory_utilization:\ + \ float = 0.8,\n dtype: str = \"auto\",\n add_bos_token: bool\ + \ = True,\n include_classification_tasks: bool = True,\n include_summarization_tasks:\ + \ bool = True,\n custom_translation_dataset: dsl.Input[dsl.Dataset]\ + \ = None,\n log_prompts: bool = True,\n verbosity: str = \"\ + INFO\",\n max_batch_size: int = None,\n):\n import logging\n \ + \ import os\n import json\n import time\n import random\n \ + \ from typing import Dict, Any, Optional\n\n from lm_eval.tasks.unitxt\ + \ import task\n from lm_eval.api.registry import get_model\n from\ + \ lm_eval.api.model import LM\n from lm_eval.evaluator import evaluate\n\ + \ from lm_eval.tasks import get_task_dict\n from lm_eval.api.instance\ + \ import Instance\n from lm_eval import tasks\n from lm_eval.api.task\ + \ import TaskConfig\n from lm_eval.api.metrics import mean\n from\ + \ datasets import load_from_disk\n import torch\n import sacrebleu\n\ + \n class TranslationTask(tasks.Task):\n \"\"\"\n A custom\ + \ lm-eval task for translation, using the greedy_until method\n and\ + \ evaluating with the BLEU metric.\n \"\"\"\n\n VERSION =\ + \ 0\n\n def __init__(self, dataset_path, task_name: str, log_prompts=False,\ + \ prompts_log=None):\n self.dataset_path = dataset_path\n \ + \ self.task_name = task_name\n self.log_prompts = log_prompts\n\ + \ self.prompts_log = [] if prompts_log is None else prompts_log\n\ + \ config = TaskConfig(task=task_name, dataset_path=dataset_path)\n\ + \ super().__init__(config=config)\n self.config.task\ + \ = task_name\n self.fewshot_rnd = random.Random()\n\n \ + \ def download(\n self, data_dir=None, cache_dir=None, download_mode=None,\ + \ **kwargs\n ) -> None:\n self.dataset = {\"test\": load_from_disk(self.dataset_path)}\n\ + \n def has_test_docs(self):\n return \"test\" in self.dataset\n\ + \n def has_validation_docs(self):\n return False\n\n \ + \ def has_training_docs(self):\n return False\n\n \ + \ def test_docs(self):\n return self.dataset[\"test\"]\n\n\ + \ def doc_to_text(self, doc):\n return doc[\"prompt\"\ + ]\n\n def doc_to_target(self, doc):\n return doc[\"completion\"\ + ]\n\n def construct_requests(self, doc, ctx, **kwargs):\n \ + \ kwargs.pop(\"apply_chat_template\", False)\n kwargs.pop(\"\ + chat_template\", False)\n return Instance(\n request_type=\"\ + generate_until\",\n doc=doc,\n arguments=(ctx,\ + \ {}),\n idx=0,\n **kwargs,\n )\n\ + \n def process_results(self, doc, results):\n (generated_text,)\ + \ = results\n\n prediction = generated_text.strip()\n\n \ + \ if self.log_prompts:\n try:\n \ + \ self.prompts_log.append(\n {\"prompt\": self.doc_to_text(doc),\ + \ \"response\": prediction}\n )\n except\ + \ Exception:\n # Best-effort logging; avoid breaking\ + \ evaluation if logging fails\n pass\n\n predictions\ + \ = [prediction]\n references = [[self.doc_to_target(doc).strip()]]\n\ + \n bleu_score = sacrebleu.corpus_bleu(predictions, references).score\n\ + \n exact_match = 1.0 if prediction == references[0][0] else 0.0\n\ + \n return {\"bleu\": bleu_score, \"exact_match\": exact_match}\n\ + \n def aggregation(self):\n return {\"bleu\": mean, \"\ + exact_match\": mean}\n\n def should_decontaminate(self):\n \ + \ return False\n\n def doc_to_prefix(self, doc):\n \ + \ return \"\"\n\n def higher_is_better(self):\n return\ + \ {\"bleu\": True, \"exact_match\": True}\n\n TASK_CONFIGS = {\n \ + \ \"classification\": [\n {\n \"task\": \"\ + classification_rte_simple\",\n \"recipe\": \"card=cards.rte,template=templates.classification.multi_class.relation.simple\"\ + ,\n \"group\": \"classification\",\n \"output_type\"\ + : \"generate_until\",\n },\n {\n \"\ + task\": \"classification_rte_default\",\n \"recipe\": \"\ + card=cards.rte,template=templates.classification.multi_class.relation.default\"\ + ,\n \"group\": \"classification\",\n \"output_type\"\ + : \"generate_until\",\n },\n {\n \"\ + task\": \"classification_rte_wnli\",\n \"recipe\": \"card=cards.wnli,template=templates.classification.multi_class.relation.simple\"\ + ,\n \"group\": \"classification\",\n \"output_type\"\ + : \"generate_until\",\n },\n ],\n \"summarization\"\ + : [\n {\n \"task\": \"summarization_xsum_formal\"\ + ,\n \"recipe\": \"card=cards.xsum,template=templates.summarization.abstractive.formal,num_demos=0\"\ + ,\n \"group\": \"summarization\",\n \"output_type\"\ + : \"generate_until\",\n }\n ],\n }\n\n logging.basicConfig(\n\ + \ level=getattr(logging, verbosity.upper()),\n format=\"%(asctime)s\ + \ - %(name)s - %(levelname)s - %(message)s\",\n )\n logger = logging.getLogger(__name__)\n\ + \n os.environ[\"HF_HUB_DISABLE_TELEMETRY\"] = \"1\"\n os.environ[\"\ + HF_HUB_DISABLE_SYMLINKS_WARNING\"] = \"1\"\n\n logger.info(\"Validating\ + \ parameters...\")\n\n if not torch.cuda.is_available():\n raise\ + \ ValueError(\"CUDA is not available\")\n\n if not (0.0 <= gpu_memory_utilization\ + \ <= 1.0):\n raise ValueError(\"gpu_memory_utilization must be between\ + \ 0.0 and 1.0\")\n\n if batch_size <= 0:\n raise ValueError(\"\ + batch_size must be positive\")\n\n if max_model_len <= 0:\n raise\ + \ ValueError(\"max_model_len must be positive\")\n\n if limit is not\ + \ None and limit <= 0:\n raise ValueError(\"limit must be positive\ + \ or None\")\n\n if (\n not include_classification_tasks\n\ + \ and not include_summarization_tasks\n and not custom_translation_dataset\n\ + \ ):\n raise ValueError(\n \"At least one of include_classification_tasks,\ + \ include_summarization_tasks, or custom_translation_dataset must be provided\"\ + \n )\n\n logger.info(\"Parameter validation passed\")\n\n logger.info(\"\ + Creating tasks...\")\n start_time = time.time()\n\n eval_tasks = []\n\ + \ prompt_response_log = []\n\n if custom_translation_dataset:\n \ + \ logger.info(\"Adding custom translation task...\")\n translation_task\ + \ = TranslationTask(\n custom_translation_dataset.path,\n \ + \ \"custom_translation\",\n log_prompts=log_prompts,\n\ + \ prompts_log=prompt_response_log,\n )\n eval_tasks.append(translation_task)\n\ + \n if include_classification_tasks:\n logger.info(\"Adding classification\ + \ tasks...\")\n classification_configs = TASK_CONFIGS[\"classification\"\ + ]\n\n for config in classification_configs:\n task_obj\ + \ = task.Unitxt(config=config)\n # TODO: Remove after https://github.com/EleutherAI/lm-evaluation-harness/pull/3225\ + \ is merged.\n task_obj.config.task = config[\"task\"]\n \ + \ eval_tasks.append(task_obj)\n\n if include_summarization_tasks:\n\ + \ logger.info(\"Adding summarization tasks...\")\n summarization_config\ + \ = TASK_CONFIGS[\"summarization\"][0]\n\n task_obj = task.Unitxt(config=summarization_config)\n\ + \ # TODO: Remove after https://github.com/EleutherAI/lm-evaluation-harness/pull/3225\ + \ is merged.\n task_obj.config.task = summarization_config[\"task\"\ + ]\n eval_tasks.append(task_obj)\n\n task_dict = get_task_dict(eval_tasks)\n\ + \ logger.info(f\"Created {len(eval_tasks)} tasks in {time.time() - start_time:.2f}s\"\ + )\n\n logger.info(\"Loading model...\")\n start_time = time.time()\n\ + \n try:\n model_args = {\n \"add_bos_token\": add_bos_token,\n\ + \ \"dtype\": dtype,\n \"max_model_len\": max_model_len,\n\ + \ \"gpu_memory_utilization\": gpu_memory_utilization,\n \ + \ \"pretrained\": model_path,\n \"trust_remote_code\":\ + \ True,\n }\n\n # Optionally provide LoRA adapter to lm-eval's\ + \ VLLM backend\n # The backend expects `lora_local_path` and internally\ + \ constructs the LoRARequest.\n if lora_adapter and lora_adapter.path:\n\ + \ logger.info(\"LoRA adapter provided; passing lora_local_path\ + \ to VLLM backend\")\n model_args[\"lora_local_path\"] = lora_adapter.path\n\ + \n model_class = get_model(\"vllm\")\n additional_config =\ + \ {\n \"batch_size\": batch_size,\n \"max_batch_size\"\ + : max_batch_size,\n \"device\": None,\n }\n\n loaded_model\ + \ = model_class.create_from_arg_obj(model_args, additional_config)\n \ + \ logger.info(f\"Model loaded successfully in {time.time() - start_time:.2f}s\"\ + )\n except Exception as e:\n logger.error(f\"Failed to load model:\ + \ {e}\")\n raise RuntimeError(f\"Model loading failed: {e}\")\n\n\ + \ logger.info(\"Starting evaluation...\")\n start_time = time.time()\n\ + \n results = evaluate(\n lm=loaded_model,\n task_dict=task_dict,\n\ + \ limit=limit,\n verbosity=verbosity,\n )\n\n logger.info(f\"\ + Evaluation completed in {time.time() - start_time:.2f}s\")\n\n logger.info(\"\ + Saving results...\")\n\n def clean_for_json(obj):\n \"\"\"Recursively\ + \ clean objects to make them JSON serializable.\"\"\"\n if isinstance(obj,\ + \ dict):\n return {k: clean_for_json(v) for k, v in obj.items()}\n\ + \ elif isinstance(obj, list):\n return [clean_for_json(item)\ + \ for item in obj]\n elif isinstance(obj, (int, float, str, bool,\ + \ type(None))):\n return obj\n else:\n # Convert\ + \ non-serializable objects to string representation\n return\ + \ str(obj)\n\n clean_results = clean_for_json(results)\n\n output_results.name\ + \ = \"results.json\"\n\n with open(output_results.path, \"w\") as f:\n\ + \ json.dump(clean_results, f, indent=2)\n logger.info(f\"Results\ + \ saved to {output_results.path}\")\n\n # Save prompt/response log for\ + \ custom TranslationTask only\n if log_prompts and custom_translation_dataset\ + \ and len(prompt_response_log) > 0:\n try:\n output_prompts.name\ + \ = \"prompts.json\"\n with open(output_prompts.path, \"w\")\ + \ as f:\n json.dump(prompt_response_log, f, indent=2)\n \ + \ logger.info(f\"Prompt/response log saved to {output_prompts.path}\"\ + )\n except Exception as e:\n logger.warning(f\"Failed\ + \ to save prompt/response log: {e}\")\n\n logger.info(\"Logging metrics...\"\ + )\n\n for task_name, task_results in clean_results[\"results\"].items():\n\ + \ for metric_name, metric_value in task_results.items():\n \ + \ if isinstance(metric_value, (int, float)):\n # Skip\ + \ metrics that are 0 due to a bug in the RHOAI UI.\n # TODO:\ + \ Fix RHOAI UI to handle 0 values.\n # TODO: Ignore store_session_info\ + \ from metrics in RHOAI UI.\n if metric_value == 0:\n \ + \ continue\n\n metric_key = f\"{task_name}_{metric_name}\"\ + \n output_metrics.log_metric(metric_key, metric_value)\n\ + \ logger.debug(f\"Logged metric: {metric_key} = {metric_value}\"\ + )\n\n logger.info(\"Metrics logged successfully\")\n\n logger.info(\"\ + Pipeline completed successfully\")\n\n" + image: registry.access.redhat.com/ubi9/python-311:latest + resources: + accelerator: + count: '1' + resourceCount: '1' + resourceType: nvidia.com/gpu + type: nvidia.com/gpu + cpuRequest: 4.0 + memoryRequest: 100.0 + resourceCpuRequest: 4000m + resourceMemoryRequest: 100G + exec-evaluate-yoda-model-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - evaluate_yoda_model + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'transformers'\ + \ 'torch' 'accelerate' 'lm-eval[vllm]' 'unitxt' 'sacrebleu' 'datasets' \ + \ && python3 -m pip install --quiet --no-warn-script-location 'kfp==2.15.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef evaluate_yoda_model(\n model_path: str,\n output_metrics:\ + \ dsl.Output[dsl.Metrics],\n output_results: dsl.Output[dsl.Artifact],\n\ + \ output_prompts: dsl.Output[dsl.Artifact],\n lora_adapter:\ + \ dsl.Input[dsl.Model] = None,\n batch_size: int = 1,\n limit:\ + \ int = None,\n max_model_len: int = 4096,\n gpu_memory_utilization:\ + \ float = 0.8,\n dtype: str = \"auto\",\n add_bos_token: bool\ + \ = True,\n include_classification_tasks: bool = True,\n include_summarization_tasks:\ + \ bool = True,\n custom_translation_dataset: dsl.Input[dsl.Dataset]\ + \ = None,\n log_prompts: bool = True,\n verbosity: str = \"\ + INFO\",\n max_batch_size: int = None,\n):\n import logging\n \ + \ import os\n import json\n import time\n import random\n \ + \ from typing import Dict, Any, Optional\n\n from lm_eval.tasks.unitxt\ + \ import task\n from lm_eval.api.registry import get_model\n from\ + \ lm_eval.api.model import LM\n from lm_eval.evaluator import evaluate\n\ + \ from lm_eval.tasks import get_task_dict\n from lm_eval.api.instance\ + \ import Instance\n from lm_eval import tasks\n from lm_eval.api.task\ + \ import TaskConfig\n from lm_eval.api.metrics import mean\n from\ + \ datasets import load_from_disk\n import torch\n import sacrebleu\n\ + \n class TranslationTask(tasks.Task):\n \"\"\"\n A custom\ + \ lm-eval task for translation, using the greedy_until method\n and\ + \ evaluating with the BLEU metric.\n \"\"\"\n\n VERSION =\ + \ 0\n\n def __init__(self, dataset_path, task_name: str, log_prompts=False,\ + \ prompts_log=None):\n self.dataset_path = dataset_path\n \ + \ self.task_name = task_name\n self.log_prompts = log_prompts\n\ + \ self.prompts_log = [] if prompts_log is None else prompts_log\n\ + \ config = TaskConfig(task=task_name, dataset_path=dataset_path)\n\ + \ super().__init__(config=config)\n self.config.task\ + \ = task_name\n self.fewshot_rnd = random.Random()\n\n \ + \ def download(\n self, data_dir=None, cache_dir=None, download_mode=None,\ + \ **kwargs\n ) -> None:\n self.dataset = {\"test\": load_from_disk(self.dataset_path)}\n\ + \n def has_test_docs(self):\n return \"test\" in self.dataset\n\ + \n def has_validation_docs(self):\n return False\n\n \ + \ def has_training_docs(self):\n return False\n\n \ + \ def test_docs(self):\n return self.dataset[\"test\"]\n\n\ + \ def doc_to_text(self, doc):\n return doc[\"prompt\"\ + ]\n\n def doc_to_target(self, doc):\n return doc[\"completion\"\ + ]\n\n def construct_requests(self, doc, ctx, **kwargs):\n \ + \ kwargs.pop(\"apply_chat_template\", False)\n kwargs.pop(\"\ + chat_template\", False)\n return Instance(\n request_type=\"\ + generate_until\",\n doc=doc,\n arguments=(ctx,\ + \ {}),\n idx=0,\n **kwargs,\n )\n\ + \n def process_results(self, doc, results):\n (generated_text,)\ + \ = results\n\n prediction = generated_text.strip()\n\n \ + \ if self.log_prompts:\n try:\n \ + \ self.prompts_log.append(\n {\"prompt\": self.doc_to_text(doc),\ + \ \"response\": prediction}\n )\n except\ + \ Exception:\n # Best-effort logging; avoid breaking\ + \ evaluation if logging fails\n pass\n\n predictions\ + \ = [prediction]\n references = [[self.doc_to_target(doc).strip()]]\n\ + \n bleu_score = sacrebleu.corpus_bleu(predictions, references).score\n\ + \n exact_match = 1.0 if prediction == references[0][0] else 0.0\n\ + \n return {\"bleu\": bleu_score, \"exact_match\": exact_match}\n\ + \n def aggregation(self):\n return {\"bleu\": mean, \"\ + exact_match\": mean}\n\n def should_decontaminate(self):\n \ + \ return False\n\n def doc_to_prefix(self, doc):\n \ + \ return \"\"\n\n def higher_is_better(self):\n return\ + \ {\"bleu\": True, \"exact_match\": True}\n\n TASK_CONFIGS = {\n \ + \ \"classification\": [\n {\n \"task\": \"\ + classification_rte_simple\",\n \"recipe\": \"card=cards.rte,template=templates.classification.multi_class.relation.simple\"\ + ,\n \"group\": \"classification\",\n \"output_type\"\ + : \"generate_until\",\n },\n {\n \"\ + task\": \"classification_rte_default\",\n \"recipe\": \"\ + card=cards.rte,template=templates.classification.multi_class.relation.default\"\ + ,\n \"group\": \"classification\",\n \"output_type\"\ + : \"generate_until\",\n },\n {\n \"\ + task\": \"classification_rte_wnli\",\n \"recipe\": \"card=cards.wnli,template=templates.classification.multi_class.relation.simple\"\ + ,\n \"group\": \"classification\",\n \"output_type\"\ + : \"generate_until\",\n },\n ],\n \"summarization\"\ + : [\n {\n \"task\": \"summarization_xsum_formal\"\ + ,\n \"recipe\": \"card=cards.xsum,template=templates.summarization.abstractive.formal,num_demos=0\"\ + ,\n \"group\": \"summarization\",\n \"output_type\"\ + : \"generate_until\",\n }\n ],\n }\n\n logging.basicConfig(\n\ + \ level=getattr(logging, verbosity.upper()),\n format=\"%(asctime)s\ + \ - %(name)s - %(levelname)s - %(message)s\",\n )\n logger = logging.getLogger(__name__)\n\ + \n os.environ[\"HF_HUB_DISABLE_TELEMETRY\"] = \"1\"\n os.environ[\"\ + HF_HUB_DISABLE_SYMLINKS_WARNING\"] = \"1\"\n\n logger.info(\"Validating\ + \ parameters...\")\n\n if not torch.cuda.is_available():\n raise\ + \ ValueError(\"CUDA is not available\")\n\n if not (0.0 <= gpu_memory_utilization\ + \ <= 1.0):\n raise ValueError(\"gpu_memory_utilization must be between\ + \ 0.0 and 1.0\")\n\n if batch_size <= 0:\n raise ValueError(\"\ + batch_size must be positive\")\n\n if max_model_len <= 0:\n raise\ + \ ValueError(\"max_model_len must be positive\")\n\n if limit is not\ + \ None and limit <= 0:\n raise ValueError(\"limit must be positive\ + \ or None\")\n\n if (\n not include_classification_tasks\n\ + \ and not include_summarization_tasks\n and not custom_translation_dataset\n\ + \ ):\n raise ValueError(\n \"At least one of include_classification_tasks,\ + \ include_summarization_tasks, or custom_translation_dataset must be provided\"\ + \n )\n\n logger.info(\"Parameter validation passed\")\n\n logger.info(\"\ + Creating tasks...\")\n start_time = time.time()\n\n eval_tasks = []\n\ + \ prompt_response_log = []\n\n if custom_translation_dataset:\n \ + \ logger.info(\"Adding custom translation task...\")\n translation_task\ + \ = TranslationTask(\n custom_translation_dataset.path,\n \ + \ \"custom_translation\",\n log_prompts=log_prompts,\n\ + \ prompts_log=prompt_response_log,\n )\n eval_tasks.append(translation_task)\n\ + \n if include_classification_tasks:\n logger.info(\"Adding classification\ + \ tasks...\")\n classification_configs = TASK_CONFIGS[\"classification\"\ + ]\n\n for config in classification_configs:\n task_obj\ + \ = task.Unitxt(config=config)\n # TODO: Remove after https://github.com/EleutherAI/lm-evaluation-harness/pull/3225\ + \ is merged.\n task_obj.config.task = config[\"task\"]\n \ + \ eval_tasks.append(task_obj)\n\n if include_summarization_tasks:\n\ + \ logger.info(\"Adding summarization tasks...\")\n summarization_config\ + \ = TASK_CONFIGS[\"summarization\"][0]\n\n task_obj = task.Unitxt(config=summarization_config)\n\ + \ # TODO: Remove after https://github.com/EleutherAI/lm-evaluation-harness/pull/3225\ + \ is merged.\n task_obj.config.task = summarization_config[\"task\"\ + ]\n eval_tasks.append(task_obj)\n\n task_dict = get_task_dict(eval_tasks)\n\ + \ logger.info(f\"Created {len(eval_tasks)} tasks in {time.time() - start_time:.2f}s\"\ + )\n\n logger.info(\"Loading model...\")\n start_time = time.time()\n\ + \n try:\n model_args = {\n \"add_bos_token\": add_bos_token,\n\ + \ \"dtype\": dtype,\n \"max_model_len\": max_model_len,\n\ + \ \"gpu_memory_utilization\": gpu_memory_utilization,\n \ + \ \"pretrained\": model_path,\n \"trust_remote_code\":\ + \ True,\n }\n\n # Optionally provide LoRA adapter to lm-eval's\ + \ VLLM backend\n # The backend expects `lora_local_path` and internally\ + \ constructs the LoRARequest.\n if lora_adapter and lora_adapter.path:\n\ + \ logger.info(\"LoRA adapter provided; passing lora_local_path\ + \ to VLLM backend\")\n model_args[\"lora_local_path\"] = lora_adapter.path\n\ + \n model_class = get_model(\"vllm\")\n additional_config =\ + \ {\n \"batch_size\": batch_size,\n \"max_batch_size\"\ + : max_batch_size,\n \"device\": None,\n }\n\n loaded_model\ + \ = model_class.create_from_arg_obj(model_args, additional_config)\n \ + \ logger.info(f\"Model loaded successfully in {time.time() - start_time:.2f}s\"\ + )\n except Exception as e:\n logger.error(f\"Failed to load model:\ + \ {e}\")\n raise RuntimeError(f\"Model loading failed: {e}\")\n\n\ + \ logger.info(\"Starting evaluation...\")\n start_time = time.time()\n\ + \n results = evaluate(\n lm=loaded_model,\n task_dict=task_dict,\n\ + \ limit=limit,\n verbosity=verbosity,\n )\n\n logger.info(f\"\ + Evaluation completed in {time.time() - start_time:.2f}s\")\n\n logger.info(\"\ + Saving results...\")\n\n def clean_for_json(obj):\n \"\"\"Recursively\ + \ clean objects to make them JSON serializable.\"\"\"\n if isinstance(obj,\ + \ dict):\n return {k: clean_for_json(v) for k, v in obj.items()}\n\ + \ elif isinstance(obj, list):\n return [clean_for_json(item)\ + \ for item in obj]\n elif isinstance(obj, (int, float, str, bool,\ + \ type(None))):\n return obj\n else:\n # Convert\ + \ non-serializable objects to string representation\n return\ + \ str(obj)\n\n clean_results = clean_for_json(results)\n\n output_results.name\ + \ = \"results.json\"\n\n with open(output_results.path, \"w\") as f:\n\ + \ json.dump(clean_results, f, indent=2)\n logger.info(f\"Results\ + \ saved to {output_results.path}\")\n\n # Save prompt/response log for\ + \ custom TranslationTask only\n if log_prompts and custom_translation_dataset\ + \ and len(prompt_response_log) > 0:\n try:\n output_prompts.name\ + \ = \"prompts.json\"\n with open(output_prompts.path, \"w\")\ + \ as f:\n json.dump(prompt_response_log, f, indent=2)\n \ + \ logger.info(f\"Prompt/response log saved to {output_prompts.path}\"\ + )\n except Exception as e:\n logger.warning(f\"Failed\ + \ to save prompt/response log: {e}\")\n\n logger.info(\"Logging metrics...\"\ + )\n\n for task_name, task_results in clean_results[\"results\"].items():\n\ + \ for metric_name, metric_value in task_results.items():\n \ + \ if isinstance(metric_value, (int, float)):\n # Skip\ + \ metrics that are 0 due to a bug in the RHOAI UI.\n # TODO:\ + \ Fix RHOAI UI to handle 0 values.\n # TODO: Ignore store_session_info\ + \ from metrics in RHOAI UI.\n if metric_value == 0:\n \ + \ continue\n\n metric_key = f\"{task_name}_{metric_name}\"\ + \n output_metrics.log_metric(metric_key, metric_value)\n\ + \ logger.debug(f\"Logged metric: {metric_key} = {metric_value}\"\ + )\n\n logger.info(\"Metrics logged successfully\")\n\n logger.info(\"\ + Pipeline completed successfully\")\n\n" + image: registry.access.redhat.com/ubi9/python-311:latest + resources: + accelerator: + count: '1' + resourceCount: '1' + resourceType: nvidia.com/gpu + type: nvidia.com/gpu + cpuRequest: 4.0 + memoryRequest: 100.0 + resourceCpuRequest: 4000m + resourceMemoryRequest: 100G + exec-prepare-yoda-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - prepare_yoda_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'datasets' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.15.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef prepare_yoda_dataset(\n yoda_input_dataset: str,\n \ + \ yoda_train_dataset: dsl.Output[dsl.Dataset],\n yoda_eval_dataset:\ + \ dsl.Output[dsl.Dataset],\n operation_map: dict[str, Any] = {\"\ + rename_column\": {\"sentence\":\"prompt\"}},\n train_split_ratio:\ + \ float = 0.8,\n):\n \"\"\"Prepare the training and evaluation datasets\ + \ by downloading and preprocessing.\n\n Downloads the yoda_sentences\ + \ dataset from HuggingFace, renames columns to match\n the expected format\ + \ for training (prompt/completion), splits into train/eval sets,\n and\ + \ saves them as output artifacts.\n\n Args:\n yoda_input_dataset\ + \ (str): Dataset to download from HuggingFace\n yoda_train_dataset\ + \ (dsl.Output[dsl.Dataset]): Output dataset for training.\n yoda_eval_dataset\ + \ (dsl.Output[dsl.Dataset]): Output dataset for evaluation.\n operation_map\ + \ (dict): Specify list of operations you want to perform on the data set\ + \ before splitting it e.g. {\"rename_column\": {\"sentence\":\"prompt\"\ + }, \"remove_columns\": \"translation\"}\n train_split_ratio (float):\ + \ Ratio of data to use for training (0.0-1.0).\n \ + \ Defaults to 0.8 (80% train, 20% eval).\n \"\"\"\n from\ + \ datasets import load_dataset\n\n print(f\"Downloading and loading the\ + \ dataset from {yoda_input_dataset}\")\n dataset = load_dataset(yoda_input_dataset,\ + \ split=\"train\")\n if operation_map:\n for operation_name, operation_value\ + \ in operation_map.items():\n print(f'Performing operation: \"\ + {operation_name}\"')\n if operation_name == 'rename_column':\n\ + \ if type(operation_value) != dict:\n \ + \ raise RuntimeError(f'Dict value is required to perform operation \"{operation_name}\"\ + ')\n for key, value in operation_value.items():\n \ + \ dataset = dataset.rename_column(key, value)\n elif\ + \ operation_name == \"remove_columns\":\n if type(operation_value)\ + \ == str:\n dataset = dataset.remove_columns([\"translation\"\ + ])\n elif type(operation_value) == list:\n \ + \ dataset = dataset.remove_columns(\"translation\")\n \ + \ else:\n raise RuntimeError(f'Only list and str type\ + \ are allowed to perform \"{operation_name}\" operation')\n else:\n\ + \ raise InvalidValue(f'Unrecogonized operation value \"{operation_name}\"\ + ')\n\n # Add prefix to prompts\n print(\"Adding Yoda speak prefix\ + \ to prompts\")\n def add_yoda_prefix(example):\n example[\"prompt\"\ + ] = (\n \"Translate the following to Yoda speak: \" + example[\"\ + prompt\"]\n )\n return example\n\n dataset = dataset.map(add_yoda_prefix)\n\ + \n # Split the dataset into train and eval sets\n print(\n \ + \ f\"Splitting dataset with {len(dataset)} rows into train ({train_split_ratio:.1%})\ + \ and eval ({(1-train_split_ratio):.1%}) sets\"\n )\n split_dataset\ + \ = dataset.train_test_split(test_size=1 - train_split_ratio, seed=42)\n\ + \n train_dataset = split_dataset[\"train\"]\n eval_dataset = split_dataset[\"\ + test\"]\n\n print(f\"Train set: {len(train_dataset)} rows\")\n print(f\"\ + Eval set: {len(eval_dataset)} rows\")\n\n # Save both datasets\n print(f\"\ + Saving train dataset to {yoda_train_dataset.path}\")\n train_dataset.save_to_disk(yoda_train_dataset.path)\n\ + \n print(f\"Saving eval dataset to {yoda_eval_dataset.path}\")\n eval_dataset.save_to_disk(yoda_eval_dataset.path)\n\ + \n" + image: python:3.11 + exec-train-model: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - train_model + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kubernetes'\ + \ && python3 -m pip install --quiet --no-warn-script-location 'git+https://github.com/kubeflow/pipelines@master#egg=kfp&subdirectory=sdk/python'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef train_model(\n input_dataset: dsl.Input[dsl.Dataset],\n\ + \ model_name: str,\n run_id: str,\n pvc_path: str,\n\ + \ output_model: dsl.Output[dsl.Model],\n output_metrics: dsl.Output[dsl.Metrics],\n\ + \ # Training configuration parameters\n epochs: int = 10,\n\ + \ lora_rank: int = 8,\n learning_rate: float = 3e-4,\n \ + \ batch_size: int = 16,\n max_length: int = 64,\n # Training\ + \ control parameters\n max_steps: Optional[int] = None,\n \ + \ logging_steps: int = 10,\n save_steps: Optional[int] = None,\n\ + \ save_strategy: str = \"epoch\",\n # Optimizer parameters\n\ + \ optimizer: str = \"adamw_torch\",\n adam_beta1: float =\ + \ 0.9,\n adam_beta2: float = 0.999,\n adam_epsilon: float\ + \ = 1e-8,\n weight_decay: float = 0.01,\n # Performance optimization\n\ + \ use_flash_attention: bool = False,\n # Infrastructure parameters\n\ + \ num_nodes: int = 2,\n trainer_runtime: str = \"torch-distributed\"\ + ,\n kubernetes_config: dsl.TaskConfig = None,\n):\n \"\"\"Train\ + \ a large language model using distributed training with LoRA fine-tuning.\n\ + \n This function creates and manages a Kubernetes TrainJob for distributed\ + \ training\n of a large language model using LoRA (Low-Rank Adaptation)\ + \ fine-tuning. It handles\n the complete training workflow including\ + \ job creation, monitoring, and artifact\n collection.\n\n Args:\n\ + \ model_name (str): HuggingFace model identifier (e.g., \"meta-llama/Llama-3.2-3B-Instruct\"\ + ).\n run_id (str): Unique identifier for this training run. Use dsl.PIPELINE_JOB_ID_PLACEHOLDER.\n\ + \ dataset_path (str): Path to the training dataset within the PVC.\n\ + \ pvc_path (str): Base path within the PVC for storing outputs.\n\ + \ output_model (dsl.Output[dsl.Model]): Kubeflow output artifact\ + \ for the trained model.\n output_metrics (dsl.Output[dsl.Metrics]):\ + \ Kubeflow output artifact for training metrics.\n epochs (int, optional):\ + \ Number of training epochs. Defaults to 10.\n lora_rank (int, optional):\ + \ LoRA adapter rank (lower = fewer parameters, faster training). Defaults\ + \ to 8.\n learning_rate (float, optional): Learning rate for training\ + \ optimization. Defaults to 3e-4.\n batch_size (int, optional): Per-device\ + \ training batch size. Defaults to 16.\n max_length (int, optional):\ + \ Maximum token sequence length for training. Defaults to 64.\n max_steps\ + \ (int, optional): Maximum number of training steps. If specified, overrides\ + \ epochs. Defaults to None.\n logging_steps (int, optional): Number\ + \ of steps between logging outputs. Defaults to 10.\n save_steps\ + \ (int, optional): Number of steps between model checkpoints. Defaults to\ + \ None.\n save_strategy (str, optional): Checkpoint saving strategy\ + \ (\"epoch\" or \"steps\"). Defaults to \"epoch\".\n optimizer (str,\ + \ optional): Optimizer to use (e.g., \"adamw_torch\", \"adamw_torch_fused\"\ + ). Defaults to \"adamw_torch\".\n adam_beta1 (float, optional): Beta1\ + \ parameter for Adam optimizer. Defaults to 0.9.\n adam_beta2 (float,\ + \ optional): Beta2 parameter for Adam optimizer. Defaults to 0.999.\n \ + \ adam_epsilon (float, optional): Epsilon parameter for Adam optimizer.\ + \ Defaults to 1e-8.\n weight_decay (float, optional): Weight decay\ + \ for regularization. Defaults to 0.01.\n use_flash_attention (bool,\ + \ optional): Whether to use Flash Attention 2 for improved performance.\ + \ Defaults to False.\n num_nodes (int, optional): Number of nodes\ + \ for distributed training. Defaults to 2.\n trainer_runtime (str,\ + \ optional): Runtime to use for Kubeflow Trainer. Defaults to \"torch-distributed\"\ + .\n \"\"\"\n import json\n import os\n import shutil\n import\ + \ textwrap\n import time\n import inspect\n\n from kubernetes import\ + \ client as k8s_client, config\n from kubernetes.client.rest import ApiException\n\ + \n def get_target_modules(model_name: str) -> list:\n \"\"\"Get\ + \ appropriate LoRA target modules based on model architecture.\n\n \ + \ Selects optimal layers for LoRA adaptation based on research findings:\n\ + \ - Attention layers (q_proj, k_proj, v_proj, o_proj) control attention\ + \ patterns\n - MLP layers (gate_proj, up_proj, down_proj) store task-specific\ + \ knowledge\n\n Model-specific targeting:\n - Granite: Attention\ + \ layers only (q,k,v,o)\n - LLaMA/Mistral/Qwen: Full coverage (attention\ + \ + MLP)\n - Phi: Uses 'dense' instead of 'o_proj'\n - Unknown:\ + \ Conservative fallback (q,v)\n\n Based on LoRA (arXiv:2106.09685),\ + \ QLoRA (arXiv:2305.14314), and model-specific research.\n \"\"\"\ + \n model_name_lower = model_name.lower()\n\n if \"granite\"\ + \ in model_name_lower:\n return [\"q_proj\", \"v_proj\", \"k_proj\"\ + , \"o_proj\"]\n elif \"llama\" in model_name_lower:\n \ + \ return [\n \"q_proj\",\n \"v_proj\",\n \ + \ \"k_proj\",\n \"o_proj\",\n \ + \ \"gate_proj\",\n \"up_proj\",\n \"down_proj\"\ + ,\n ]\n elif \"mistral\" in model_name_lower or \"mixtral\"\ + \ in model_name_lower:\n return [\n \"q_proj\"\ + ,\n \"v_proj\",\n \"k_proj\",\n \ + \ \"o_proj\",\n \"gate_proj\",\n \"up_proj\"\ + ,\n \"down_proj\",\n ]\n elif \"qwen\"\ + \ in model_name_lower:\n return [\n \"q_proj\"\ + ,\n \"v_proj\",\n \"k_proj\",\n \ + \ \"o_proj\",\n \"gate_proj\",\n \"up_proj\"\ + ,\n \"down_proj\",\n ]\n elif \"phi\" in\ + \ model_name_lower:\n return [\"q_proj\", \"v_proj\", \"k_proj\"\ + , \"dense\"]\n else:\n print(\n f\"Warning:\ + \ Unknown model architecture for {model_name}, using conservative LoRA targets\"\ + \n )\n return [\"q_proj\", \"v_proj\"]\n\n def\ + \ train_model_func(\n lora_rank: int,\n learning_rate:\ + \ float,\n batch_size: int,\n max_length: int,\n \ + \ model_name: str,\n dataset_path: str,\n \ + \ epochs: int,\n pvc_path: str,\n target_modules:\ + \ list,\n max_steps: int,\n logging_steps: int,\n\ + \ save_steps: int,\n save_strategy: str,\n \ + \ optimizer: str,\n adam_beta1: float,\n adam_beta2:\ + \ float,\n adam_epsilon: float,\n weight_decay: float,\n\ + \ use_flash_attention: bool,\n ):\n import os\n \ + \ import json\n import torch\n from datasets import load_from_disk\n\ + \ from peft import get_peft_model, LoraConfig\n from transformers\ + \ import (\n AutoModelForCausalLM,\n AutoTokenizer,\n\ + \ TrainerCallback,\n )\n from trl import SFTConfig,\ + \ SFTTrainer\n\n local_rank = int(os.environ.get(\"LOCAL_RANK\",\ + \ 0))\n world_rank = int(os.environ.get(\"RANK\", 0))\n world_size\ + \ = int(os.environ.get(\"WORLD_SIZE\", 1))\n\n print(\n \ + \ f\"Worker info - Local rank: {local_rank}, World rank: {world_rank},\ + \ World size: {world_size}\"\n )\n\n is_main_worker = world_rank\ + \ == 0\n\n class MetricsCallback(TrainerCallback):\n def\ + \ __init__(self, is_main_worker):\n self.is_main_worker =\ + \ is_main_worker\n self.initial_loss = None\n \ + \ self.final_loss = None\n\n def on_log(self, args, state,\ + \ control, logs=None, **kwargs):\n if logs and self.is_main_worker\ + \ and \"loss\" in logs:\n if self.initial_loss is None:\n\ + \ self.initial_loss = logs[\"loss\"]\n \ + \ self.final_loss = logs[\"loss\"]\n\n metrics_callback\ + \ = MetricsCallback(is_main_worker)\n\n print(\"Downloading and loading\ + \ model\")\n model_kwargs = {\n \"device_map\": \"auto\"\ + ,\n \"torch_dtype\": torch.float16,\n \"trust_remote_code\"\ + : True,\n }\n if use_flash_attention:\n model_kwargs[\"\ + attn_implementation\"] = \"flash_attention_2\"\n\n model = AutoModelForCausalLM.from_pretrained(model_name,\ + \ **model_kwargs)\n\n print(f\"Using LoRA target modules for {model_name}:\ + \ {target_modules}\")\n\n config = LoraConfig(\n r=lora_rank,\n\ + \ lora_alpha=lora_rank * 2,\n bias=\"none\",\n \ + \ lora_dropout=0.05,\n task_type=\"CAUSAL_LM\",\n \ + \ target_modules=target_modules,\n )\n model = get_peft_model(model,\ + \ config)\n\n print(\"Loading dataset\")\n dataset = load_from_disk(dataset_path)\n\ + \n tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n\ + \n if tokenizer.pad_token is None:\n tokenizer.pad_token\ + \ = tokenizer.eos_token\n tokenizer.padding_side = \"right\"\n\n\ + \ sft_config = SFTConfig(\n ## Memory optimization\n \ + \ gradient_checkpointing=True,\n gradient_checkpointing_kwargs={\"\ + use_reentrant\": False},\n gradient_accumulation_steps=1,\n \ + \ per_device_train_batch_size=batch_size,\n auto_find_batch_size=True,\n\ + \ ## Dataset configuration\n max_length=max_length,\n\ + \ packing=use_flash_attention, # Packing works best with Flash\ + \ Attention\n ## Training parameters\n num_train_epochs=epochs\ + \ if max_steps is None else None,\n max_steps=-1 if max_steps\ + \ is None else max_steps,\n learning_rate=learning_rate,\n \ + \ optim=optimizer,\n ## Optimizer parameters\n \ + \ adam_beta1=adam_beta1,\n adam_beta2=adam_beta2,\n \ + \ adam_epsilon=adam_epsilon,\n weight_decay=weight_decay,\n\ + \ ## Logging and saving\n logging_steps=logging_steps,\n\ + \ save_steps=save_steps,\n save_strategy=save_strategy,\n\ + \ logging_dir=\"./logs\",\n report_to=\"none\",\n\ + \ )\n trainer = SFTTrainer(\n model=model,\n \ + \ processing_class=tokenizer,\n args=sft_config,\n \ + \ train_dataset=dataset,\n callbacks=[metrics_callback],\n\ + \ )\n\n train_result = trainer.train()\n\n if torch.distributed.is_initialized():\n\ + \ torch.distributed.barrier()\n print(f\"Worker {world_rank}\ + \ - Training completed and synchronized\")\n\n if not is_main_worker:\n\ + \ print(\n f\"Worker {world_rank} - Skipping model\ + \ export and metrics (not main worker)\"\n )\n # Clean\ + \ up distributed process group for non-main workers\n if torch.distributed.is_initialized():\n\ + \ print(f\"Worker {world_rank} - Cleaning up distributed\ + \ process group\")\n torch.distributed.destroy_process_group()\n\ + \ print(f\"Worker {world_rank} - Distributed process group\ + \ destroyed\")\n return\n\n print(\"Main worker (rank\ + \ 0) - Exporting model and metrics...\")\n\n # Save LoRA adapter\n\ + \ model_output_path = os.path.join(pvc_path, \"adapter\")\n \ + \ model.save_pretrained(model_output_path)\n tokenizer.save_pretrained(model_output_path)\n\ + \ print(\"LoRA adapter exported successfully!\")\n\n # Clean\ + \ up distributed process group for main worker AFTER model saving\n \ + \ if torch.distributed.is_initialized():\n print(f\"Worker\ + \ {world_rank} - Cleaning up distributed process group\")\n torch.distributed.destroy_process_group()\n\ + \ print(f\"Worker {world_rank} - Distributed process group destroyed\"\ + )\n\n print(f\"Collecting essential metrics\")\n metrics_dict\ + \ = {}\n\n if hasattr(train_result, \"train_loss\"):\n \ + \ metrics_dict[\"final_train_loss\"] = train_result.train_loss\n \ + \ if hasattr(train_result, \"train_runtime\"):\n metrics_dict[\"\ + train_runtime_seconds\"] = train_result.train_runtime\n if hasattr(train_result,\ + \ \"train_samples_per_second\"):\n metrics_dict[\"throughput_samples_per_sec\"\ + ] = (\n train_result.train_samples_per_second\n \ + \ )\n\n total_params = sum(p.numel() for p in model.parameters())\n\ + \ trainable_params = sum(p.numel() for p in model.parameters() if\ + \ p.requires_grad)\n metrics_dict[\"total_parameters_millions\"]\ + \ = total_params / 1_000_000\n metrics_dict[\"trainable_parameters_millions\"\ + ] = trainable_params / 1_000_000\n metrics_dict[\"lora_efficiency_percent\"\ + ] = (\n trainable_params\ + \ / total_params\n ) *\ + \ 100\n\n metrics_dict[\"lora_rank\"] = config.r\n metrics_dict[\"\ + learning_rate\"] = sft_config.learning_rate\n metrics_dict[\"effective_batch_size\"\ + ] = (\n sft_config.per_device_train_batch_size * world_size\n\ + \ )\n metrics_dict[\"dataset_size\"] = len(dataset)\n\n \ + \ metrics_dict[\"num_nodes\"] = (\n world_size // torch.cuda.device_count()\n\ + \ if torch.cuda.is_available() and torch.cuda.device_count()\ + \ > 0\n else 1\n )\n if torch.cuda.is_available():\n\ + \ metrics_dict[\"peak_gpu_memory_gb\"] = torch.cuda.max_memory_allocated()\ + \ / (\n 1024**3\n )\n\n if metrics_callback.initial_loss\ + \ and metrics_callback.final_loss:\n metrics_dict[\"initial_loss\"\ + ] = metrics_callback.initial_loss\n metrics_dict[\"loss_reduction\"\ + ] = (\n metrics_callback.initial_loss - metrics_callback.final_loss\n\ + \ )\n metrics_dict[\"loss_reduction_percent\"] = (\n\ + \ (metrics_callback.initial_loss\ + \ - metrics_callback.final_loss)\n \ + \ / metrics_callback.initial_loss\n \ + \ ) * 100\n\n with open(os.path.join(pvc_path,\ + \ \"metrics.json\"), \"w\") as f:\n json.dump(metrics_dict, f,\ + \ indent=2)\n\n print(\n f\"Exported {len(metrics_dict)}\ + \ metrics to {os.path.join(pvc_path, 'metrics.json')}\"\n )\n \ + \ print(\"Model and metrics exported successfully!\")\n\n print(\"\ + Copying dataset to PVC...\")\n dataset_path = os.path.join(pvc_path,\ + \ \"dataset\", \"train\")\n os.makedirs(dataset_path, exist_ok=True)\n\ + \ shutil.copytree(\n input_dataset.path,\n dataset_path,\n\ + \ dirs_exist_ok=True,\n )\n print(f\"Dataset copied successfully\ + \ from {input_dataset.path} to {dataset_path}\")\n\n print(\"=== Starting\ + \ TrainJob creation process ===\")\n\n target_modules = get_target_modules(model_name)\n\ + \ print(f\"Selected LoRA target modules for {model_name}: {target_modules}\"\ + )\n\n with open(\n \"/var/run/secrets/kubernetes.io/serviceaccount/namespace\"\ + , \"r\"\n ) as ns_file:\n namespace = ns_file.readline()\n\n \ + \ print(\"Generating command...\")\n\n func_code = inspect.getsource(train_model_func)\n\ + \ func_code = textwrap.dedent(func_code)\n\n func_call_code = f\"\"\ + \"\nimport os\nimport json\n\n# Parse function arguments from environment\ + \ variable\nconfig_json = os.environ.get(\"TRAINING_CONFIG\", \"{{}}\")\n\ + func_args = json.loads(config_json)\n\n# Call the training function with\ + \ parsed arguments\n{train_model_func.__name__}(**func_args)\n\"\"\"\n\n\ + \ func_code = f\"{func_code}\\n{func_call_code}\"\n\n # Build package\ + \ list based on configuration\n packages = [\"transformers\", \"peft\"\ + , \"accelerate\", \"trl\"]\n if use_flash_attention:\n packages.append(\"\ + flash-attn\")\n packages_str = \" \".join(packages)\n\n install_script\ + \ = f\"\"\"set -e\nset -o pipefail\n\necho \"=== Starting container setup\ + \ ===\"\necho \"Python version: $(python --version)\"\n\nif ! [ -x \"$(command\ + \ -v pip)\" ]; then\n echo \"Installing pip...\"\n python -m ensurepip\ + \ || python -m ensurepip --user\nfi\n\necho \"Installing Python packages...\"\ + \nPIP_DISABLE_PIP_VERSION_CHECK=1 python -m pip install --user --quiet --no-warn-script-location\ + \ {packages_str}\n\necho \"Creating training script...\"\ncat > ephemeral_component.py\ + \ << 'EOF'\n{func_code}\nEOF\n\necho \"Starting distributed training...\"\ + \ntorchrun --nproc_per_node=1 ephemeral_component.py\"\"\"\n\n command\ + \ = [\"bash\", \"-c\", install_script]\n\n print(f\"Generated command:\ + \ {command}\")\n print(f\"Command length: {len(command)}\")\n print(f\"\ + Command type: {type(command)}\")\n\n print(\"Loading Kubernetes configuration...\"\ + )\n try:\n config.load_incluster_config()\n print(\"Loaded\ + \ in-cluster Kubernetes configuration\")\n except config.ConfigException:\n\ + \ config.load_kube_config()\n print(\"Loaded kubeconfig Kubernetes\ + \ configuration\")\n\n print(\"Creating Kubernetes API client...\")\n\ + \ api_client = k8s_client.ApiClient()\n custom_objects_api = k8s_client.CustomObjectsApi(api_client)\n\ + \ print(\"Successfully created Kubernetes API client\")\n\n print(\"\ + Defining TrainJob resource...\")\n\n env_vars = [\n {\"name\"\ + : \"HOME\", \"value\": \"/tmp\"},\n {\n \"name\": \"TRAINING_CONFIG\"\ + ,\n \"value\": json.dumps(\n {\n \ + \ \"lora_rank\": lora_rank,\n \"learning_rate\"\ + : learning_rate,\n \"batch_size\": batch_size,\n \ + \ \"max_length\": max_length,\n \"model_name\"\ + : model_name,\n \"dataset_path\": dataset_path,\n \ + \ \"epochs\": epochs,\n \"pvc_path\"\ + : pvc_path,\n \"target_modules\": target_modules,\n \ + \ \"max_steps\": max_steps,\n \"logging_steps\"\ + : logging_steps,\n \"save_steps\": save_steps,\n \ + \ \"save_strategy\": save_strategy,\n \ + \ \"optimizer\": optimizer,\n \"adam_beta1\": adam_beta1,\n\ + \ \"adam_beta2\": adam_beta2,\n \"\ + adam_epsilon\": adam_epsilon,\n \"weight_decay\": weight_decay,\n\ + \ \"use_flash_attention\": use_flash_attention,\n \ + \ }\n ),\n },\n *(kubernetes_config.env\ + \ or []),\n ]\n\n train_job = {\n \"apiVersion\": \"trainer.kubeflow.org/v1alpha1\"\ + ,\n \"kind\": \"TrainJob\",\n \"metadata\": {\"name\": f\"\ + kfp-{run_id}\", \"namespace\": namespace},\n \"spec\": {\n \ + \ \"runtimeRef\": {\"name\": trainer_runtime},\n \"trainer\"\ + : {\n \"numNodes\": num_nodes,\n \"resourcesPerNode\"\ + : kubernetes_config.resources,\n \"env\": env_vars,\n \ + \ \"command\": command,\n },\n \"podSpecOverrides\"\ + : [\n {\n \"targetJobs\": [{\"name\":\ + \ \"node\"}],\n \"volumes\": kubernetes_config.volumes,\n\ + \ \"containers\": [\n {\n \ + \ \"name\": \"node\",\n \ + \ \"volumeMounts\": kubernetes_config.volume_mounts,\n \ + \ }\n ],\n \"nodeSelector\"\ + : kubernetes_config.node_selector,\n \"tolerations\"\ + : kubernetes_config.tolerations,\n }\n ],\n \ + \ },\n }\n\n print(f\"TrainJob definition created:\")\n print(f\"\ + \ - Name: kfp-{run_id}\")\n print(f\" - Namespace: {namespace}\")\n\ + \n print(f\" - Runtime: {trainer_runtime}\")\n print(f\" - Nodes:\ + \ {num_nodes}\")\n print(f\" - Model: {model_name}\")\n print(f\"\ + \ - Dataset: {dataset_path}\")\n print(f\" - Epochs: {epochs}\")\n\n\ + \ print(\"Submitting TrainJob to Kubernetes...\")\n try:\n \ + \ response = custom_objects_api.create_namespaced_custom_object(\n \ + \ group=\"trainer.kubeflow.org\",\n version=\"v1alpha1\"\ + ,\n namespace=namespace,\n plural=\"trainjobs\",\n\ + \ body=train_job,\n )\n job_name = response[\"\ + metadata\"][\"name\"]\n print(f\"TrainJob {job_name} created successfully\"\ + )\n print(f\"Response metadata: {response.get('metadata', {})}\"\ + )\n except ApiException as e:\n print(f\"Error creating TrainJob:\ + \ {e}\")\n print(f\"Error details: {e.body}\")\n print(f\"\ + Error status: {e.status}\")\n raise\n\n print(f\"Starting to monitor\ + \ TrainJob {job_name} status...\")\n check_count = 0\n while True:\n\ + \ check_count += 1\n try:\n print(f\"Checking job\ + \ status (attempt {check_count})...\")\n job_status = custom_objects_api.get_namespaced_custom_object(\n\ + \ group=\"trainer.kubeflow.org\",\n version=\"\ + v1alpha1\",\n namespace=namespace,\n plural=\"\ + trainjobs\",\n name=job_name,\n )\n\n \ + \ status = job_status.get(\"status\", {})\n conditions = status.get(\"\ + conditions\", [])\n print(f\"Job status conditions: {conditions}\"\ + )\n\n completed = False\n failed = False\n\n \ + \ for condition in conditions:\n condition_type = condition.get(\"\ + type\", \"\")\n condition_status = condition.get(\"status\"\ + , \"\")\n condition_reason = condition.get(\"reason\", \"\ + \")\n condition_message = condition.get(\"message\", \"\"\ + )\n\n print(\n f\"Condition: type={condition_type},\ + \ status={condition_status}, reason={condition_reason}\"\n \ + \ )\n\n if condition_type == \"Complete\" and condition_status\ + \ == \"True\":\n print(\n f\"\ + Training job {job_name} completed successfully: {condition_message}\"\n\ + \ )\n completed = True\n \ + \ break\n elif condition_type == \"Failed\" and\ + \ condition_status == \"True\":\n print(f\"Training job\ + \ {job_name} failed: {condition_message}\")\n failed\ + \ = True\n break\n elif condition_type\ + \ == \"Cancelled\" and condition_status == \"True\":\n \ + \ print(f\"Training job {job_name} was cancelled: {condition_message}\"\ + )\n failed = True\n break\n\n \ + \ if completed:\n break\n elif failed:\n\ + \ raise RuntimeError(f\"Training job {job_name} failed or\ + \ was cancelled\")\n else:\n print(f\"Job is still\ + \ running, continuing to wait...\")\n\n except ApiException as e:\n\ + \ print(f\"Error checking job status: {e}\")\n print(f\"\ + Error details: {e.body}\")\n\n print(f\"Waiting 10 seconds before\ + \ next check...\")\n time.sleep(10)\n\n print(f\"Training job\ + \ {job_name} completed. Logs would be retrieved here.\")\n\n print(\"\ + Processing training results...\")\n\n metrics_file_path = os.path.join(pvc_path,\ + \ \"metrics.json\")\n print(f\"Looking for metrics file at: {metrics_file_path}\"\ + )\n if os.path.exists(metrics_file_path):\n print(f\"Found metrics\ + \ file, reading from {metrics_file_path}\")\n with open(metrics_file_path,\ + \ \"r\") as f:\n metrics_dict = json.load(f)\n\n print(f\"\ + Loaded {len(metrics_dict)} metrics from file\")\n\n exported_count\ + \ = 0\n for metric_name, metric_value in metrics_dict.items():\n\ + \ # Ignore metrics that are 0 to avoid a bug in the RHOAI UI.\n\ + \ if isinstance(metric_value, (int, float)) and metric_value\ + \ != 0:\n output_metrics.log_metric(metric_name, metric_value)\n\ + \ print(f\"Exported metric: {metric_name} = {metric_value}\"\ + )\n exported_count += 1\n\n print(f\"Successfully\ + \ exported {exported_count} metrics to Kubeflow\")\n os.remove(metrics_file_path)\n\ + \ else:\n print(f\"Warning: Metrics file {metrics_file_path} not\ + \ found\")\n\n print(\"Copying model from PVC to Kubeflow output path...\"\ + )\n model_source = os.path.join(pvc_path, \"adapter\")\n print(f\"\ + Model source: {model_source}\")\n print(f\"Destination: {output_model.path}\"\ + )\n\n if not os.path.exists(model_source):\n raise FileNotFoundError(\n\ + \ f\"Trained model not found at expected location: {model_source}\"\ + \n )\n\n output_model.name = f\"{model_name}-adapter\"\n shutil.copytree(model_source,\ + \ output_model.path, dirs_exist_ok=True)\n print(f\"Model copied successfully\ + \ from {model_source} to {output_model.path}\")\n\n print(\"=== TrainJob\ + \ process completed successfully ===\")\n\n" + image: registry.access.redhat.com/ubi9/python-311:latest + resources: + accelerator: + count: '1' + resourceCount: '1' + resourceType: nvidia.com/gpu + type: nvidia.com/gpu + cpuLimit: 2.0 + cpuRequest: 2.0 + memoryLimit: 32.21225472 + memoryRequest: 32.21225472 + resourceCpuLimit: '2' + resourceCpuRequest: '2' + resourceMemoryLimit: 30Gi + resourceMemoryRequest: 30Gi +pipelineInfo: + description: Prepare Yoda dataset, finetune a base model with LoRA, and evaluate + baseline vs fine-tuned + name: yoda-finetune +root: + dag: + tasks: + evaluate-yoda-model: + cachingOptions: {} + componentRef: + name: comp-evaluate-yoda-model + dependentTasks: + - prepare-yoda-dataset + inputs: + artifacts: + custom_translation_dataset: + taskOutputArtifact: + outputArtifactKey: yoda_eval_dataset + producerTask: prepare-yoda-dataset + parameters: + limit: + componentInputParameter: eval_limit + model_path: + componentInputParameter: model_name + taskInfo: + name: evaluate-yoda-model + evaluate-yoda-model-2: + cachingOptions: {} + componentRef: + name: comp-evaluate-yoda-model-2 + dependentTasks: + - prepare-yoda-dataset + - train-model + inputs: + artifacts: + custom_translation_dataset: + taskOutputArtifact: + outputArtifactKey: yoda_eval_dataset + producerTask: prepare-yoda-dataset + lora_adapter: + taskOutputArtifact: + outputArtifactKey: output_model + producerTask: train-model + parameters: + limit: + componentInputParameter: eval_limit + model_path: + componentInputParameter: model_name + taskInfo: + name: evaluate-yoda-model-2 + prepare-yoda-dataset: + cachingOptions: {} + componentRef: + name: comp-prepare-yoda-dataset + inputs: + parameters: + operation_map: + runtimeValue: + constant: + remove_columns: translation + rename_column: + sentence: prompt + translation: + translation_extra: completion + yoda_input_dataset: + runtimeValue: + constant: dvgodoy/yoda_sentences + retryPolicy: + backoffDuration: 0s + backoffFactor: 2.0 + backoffMaxDuration: 3600s + maxRetryCount: 3 + taskInfo: + name: prepare-yoda-dataset + train-model: + cachingOptions: {} + componentRef: + name: comp-train-model + dependentTasks: + - prepare-yoda-dataset + inputs: + artifacts: + input_dataset: + taskOutputArtifact: + outputArtifactKey: yoda_train_dataset + producerTask: prepare-yoda-dataset + parameters: + model_name: + componentInputParameter: model_name + pvc_path: + runtimeValue: + constant: '{{$.workspace_path}}' + run_id: + runtimeValue: + constant: '{{$.pipeline_job_uuid}}' + taskInfo: + name: train-model + inputDefinitions: + parameters: + eval_limit: + description: 'Maximum number of examples per task for evaluation. + + Use None to evaluate all available examples. Defaults to None.' + isOptional: true + parameterType: NUMBER_INTEGER + model_name: + defaultValue: meta-llama/Llama-3.2-3B-Instruct + description: 'HuggingFace model ID for both baseline and training. + + Defaults to "meta-llama/Llama-3.2-3B-Instruct".' + isOptional: true + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.15.2 +--- +platforms: + kubernetes: + deploymentSpec: + executors: + exec-evaluate-yoda-model: + secretAsEnv: + - keyToEnv: + - envVar: HF_TOKEN + secretKey: HF_TOKEN + optional: false + secretName: hf-token + secretNameParameter: + runtimeValue: + constant: hf-token + exec-evaluate-yoda-model-2: + secretAsEnv: + - keyToEnv: + - envVar: HF_TOKEN + secretKey: HF_TOKEN + optional: false + secretName: hf-token + secretNameParameter: + runtimeValue: + constant: hf-token + exec-train-model: + secretAsEnv: + - keyToEnv: + - envVar: HF_TOKEN + secretKey: HF_TOKEN + optional: false + secretName: hf-token + secretNameParameter: + runtimeValue: + constant: hf-token + pipelineConfig: + workspace: + kubernetes: + pvcSpecPatch: + accessModes: + - ReadWriteMany + storageClassName: efs-sc + size: 20Gi diff --git a/pyproject.toml b/pyproject.toml index a79fce369..a8a6939a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ readme = "README.md" requires-python = ">=3.11" dependencies = [ "kfp>=2.15.0", + "kfp-kubernetes>=2.15.0" ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock deleted file mode 100644 index 5ab42e728..000000000 --- a/uv.lock +++ /dev/null @@ -1,713 +0,0 @@ -version = 1 -revision = 3 -requires-python = ">=3.11" -resolution-markers = [ - "python_full_version >= '3.13'", - "python_full_version < '3.13'", -] - -[[package]] -name = "cachetools" -version = "6.2.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fb/44/ca1675be2a83aeee1886ab745b28cda92093066590233cc501890eb8417a/cachetools-6.2.2.tar.gz", hash = "sha256:8e6d266b25e539df852251cfd6f990b4bc3a141db73b939058d809ebd2590fc6", size = 31571, upload-time = "2025-11-13T17:42:51.465Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e6/46/eb6eca305c77a4489affe1c5d8f4cae82f285d9addd8de4ec084a7184221/cachetools-6.2.2-py3-none-any.whl", hash = "sha256:6c09c98183bf58560c97b2abfcedcbaf6a896a490f534b031b661d3723b45ace", size = 11503, upload-time = "2025-11-13T17:42:50.232Z" }, -] - -[[package]] -name = "certifi" -version = "2025.11.12" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" }, -] - -[[package]] -name = "charset-normalizer" -version = "3.4.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988, upload-time = "2025-10-14T04:40:33.79Z" }, - { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324, upload-time = "2025-10-14T04:40:34.961Z" }, - { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742, upload-time = "2025-10-14T04:40:36.105Z" }, - { url = "https://files.pythonhosted.org/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863, upload-time = "2025-10-14T04:40:37.188Z" }, - { url = "https://files.pythonhosted.org/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837, upload-time = "2025-10-14T04:40:38.435Z" }, - { url = "https://files.pythonhosted.org/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550, upload-time = "2025-10-14T04:40:40.053Z" }, - { url = "https://files.pythonhosted.org/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162, upload-time = "2025-10-14T04:40:41.163Z" }, - { url = "https://files.pythonhosted.org/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019, upload-time = "2025-10-14T04:40:42.276Z" }, - { url = "https://files.pythonhosted.org/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310, upload-time = "2025-10-14T04:40:43.439Z" }, - { url = "https://files.pythonhosted.org/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022, upload-time = "2025-10-14T04:40:44.547Z" }, - { url = "https://files.pythonhosted.org/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383, upload-time = "2025-10-14T04:40:46.018Z" }, - { url = "https://files.pythonhosted.org/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098, upload-time = "2025-10-14T04:40:47.081Z" }, - { url = "https://files.pythonhosted.org/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991, upload-time = "2025-10-14T04:40:48.246Z" }, - { url = "https://files.pythonhosted.org/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456, upload-time = "2025-10-14T04:40:49.376Z" }, - { url = "https://files.pythonhosted.org/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978, upload-time = "2025-10-14T04:40:50.844Z" }, - { url = "https://files.pythonhosted.org/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969, upload-time = "2025-10-14T04:40:52.272Z" }, - { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" }, - { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" }, - { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" }, - { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" }, - { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" }, - { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" }, - { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" }, - { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" }, - { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" }, - { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" }, - { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" }, - { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" }, - { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" }, - { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" }, - { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" }, - { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" }, - { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" }, - { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" }, - { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" }, - { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" }, - { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" }, - { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" }, - { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" }, - { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" }, - { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" }, - { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" }, - { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" }, - { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" }, - { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" }, - { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" }, - { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" }, - { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" }, - { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" }, - { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" }, - { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" }, - { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" }, - { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" }, - { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" }, - { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" }, - { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" }, - { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" }, - { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" }, - { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" }, - { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" }, - { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" }, - { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" }, - { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" }, - { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" }, - { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" }, -] - -[[package]] -name = "click" -version = "8.3.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, -] - -[[package]] -name = "click-option-group" -version = "0.5.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b9/9f/1f917934da4e07ae7715a982347e3c2179556d8a58d1108c5da3e8f09c76/click_option_group-0.5.7.tar.gz", hash = "sha256:8dc780be038712fc12c9fecb3db4fe49e0d0723f9c171d7cda85c20369be693c", size = 22110, upload-time = "2025-03-24T13:24:55.897Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/93/27/bf74dc1494625c3b14dbcdb93740defd7b8c58dae3736be8d264f2a643fb/click_option_group-0.5.7-py3-none-any.whl", hash = "sha256:96b9f52f397ef4d916f81929bd6c1f85e89046c7a401a64e72a61ae74ad35c24", size = 11483, upload-time = "2025-03-24T13:24:54.611Z" }, -] - -[[package]] -name = "colorama" -version = "0.4.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, -] - -[[package]] -name = "docstring-parser" -version = "0.17.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, -] - -[[package]] -name = "google-api-core" -version = "2.28.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "google-auth" }, - { name = "googleapis-common-protos" }, - { name = "proto-plus" }, - { name = "protobuf" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/61/da/83d7043169ac2c8c7469f0e375610d78ae2160134bf1b80634c482fa079c/google_api_core-2.28.1.tar.gz", hash = "sha256:2b405df02d68e68ce0fbc138559e6036559e685159d148ae5861013dc201baf8", size = 176759, upload-time = "2025-10-28T21:34:51.529Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ed/d4/90197b416cb61cefd316964fd9e7bd8324bcbafabf40eef14a9f20b81974/google_api_core-2.28.1-py3-none-any.whl", hash = "sha256:4021b0f8ceb77a6fb4de6fde4502cecab45062e66ff4f2895169e0b35bc9466c", size = 173706, upload-time = "2025-10-28T21:34:50.151Z" }, -] - -[[package]] -name = "google-auth" -version = "2.43.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cachetools" }, - { name = "pyasn1-modules" }, - { name = "rsa" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ff/ef/66d14cf0e01b08d2d51ffc3c20410c4e134a1548fc246a6081eae585a4fe/google_auth-2.43.0.tar.gz", hash = "sha256:88228eee5fc21b62a1b5fe773ca15e67778cb07dc8363adcb4a8827b52d81483", size = 296359, upload-time = "2025-11-06T00:13:36.587Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/d1/385110a9ae86d91cc14c5282c61fe9f4dc41c0b9f7d423c6ad77038c4448/google_auth-2.43.0-py2.py3-none-any.whl", hash = "sha256:af628ba6fa493f75c7e9dbe9373d148ca9f4399b5ea29976519e0a3848eddd16", size = 223114, upload-time = "2025-11-06T00:13:35.209Z" }, -] - -[[package]] -name = "google-cloud-core" -version = "2.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "google-api-core" }, - { name = "google-auth" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a6/03/ef0bc99d0e0faf4fdbe67ac445e18cdaa74824fd93cd069e7bb6548cb52d/google_cloud_core-2.5.0.tar.gz", hash = "sha256:7c1b7ef5c92311717bd05301aa1a91ffbc565673d3b0b4163a52d8413a186963", size = 36027, upload-time = "2025-10-29T23:17:39.513Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/89/20/bfa472e327c8edee00f04beecc80baeddd2ab33ee0e86fd7654da49d45e9/google_cloud_core-2.5.0-py3-none-any.whl", hash = "sha256:67d977b41ae6c7211ee830c7912e41003ea8194bff15ae7d72fd6f51e57acabc", size = 29469, upload-time = "2025-10-29T23:17:38.548Z" }, -] - -[[package]] -name = "google-cloud-storage" -version = "3.7.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "google-api-core" }, - { name = "google-auth" }, - { name = "google-cloud-core" }, - { name = "google-crc32c" }, - { name = "google-resumable-media" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d2/8e/fab2de1a0ab7fdbd452eaae5a9a5c933d0911c26b04efa0c76ddfd921259/google_cloud_storage-3.7.0.tar.gz", hash = "sha256:9ce59c65f4d6e372effcecc0456680a8d73cef4f2dc9212a0704799cb3d69237", size = 17258914, upload-time = "2025-12-09T18:24:48.97Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2d/80/6e5c7c83cea15ed4dfc4843b9df9db0716bc551ac938f7b5dd18a72bd5e4/google_cloud_storage-3.7.0-py3-none-any.whl", hash = "sha256:469bc9540936e02f8a4bfd1619e9dca1e42dec48f95e4204d783b36476a15093", size = 303364, upload-time = "2025-12-09T18:24:47.343Z" }, -] - -[[package]] -name = "google-crc32c" -version = "1.7.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/19/ae/87802e6d9f9d69adfaedfcfd599266bf386a54d0be058b532d04c794f76d/google_crc32c-1.7.1.tar.gz", hash = "sha256:2bff2305f98846f3e825dbeec9ee406f89da7962accdb29356e4eadc251bd472", size = 14495, upload-time = "2025-03-26T14:29:13.32Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/94/220139ea87822b6fdfdab4fb9ba81b3fff7ea2c82e2af34adc726085bffc/google_crc32c-1.7.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:6fbab4b935989e2c3610371963ba1b86afb09537fd0c633049be82afe153ac06", size = 30468, upload-time = "2025-03-26T14:32:52.215Z" }, - { url = "https://files.pythonhosted.org/packages/94/97/789b23bdeeb9d15dc2904660463ad539d0318286d7633fe2760c10ed0c1c/google_crc32c-1.7.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:ed66cbe1ed9cbaaad9392b5259b3eba4a9e565420d734e6238813c428c3336c9", size = 30313, upload-time = "2025-03-26T14:57:38.758Z" }, - { url = "https://files.pythonhosted.org/packages/81/b8/976a2b843610c211e7ccb3e248996a61e87dbb2c09b1499847e295080aec/google_crc32c-1.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee6547b657621b6cbed3562ea7826c3e11cab01cd33b74e1f677690652883e77", size = 33048, upload-time = "2025-03-26T14:41:30.679Z" }, - { url = "https://files.pythonhosted.org/packages/c9/16/a3842c2cf591093b111d4a5e2bfb478ac6692d02f1b386d2a33283a19dc9/google_crc32c-1.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d68e17bad8f7dd9a49181a1f5a8f4b251c6dbc8cc96fb79f1d321dfd57d66f53", size = 32669, upload-time = "2025-03-26T14:41:31.432Z" }, - { url = "https://files.pythonhosted.org/packages/04/17/ed9aba495916fcf5fe4ecb2267ceb851fc5f273c4e4625ae453350cfd564/google_crc32c-1.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:6335de12921f06e1f774d0dd1fbea6bf610abe0887a1638f64d694013138be5d", size = 33476, upload-time = "2025-03-26T14:29:10.211Z" }, - { url = "https://files.pythonhosted.org/packages/dd/b7/787e2453cf8639c94b3d06c9d61f512234a82e1d12d13d18584bd3049904/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2d73a68a653c57281401871dd4aeebbb6af3191dcac751a76ce430df4d403194", size = 30470, upload-time = "2025-03-26T14:34:31.655Z" }, - { url = "https://files.pythonhosted.org/packages/ed/b4/6042c2b0cbac3ec3a69bb4c49b28d2f517b7a0f4a0232603c42c58e22b44/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:22beacf83baaf59f9d3ab2bbb4db0fb018da8e5aebdce07ef9f09fce8220285e", size = 30315, upload-time = "2025-03-26T15:01:54.634Z" }, - { url = "https://files.pythonhosted.org/packages/29/ad/01e7a61a5d059bc57b702d9ff6a18b2585ad97f720bd0a0dbe215df1ab0e/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19eafa0e4af11b0a4eb3974483d55d2d77ad1911e6cf6f832e1574f6781fd337", size = 33180, upload-time = "2025-03-26T14:41:32.168Z" }, - { url = "https://files.pythonhosted.org/packages/3b/a5/7279055cf004561894ed3a7bfdf5bf90a53f28fadd01af7cd166e88ddf16/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6d86616faaea68101195c6bdc40c494e4d76f41e07a37ffdef270879c15fb65", size = 32794, upload-time = "2025-03-26T14:41:33.264Z" }, - { url = "https://files.pythonhosted.org/packages/0f/d6/77060dbd140c624e42ae3ece3df53b9d811000729a5c821b9fd671ceaac6/google_crc32c-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:b7491bdc0c7564fcf48c0179d2048ab2f7c7ba36b84ccd3a3e1c3f7a72d3bba6", size = 33477, upload-time = "2025-03-26T14:29:10.94Z" }, - { url = "https://files.pythonhosted.org/packages/8b/72/b8d785e9184ba6297a8620c8a37cf6e39b81a8ca01bb0796d7cbb28b3386/google_crc32c-1.7.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:df8b38bdaf1629d62d51be8bdd04888f37c451564c2042d36e5812da9eff3c35", size = 30467, upload-time = "2025-03-26T14:36:06.909Z" }, - { url = "https://files.pythonhosted.org/packages/34/25/5f18076968212067c4e8ea95bf3b69669f9fc698476e5f5eb97d5b37999f/google_crc32c-1.7.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:e42e20a83a29aa2709a0cf271c7f8aefaa23b7ab52e53b322585297bb94d4638", size = 30309, upload-time = "2025-03-26T15:06:15.318Z" }, - { url = "https://files.pythonhosted.org/packages/92/83/9228fe65bf70e93e419f38bdf6c5ca5083fc6d32886ee79b450ceefd1dbd/google_crc32c-1.7.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:905a385140bf492ac300026717af339790921f411c0dfd9aa5a9e69a08ed32eb", size = 33133, upload-time = "2025-03-26T14:41:34.388Z" }, - { url = "https://files.pythonhosted.org/packages/c3/ca/1ea2fd13ff9f8955b85e7956872fdb7050c4ace8a2306a6d177edb9cf7fe/google_crc32c-1.7.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b211ddaf20f7ebeec5c333448582c224a7c90a9d98826fbab82c0ddc11348e6", size = 32773, upload-time = "2025-03-26T14:41:35.19Z" }, - { url = "https://files.pythonhosted.org/packages/89/32/a22a281806e3ef21b72db16f948cad22ec68e4bdd384139291e00ff82fe2/google_crc32c-1.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:0f99eaa09a9a7e642a61e06742856eec8b19fc0037832e03f941fe7cf0c8e4db", size = 33475, upload-time = "2025-03-26T14:29:11.771Z" }, - { url = "https://files.pythonhosted.org/packages/b8/c5/002975aff514e57fc084ba155697a049b3f9b52225ec3bc0f542871dd524/google_crc32c-1.7.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32d1da0d74ec5634a05f53ef7df18fc646666a25efaaca9fc7dcfd4caf1d98c3", size = 33243, upload-time = "2025-03-26T14:41:35.975Z" }, - { url = "https://files.pythonhosted.org/packages/61/cb/c585282a03a0cea70fcaa1bf55d5d702d0f2351094d663ec3be1c6c67c52/google_crc32c-1.7.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e10554d4abc5238823112c2ad7e4560f96c7bf3820b202660373d769d9e6e4c9", size = 32870, upload-time = "2025-03-26T14:41:37.08Z" }, - { url = "https://files.pythonhosted.org/packages/16/1b/1693372bf423ada422f80fd88260dbfd140754adb15cbc4d7e9a68b1cb8e/google_crc32c-1.7.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85fef7fae11494e747c9fd1359a527e5970fc9603c90764843caabd3a16a0a48", size = 28241, upload-time = "2025-03-26T14:41:45.898Z" }, - { url = "https://files.pythonhosted.org/packages/fd/3c/2a19a60a473de48717b4efb19398c3f914795b64a96cf3fbe82588044f78/google_crc32c-1.7.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6efb97eb4369d52593ad6f75e7e10d053cf00c48983f7a973105bc70b0ac4d82", size = 28048, upload-time = "2025-03-26T14:41:46.696Z" }, -] - -[[package]] -name = "google-resumable-media" -version = "2.8.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "google-crc32c" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/64/d7/520b62a35b23038ff005e334dba3ffc75fcf583bee26723f1fd8fd4b6919/google_resumable_media-2.8.0.tar.gz", hash = "sha256:f1157ed8b46994d60a1bc432544db62352043113684d4e030ee02e77ebe9a1ae", size = 2163265, upload-time = "2025-11-17T15:38:06.659Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/0b/93afde9cfe012260e9fe1522f35c9b72d6ee222f316586b1f23ecf44d518/google_resumable_media-2.8.0-py3-none-any.whl", hash = "sha256:dd14a116af303845a8d932ddae161a26e86cc229645bc98b39f026f9b1717582", size = 81340, upload-time = "2025-11-17T15:38:05.594Z" }, -] - -[[package]] -name = "googleapis-common-protos" -version = "1.72.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "protobuf" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e5/7b/adfd75544c415c487b33061fe7ae526165241c1ea133f9a9125a56b39fd8/googleapis_common_protos-1.72.0.tar.gz", hash = "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5", size = 147433, upload-time = "2025-11-06T18:29:24.087Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" }, -] - -[[package]] -name = "idna" -version = "3.11" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, -] - -[[package]] -name = "iniconfig" -version = "2.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, -] - -[[package]] -name = "jinja2" -version = "3.1.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markupsafe" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, -] - -[[package]] -name = "kfp" -version = "2.15.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "click-option-group" }, - { name = "docstring-parser" }, - { name = "google-api-core" }, - { name = "google-auth" }, - { name = "google-cloud-storage" }, - { name = "kfp-pipeline-spec" }, - { name = "kfp-server-api" }, - { name = "kubernetes" }, - { name = "protobuf" }, - { name = "pyyaml" }, - { name = "requests-toolbelt" }, - { name = "tabulate" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/89/4b/2dd0352eb21aa8166ec0effe48c08e64f4eb05dc82cf7586e39dfd35f828/kfp-2.15.2.tar.gz", hash = "sha256:389933cbebdead61dd1eef538ff09d467048dc97edee2a70c80ca264506501cd", size = 297293, upload-time = "2025-12-03T21:35:30.168Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/50/db/5ee012dafdaaf5981e2dfe9c990d78e4218d62c128ea743f5c2340d73329/kfp-2.15.2-py3-none-any.whl", hash = "sha256:5d522b522707d10fe1c2b42884f003a778794bb8d858e7a4e30545ab9dd7be97", size = 397542, upload-time = "2025-12-03T21:35:28.728Z" }, -] - -[[package]] -name = "kfp-components" -version = "1.11.0" -source = { editable = "." } -dependencies = [ - { name = "kfp" }, -] - -[package.optional-dependencies] -ci = [ - { name = "docstring-parser" }, - { name = "jinja2" }, - { name = "pytest" }, - { name = "pyyaml" }, -] -dev = [ - { name = "docstring-parser" }, - { name = "jinja2" }, - { name = "pytest" }, - { name = "pyyaml" }, -] - -[package.metadata] -requires-dist = [ - { name = "docstring-parser", marker = "extra == 'ci'" }, - { name = "jinja2", marker = "extra == 'ci'" }, - { name = "kfp", specifier = ">=2.15.0" }, - { name = "kfp-components", extras = ["ci"], marker = "extra == 'dev'" }, - { name = "pytest", marker = "extra == 'ci'" }, - { name = "pyyaml", marker = "extra == 'ci'" }, -] -provides-extras = ["ci", "dev"] - -[[package]] -name = "kfp-pipeline-spec" -version = "2.15.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "protobuf" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/73/84/510f88de64ef4f0c2ff173f553fa4b038715617139a1cd41bed3ef677e09/kfp_pipeline_spec-2.15.2.tar.gz", hash = "sha256:a5979dd74d9bd02c7614bc372d58ab26929aa6677cd460b61f73a316cf7e04b4", size = 10527, upload-time = "2025-12-03T21:35:40.71Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/71/30886a62d9ffa7b5a4709e1d66e394eecbe0376877a313b31ab9cef46322/kfp_pipeline_spec-2.15.2-py3-none-any.whl", hash = "sha256:b906cca1400ca097967a6e66a10799dcf17703052264e4b0093bcb0e7f4ed380", size = 9848, upload-time = "2025-12-03T21:35:39.304Z" }, -] - -[[package]] -name = "kfp-server-api" -version = "2.15.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "python-dateutil" }, - { name = "six" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3d/b6/130028135dc5a59fe7bca33d589f9ba55f86449edc32a3906d7cd8fd635d/kfp_server_api-2.15.2.tar.gz", hash = "sha256:e24493de35f56712bf0c9cdd3ce8df9b0c4f472a6f3b7111ee387b9f467c5c0e", size = 63454, upload-time = "2025-12-03T21:35:08.532Z" } - -[[package]] -name = "kubernetes" -version = "30.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "google-auth" }, - { name = "oauthlib" }, - { name = "python-dateutil" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "requests-oauthlib" }, - { name = "six" }, - { name = "urllib3" }, - { name = "websocket-client" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/82/3c/9f29f6cab7f35df8e54f019e5719465fa97b877be2454e99f989270b4f34/kubernetes-30.1.0.tar.gz", hash = "sha256:41e4c77af9f28e7a6c314e3bd06a8c6229ddd787cad684e0ab9f69b498e98ebc", size = 887810, upload-time = "2024-06-06T15:58:30.031Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/a1/2027ddede72d33be2effc087580aeba07e733a7360780ae87226f1f91bd8/kubernetes-30.1.0-py2.py3-none-any.whl", hash = "sha256:e212e8b7579031dd2e512168b617373bc1e03888d41ac4e04039240a292d478d", size = 1706042, upload-time = "2024-06-06T15:58:27.13Z" }, -] - -[[package]] -name = "markupsafe" -version = "3.0.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631, upload-time = "2025-09-27T18:36:18.185Z" }, - { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058, upload-time = "2025-09-27T18:36:19.444Z" }, - { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287, upload-time = "2025-09-27T18:36:20.768Z" }, - { url = "https://files.pythonhosted.org/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940, upload-time = "2025-09-27T18:36:22.249Z" }, - { url = "https://files.pythonhosted.org/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887, upload-time = "2025-09-27T18:36:23.535Z" }, - { url = "https://files.pythonhosted.org/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692, upload-time = "2025-09-27T18:36:24.823Z" }, - { url = "https://files.pythonhosted.org/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471, upload-time = "2025-09-27T18:36:25.95Z" }, - { url = "https://files.pythonhosted.org/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923, upload-time = "2025-09-27T18:36:27.109Z" }, - { url = "https://files.pythonhosted.org/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572, upload-time = "2025-09-27T18:36:28.045Z" }, - { url = "https://files.pythonhosted.org/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077, upload-time = "2025-09-27T18:36:29.025Z" }, - { url = "https://files.pythonhosted.org/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876, upload-time = "2025-09-27T18:36:29.954Z" }, - { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, - { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, - { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, - { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, - { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, - { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, - { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, - { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, - { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, - { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, - { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, - { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, - { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, - { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, - { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, - { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, - { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, - { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, - { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, - { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, - { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, - { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, - { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, - { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, - { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, - { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, - { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, - { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, - { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, - { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, - { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, - { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, - { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, - { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, - { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, - { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, - { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, - { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, - { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, - { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, - { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, - { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, - { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, - { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, - { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, - { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, - { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, - { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, - { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, - { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, - { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, - { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, - { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, - { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, - { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, -] - -[[package]] -name = "oauthlib" -version = "3.3.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/19930f824ffeb0ad4372da4812c50edbd1434f678c90c2733e1188edfc63/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", size = 185918, upload-time = "2025-06-19T22:48:08.269Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, -] - -[[package]] -name = "packaging" -version = "25.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, -] - -[[package]] -name = "pluggy" -version = "1.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, -] - -[[package]] -name = "proto-plus" -version = "1.26.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "protobuf" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f4/ac/87285f15f7cce6d4a008f33f1757fb5a13611ea8914eb58c3d0d26243468/proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012", size = 56142, upload-time = "2025-03-10T15:54:38.843Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/6d/280c4c2ce28b1593a19ad5239c8b826871fc6ec275c21afc8e1820108039/proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66", size = 50163, upload-time = "2025-03-10T15:54:37.335Z" }, -] - -[[package]] -name = "protobuf" -version = "6.33.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/34/44/e49ecff446afeec9d1a66d6bbf9adc21e3c7cea7803a920ca3773379d4f6/protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4", size = 444296, upload-time = "2025-12-06T00:17:53.311Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/91/1e3a34881a88697a7354ffd177e8746e97a722e5e8db101544b47e84afb1/protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d", size = 425603, upload-time = "2025-12-06T00:17:41.114Z" }, - { url = "https://files.pythonhosted.org/packages/64/20/4d50191997e917ae13ad0a235c8b42d8c1ab9c3e6fd455ca16d416944355/protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4", size = 436930, upload-time = "2025-12-06T00:17:43.278Z" }, - { url = "https://files.pythonhosted.org/packages/b2/ca/7e485da88ba45c920fb3f50ae78de29ab925d9e54ef0de678306abfbb497/protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43", size = 427621, upload-time = "2025-12-06T00:17:44.445Z" }, - { url = "https://files.pythonhosted.org/packages/7d/4f/f743761e41d3b2b2566748eb76bbff2b43e14d5fcab694f494a16458b05f/protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e", size = 324460, upload-time = "2025-12-06T00:17:45.678Z" }, - { url = "https://files.pythonhosted.org/packages/b1/fa/26468d00a92824020f6f2090d827078c09c9c587e34cbfd2d0c7911221f8/protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872", size = 339168, upload-time = "2025-12-06T00:17:46.813Z" }, - { url = "https://files.pythonhosted.org/packages/56/13/333b8f421738f149d4fe5e49553bc2a2ab75235486259f689b4b91f96cec/protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f", size = 323270, upload-time = "2025-12-06T00:17:48.253Z" }, - { url = "https://files.pythonhosted.org/packages/0e/15/4f02896cc3df04fc465010a4c6a0cd89810f54617a32a70ef531ed75d61c/protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c", size = 170501, upload-time = "2025-12-06T00:17:52.211Z" }, -] - -[[package]] -name = "pyasn1" -version = "0.6.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322, upload-time = "2024-09-10T22:41:42.55Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135, upload-time = "2024-09-11T16:00:36.122Z" }, -] - -[[package]] -name = "pyasn1-modules" -version = "0.4.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyasn1" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, -] - -[[package]] -name = "pygments" -version = "2.19.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, -] - -[[package]] -name = "pytest" -version = "9.0.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "iniconfig" }, - { name = "packaging" }, - { name = "pluggy" }, - { name = "pygments" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, -] - -[[package]] -name = "python-dateutil" -version = "2.9.0.post0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "six" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, -] - -[[package]] -name = "pyyaml" -version = "6.0.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" }, - { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" }, - { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" }, - { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" }, - { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" }, - { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" }, - { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" }, - { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" }, - { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" }, - { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, - { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, - { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, - { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, - { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, - { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, - { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, - { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, - { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, - { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, - { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, - { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, - { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, - { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, - { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, - { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, - { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, - { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, - { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, - { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, - { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, - { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, - { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, - { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, - { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, - { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, - { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, - { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, - { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, - { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, - { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, - { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, - { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, - { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, - { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, - { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, - { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, - { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, -] - -[[package]] -name = "requests" -version = "2.32.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "charset-normalizer" }, - { name = "idna" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, -] - -[[package]] -name = "requests-oauthlib" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "oauthlib" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650, upload-time = "2024-03-22T20:32:29.939Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload-time = "2024-03-22T20:32:28.055Z" }, -] - -[[package]] -name = "requests-toolbelt" -version = "1.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, -] - -[[package]] -name = "rsa" -version = "4.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyasn1" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, -] - -[[package]] -name = "six" -version = "1.17.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, -] - -[[package]] -name = "tabulate" -version = "0.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, -] - -[[package]] -name = "urllib3" -version = "2.6.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/1d/0f3a93cca1ac5e8287842ed4eebbd0f7a991315089b1a0b01c7788aa7b63/urllib3-2.6.1.tar.gz", hash = "sha256:5379eb6e1aba4088bae84f8242960017ec8d8e3decf30480b3a1abdaa9671a3f", size = 432678, upload-time = "2025-12-08T15:25:26.773Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/56/190ceb8cb10511b730b564fb1e0293fa468363dbad26145c34928a60cb0c/urllib3-2.6.1-py3-none-any.whl", hash = "sha256:e67d06fe947c36a7ca39f4994b08d73922d40e6cca949907be05efa6fd75110b", size = 131138, upload-time = "2025-12-08T15:25:25.51Z" }, -] - -[[package]] -name = "websocket-client" -version = "1.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2c/41/aa4bf9664e4cda14c3b39865b12251e8e7d239f4cd0e3cc1b6c2ccde25c1/websocket_client-1.9.0.tar.gz", hash = "sha256:9e813624b6eb619999a97dc7958469217c3176312b3a16a4bd1bc7e08a46ec98", size = 70576, upload-time = "2025-10-07T21:16:36.495Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/34/db/b10e48aa8fff7407e67470363eac595018441cf32d5e1001567a7aeba5d2/websocket_client-1.9.0-py3-none-any.whl", hash = "sha256:af248a825037ef591efbf6ed20cc5faa03d3b47b9e5a2230a529eeee1c1fc3ef", size = 82616, upload-time = "2025-10-07T21:16:34.951Z" }, -]