From 1d8ec5bae484b0eb93b036a608e54f6908a5e207 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Tue, 9 Apr 2024 10:39:57 -0400 Subject: [PATCH 01/36] Starting to figure out data --- azureml/components/src/jsonl_gsm8k_fetch.py | 79 +++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 azureml/components/src/jsonl_gsm8k_fetch.py diff --git a/azureml/components/src/jsonl_gsm8k_fetch.py b/azureml/components/src/jsonl_gsm8k_fetch.py new file mode 100644 index 0000000..4e53639 --- /dev/null +++ b/azureml/components/src/jsonl_gsm8k_fetch.py @@ -0,0 +1,79 @@ +import argparse +import json +import pathlib +import re + +from typing import Any, Dict + +import requests + + +from aether_utils.jsonl_file_utils import JSONLWriter, JSONLReader +from aether_utils.logging_utils import get_standard_logger_for_file + +_logger = get_standard_logger_for_file(__file__) + +BASE_DATA_URL = "https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/" + +SPLITS = ["train", "test"] + + +def parse_args(): + parser = argparse.ArgumentParser(add_help=True) + + # Information about the ports + ports_group = parser.add_argument_group("Ports") + ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True) + ports_group.add_argument("--output_encoding", type=str, required=True) + + args = parser.parse_args() + return args + + +def extract_thought_parts(thought: str) -> Dict[str,Any]: + result = dict() + + thought_re = r"(.*)<<(.*=\d+)>>(.*)" + match = re.match(thought_re, thought) + + result["step"] = match.group(1) + result["calculation"] = match.group(2) + result["result"] = match.group(3) + + return result + +def process_line(item: Dict[str, Any]) -> Dict[str,Any]: + result = dict() + + result["question"] = item["question"] + + split_answer = item["answer"].split("####") + result["answer"] = float(split_answer[1]) + + result["thoughts"] = [] + for thought in split_answer[0].splitlines(): + result["thoughts"].append(extract_thought_parts(thought)) + return result + + +def main(): + args = parse_args() + + for split in ["test"]: + target_url = f"{BASE_DATA_URL}{split}.jsonl" + + _logger.info(f"Fetching {target_url}") + response = requests.get(target_url) + assert response.status_code == 200, f"Got response {response}" + + for line in response.text.splitlines(): + nxt_item = json.loads(line) + output_item = process_line(nxt_item) + + print(json.dumps(output_item, indent=4)) + + _logger.info("Complete") + + +if __name__ == "__main__": + main() From cc7fcc18e823d8172b457e4fb58312e1bd433fde Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Tue, 9 Apr 2024 10:57:30 -0400 Subject: [PATCH 02/36] Make sure we can process data --- azureml/components/src/jsonl_gsm8k_fetch.py | 43 +++++++++++++-------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/azureml/components/src/jsonl_gsm8k_fetch.py b/azureml/components/src/jsonl_gsm8k_fetch.py index 4e53639..02541a5 100644 --- a/azureml/components/src/jsonl_gsm8k_fetch.py +++ b/azureml/components/src/jsonl_gsm8k_fetch.py @@ -1,5 +1,6 @@ import argparse import json +import locale import pathlib import re @@ -30,25 +31,28 @@ def parse_args(): return args -def extract_thought_parts(thought: str) -> Dict[str,Any]: - result = dict() - - thought_re = r"(.*)<<(.*=\d+)>>(.*)" +def extract_thought_parts(thought: str) -> Dict[str, Any]: + thought_re = r"(.*)<<(.*=.*)>>(.*)" match = re.match(thought_re, thought) - result["step"] = match.group(1) - result["calculation"] = match.group(2) - result["result"] = match.group(3) - + result = dict() + if match: + result["step"] = match.group(1) + result["calculation"] = match.group(2) + result["result"] = match.group(3) + else: + result["step"] = thought return result -def process_line(item: Dict[str, Any]) -> Dict[str,Any]: + +def process_line(item: Dict[str, Any]) -> Dict[str, Any]: result = dict() + _logger.debug(f"Processing {item}") result["question"] = item["question"] split_answer = item["answer"].split("####") - result["answer"] = float(split_answer[1]) + result["answer"] = locale.atof(split_answer[1]) result["thoughts"] = [] for thought in split_answer[0].splitlines(): @@ -59,18 +63,25 @@ def process_line(item: Dict[str, Any]) -> Dict[str,Any]: def main(): args = parse_args() - for split in ["test"]: + # For parsing numbers + locale.setlocale(locale.LC_ALL, "en_US.UTF-8") + + for split in SPLITS: + _logger.info(f"Starting split {split}") target_url = f"{BASE_DATA_URL}{split}.jsonl" _logger.info(f"Fetching {target_url}") response = requests.get(target_url) assert response.status_code == 200, f"Got response {response}" - for line in response.text.splitlines(): - nxt_item = json.loads(line) - output_item = process_line(nxt_item) - - print(json.dumps(output_item, indent=4)) + with JSONLWriter( + args.output_dataset / f"{split}.jsonl", args.output_encoding + ) as jlw: + for line in response.text.splitlines(): + nxt_item = json.loads(line) + output_item = process_line(nxt_item) + jlw.write_line(output_item) + _logger.info(f"Completed split {split}") _logger.info("Complete") From b6f44fb04caeeebd03afb82d73d92e1ddb841f47 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Tue, 9 Apr 2024 10:59:33 -0400 Subject: [PATCH 03/36] Add component definition YAML --- .../jsonl_gsm8k_fetch_component.yaml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 azureml/components/jsonl_gsm8k_fetch_component.yaml diff --git a/azureml/components/jsonl_gsm8k_fetch_component.yaml b/azureml/components/jsonl_gsm8k_fetch_component.yaml new file mode 100644 index 0000000..dc647e9 --- /dev/null +++ b/azureml/components/jsonl_gsm8k_fetch_component.yaml @@ -0,0 +1,32 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json + +name: jsonl_gsm8k_fetch +version: 0.0.1pre1 +display_name: JSONL GSM8K Fetcher +type: command +description: Fetches the GSM8K dataset, and formats into JSONL +is_deterministic: true + +inputs: + output_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the output datasets + +outputs: + output_dataset: + type: uri_folder + description: | + Folder which will contain 'train.jsonl' and 'test.jsonl' + +code: ./src/ + +command: >- + python ./jsonl_gsm8k_fetch.py\ + --output_encoding ${{ inputs.output_encoding }} + --output_dataset ${{ outputs.output_dataset }} + +environment: + # Will be updated when component uploads + image: azureml:promptbase_aml@latest \ No newline at end of file From 5605b2fbf1150bd5dfffa349315a1eb7545f17bf Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Tue, 9 Apr 2024 11:23:57 -0400 Subject: [PATCH 04/36] Trying to get to first submission --- azureml/pipelines/azureml_utils.py | 1 + azureml/pipelines/configs.py | 5 + .../configs/gsm8k_zeroshot_config.yaml | 10 ++ azureml/pipelines/submit_gsm8k_zeroshot.py | 93 +++++++++++++++++++ 4 files changed, 109 insertions(+) create mode 100644 azureml/pipelines/configs/gsm8k_zeroshot_config.yaml create mode 100644 azureml/pipelines/submit_gsm8k_zeroshot.py diff --git a/azureml/pipelines/azureml_utils.py b/azureml/pipelines/azureml_utils.py index 4401418..25d641e 100644 --- a/azureml/pipelines/azureml_utils.py +++ b/azureml/pipelines/azureml_utils.py @@ -15,6 +15,7 @@ ALL_COMPONENTS = dict( jsonl_embeddings="jsonl_embeddings_aoai_component.yaml", jsonl_filter_correct_multiplechoice="jsonl_filter_correct_multiplechoice_component.yaml", + jsonl_gsm8k_fetch="jsonl_gsm8k_fetch_component.yaml", jsonl_guidance="jsonl_guidance_component.yaml", jsonl_key_filter="jsonl_key_filter_component.yaml", jsonl_key_rename="jsonl_key_rename_component.yaml", diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py index 424ce6b..ddd0989 100644 --- a/azureml/pipelines/configs.py +++ b/azureml/pipelines/configs.py @@ -115,3 +115,8 @@ class BiosBiasJSONPipelineConfig: biosbias_dataset: str = str() json_guidance_program: str = str() aoai_config: AOAIConfig = field(default_factory=AOAIConfig) + + +@dataclass +class GSM8KZeroShotConfig: + pipeline: PipelineConfig = field(default_factory=PipelineConfig) \ No newline at end of file diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml new file mode 100644 index 0000000..462e9ce --- /dev/null +++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml @@ -0,0 +1,10 @@ +defaults: + - _self_ + - aml_config + - aoai_config + +zeroshot_config: + pipeline: + base_experiment_name: gsm8k_zeroshot + tags: + default_compute_target: isolatedcompute \ No newline at end of file diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py new file mode 100644 index 0000000..3bcb2a2 --- /dev/null +++ b/azureml/pipelines/submit_gsm8k_zeroshot.py @@ -0,0 +1,93 @@ +# Submit a run using: +# python .\submit_mmlu_zeroshot.py -cn zeroshot_config + +import time + +from dataclasses import dataclass + +import hydra +from hydra.core.config_store import ConfigStore + +import omegaconf + +from azure.identity import DefaultAzureCredential +from azure.ai.ml import MLClient + +from azure.ai.ml import dsl, Input, MLClient +from azure.ai.ml.entities import Pipeline + +from azureml_pipelines import create_zeroshot_pipeline +from azureml_utils import get_component_collector +from configs import AMLConfig, GSM8KZeroShotConfig +from constants import GUIDANCE_PROGRAMS_DIR +from logging_utils import get_standard_logger_for_file + +_logger = get_standard_logger_for_file(__file__) + + +@dataclass +class PipelineConfig: + zeroshot_config: GSM8KZeroShotConfig = omegaconf.MISSING + azureml_config: AMLConfig = omegaconf.MISSING + + +cs = ConfigStore.instance() +cs.store(name="config", node=PipelineConfig) + + +def create_gsm8k_zeroshot_pipeline( + ml_client: MLClient, run_config: GSM8KZeroShotConfig, version_string: str +): + components = get_component_collector(ml_client, version_string) + + @dsl.pipeline() + def basic_pipeline() -> Pipeline: + mmlu_fetch_job = components.jsonl_gsm8k_fetch() + mmlu_fetch_job.name = f"fetch_gsm8k" + + get_split_job = components.uri_folder_to_file( + input_dataset=mmlu_fetch_job.outputs.output_dataset, + filename_pattern=f"test.jsonl", + ) + get_split_job.name = f"extract_split_test" + + pipeline = basic_pipeline() + pipeline.experiment_name = f"{run_config.pipeline.base_experiment_name}" + pipeline.display_name = None + pipeline.compute = run_config.pipeline.default_compute_target + if run_config.pipeline.tags: + pipeline.tags.update(run_config.tags) + _logger.info("Pipeline created") + + return pipeline + + +@hydra.main(config_path="configs", version_base="1.1") +def main(config: PipelineConfig): + version_string = str(int(time.time())) + _logger.info(f"AzureML object version for this run: {version_string}") + + _logger.info(f"Azure Subscription: {config.azureml_config.subscription_id}") + _logger.info(f"Resource Group: {config.azureml_config.resource_group}") + _logger.info(f"Workspace : {config.azureml_config.workspace_name}") + + credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True) + + ws_client = MLClient( + credential=credential, + subscription_id=config.azureml_config.subscription_id, + resource_group_name=config.azureml_config.resource_group, + workspace_name=config.azureml_config.workspace_name, + logging_enable=False, + ) + + pipeline = create_gsm8k_zeroshot_pipeline( + ws_client, config.zeroshot_config, version_string + ) + _logger.info("Submitting pipeline") + submitted_job = ws_client.jobs.create_or_update(pipeline) + _logger.info(f"Submitted: {submitted_job.name}") + + +if __name__ == "__main__": + main() From 18bbb22e65c405c8a24929a65226289d5bc3cd28 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Tue, 9 Apr 2024 12:15:05 -0400 Subject: [PATCH 05/36] Give up on locale :-/ --- .../jsonl_gsm8k_fetch_component.yaml | 2 +- azureml/components/src/jsonl_gsm8k_fetch.py | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/azureml/components/jsonl_gsm8k_fetch_component.yaml b/azureml/components/jsonl_gsm8k_fetch_component.yaml index dc647e9..5d2fdeb 100644 --- a/azureml/components/jsonl_gsm8k_fetch_component.yaml +++ b/azureml/components/jsonl_gsm8k_fetch_component.yaml @@ -23,7 +23,7 @@ outputs: code: ./src/ command: >- - python ./jsonl_gsm8k_fetch.py\ + python ./jsonl_gsm8k_fetch.py --output_encoding ${{ inputs.output_encoding }} --output_dataset ${{ outputs.output_dataset }} diff --git a/azureml/components/src/jsonl_gsm8k_fetch.py b/azureml/components/src/jsonl_gsm8k_fetch.py index 02541a5..01cb15d 100644 --- a/azureml/components/src/jsonl_gsm8k_fetch.py +++ b/azureml/components/src/jsonl_gsm8k_fetch.py @@ -1,6 +1,5 @@ import argparse import json -import locale import pathlib import re @@ -51,23 +50,29 @@ def process_line(item: Dict[str, Any]) -> Dict[str, Any]: result["question"] = item["question"] + # The answer embeds a chain of thought and the + # numeric result split_answer = item["answer"].split("####") - result["answer"] = locale.atof(split_answer[1]) result["thoughts"] = [] for thought in split_answer[0].splitlines(): result["thoughts"].append(extract_thought_parts(thought)) + + # The following is not how you're supposed to handle + # numbers with thousand separators. + # This is a work around, pending three-way negotiations + # with locale.atof() and the AzureML compute nodes + result["answer"] = float(split_answer[1].replace(",", "")) + return result def main(): args = parse_args() - # For parsing numbers - locale.setlocale(locale.LC_ALL, "en_US.UTF-8") - for split in SPLITS: _logger.info(f"Starting split {split}") + line_count = 0 target_url = f"{BASE_DATA_URL}{split}.jsonl" _logger.info(f"Fetching {target_url}") @@ -81,7 +86,8 @@ def main(): nxt_item = json.loads(line) output_item = process_line(nxt_item) jlw.write_line(output_item) - _logger.info(f"Completed split {split}") + line_count += 1 + _logger.info(f"Completed split {split} ({line_count} lines)") _logger.info("Complete") From 6fa1f932ce327d75f7b2b53807902a495160bace Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Tue, 9 Apr 2024 13:52:34 -0400 Subject: [PATCH 06/36] Drafting a component --- .../src/jsonl_guidance_mistral7b.py | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 azureml/components/src/jsonl_guidance_mistral7b.py diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py new file mode 100644 index 0000000..2e53f65 --- /dev/null +++ b/azureml/components/src/jsonl_guidance_mistral7b.py @@ -0,0 +1,120 @@ +import argparse +import importlib.util +import json +import pathlib + +from typing import Any, Callable, Dict + +import guidance + +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + +from aether_utils.jsonl_utils import line_map +from aether_utils.logging_utils import get_standard_logger_for_file + + +_logger = get_standard_logger_for_file(__file__) + +USER_MODULE = "user_module" +GUIDANCE_FUNCTION = "guidance_generation" + + +def parse_args(): + parser = argparse.ArgumentParser(add_help=True) + + # Information about the datasets + datasets_group = parser.add_argument_group("Datasets") + datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True) + datasets_group.add_argument("--input_encoding", type=str, required=True) + datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True) + datasets_group.add_argument("--output_encoding", type=str, required=True) + datasets_group.add_argument("--error_dataset", type=pathlib.Path, required=True) + datasets_group.add_argument("--error_encoding", type=str, required=True) + datasets_group.add_argument( + "--common_dataset", type=pathlib.Path, required=False, default=None + ) + datasets_group.add_argument("--common_encoding", type=str, required=False) + + # Information about the guidance program + parser.add_argument("--guidance_program", type=pathlib.Path, required=True) + + args = parser.parse_args() + return args + + +class LLMProcessor: + def __init__( + self, + program_path, + model: guidance.models.Model, + common_data: dict[str, any] | None, + ): + self._program_path = program_path + self._model = model + self._guidance_function = self._get_guidance_function() + self._common_data = common_data + + def __call__(self, item: Dict[str, Any]) -> dict[str, any]: + _logger.debug(f"__call__: {item}") + result = self._guidance_function(self._model, item, common=self._common_data) + _logger.debug(f"Checking keys") + for k in result.keys(): + assert k not in item, f"Duplicate key: {k}" + + _logger.debug(f"Updating item") + item.update(**result) + + return item + + def _get_guidance_function( + self, + ) -> Callable[[Dict[str, Any]], Dict[str, Any]]: + _logger.debug("Importing guidance file") + spec = importlib.util.spec_from_file_location(USER_MODULE, self._program_path) + module_definition = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module_definition) + + guidance_func = getattr(module_definition, GUIDANCE_FUNCTION) + + return guidance_func + + +def main(): + args = parse_args() + + # Load the common data (if required) + common_data = None + if args.common_dataset is not None: + _logger.info("Loading common dataset") + with open(args.common_dataset, "r", encoding=args.common_encoding) as jf: + common_data = json.load(jf) + else: + _logger.info("No common dataset present") + + guidance_model = guidance.models.Transformers( + "mistralai/Mistral-7B-v0.1", + device_map="cuda:0", + echo=False, + ) + _logger.info(f"guidance_model.device: {guidance_model.engine.device}") + + processor = LLMProcessor( + program_path=args.guidance_program, + model=guidance_model, + common_data=common_data, + ) + + s, f = line_map( + map_func=processor, + source_file=args.input_dataset, + dest_file=args.output_dataset, + source_encoding=args.input_encoding, + dest_encoding=args.output_encoding, + ) + + _logger.info(f"Complete with {s} successes and {f} failures") + + +if __name__ == "__main__": + main() From 98405aa6f287011e95c8026c3160b099858c3c01 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Tue, 9 Apr 2024 13:58:21 -0400 Subject: [PATCH 07/36] Roughing out the mistral7b component --- .../jsonl_guidance_mistral7b_component.yaml | 71 +++++++++++++++++++ guidance_programs/gsm8k_zero_or_few_shot.py | 51 +++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 azureml/components/jsonl_guidance_mistral7b_component.yaml create mode 100644 guidance_programs/gsm8k_zero_or_few_shot.py diff --git a/azureml/components/jsonl_guidance_mistral7b_component.yaml b/azureml/components/jsonl_guidance_mistral7b_component.yaml new file mode 100644 index 0000000..e0fd05b --- /dev/null +++ b/azureml/components/jsonl_guidance_mistral7b_component.yaml @@ -0,0 +1,71 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json + +name: jsonl_guidance_mistral7b +version: 0.0.1pre1 +display_name: JSONL Guidance Mistral7B +type: command +description: Runs a supplied Guidance program on every line of a JSONL file via Mistral7B +is_deterministic: false + +inputs: + guidance_program: + type: uri_file + optional: false + description: Python file containing the guidance program + input_dataset: + type: uri_file + optional: false + description: Dataset containing JSONL input + input_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the input dataset + common_dataset: + type: uri_file + optional: true + description: Dataset containing data to be shared with all rows in input + common_encoding: + type: string + optional: true + default: utf-8-sig + description: Encoding format of the common dataset + output_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the output dataset + error_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the error dataset + +outputs: + output_dataset: + type: uri_file + description: JSONL file + error_dataset: + type: uri_file + description: JSONL file containing failed lines + +code: ./src/ + +command: | + # Get guidance from GitHub + pip install --upgrade git+https://github.com/guidance-ai/guidance + # Run the script + python ./jsonl_guidance_mistral7b.py \ + --guidance_program ${{ inputs.guidance_program }} \ + --input_dataset ${{ inputs.input_dataset }} \ + --input_encoding ${{ inputs.input_encoding }} \ + $[[--common_dataset ${{ inputs.common_dataset }} ]] \ + $[[--common_encoding ${{ inputs.common_encoding }} ]] \ + --output_dataset ${{ outputs.output_dataset }} \ + --output_encoding ${{ inputs.output_encoding }} \ + --error_dataset ${{ outputs.error_dataset }} \ + --error_encoding ${{ inputs.error_encoding }} + +environment: + # Will be updated when component uploads + image: azureml:guidance_phi2_env@latest \ No newline at end of file diff --git a/guidance_programs/gsm8k_zero_or_few_shot.py b/guidance_programs/gsm8k_zero_or_few_shot.py new file mode 100644 index 0000000..9f68b55 --- /dev/null +++ b/guidance_programs/gsm8k_zero_or_few_shot.py @@ -0,0 +1,51 @@ +# This is a very naive guidance program for GSM8K + +import logging +import sys + +from typing import Any, Dict + +import guidance +from guidance import gen, select, system, user, assistant + + +_logger = logging.getLogger(__file__) +_logger.setLevel(logging.INFO) +_logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +@guidance +def zero_shot_gsm8k( + lm: guidance.models.Instruct, + question: str, + choices: list[str], + common: list[dict[str, Any]] | None, +): + # Some general instruction to the model + lm += """Taking a maths test. Answer the following question and + show your working +""" + + if common: + _logger.debug("Adding few shot examples") + raise ValueError("common data not yet supported") + + + + return lm + + +def guidance_generation( + lm: guidance.models.Chat, + input: Dict[str, Any], + common: list[dict[str, Any]] | None = None, +) -> Dict[str, Any]: + _logger.debug("Starting guidance_generation") + result = lm + zero_shot_gsm8k( + question=input["question"], common=common + ) + + _logger.debug(f"Result: {result}") + + result = dict(zero_or_few_shot_choice=float(result["string_result"])) + return result From 1e932cde21c78386e06822db710e5b65f921a44e Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Tue, 9 Apr 2024 15:05:44 -0400 Subject: [PATCH 08/36] Roughing out more changes --- azureml/pipelines/azureml_utils.py | 6 ++++++ azureml/pipelines/configs.py | 7 +++++++ azureml/pipelines/configs/gsm8k_zeroshot_config.yaml | 6 +++++- guidance_programs/gsm8k_zero_or_few_shot.py | 12 +++++------- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/azureml/pipelines/azureml_utils.py b/azureml/pipelines/azureml_utils.py index 993b2ae..96ce0a7 100644 --- a/azureml/pipelines/azureml_utils.py +++ b/azureml/pipelines/azureml_utils.py @@ -100,6 +100,12 @@ def prepare(self): environment=phi2_environment, version_string=self._version_string, ) + self.jsonl_guidance_mistral7b = create_component_from_yaml( + self._client, + self._base_dir / "jsonl_guidance_mistral7b_component.yaml", + environment=phi2_environment, + version_string=self._version_string, + ) _logger.info("Added all components") diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py index e380f8b..39ea22b 100644 --- a/azureml/pipelines/configs.py +++ b/azureml/pipelines/configs.py @@ -30,6 +30,10 @@ class AOAIConfig: class Phi2Config: compute_target: str = str() +@dataclass +class TransformerConfig: + compute_target: str = str() + @dataclass class ZeroShotRunConfig: @@ -134,3 +138,6 @@ class Phi2BiosBiasJSONPipelineConfig: @dataclass class GSM8KZeroShotConfig: pipeline: PipelineConfig = field(default_factory=PipelineConfig) + json_guidance_programs: list[str] = field(default_factory=list) + transformer_config: TransformerConfig = field(default_factory=TransformerConfig) + diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml index 462e9ce..76708c2 100644 --- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml @@ -7,4 +7,8 @@ zeroshot_config: pipeline: base_experiment_name: gsm8k_zeroshot tags: - default_compute_target: isolatedcompute \ No newline at end of file + default_compute_target: isolatedcompute + json_guidance_programs: + - gsm8k_zero_or_few_shot.py + transformer_config: + compute_target: gput4 \ No newline at end of file diff --git a/guidance_programs/gsm8k_zero_or_few_shot.py b/guidance_programs/gsm8k_zero_or_few_shot.py index 9f68b55..4257e51 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot.py +++ b/guidance_programs/gsm8k_zero_or_few_shot.py @@ -6,7 +6,6 @@ from typing import Any, Dict import guidance -from guidance import gen, select, system, user, assistant _logger = logging.getLogger(__file__) @@ -18,21 +17,22 @@ def zero_shot_gsm8k( lm: guidance.models.Instruct, question: str, - choices: list[str], common: list[dict[str, Any]] | None, ): # Some general instruction to the model lm += """Taking a maths test. Answer the following question and - show your working + show your working. """ if common: _logger.debug("Adding few shot examples") raise ValueError("common data not yet supported") + lm += question + schema_obj = dict(type="object", properties=dict(string_result="number")) - return lm + return lm + guidance.json(name="string_result", schema=schema_obj) def guidance_generation( @@ -41,9 +41,7 @@ def guidance_generation( common: list[dict[str, Any]] | None = None, ) -> Dict[str, Any]: _logger.debug("Starting guidance_generation") - result = lm + zero_shot_gsm8k( - question=input["question"], common=common - ) + result = lm + zero_shot_gsm8k(question=input["question"], common=common) _logger.debug(f"Result: {result}") From 29358352a9a77c4b8af7bddba70ab3f6aae9d9c9 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Tue, 9 Apr 2024 15:06:07 -0400 Subject: [PATCH 09/36] Better name --- azureml/pipelines/configs/gsm8k_zeroshot_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml index 76708c2..db784e6 100644 --- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml @@ -5,7 +5,7 @@ defaults: zeroshot_config: pipeline: - base_experiment_name: gsm8k_zeroshot + base_experiment_name: gsm8k_zeroshot_debugging tags: default_compute_target: isolatedcompute json_guidance_programs: From d16df86cc33ece14f13f1cffbc716aea9abf842c Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 10 Apr 2024 10:36:27 -0400 Subject: [PATCH 10/36] Hack things into working --- .../jsonl_guidance_mistral7b_component.yaml | 18 +++++++++++++++-- .../src/jsonl_guidance_mistral7b.py | 15 +++++++++----- azureml/pipelines/submit_gsm8k_zeroshot.py | 20 +++++++++++++++++++ guidance_programs/gsm8k_zero_or_few_shot.py | 12 +++++++---- 4 files changed, 54 insertions(+), 11 deletions(-) diff --git a/azureml/components/jsonl_guidance_mistral7b_component.yaml b/azureml/components/jsonl_guidance_mistral7b_component.yaml index e0fd05b..f6ba778 100644 --- a/azureml/components/jsonl_guidance_mistral7b_component.yaml +++ b/azureml/components/jsonl_guidance_mistral7b_component.yaml @@ -52,8 +52,22 @@ outputs: code: ./src/ command: | - # Get guidance from GitHub - pip install --upgrade git+https://github.com/guidance-ai/guidance + # Download the zip + wget https://github.com/guidance-ai/guidance/archive/refs/heads/main.zip + echo + ls + echo + # Unzip + unzip ./main.zip + echo + ls -p + echo + # Install from download + pip install --upgrade ./guidance-main/ + echo + # Install LlamaCpp + CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python<0.2.58" + echo # Run the script python ./jsonl_guidance_mistral7b.py \ --guidance_program ${{ inputs.guidance_program }} \ diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py index 2e53f65..d22a9bf 100644 --- a/azureml/components/src/jsonl_guidance_mistral7b.py +++ b/azureml/components/src/jsonl_guidance_mistral7b.py @@ -7,6 +7,8 @@ import guidance +from huggingface_hub import hf_hub_download + import torch from transformers import AutoModelForCausalLM, AutoTokenizer @@ -92,12 +94,14 @@ def main(): else: _logger.info("No common dataset present") - guidance_model = guidance.models.Transformers( - "mistralai/Mistral-7B-v0.1", - device_map="cuda:0", - echo=False, + repo_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF" + filename = "mistral-7b-instruct-v0.2.Q8_0.gguf" + downloaded_file = hf_hub_download(repo_id=repo_id, filename=filename) + + guidance_model = guidance.models.LlamaCpp( + downloaded_file, verbose=True, n_gpu_layers=-1 ) - _logger.info(f"guidance_model.device: {guidance_model.engine.device}") + # _logger.info(f"guidance_model.device: {guidance_model.engine.device}") processor = LLMProcessor( program_path=args.guidance_program, @@ -105,6 +109,7 @@ def main(): common_data=common_data, ) + _logger.info("Starting to process input") s, f = line_map( map_func=processor, source_file=args.input_dataset, diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py index 3bcb2a2..ad31752 100644 --- a/azureml/pipelines/submit_gsm8k_zeroshot.py +++ b/azureml/pipelines/submit_gsm8k_zeroshot.py @@ -40,6 +40,17 @@ def create_gsm8k_zeroshot_pipeline( ): components = get_component_collector(ml_client, version_string) + guidance_inputs = dict() + for prog_filename in run_config.json_guidance_programs: + k = prog_filename[0:-3] + v = Input( + type="uri_file", + path=GUIDANCE_PROGRAMS_DIR / prog_filename, + model="download", + ) + guidance_inputs[k] = v + _logger.info(f"Found {len(guidance_inputs)} guidance programs") + @dsl.pipeline() def basic_pipeline() -> Pipeline: mmlu_fetch_job = components.jsonl_gsm8k_fetch() @@ -51,6 +62,15 @@ def basic_pipeline() -> Pipeline: ) get_split_job.name = f"extract_split_test" + for progname, prog_input in guidance_inputs.items(): + + guidance_job = components.jsonl_guidance_mistral7b( + guidance_program=prog_input, + input_dataset=get_split_job.outputs.output_dataset, + ) + guidance_job.compute = run_config.transformer_config.compute_target + guidance_job.name = f"guidance_mistral7b_{progname}" + pipeline = basic_pipeline() pipeline.experiment_name = f"{run_config.pipeline.base_experiment_name}" pipeline.display_name = None diff --git a/guidance_programs/gsm8k_zero_or_few_shot.py b/guidance_programs/gsm8k_zero_or_few_shot.py index 4257e51..b64d52a 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot.py +++ b/guidance_programs/gsm8k_zero_or_few_shot.py @@ -1,5 +1,6 @@ # This is a very naive guidance program for GSM8K +import json import logging import sys @@ -30,9 +31,9 @@ def zero_shot_gsm8k( lm += question - schema_obj = dict(type="object", properties=dict(string_result="number")) + schema_obj = dict(type="object", properties=dict(answer=dict(type="number"))) - return lm + guidance.json(name="string_result", schema=schema_obj) + return lm + guidance.json(name="json_result_object", schema=schema_obj) def guidance_generation( @@ -43,7 +44,10 @@ def guidance_generation( _logger.debug("Starting guidance_generation") result = lm + zero_shot_gsm8k(question=input["question"], common=common) - _logger.debug(f"Result: {result}") + _logger.info(f"Result: {result}") + _logger.info(f"JSON portion: {result['json_result_object']}") - result = dict(zero_or_few_shot_choice=float(result["string_result"])) + loaded_obj = json.loads(result["json_result_object"]) + + result = dict(zero_or_few_shot_answer=loaded_obj["answer"]) return result From d3b2a4e08ae77760eb55f176b3a95ac2c8db6f4f Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 10 Apr 2024 10:54:42 -0400 Subject: [PATCH 11/36] Rename things to match switch to LlamaCpp --- azureml/pipelines/configs.py | 4 ++-- azureml/pipelines/configs/gsm8k_zeroshot_config.yaml | 2 +- azureml/pipelines/submit_gsm8k_zeroshot.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py index 39ea22b..a45d9a8 100644 --- a/azureml/pipelines/configs.py +++ b/azureml/pipelines/configs.py @@ -31,7 +31,7 @@ class Phi2Config: compute_target: str = str() @dataclass -class TransformerConfig: +class LlamaCppConfig: compute_target: str = str() @@ -139,5 +139,5 @@ class Phi2BiosBiasJSONPipelineConfig: class GSM8KZeroShotConfig: pipeline: PipelineConfig = field(default_factory=PipelineConfig) json_guidance_programs: list[str] = field(default_factory=list) - transformer_config: TransformerConfig = field(default_factory=TransformerConfig) + llamacpp_config: LlamaCppConfig = field(default_factory=LlamaCppConfig) diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml index db784e6..3870e12 100644 --- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml @@ -10,5 +10,5 @@ zeroshot_config: default_compute_target: isolatedcompute json_guidance_programs: - gsm8k_zero_or_few_shot.py - transformer_config: + llamacpp_config: compute_target: gput4 \ No newline at end of file diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py index ad31752..ea3badd 100644 --- a/azureml/pipelines/submit_gsm8k_zeroshot.py +++ b/azureml/pipelines/submit_gsm8k_zeroshot.py @@ -68,7 +68,7 @@ def basic_pipeline() -> Pipeline: guidance_program=prog_input, input_dataset=get_split_job.outputs.output_dataset, ) - guidance_job.compute = run_config.transformer_config.compute_target + guidance_job.compute = run_config.llamacpp_config.compute_target guidance_job.name = f"guidance_mistral7b_{progname}" pipeline = basic_pipeline() From 033c407cc8a1c46488934c6700eaa0c12bbc3ca5 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 10 Apr 2024 11:58:37 -0400 Subject: [PATCH 12/36] Add a scoring component --- .../jsonl_score_numeric_component.yaml | 56 ++++++++++++ azureml/components/src/jsonl_score_numeric.py | 85 +++++++++++++++++++ azureml/pipelines/azureml_utils.py | 1 + azureml/pipelines/submit_gsm8k_zeroshot.py | 7 ++ 4 files changed, 149 insertions(+) create mode 100644 azureml/components/jsonl_score_numeric_component.yaml create mode 100644 azureml/components/src/jsonl_score_numeric.py diff --git a/azureml/components/jsonl_score_numeric_component.yaml b/azureml/components/jsonl_score_numeric_component.yaml new file mode 100644 index 0000000..6098006 --- /dev/null +++ b/azureml/components/jsonl_score_numeric_component.yaml @@ -0,0 +1,56 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json + +name: jsonl_score_numeric +version: 0.0.1pre1 +display_name: JSONL Numeric Scorer +type: command +description: | + Takes a JSONL file of numeric questions and correct answers and responses + from a model, and produces the overall score. + Results are stored in JSON +is_deterministic: true + +inputs: + input_dataset: + type: uri_file + optional: false + description: Dataset containing JSONL input + input_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the input dataset + correct_key: + type: string + optional: false + description: Which key contains the correct answer + response_key: + type: string + optional: false + description: Which key contains the answer produced by the model + output_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the output dataset + +outputs: + output_dataset: + type: uri_file + description: JSON file containing score summary + + +code: ./src/ + +command: >- + python ./jsonl_score_numeric.py + --input_dataset ${{ inputs.input_dataset }} + --input_encoding ${{ inputs.input_encoding }} + --output_dataset ${{ outputs.output_dataset }} + --output_encoding ${{ inputs.output_encoding }} + --correct_key ${{ inputs.correct_key }} + --response_key ${{ inputs.response_key }} + +environment: + # Will be updated when component uploads + image: azureml:promptbase_aml@latest \ No newline at end of file diff --git a/azureml/components/src/jsonl_score_numeric.py b/azureml/components/src/jsonl_score_numeric.py new file mode 100644 index 0000000..35a260e --- /dev/null +++ b/azureml/components/src/jsonl_score_numeric.py @@ -0,0 +1,85 @@ +import argparse +import functools +import json +import pathlib + +from typing import Any + +import mlflow + +from aether_utils.jsonl_utils import line_reduce +from aether_utils.logging_utils import get_standard_logger_for_file + +_logger = get_standard_logger_for_file(__file__) + + +class Scorer: + def __init__(self, correct_key: str, response_key: str): + self.y_true = [] + self.y_pred = [] + self.dataset = [] + self.subject = [] + self.correct_key = correct_key + self.response_key = response_key + + def __call__(self, line: dict[str, Any]): + correct_answer = line[self.correct_key] + response_answer = line[self.response_key] + self.y_true.append(correct_answer) + self.y_pred.append(response_answer) + + def generate_summary(self) -> dict[str, Any]: + result = dict() + + result["n_answers"] = len(self.y_true) + n_correct = 0 + for y_t, y_p in zip(self.y_true, self.y_pred): + if y_t == y_p: + n_correct += 1 + result["n_correct"] = n_correct + result["accuracy"] = float(n_correct) / len(self.y_true) + + return result + + +def parse_args(): + parser = argparse.ArgumentParser(add_help=True) + + # Information about the ports + ports_group = parser.add_argument_group("Ports") + ports_group.add_argument("--input_dataset", type=pathlib.Path, required=True) + ports_group.add_argument("--input_encoding", type=str, required=True) + ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True) + ports_group.add_argument("--output_encoding", type=str, required=True) + + # Information about the keys + keys_group = parser.add_argument_group("Keys") + keys_group.add_argument("--correct_key", type=str, required=True) + keys_group.add_argument("--response_key", type=str, required=True) + + args = parser.parse_args() + + return args + + +def main(): + args = parse_args() + + scorer = Scorer(correct_key=args.correct_key, response_key=args.response_key) + line_reduce( + reducer=scorer, + source_file=args.input_dataset, + source_encoding=args.input_encoding, + ) + summary = scorer.generate_summary() + + _logger.info("Logging with mlflow") + mlflow.log_metrics(summary) + + _logger.info("Writing output file") + with open(args.output_dataset, encoding=args.output_encoding, mode="w") as jf: + json.dump(summary, jf, indent=4) + + +if __name__ == "__main__": + main() diff --git a/azureml/pipelines/azureml_utils.py b/azureml/pipelines/azureml_utils.py index 96ce0a7..341b9b6 100644 --- a/azureml/pipelines/azureml_utils.py +++ b/azureml/pipelines/azureml_utils.py @@ -25,6 +25,7 @@ jsonl_schema_checker="jsonl_schema_checker_component.yaml", jsonl_score_biosbias_json="jsonl_score_biosbias_json_component.yaml", jsonl_score_multiplechoice="jsonl_score_multiplechoice_component.yaml", + jsonl_score_numeric="jsonl_score_numeric_component.yaml", jsonl_to_json="jsonl_to_json_component.yaml", uri_folder_to_file="uri_folder_to_file_component.yaml", ) diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py index ea3badd..b04bb4b 100644 --- a/azureml/pipelines/submit_gsm8k_zeroshot.py +++ b/azureml/pipelines/submit_gsm8k_zeroshot.py @@ -71,6 +71,13 @@ def basic_pipeline() -> Pipeline: guidance_job.compute = run_config.llamacpp_config.compute_target guidance_job.name = f"guidance_mistral7b_{progname}" + score_job = components.jsonl_score_numeric( + input_dataset=guidance_job.outputs.output_dataset, + correct_key="answer", + response_key="zero_or_few_shot_answer", + ) + score_job.name = f"score_{progname}" + pipeline = basic_pipeline() pipeline.experiment_name = f"{run_config.pipeline.base_experiment_name}" pipeline.display_name = None From cb56759b160b47c8207e9b72a938b216d4e3d92b Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 10 Apr 2024 12:03:16 -0400 Subject: [PATCH 13/36] Start expanding on the prompts --- .../configs/gsm8k_zeroshot_config.yaml | 2 + .../gsm8k_zero_or_few_shot_plain.py | 49 +++++++++++++++++++ .../gsm8k_zero_or_few_shot_regex_number.py | 49 +++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 guidance_programs/gsm8k_zero_or_few_shot_plain.py create mode 100644 guidance_programs/gsm8k_zero_or_few_shot_regex_number.py diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml index 3870e12..ef66d27 100644 --- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml @@ -9,6 +9,8 @@ zeroshot_config: tags: default_compute_target: isolatedcompute json_guidance_programs: + - gsm8k_zero_or_few_shot_plain.py + - gsm8k_zero_or_few_shot_regex_number.py - gsm8k_zero_or_few_shot.py llamacpp_config: compute_target: gput4 \ No newline at end of file diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py new file mode 100644 index 0000000..530cfa3 --- /dev/null +++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py @@ -0,0 +1,49 @@ +# This is a very naive guidance program for GSM8K + +import json +import logging +import sys + +from typing import Any, Dict + +import guidance + + +_logger = logging.getLogger(__file__) +_logger.setLevel(logging.INFO) +_logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +@guidance +def zero_shot_gsm8k( + lm: guidance.models.Instruct, + question: str, + common: list[dict[str, Any]] | None, +): + # Some general instruction to the model + lm += """Taking a maths test. Answer the following question: +""" + + if common: + _logger.debug("Adding few shot examples") + raise ValueError("common data not yet supported") + + lm += question + + return lm + guidance.gen(name="result_string") + + +def guidance_generation( + lm: guidance.models.Chat, + input: Dict[str, Any], + common: list[dict[str, Any]] | None = None, +) -> Dict[str, Any]: + _logger.debug("Starting guidance_generation") + result = lm + zero_shot_gsm8k(question=input["question"], common=common) + + _logger.info(f"JSON portion: {result['result_string']}") + + float_result = float(result['result_string']) + + result = dict(zero_or_few_shot_answer=float_result) + return result diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py new file mode 100644 index 0000000..6766c5c --- /dev/null +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -0,0 +1,49 @@ +# This is a very naive guidance program for GSM8K + +import json +import logging +import sys + +from typing import Any, Dict + +import guidance + + +_logger = logging.getLogger(__file__) +_logger.setLevel(logging.INFO) +_logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +@guidance +def zero_shot_gsm8k( + lm: guidance.models.Instruct, + question: str, + common: list[dict[str, Any]] | None, +): + # Some general instruction to the model + lm += """Taking a maths test. Answer the following question: +""" + + if common: + _logger.debug("Adding few shot examples") + raise ValueError("common data not yet supported") + + lm += question + + return lm + guidance.gen(name="result_string", regex=r"\d+\.?\d*") + + +def guidance_generation( + lm: guidance.models.Chat, + input: Dict[str, Any], + common: list[dict[str, Any]] | None = None, +) -> Dict[str, Any]: + _logger.debug("Starting guidance_generation") + result = lm + zero_shot_gsm8k(question=input["question"], common=common) + + _logger.info(f"JSON portion: {result['result_string']}") + + float_result = float(result["result_string"]) + + result = dict(zero_or_few_shot_answer=float_result) + return result From ff305b276a875ca56b32981aea2de19a9b67734f Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 10 Apr 2024 13:14:13 -0400 Subject: [PATCH 14/36] Unsuccessful tweak --- guidance_programs/gsm8k_zero_or_few_shot_plain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py index 530cfa3..da02638 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py @@ -21,7 +21,7 @@ def zero_shot_gsm8k( common: list[dict[str, Any]] | None, ): # Some general instruction to the model - lm += """Taking a maths test. Answer the following question: + lm += """Taking a maths test. Answer the following question. Respond with just the numerical answer: """ if common: From b8d25bfdfa1d4ed8555a96c12add0110a8277fda Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 10 Apr 2024 13:35:16 -0400 Subject: [PATCH 15/36] Add some random examples --- azureml/pipelines/configs.py | 2 ++ .../configs/gsm8k_zeroshot_config.yaml | 4 +++- azureml/pipelines/submit_gsm8k_zeroshot.py | 22 ++++++++++++++----- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py index a45d9a8..3befd93 100644 --- a/azureml/pipelines/configs.py +++ b/azureml/pipelines/configs.py @@ -140,4 +140,6 @@ class GSM8KZeroShotConfig: pipeline: PipelineConfig = field(default_factory=PipelineConfig) json_guidance_programs: list[str] = field(default_factory=list) llamacpp_config: LlamaCppConfig = field(default_factory=LlamaCppConfig) + random_seed: int = int() + n_samples: int = int() diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml index ef66d27..b6183b3 100644 --- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml @@ -13,4 +13,6 @@ zeroshot_config: - gsm8k_zero_or_few_shot_regex_number.py - gsm8k_zero_or_few_shot.py llamacpp_config: - compute_target: gput4 \ No newline at end of file + compute_target: gput4 + random_seed: 4521 + n_samples: 5 \ No newline at end of file diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py index b04bb4b..fb325a8 100644 --- a/azureml/pipelines/submit_gsm8k_zeroshot.py +++ b/azureml/pipelines/submit_gsm8k_zeroshot.py @@ -56,17 +56,29 @@ def basic_pipeline() -> Pipeline: mmlu_fetch_job = components.jsonl_gsm8k_fetch() mmlu_fetch_job.name = f"fetch_gsm8k" - get_split_job = components.uri_folder_to_file( - input_dataset=mmlu_fetch_job.outputs.output_dataset, - filename_pattern=f"test.jsonl", + split_outputs = dict() + for s in ["train", "test"]: + get_split_job = components.uri_folder_to_file( + input_dataset=mmlu_fetch_job.outputs.output_dataset, + filename_pattern=f"{s}.jsonl", + ) + get_split_job.name = f"extract_split_{s}" + split_outputs[s] = get_split_job.outputs.output_dataset + + random_examples_job = components.jsonl_random_examples( + input_dataset=split_outputs["train"], + example_dataset=split_outputs["test"], + output_key="examples", + num_examples=run_config.n_samples, + random_seed=run_config.random_seed ) - get_split_job.name = f"extract_split_test" + random_examples_job.name=f"add_random_examples" for progname, prog_input in guidance_inputs.items(): guidance_job = components.jsonl_guidance_mistral7b( guidance_program=prog_input, - input_dataset=get_split_job.outputs.output_dataset, + input_dataset=random_examples_job.outputs.output_dataset, ) guidance_job.compute = run_config.llamacpp_config.compute_target guidance_job.name = f"guidance_mistral7b_{progname}" From ac49c39b281ddce5c88753ec69c6ee5df80b4818 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 10 Apr 2024 13:59:33 -0400 Subject: [PATCH 16/36] Thinking about few shots a bit --- azureml/pipelines/configs.py | 2 +- .../configs/gsm8k_zeroshot_config.yaml | 4 +- azureml/pipelines/submit_gsm8k_zeroshot.py | 6 +- guidance_programs/gsm8k_zero_or_few_shot.py | 53 ---------------- .../gsm8k_zero_or_few_shot_basic_json.py | 62 +++++++++++++++++++ .../gsm8k_zero_or_few_shot_plain.py | 37 +++++++---- .../gsm8k_zero_or_few_shot_regex_number.py | 37 +++++++---- 7 files changed, 116 insertions(+), 85 deletions(-) delete mode 100644 guidance_programs/gsm8k_zero_or_few_shot.py create mode 100644 guidance_programs/gsm8k_zero_or_few_shot_basic_json.py diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py index 3befd93..e51cbec 100644 --- a/azureml/pipelines/configs.py +++ b/azureml/pipelines/configs.py @@ -136,7 +136,7 @@ class Phi2BiosBiasJSONPipelineConfig: @dataclass -class GSM8KZeroShotConfig: +class GSM8KZeroOrFewShotConfig: pipeline: PipelineConfig = field(default_factory=PipelineConfig) json_guidance_programs: list[str] = field(default_factory=list) llamacpp_config: LlamaCppConfig = field(default_factory=LlamaCppConfig) diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml index b6183b3..6128613 100644 --- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml @@ -5,13 +5,13 @@ defaults: zeroshot_config: pipeline: - base_experiment_name: gsm8k_zeroshot_debugging + base_experiment_name: gsm8k_zeroorfewshot_debugging tags: default_compute_target: isolatedcompute json_guidance_programs: - gsm8k_zero_or_few_shot_plain.py - gsm8k_zero_or_few_shot_regex_number.py - - gsm8k_zero_or_few_shot.py + - gsm8k_zero_or_few_shot_basic_json.py llamacpp_config: compute_target: gput4 random_seed: 4521 diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py index fb325a8..0a459b1 100644 --- a/azureml/pipelines/submit_gsm8k_zeroshot.py +++ b/azureml/pipelines/submit_gsm8k_zeroshot.py @@ -18,7 +18,7 @@ from azureml_pipelines import create_zeroshot_pipeline from azureml_utils import get_component_collector -from configs import AMLConfig, GSM8KZeroShotConfig +from configs import AMLConfig, GSM8KZeroOrFewShotConfig from constants import GUIDANCE_PROGRAMS_DIR from logging_utils import get_standard_logger_for_file @@ -27,7 +27,7 @@ @dataclass class PipelineConfig: - zeroshot_config: GSM8KZeroShotConfig = omegaconf.MISSING + zeroshot_config: GSM8KZeroOrFewShotConfig = omegaconf.MISSING azureml_config: AMLConfig = omegaconf.MISSING @@ -36,7 +36,7 @@ class PipelineConfig: def create_gsm8k_zeroshot_pipeline( - ml_client: MLClient, run_config: GSM8KZeroShotConfig, version_string: str + ml_client: MLClient, run_config: GSM8KZeroOrFewShotConfig, version_string: str ): components = get_component_collector(ml_client, version_string) diff --git a/guidance_programs/gsm8k_zero_or_few_shot.py b/guidance_programs/gsm8k_zero_or_few_shot.py deleted file mode 100644 index b64d52a..0000000 --- a/guidance_programs/gsm8k_zero_or_few_shot.py +++ /dev/null @@ -1,53 +0,0 @@ -# This is a very naive guidance program for GSM8K - -import json -import logging -import sys - -from typing import Any, Dict - -import guidance - - -_logger = logging.getLogger(__file__) -_logger.setLevel(logging.INFO) -_logger.addHandler(logging.StreamHandler(stream=sys.stdout)) - - -@guidance -def zero_shot_gsm8k( - lm: guidance.models.Instruct, - question: str, - common: list[dict[str, Any]] | None, -): - # Some general instruction to the model - lm += """Taking a maths test. Answer the following question and - show your working. -""" - - if common: - _logger.debug("Adding few shot examples") - raise ValueError("common data not yet supported") - - lm += question - - schema_obj = dict(type="object", properties=dict(answer=dict(type="number"))) - - return lm + guidance.json(name="json_result_object", schema=schema_obj) - - -def guidance_generation( - lm: guidance.models.Chat, - input: Dict[str, Any], - common: list[dict[str, Any]] | None = None, -) -> Dict[str, Any]: - _logger.debug("Starting guidance_generation") - result = lm + zero_shot_gsm8k(question=input["question"], common=common) - - _logger.info(f"Result: {result}") - _logger.info(f"JSON portion: {result['json_result_object']}") - - loaded_obj = json.loads(result["json_result_object"]) - - result = dict(zero_or_few_shot_answer=loaded_obj["answer"]) - return result diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py new file mode 100644 index 0000000..07e2eac --- /dev/null +++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py @@ -0,0 +1,62 @@ +# This is a very naive guidance program for GSM8K + +import json +import logging +import sys + +from typing import Any, Dict + +import guidance + + +_logger = logging.getLogger(__file__) +_logger.setLevel(logging.INFO) +_logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +@guidance +def zero_shot_gsm8k( + lm: guidance.models.Instruct, + question: str, + examples: list[dict[str, Any]] | None, +): + # Some general instruction to the model + lm += """You are taking a maths test\n\n""" + + # Show the few shots + for e in examples: + lm += f"Question: {e['question']}\n" + lm += f"Reasoning:" + for t in e["thoughts"]: + lm += t["step"] + t["result"] + lm += f"Answer: {e['answer']}" + lm += "\n" + + # Now ask the question + lm += f"Question: {question}\n" + lm += f"Reasoning:" + lm += guidance.gen("reasons") + lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number")) + + return lm + + +def guidance_generation( + lm: guidance.models.Chat, + input: Dict[str, Any], + common: list[dict[str, Any]] | None = None, +) -> Dict[str, Any]: + _logger.debug("Starting guidance_generation") + if common: + raise ValueError("Common Data not supported!") + + result = lm + zero_shot_gsm8k( + question=input["question"], examples=input["examples"] + ) + + _logger.info(f"result_string: {result['result_string']}") + + float_result = float(result["result_string"]) + + result = dict(zero_or_few_shot_answer=float_result) + return result diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py index da02638..6b420f3 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py @@ -18,19 +18,27 @@ def zero_shot_gsm8k( lm: guidance.models.Instruct, question: str, - common: list[dict[str, Any]] | None, + examples: list[dict[str, Any]] | None, ): # Some general instruction to the model - lm += """Taking a maths test. Answer the following question. Respond with just the numerical answer: -""" - - if common: - _logger.debug("Adding few shot examples") - raise ValueError("common data not yet supported") - - lm += question - - return lm + guidance.gen(name="result_string") + lm += """You are taking a maths test\n\n""" + + # Show the few shots + for e in examples: + lm += f"Question: {e['question']}\n" + lm += f"Reasoning:" + for t in e["thoughts"]: + lm += t["step"] + t["result"] + lm += f"Answer: {e['answer']}" + lm += "\n" + + # Now ask the question + lm += f"Question: {question}\n" + lm += f"Reasoning:" + lm += guidance.gen("reasons") + lm += f"Answer: " + guidance.gen(name="result_string") + + return lm def guidance_generation( @@ -39,9 +47,12 @@ def guidance_generation( common: list[dict[str, Any]] | None = None, ) -> Dict[str, Any]: _logger.debug("Starting guidance_generation") - result = lm + zero_shot_gsm8k(question=input["question"], common=common) + if common: + raise ValueError("Common Data not supported!") + + result = lm + zero_shot_gsm8k(question=input["question"], examples=input["examples"]) - _logger.info(f"JSON portion: {result['result_string']}") + _logger.info(f"result_string: {result['result_string']}") float_result = float(result['result_string']) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py index 6766c5c..1ab6e85 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -18,19 +18,27 @@ def zero_shot_gsm8k( lm: guidance.models.Instruct, question: str, - common: list[dict[str, Any]] | None, + examples: list[dict[str, Any]] | None, ): # Some general instruction to the model - lm += """Taking a maths test. Answer the following question: -""" - - if common: - _logger.debug("Adding few shot examples") - raise ValueError("common data not yet supported") - - lm += question - - return lm + guidance.gen(name="result_string", regex=r"\d+\.?\d*") + lm += """You are taking a maths test\n\n""" + + # Show the few shots + for e in examples: + lm += f"Question: {e['question']}\n" + lm += f"Reasoning:" + for t in e["thoughts"]: + lm += t["step"] + t["result"] + lm += f"Answer: {e['answer']}" + lm += "\n" + + # Now ask the question + lm += f"Question: {question}\n" + lm += f"Reasoning:" + lm += guidance.gen("reasons") + lm += f"Answer: " + guidance.gen(name="result_string", regex=r"\d+\.?\d*") + + return lm def guidance_generation( @@ -39,9 +47,12 @@ def guidance_generation( common: list[dict[str, Any]] | None = None, ) -> Dict[str, Any]: _logger.debug("Starting guidance_generation") - result = lm + zero_shot_gsm8k(question=input["question"], common=common) + if common: + raise ValueError("Common Data not supported!") + + result = lm + zero_shot_gsm8k(question=input["question"], examples=input["examples"]) - _logger.info(f"JSON portion: {result['result_string']}") + _logger.info(f"result_string: {result['result_string']}") float_result = float(result["result_string"]) From 6f8ef9d2b6622c218bf025e5ac431068de93a657 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 10 Apr 2024 14:08:38 -0400 Subject: [PATCH 17/36] Doing some renaming --- azureml/pipelines/configs/gsm8k_zeroshot_config.yaml | 2 +- ...submit_gsm8k_zeroshot.py => submit_gsm8k_zeroorfewshot.py} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename azureml/pipelines/{submit_gsm8k_zeroshot.py => submit_gsm8k_zeroorfewshot.py} (97%) diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml index 6128613..e548f92 100644 --- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml @@ -3,7 +3,7 @@ defaults: - aml_config - aoai_config -zeroshot_config: +zeroorfewshot_config: pipeline: base_experiment_name: gsm8k_zeroorfewshot_debugging tags: diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py similarity index 97% rename from azureml/pipelines/submit_gsm8k_zeroshot.py rename to azureml/pipelines/submit_gsm8k_zeroorfewshot.py index 0a459b1..b93f738 100644 --- a/azureml/pipelines/submit_gsm8k_zeroshot.py +++ b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py @@ -27,7 +27,7 @@ @dataclass class PipelineConfig: - zeroshot_config: GSM8KZeroOrFewShotConfig = omegaconf.MISSING + zeroorfewshot_config: GSM8KZeroOrFewShotConfig = omegaconf.MISSING azureml_config: AMLConfig = omegaconf.MISSING @@ -121,7 +121,7 @@ def main(config: PipelineConfig): ) pipeline = create_gsm8k_zeroshot_pipeline( - ws_client, config.zeroshot_config, version_string + ws_client, config.zeroorfewshot_config, version_string ) _logger.info("Submitting pipeline") submitted_job = ws_client.jobs.create_or_update(pipeline) From 2076b53ffa35cf87436fa5ce23aff908ba1061c9 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 10 Apr 2024 14:28:33 -0400 Subject: [PATCH 18/36] Tweaking --- ..._zeroshot_config.yaml => gsm8k_zeroorfewshot_config.yaml} | 2 +- guidance_programs/gsm8k_zero_or_few_shot_basic_json.py | 5 ++++- guidance_programs/gsm8k_zero_or_few_shot_plain.py | 5 ++++- guidance_programs/gsm8k_zero_or_few_shot_regex_number.py | 5 ++++- 4 files changed, 13 insertions(+), 4 deletions(-) rename azureml/pipelines/configs/{gsm8k_zeroshot_config.yaml => gsm8k_zeroorfewshot_config.yaml} (96%) diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml similarity index 96% rename from azureml/pipelines/configs/gsm8k_zeroshot_config.yaml rename to azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml index e548f92..2509917 100644 --- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml @@ -15,4 +15,4 @@ zeroorfewshot_config: llamacpp_config: compute_target: gput4 random_seed: 4521 - n_samples: 5 \ No newline at end of file + n_samples: 2 \ No newline at end of file diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py index 07e2eac..3f6dddc 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py @@ -28,7 +28,10 @@ def zero_shot_gsm8k( lm += f"Question: {e['question']}\n" lm += f"Reasoning:" for t in e["thoughts"]: - lm += t["step"] + t["result"] + lm += t["step"] + if "result" in t: + lm += t["result"] + lm += "\n" lm += f"Answer: {e['answer']}" lm += "\n" diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py index 6b420f3..4ea11ad 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py @@ -28,7 +28,10 @@ def zero_shot_gsm8k( lm += f"Question: {e['question']}\n" lm += f"Reasoning:" for t in e["thoughts"]: - lm += t["step"] + t["result"] + lm += t["step"] + if "result" in t: + lm += t["result"] + lm += "\n" lm += f"Answer: {e['answer']}" lm += "\n" diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py index 1ab6e85..42a5773 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -28,7 +28,10 @@ def zero_shot_gsm8k( lm += f"Question: {e['question']}\n" lm += f"Reasoning:" for t in e["thoughts"]: - lm += t["step"] + t["result"] + lm += t["step"] + if "result" in t: + lm += t["result"] + lm += "\n" lm += f"Answer: {e['answer']}" lm += "\n" From d60345c46b469f59c67ba81d4b8a8e03afad5613 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 10 Apr 2024 14:43:44 -0400 Subject: [PATCH 19/36] Some token limits --- .../configs/gsm8k_zeroorfewshot_config.yaml | 2 +- .../gsm8k_zero_or_few_shot_basic_json.py | 2 +- guidance_programs/gsm8k_zero_or_few_shot_plain.py | 14 ++++++++------ .../gsm8k_zero_or_few_shot_regex_number.py | 10 ++++++---- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml index 2509917..40248f9 100644 --- a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml @@ -15,4 +15,4 @@ zeroorfewshot_config: llamacpp_config: compute_target: gput4 random_seed: 4521 - n_samples: 2 \ No newline at end of file + n_samples: 1 \ No newline at end of file diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py index 3f6dddc..7a619f4 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py @@ -38,7 +38,7 @@ def zero_shot_gsm8k( # Now ask the question lm += f"Question: {question}\n" lm += f"Reasoning:" - lm += guidance.gen("reasons") + lm += guidance.gen("reasons", max_tokens=50) lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number")) return lm diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py index 4ea11ad..5cc21da 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py @@ -34,12 +34,12 @@ def zero_shot_gsm8k( lm += "\n" lm += f"Answer: {e['answer']}" lm += "\n" - + # Now ask the question lm += f"Question: {question}\n" lm += f"Reasoning:" - lm += guidance.gen("reasons") - lm += f"Answer: " + guidance.gen(name="result_string") + lm += guidance.gen("reasons", max_tokens=50) + lm += f"Answer: " + guidance.gen(name="result_string", max_tokens=10) return lm @@ -52,12 +52,14 @@ def guidance_generation( _logger.debug("Starting guidance_generation") if common: raise ValueError("Common Data not supported!") - - result = lm + zero_shot_gsm8k(question=input["question"], examples=input["examples"]) + + result = lm + zero_shot_gsm8k( + question=input["question"], examples=input["examples"] + ) _logger.info(f"result_string: {result['result_string']}") - float_result = float(result['result_string']) + float_result = float(result["result_string"]) result = dict(zero_or_few_shot_answer=float_result) return result diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py index 42a5773..f88e099 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -34,11 +34,11 @@ def zero_shot_gsm8k( lm += "\n" lm += f"Answer: {e['answer']}" lm += "\n" - + # Now ask the question lm += f"Question: {question}\n" lm += f"Reasoning:" - lm += guidance.gen("reasons") + lm += guidance.gen("reasons", max_tokens=50) lm += f"Answer: " + guidance.gen(name="result_string", regex=r"\d+\.?\d*") return lm @@ -52,8 +52,10 @@ def guidance_generation( _logger.debug("Starting guidance_generation") if common: raise ValueError("Common Data not supported!") - - result = lm + zero_shot_gsm8k(question=input["question"], examples=input["examples"]) + + result = lm + zero_shot_gsm8k( + question=input["question"], examples=input["examples"] + ) _logger.info(f"result_string: {result['result_string']}") From 1cfefad068aa2116549a0ac382412d32cdf32d6c Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Thu, 11 Apr 2024 10:43:44 -0400 Subject: [PATCH 20/36] More involved JSON --- .../configs/gsm8k_zeroorfewshot_config.yaml | 1 + .../gsm8k_zero_or_few_shot_json_response.py | 76 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 guidance_programs/gsm8k_zero_or_few_shot_json_response.py diff --git a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml index 40248f9..59efb39 100644 --- a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml @@ -12,6 +12,7 @@ zeroorfewshot_config: - gsm8k_zero_or_few_shot_plain.py - gsm8k_zero_or_few_shot_regex_number.py - gsm8k_zero_or_few_shot_basic_json.py + - gsm8k_zero_or_few_shot_json_response.py llamacpp_config: compute_target: gput4 random_seed: 4521 diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py new file mode 100644 index 0000000..13b67ba --- /dev/null +++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py @@ -0,0 +1,76 @@ +# This is a very naive guidance program for GSM8K + +import json +import logging +import sys + +from typing import Any, Dict + +import guidance + + +_logger = logging.getLogger(__file__) +_logger.setLevel(logging.INFO) +_logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +@guidance +def zero_shot_gsm8k( + lm: guidance.models.Instruct, + question: str, + examples: list[dict[str, Any]] | None, +): + # Some general instruction to the model + lm += """You are taking a maths test\n\n""" + + response_schema = dict( + type="object", + properties=dict( + thoughts=dict(type="array", items=dict(type="string")), + result=dict(type="number"), + ), + ) + + # Show the few shots + for e in examples: + lm += f"Question: {e['question']}\n" + + nxt_obj = dict(result=e["answer"], thoughts=[]) + for t in e["thoughts"]: + nxt_thought = t["step"] + if "result" in t: + nxt_thought += t["result"] + nxt_obj["thoughts"].append(nxt_thought) + + lm += guidance.library._json._to_compact_json(nxt_obj) + lm += "\n" + + # Now ask the question + lm += f"Question: {question}\n" + lm += guidance.json(name="response_json", schema=response_schema) + + return lm + + +def guidance_generation( + lm: guidance.models.Chat, + input: Dict[str, Any], + common: list[dict[str, Any]] | None = None, +) -> Dict[str, Any]: + _logger.debug("Starting guidance_generation") + if common: + raise ValueError("Common Data not supported!") + + result = lm + zero_shot_gsm8k( + question=input["question"], examples=input["examples"] + ) + + _logger.info(f"result_string: {result['response_json']}") + + loaded_obj = json.loads(result["response_json"]) + + result = dict( + zero_or_few_shot_answer=loaded_obj["result"], + zero_or_few_show_thoughts=loaded_obj["thoughts"], + ) + return result From 0714d8877aa3015686133bd18fc5421fe53e5d02 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 12 Apr 2024 20:49:40 -0400 Subject: [PATCH 21/36] Make sure pydantic and jsonschema are available --- azureml/environments/phi2transformer-env.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/azureml/environments/phi2transformer-env.yaml b/azureml/environments/phi2transformer-env.yaml index f31b249..8045498 100644 --- a/azureml/environments/phi2transformer-env.yaml +++ b/azureml/environments/phi2transformer-env.yaml @@ -21,4 +21,6 @@ conda_file: - accelerate - aether-utils==0.0.1.dev1 - guidance>=0.1.13 + - jsonschema + - pydantic - transformers \ No newline at end of file From 6f54a517979eac1eba17d32e749acf6a27645869 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Mon, 15 Apr 2024 13:01:37 -0400 Subject: [PATCH 22/36] Fix regex for a number --- guidance_programs/gsm8k_zero_or_few_shot_regex_number.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py index f88e099..2b219ad 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -39,7 +39,7 @@ def zero_shot_gsm8k( lm += f"Question: {question}\n" lm += f"Reasoning:" lm += guidance.gen("reasons", max_tokens=50) - lm += f"Answer: " + guidance.gen(name="result_string", regex=r"\d+\.?\d*") + lm += f"Answer: " + guidance.gen(name="result_string", regex=r"-?\d+\.?\d*") return lm From 1bbc53d0babe33b53df3880e9b9ffcdfe210b356 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Mon, 15 Apr 2024 13:37:24 -0400 Subject: [PATCH 23/36] Increase context window for model --- azureml/components/src/jsonl_guidance_mistral7b.py | 2 +- azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py index d22a9bf..c1cc446 100644 --- a/azureml/components/src/jsonl_guidance_mistral7b.py +++ b/azureml/components/src/jsonl_guidance_mistral7b.py @@ -99,7 +99,7 @@ def main(): downloaded_file = hf_hub_download(repo_id=repo_id, filename=filename) guidance_model = guidance.models.LlamaCpp( - downloaded_file, verbose=True, n_gpu_layers=-1 + downloaded_file, verbose=True, n_gpu_layers=-1, n_ctx=4096 ) # _logger.info(f"guidance_model.device: {guidance_model.engine.device}") diff --git a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml index 59efb39..3279e3e 100644 --- a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml @@ -16,4 +16,4 @@ zeroorfewshot_config: llamacpp_config: compute_target: gput4 random_seed: 4521 - n_samples: 1 \ No newline at end of file + n_samples: 5 \ No newline at end of file From 88ef9efc3082a12419345a814fbd03080c9ebf06 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 17 Apr 2024 08:44:13 -0400 Subject: [PATCH 24/36] Create a JSONL sampling component --- .../jsonl_sample_lines_component.yaml | 52 +++++++++++++++++++ azureml/components/src/jsonl_sample_lines.py | 52 +++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 azureml/components/jsonl_sample_lines_component.yaml create mode 100644 azureml/components/src/jsonl_sample_lines.py diff --git a/azureml/components/jsonl_sample_lines_component.yaml b/azureml/components/jsonl_sample_lines_component.yaml new file mode 100644 index 0000000..5c23a68 --- /dev/null +++ b/azureml/components/jsonl_sample_lines_component.yaml @@ -0,0 +1,52 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json + +name: jsonl_sample_lines +display_name: 'JSONL Sample Lines' +type: command +description: | + Samples lines (without replacement) from a JSONL file +is_deterministic: true + +inputs: + input_dataset: + type: uri_file + optional: false + description: Dataset containing JSONL input + input_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the input dataset + n_samples: + type: integer + optional: false + description: Number of samples required + random_seed: + type: integer + optional: false + description: Seed for Pythons PRNG + output_encoding: + type: string + optional: false + default: utf-8-sig + description: Encoding format of the output dataset + +outputs: + output_dataset: + type: uri_file + description: Dataset containing sampled JSONL + +code: ./src + +command: >- + python ./jsonl_sample_lines.py + --input_dataset ${{ inputs.input_dataset }} + --input_encoding ${{ inputs.input_encoding }} + --n_samples ${{ inputs.n_samples }} + --random_seed ${{ inputs.random_seed }} + --output_dataset ${{ outputs.output_dataset }} + --output_encoding ${{ inputs.output_encoding }} + +environment: + # Will be updated when component uploads + image: azureml:promptbase_aml@latest \ No newline at end of file diff --git a/azureml/components/src/jsonl_sample_lines.py b/azureml/components/src/jsonl_sample_lines.py new file mode 100644 index 0000000..224c877 --- /dev/null +++ b/azureml/components/src/jsonl_sample_lines.py @@ -0,0 +1,52 @@ +import argparse +import pathlib +import random + +from typing import Any, Dict, List + +from aether_utils.jsonl_file_utils import load_jsonl, save_jsonl +from aether_utils.logging_utils import get_standard_logger_for_file + +_logger = get_standard_logger_for_file(__file__) + + +def parse_args(): + parser = argparse.ArgumentParser(add_help=True) + + # Information about the datasets + datasets_group = parser.add_argument_group("Datasets") + datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True) + datasets_group.add_argument("--input_encoding", type=str, required=True) + datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True) + datasets_group.add_argument("--output_encoding", type=str, required=True) + + # Information about the sampling + sampling_group = parser.add_argument_group("Sampling") + sampling_group.add_argument("--n_samples", type=int, required=True) + sampling_group.add_argument("--random_seed", type=int, required=True) + + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + + _logger.info("Loading input") + all_data = load_jsonl(args.input_dataset, args.input_encoding) + _logger.info(f"Loaded {len(all_data)} items") + + random.seed(args.random_seed) + sampled_data = random.sample(all_data, k=args.n_samples) + + _logger.info("Saving output") + save_jsonl( + file_path=args.output_dataset, + data=sampled_data, + destination_encoding=args.output_encoding, + ) + _logger.info("Done") + + +if __name__ == "__main__": + main() From 85657f38015630f1904d309f300190a4bc78c2a9 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 17 Apr 2024 09:01:48 -0400 Subject: [PATCH 25/36] Hook new component into pipeline --- azureml/pipelines/azureml_utils.py | 1 + azureml/pipelines/configs.py | 4 +++- .../configs/gsm8k_zeroorfewshot_config.yaml | 6 ++++-- azureml/pipelines/submit_gsm8k_zeroorfewshot.py | 13 ++++++++++--- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/azureml/pipelines/azureml_utils.py b/azureml/pipelines/azureml_utils.py index 341b9b6..9dfce0d 100644 --- a/azureml/pipelines/azureml_utils.py +++ b/azureml/pipelines/azureml_utils.py @@ -22,6 +22,7 @@ jsonl_knn_cosine_similarity="jsonl_knn_cosine_similarity_component.yaml", jsonl_mmlu_fetch="jsonl_mmlu_fetch_component.yaml", jsonl_random_examples="jsonl_random_examples_component.yaml", + jsonl_sample_lines="jsonl_sample_lines_component.yaml", jsonl_schema_checker="jsonl_schema_checker_component.yaml", jsonl_score_biosbias_json="jsonl_score_biosbias_json_component.yaml", jsonl_score_multiplechoice="jsonl_score_multiplechoice_component.yaml", diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py index e51cbec..8b6675a 100644 --- a/azureml/pipelines/configs.py +++ b/azureml/pipelines/configs.py @@ -140,6 +140,8 @@ class GSM8KZeroOrFewShotConfig: pipeline: PipelineConfig = field(default_factory=PipelineConfig) json_guidance_programs: list[str] = field(default_factory=list) llamacpp_config: LlamaCppConfig = field(default_factory=LlamaCppConfig) - random_seed: int = int() + fewshot_random_seed: int = int() + n_fewshot: int = int() + sample_random_seed: int = int() n_samples: int = int() diff --git a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml index 3279e3e..20c1e59 100644 --- a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml +++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml @@ -15,5 +15,7 @@ zeroorfewshot_config: - gsm8k_zero_or_few_shot_json_response.py llamacpp_config: compute_target: gput4 - random_seed: 4521 - n_samples: 5 \ No newline at end of file + fewshot_random_seed: 4521 + n_fewshot: 5 + sample_random_seed: 234891 + n_samples: 400 \ No newline at end of file diff --git a/azureml/pipelines/submit_gsm8k_zeroorfewshot.py b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py index b93f738..4890548 100644 --- a/azureml/pipelines/submit_gsm8k_zeroorfewshot.py +++ b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py @@ -65,12 +65,19 @@ def basic_pipeline() -> Pipeline: get_split_job.name = f"extract_split_{s}" split_outputs[s] = get_split_job.outputs.output_dataset - random_examples_job = components.jsonl_random_examples( + sample_lines_job = components.jsonl_sample_lines( input_dataset=split_outputs["train"], + n_samples=run_config.n_samples, + random_seed=run_config.sample_random_seed + ) + sample_lines_job.name= f"sample_{run_config.n_samples}_lines" + + random_examples_job = components.jsonl_random_examples( + input_dataset=sample_lines_job.outputs.output_dataset, example_dataset=split_outputs["test"], output_key="examples", - num_examples=run_config.n_samples, - random_seed=run_config.random_seed + num_examples=run_config.n_fewshot, + random_seed=run_config.fewshot_random_seed ) random_examples_job.name=f"add_random_examples" From 53d4fe598c33c8deba2ccddeae95065aed2f225f Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 17 Apr 2024 14:53:58 -0400 Subject: [PATCH 26/36] Try tweaking the prompts (and saving) --- .../gsm8k_zero_or_few_shot_basic_json.py | 7 ++++--- .../gsm8k_zero_or_few_shot_json_response.py | 1 + guidance_programs/gsm8k_zero_or_few_shot_plain.py | 11 ++++++----- .../gsm8k_zero_or_few_shot_regex_number.py | 7 ++++--- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py index 7a619f4..f73316e 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py @@ -26,7 +26,7 @@ def zero_shot_gsm8k( # Show the few shots for e in examples: lm += f"Question: {e['question']}\n" - lm += f"Reasoning:" + lm += f"Reasoning:\n" for t in e["thoughts"]: lm += t["step"] if "result" in t: @@ -38,7 +38,8 @@ def zero_shot_gsm8k( # Now ask the question lm += f"Question: {question}\n" lm += f"Reasoning:" - lm += guidance.gen("reasons", max_tokens=50) + lm += guidance.gen("reasons", max_tokens=100) + lm += "\n" lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number")) return lm @@ -61,5 +62,5 @@ def guidance_generation( float_result = float(result["result_string"]) - result = dict(zero_or_few_shot_answer=float_result) + result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result)) return result diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py index 13b67ba..06e017c 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py @@ -72,5 +72,6 @@ def guidance_generation( result = dict( zero_or_few_shot_answer=loaded_obj["result"], zero_or_few_show_thoughts=loaded_obj["thoughts"], + final_lm=str(result), ) return result diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py index 5cc21da..7b47e71 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py @@ -26,19 +26,20 @@ def zero_shot_gsm8k( # Show the few shots for e in examples: lm += f"Question: {e['question']}\n" - lm += f"Reasoning:" + lm += f"Reasoning:\n" for t in e["thoughts"]: lm += t["step"] if "result" in t: lm += t["result"] lm += "\n" - lm += f"Answer: {e['answer']}" + lm += f"Answer: {e['answer']}\n" lm += "\n" # Now ask the question lm += f"Question: {question}\n" - lm += f"Reasoning:" - lm += guidance.gen("reasons", max_tokens=50) + lm += f"Reasoning:\n" + lm += guidance.gen("reasons", max_tokens=100) + lm += "\n" lm += f"Answer: " + guidance.gen(name="result_string", max_tokens=10) return lm @@ -61,5 +62,5 @@ def guidance_generation( float_result = float(result["result_string"]) - result = dict(zero_or_few_shot_answer=float_result) + result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result)) return result diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py index 2b219ad..9fff285 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -26,7 +26,7 @@ def zero_shot_gsm8k( # Show the few shots for e in examples: lm += f"Question: {e['question']}\n" - lm += f"Reasoning:" + lm += f"Reasoning:\n" for t in e["thoughts"]: lm += t["step"] if "result" in t: @@ -38,7 +38,8 @@ def zero_shot_gsm8k( # Now ask the question lm += f"Question: {question}\n" lm += f"Reasoning:" - lm += guidance.gen("reasons", max_tokens=50) + lm += guidance.gen("reasons", max_tokens=100) + lm += "\n" lm += f"Answer: " + guidance.gen(name="result_string", regex=r"-?\d+\.?\d*") return lm @@ -61,5 +62,5 @@ def guidance_generation( float_result = float(result["result_string"]) - result = dict(zero_or_few_shot_answer=float_result) + result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result)) return result From 4ec2b820660175a9093d57e7195e042e4a49c451 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 17 Apr 2024 15:29:44 -0400 Subject: [PATCH 27/36] Fix division by zero --- azureml/components/src/jsonl_score_numeric.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/azureml/components/src/jsonl_score_numeric.py b/azureml/components/src/jsonl_score_numeric.py index 35a260e..c5dd211 100644 --- a/azureml/components/src/jsonl_score_numeric.py +++ b/azureml/components/src/jsonl_score_numeric.py @@ -37,7 +37,9 @@ def generate_summary(self) -> dict[str, Any]: if y_t == y_p: n_correct += 1 result["n_correct"] = n_correct - result["accuracy"] = float(n_correct) / len(self.y_true) + result["accuracy"] = 0 + if len(self.y_true) > 0: + result["accuracy"] = float(n_correct) / len(self.y_true) return result From bf85df89e3aa0ae5ddc7980f00029d01adb2085a Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 17 Apr 2024 15:29:53 -0400 Subject: [PATCH 28/36] Want mlflow available --- azureml/environments/phi2transformer-env.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/azureml/environments/phi2transformer-env.yaml b/azureml/environments/phi2transformer-env.yaml index 8045498..8924347 100644 --- a/azureml/environments/phi2transformer-env.yaml +++ b/azureml/environments/phi2transformer-env.yaml @@ -19,6 +19,7 @@ conda_file: # ... so we have to add PyPI back in as an alternative index - --extra-index-url https://pypi.org/simple - accelerate + - azureml-mlflow - aether-utils==0.0.1.dev1 - guidance>=0.1.13 - jsonschema From ade55793dcbd662249f7b4e7526669a9efbf481b Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 17 Apr 2024 15:30:00 -0400 Subject: [PATCH 29/36] Try some more logging --- azureml/components/src/jsonl_guidance_mistral7b.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py index c1cc446..16399fa 100644 --- a/azureml/components/src/jsonl_guidance_mistral7b.py +++ b/azureml/components/src/jsonl_guidance_mistral7b.py @@ -2,6 +2,7 @@ import importlib.util import json import pathlib +import time from typing import Any, Callable, Dict @@ -9,7 +10,8 @@ from huggingface_hub import hf_hub_download -import torch +import mlflow + from transformers import AutoModelForCausalLM, AutoTokenizer from aether_utils.jsonl_utils import line_map @@ -56,16 +58,21 @@ def __init__( self._model = model self._guidance_function = self._get_guidance_function() self._common_data = common_data + self._step = 0 def __call__(self, item: Dict[str, Any]) -> dict[str, any]: _logger.debug(f"__call__: {item}") + start = time.time() result = self._guidance_function(self._model, item, common=self._common_data) + stop = time.time() + mlflow.log_metric("time_taken", value=stop-start, step=self._step) _logger.debug(f"Checking keys") for k in result.keys(): assert k not in item, f"Duplicate key: {k}" _logger.debug(f"Updating item") item.update(**result) + self._step += 1 return item From 542b7c88b3559c58c8802fa18ba4b9266d0e1585 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 17 Apr 2024 15:30:25 -0400 Subject: [PATCH 30/36] Tweaking guidance programs --- .../gsm8k_zero_or_few_shot_basic_json.py | 6 ++++-- .../gsm8k_zero_or_few_shot_json_response.py | 20 ++++++++++++++++--- .../gsm8k_zero_or_few_shot_plain.py | 6 ++++-- .../gsm8k_zero_or_few_shot_regex_number.py | 6 ++++-- 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py index f73316e..c3a2edf 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py @@ -27,9 +27,11 @@ def zero_shot_gsm8k( for e in examples: lm += f"Question: {e['question']}\n" lm += f"Reasoning:\n" - for t in e["thoughts"]: - lm += t["step"] + for i, t in enumerate(e["thoughts"]): + lm += f"{i+1}. {t['step']}" if "result" in t: + lm += " " + lm += t["calculation"] lm += t["result"] lm += "\n" lm += f"Answer: {e['answer']}" diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py index 06e017c..81a08e7 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py @@ -6,6 +6,8 @@ from typing import Any, Dict +from jsonschema import validate + import guidance @@ -26,7 +28,17 @@ def zero_shot_gsm8k( response_schema = dict( type="object", properties=dict( - thoughts=dict(type="array", items=dict(type="string")), + thoughts=dict( + type="array", + items=dict( + type="object", + properties=dict( + step=dict(type="string"), + calculation=dict(type="string"), + result=dict(type="string"), + ), + ), + ), result=dict(type="number"), ), ) @@ -37,11 +49,13 @@ def zero_shot_gsm8k( nxt_obj = dict(result=e["answer"], thoughts=[]) for t in e["thoughts"]: - nxt_thought = t["step"] + nxt_thought = dict(step=t["step"]) if "result" in t: - nxt_thought += t["result"] + nxt_thought["calculation"] = t["calculation"] + nxt_thought["result"] += t["result"] nxt_obj["thoughts"].append(nxt_thought) + validate(nxt_obj, schema=response_schema) lm += guidance.library._json._to_compact_json(nxt_obj) lm += "\n" diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py index 7b47e71..ae5eca8 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py @@ -27,9 +27,11 @@ def zero_shot_gsm8k( for e in examples: lm += f"Question: {e['question']}\n" lm += f"Reasoning:\n" - for t in e["thoughts"]: - lm += t["step"] + for i, t in enumerate(e["thoughts"]): + lm += f"{i+1}. {t['step']}" if "result" in t: + lm += " " + lm += t["calculation"] lm += t["result"] lm += "\n" lm += f"Answer: {e['answer']}\n" diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py index 9fff285..3402595 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -27,9 +27,11 @@ def zero_shot_gsm8k( for e in examples: lm += f"Question: {e['question']}\n" lm += f"Reasoning:\n" - for t in e["thoughts"]: - lm += t["step"] + for i, t in enumerate(e["thoughts"]): + lm += f"{i+1}. {t['step']}" if "result" in t: + lm += " " + lm += t["calculation"] lm += t["result"] lm += "\n" lm += f"Answer: {e['answer']}" From 8a2afa5ef9092b9392fd82bfe07b0a41eca4b5ce Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Thu, 18 Apr 2024 08:27:25 -0400 Subject: [PATCH 31/36] Try some extra new lines --- guidance_programs/gsm8k_zero_or_few_shot_basic_json.py | 4 ++-- guidance_programs/gsm8k_zero_or_few_shot_json_response.py | 1 + guidance_programs/gsm8k_zero_or_few_shot_plain.py | 6 +++--- guidance_programs/gsm8k_zero_or_few_shot_regex_number.py | 8 +++++--- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py index c3a2edf..8eb6c08 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py @@ -35,12 +35,12 @@ def zero_shot_gsm8k( lm += t["result"] lm += "\n" lm += f"Answer: {e['answer']}" - lm += "\n" + lm += "\n\n" # Now ask the question lm += f"Question: {question}\n" lm += f"Reasoning:" - lm += guidance.gen("reasons", max_tokens=100) + lm += guidance.gen("reasons", max_tokens=100, stop="\n") lm += "\n" lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number")) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py index 81a08e7..88bcc37 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py @@ -54,6 +54,7 @@ def zero_shot_gsm8k( nxt_thought["calculation"] = t["calculation"] nxt_thought["result"] += t["result"] nxt_obj["thoughts"].append(nxt_thought) + lm += "\n" validate(nxt_obj, schema=response_schema) lm += guidance.library._json._to_compact_json(nxt_obj) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py index ae5eca8..9b7c76b 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py @@ -35,14 +35,14 @@ def zero_shot_gsm8k( lm += t["result"] lm += "\n" lm += f"Answer: {e['answer']}\n" - lm += "\n" + lm += "\n\n" # Now ask the question lm += f"Question: {question}\n" lm += f"Reasoning:\n" - lm += guidance.gen("reasons", max_tokens=100) + lm += guidance.gen("reasons", max_tokens=100, stop="\n") lm += "\n" - lm += f"Answer: " + guidance.gen(name="result_string", max_tokens=10) + lm += f"Answer: " + guidance.gen(name="result_string", max_tokens=10, stop="\n") return lm diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py index 3402595..1a51aab 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -35,14 +35,16 @@ def zero_shot_gsm8k( lm += t["result"] lm += "\n" lm += f"Answer: {e['answer']}" - lm += "\n" + lm += "\n\n" # Now ask the question lm += f"Question: {question}\n" lm += f"Reasoning:" - lm += guidance.gen("reasons", max_tokens=100) + lm += guidance.gen("reasons", max_tokens=100, stop="\n") lm += "\n" - lm += f"Answer: " + guidance.gen(name="result_string", regex=r"-?\d+\.?\d*") + lm += f"Answer: " + guidance.gen( + name="result_string", regex=r"-?\d+\.?\d*", stop="\n" + ) return lm From e51a0258df4deb54302aec2c7551ff495f1ad81c Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Thu, 18 Apr 2024 13:06:50 -0400 Subject: [PATCH 32/36] Small tweaks and linting --- azureml/components/src/jsonl_guidance_mistral7b.py | 2 +- azureml/pipelines/configs.py | 2 +- azureml/pipelines/submit_gsm8k_zeroorfewshot.py | 9 ++++----- guidance_programs/gsm8k_zero_or_few_shot_basic_json.py | 3 ++- .../gsm8k_zero_or_few_shot_json_response.py | 3 +-- guidance_programs/gsm8k_zero_or_few_shot_plain.py | 3 ++- guidance_programs/gsm8k_zero_or_few_shot_regex_number.py | 3 ++- 7 files changed, 13 insertions(+), 12 deletions(-) diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py index 16399fa..7fa5063 100644 --- a/azureml/components/src/jsonl_guidance_mistral7b.py +++ b/azureml/components/src/jsonl_guidance_mistral7b.py @@ -65,7 +65,7 @@ def __call__(self, item: Dict[str, Any]) -> dict[str, any]: start = time.time() result = self._guidance_function(self._model, item, common=self._common_data) stop = time.time() - mlflow.log_metric("time_taken", value=stop-start, step=self._step) + mlflow.log_metric("time_taken", value=stop - start, step=self._step) _logger.debug(f"Checking keys") for k in result.keys(): assert k not in item, f"Duplicate key: {k}" diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py index 8b6675a..7fadb45 100644 --- a/azureml/pipelines/configs.py +++ b/azureml/pipelines/configs.py @@ -30,6 +30,7 @@ class AOAIConfig: class Phi2Config: compute_target: str = str() + @dataclass class LlamaCppConfig: compute_target: str = str() @@ -144,4 +145,3 @@ class GSM8KZeroOrFewShotConfig: n_fewshot: int = int() sample_random_seed: int = int() n_samples: int = int() - diff --git a/azureml/pipelines/submit_gsm8k_zeroorfewshot.py b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py index 4890548..99a4569 100644 --- a/azureml/pipelines/submit_gsm8k_zeroorfewshot.py +++ b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py @@ -68,21 +68,20 @@ def basic_pipeline() -> Pipeline: sample_lines_job = components.jsonl_sample_lines( input_dataset=split_outputs["train"], n_samples=run_config.n_samples, - random_seed=run_config.sample_random_seed + random_seed=run_config.sample_random_seed, ) - sample_lines_job.name= f"sample_{run_config.n_samples}_lines" + sample_lines_job.name = f"sample_{run_config.n_samples}_lines" random_examples_job = components.jsonl_random_examples( input_dataset=sample_lines_job.outputs.output_dataset, example_dataset=split_outputs["test"], output_key="examples", num_examples=run_config.n_fewshot, - random_seed=run_config.fewshot_random_seed + random_seed=run_config.fewshot_random_seed, ) - random_examples_job.name=f"add_random_examples" + random_examples_job.name = f"add_random_examples" for progname, prog_input in guidance_inputs.items(): - guidance_job = components.jsonl_guidance_mistral7b( guidance_program=prog_input, input_dataset=random_examples_job.outputs.output_dataset, diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py index 8eb6c08..f201fca 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py @@ -35,7 +35,8 @@ def zero_shot_gsm8k( lm += t["result"] lm += "\n" lm += f"Answer: {e['answer']}" - lm += "\n\n" + lm += "\n" + lm += "\n" # Now ask the question lm += f"Question: {question}\n" diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py index 88bcc37..0357631 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py @@ -54,11 +54,10 @@ def zero_shot_gsm8k( nxt_thought["calculation"] = t["calculation"] nxt_thought["result"] += t["result"] nxt_obj["thoughts"].append(nxt_thought) - lm += "\n" validate(nxt_obj, schema=response_schema) lm += guidance.library._json._to_compact_json(nxt_obj) - lm += "\n" + lm += "\n\n" # Now ask the question lm += f"Question: {question}\n" diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py index 9b7c76b..aa0bd90 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py @@ -35,7 +35,8 @@ def zero_shot_gsm8k( lm += t["result"] lm += "\n" lm += f"Answer: {e['answer']}\n" - lm += "\n\n" + lm += "\n" + lm += "\n" # Now ask the question lm += f"Question: {question}\n" diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py index 1a51aab..1df94e3 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -35,7 +35,8 @@ def zero_shot_gsm8k( lm += t["result"] lm += "\n" lm += f"Answer: {e['answer']}" - lm += "\n\n" + lm += "\n" + lm += "\n" # Now ask the question lm += f"Question: {question}\n" From 622be2895500567965ba915d19055d013a3a000d Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Thu, 18 Apr 2024 13:38:25 -0400 Subject: [PATCH 33/36] Bad stop for reasons --- guidance_programs/gsm8k_zero_or_few_shot_basic_json.py | 2 +- guidance_programs/gsm8k_zero_or_few_shot_regex_number.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py index f201fca..499a15e 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py @@ -41,7 +41,7 @@ def zero_shot_gsm8k( # Now ask the question lm += f"Question: {question}\n" lm += f"Reasoning:" - lm += guidance.gen("reasons", max_tokens=100, stop="\n") + lm += guidance.gen("reasons", max_tokens=100) lm += "\n" lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number")) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py index 1df94e3..ad07bca 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py @@ -41,7 +41,7 @@ def zero_shot_gsm8k( # Now ask the question lm += f"Question: {question}\n" lm += f"Reasoning:" - lm += guidance.gen("reasons", max_tokens=100, stop="\n") + lm += guidance.gen("reasons", max_tokens=100) lm += "\n" lm += f"Answer: " + guidance.gen( name="result_string", regex=r"-?\d+\.?\d*", stop="\n" From a90380e8b108d3b8615975736612c574e05c1c82 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Thu, 18 Apr 2024 14:44:26 -0400 Subject: [PATCH 34/36] Dumb mistakes in json formatted response --- .../gsm8k_zero_or_few_shot_json_response.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py index 0357631..34505df 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py @@ -47,13 +47,14 @@ def zero_shot_gsm8k( for e in examples: lm += f"Question: {e['question']}\n" - nxt_obj = dict(result=e["answer"], thoughts=[]) + nxt_obj = dict(thoughts=[]) for t in e["thoughts"]: - nxt_thought = dict(step=t["step"]) + nxt_thought = dict(step=t["step"], calculation="", result="") if "result" in t: nxt_thought["calculation"] = t["calculation"] - nxt_thought["result"] += t["result"] + nxt_thought["result"] = t["result"] nxt_obj["thoughts"].append(nxt_thought) + nxt_obj["result"] = e["answer"] validate(nxt_obj, schema=response_schema) lm += guidance.library._json._to_compact_json(nxt_obj) @@ -75,13 +76,13 @@ def guidance_generation( if common: raise ValueError("Common Data not supported!") - result = lm + zero_shot_gsm8k( + llm_result = lm + zero_shot_gsm8k( question=input["question"], examples=input["examples"] ) - _logger.info(f"result_string: {result['response_json']}") + _logger.info(f"result_string: {llm_result['response_json']}") - loaded_obj = json.loads(result["response_json"]) + loaded_obj = json.loads(llm_result["response_json"]) result = dict( zero_or_few_shot_answer=loaded_obj["result"], From db91f0183208e9f5a72042db014d8978e13266b9 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 19 Apr 2024 08:07:10 -0400 Subject: [PATCH 35/36] Another silly mistake --- guidance_programs/gsm8k_zero_or_few_shot_json_response.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py index 34505df..2d58c30 100644 --- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py +++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py @@ -87,6 +87,6 @@ def guidance_generation( result = dict( zero_or_few_shot_answer=loaded_obj["result"], zero_or_few_show_thoughts=loaded_obj["thoughts"], - final_lm=str(result), + final_lm=str(llm_result), ) return result From 94c465cfb9be8c075e20aa498e338765440686ba Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Sun, 21 Apr 2024 16:53:38 -0400 Subject: [PATCH 36/36] Need Rust --- azureml/components/jsonl_guidance_mistral7b_component.yaml | 5 +++++ azureml/environments/phi2transformer-env.yaml | 3 +++ 2 files changed, 8 insertions(+) diff --git a/azureml/components/jsonl_guidance_mistral7b_component.yaml b/azureml/components/jsonl_guidance_mistral7b_component.yaml index f6ba778..c451731 100644 --- a/azureml/components/jsonl_guidance_mistral7b_component.yaml +++ b/azureml/components/jsonl_guidance_mistral7b_component.yaml @@ -52,6 +52,11 @@ outputs: code: ./src/ command: | + # Install Rust toolchain + #apt update + #apt upgrade -y + #apt install -y rustc build-essential + #pip install setup-rust # Download the zip wget https://github.com/guidance-ai/guidance/archive/refs/heads/main.zip echo diff --git a/azureml/environments/phi2transformer-env.yaml b/azureml/environments/phi2transformer-env.yaml index 8924347..fe96e88 100644 --- a/azureml/environments/phi2transformer-env.yaml +++ b/azureml/environments/phi2transformer-env.yaml @@ -8,8 +8,11 @@ image: mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cuda11.8-gpu-inference conda_file: channels: - defaults + - conda-forge dependencies: - python=3.11 + # Rust is now part of building the guidance wheel + - rust - pip - pip: # Note that we have to force torch to install from this index