From 1d8ec5bae484b0eb93b036a608e54f6908a5e207 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Tue, 9 Apr 2024 10:39:57 -0400
Subject: [PATCH 01/36] Starting to figure out data

---
 azureml/components/src/jsonl_gsm8k_fetch.py | 79 +++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 azureml/components/src/jsonl_gsm8k_fetch.py

diff --git a/azureml/components/src/jsonl_gsm8k_fetch.py b/azureml/components/src/jsonl_gsm8k_fetch.py
new file mode 100644
index 0000000..4e53639
--- /dev/null
+++ b/azureml/components/src/jsonl_gsm8k_fetch.py
@@ -0,0 +1,79 @@
+import argparse
+import json
+import pathlib
+import re
+
+from typing import Any, Dict
+
+import requests
+
+
+from aether_utils.jsonl_file_utils import JSONLWriter, JSONLReader
+from aether_utils.logging_utils import get_standard_logger_for_file
+
+_logger = get_standard_logger_for_file(__file__)
+
+BASE_DATA_URL = "https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/"
+
+SPLITS = ["train", "test"]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(add_help=True)
+
+    # Information about the ports
+    ports_group = parser.add_argument_group("Ports")
+    ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
+    ports_group.add_argument("--output_encoding", type=str, required=True)
+
+    args = parser.parse_args()
+    return args
+
+
+def extract_thought_parts(thought: str) -> Dict[str,Any]:
+    result = dict()
+
+    thought_re = r"(.*)<<(.*=\d+)>>(.*)"
+    match = re.match(thought_re, thought)
+
+    result["step"] = match.group(1)
+    result["calculation"] = match.group(2)
+    result["result"] = match.group(3)
+
+    return result
+
+def process_line(item: Dict[str, Any]) -> Dict[str,Any]:
+    result = dict()
+
+    result["question"] = item["question"]
+
+    split_answer = item["answer"].split("####")
+    result["answer"] = float(split_answer[1])
+
+    result["thoughts"] = []
+    for thought in split_answer[0].splitlines():
+        result["thoughts"].append(extract_thought_parts(thought))
+    return result
+
+
+def main():
+    args = parse_args()
+
+    for split in ["test"]:
+        target_url = f"{BASE_DATA_URL}{split}.jsonl"
+
+        _logger.info(f"Fetching {target_url}")
+        response = requests.get(target_url)
+        assert response.status_code == 200, f"Got response {response}"
+
+        for line in response.text.splitlines():
+            nxt_item = json.loads(line)
+            output_item = process_line(nxt_item)
+
+            print(json.dumps(output_item, indent=4))
+
+    _logger.info("Complete")
+
+
+if __name__ == "__main__":
+    main()

From cc7fcc18e823d8172b457e4fb58312e1bd433fde Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Tue, 9 Apr 2024 10:57:30 -0400
Subject: [PATCH 02/36] Make sure we can process data

---
 azureml/components/src/jsonl_gsm8k_fetch.py | 43 +++++++++++++--------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/azureml/components/src/jsonl_gsm8k_fetch.py b/azureml/components/src/jsonl_gsm8k_fetch.py
index 4e53639..02541a5 100644
--- a/azureml/components/src/jsonl_gsm8k_fetch.py
+++ b/azureml/components/src/jsonl_gsm8k_fetch.py
@@ -1,5 +1,6 @@
 import argparse
 import json
+import locale
 import pathlib
 import re
 
@@ -30,25 +31,28 @@ def parse_args():
     return args
 
 
-def extract_thought_parts(thought: str) -> Dict[str,Any]:
-    result = dict()
-
-    thought_re = r"(.*)<<(.*=\d+)>>(.*)"
+def extract_thought_parts(thought: str) -> Dict[str, Any]:
+    thought_re = r"(.*)<<(.*=.*)>>(.*)"
     match = re.match(thought_re, thought)
 
-    result["step"] = match.group(1)
-    result["calculation"] = match.group(2)
-    result["result"] = match.group(3)
-
+    result = dict()
+    if match:
+        result["step"] = match.group(1)
+        result["calculation"] = match.group(2)
+        result["result"] = match.group(3)
+    else:
+        result["step"] = thought
     return result
 
-def process_line(item: Dict[str, Any]) -> Dict[str,Any]:
+
+def process_line(item: Dict[str, Any]) -> Dict[str, Any]:
     result = dict()
+    _logger.debug(f"Processing {item}")
 
     result["question"] = item["question"]
 
     split_answer = item["answer"].split("####")
-    result["answer"] = float(split_answer[1])
+    result["answer"] = locale.atof(split_answer[1])
 
     result["thoughts"] = []
     for thought in split_answer[0].splitlines():
@@ -59,18 +63,25 @@ def process_line(item: Dict[str, Any]) -> Dict[str,Any]:
 def main():
     args = parse_args()
 
-    for split in ["test"]:
+    # For parsing numbers
+    locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
+
+    for split in SPLITS:
+        _logger.info(f"Starting split {split}")
         target_url = f"{BASE_DATA_URL}{split}.jsonl"
 
         _logger.info(f"Fetching {target_url}")
         response = requests.get(target_url)
         assert response.status_code == 200, f"Got response {response}"
 
-        for line in response.text.splitlines():
-            nxt_item = json.loads(line)
-            output_item = process_line(nxt_item)
-
-            print(json.dumps(output_item, indent=4))
+        with JSONLWriter(
+            args.output_dataset / f"{split}.jsonl", args.output_encoding
+        ) as jlw:
+            for line in response.text.splitlines():
+                nxt_item = json.loads(line)
+                output_item = process_line(nxt_item)
+                jlw.write_line(output_item)
+        _logger.info(f"Completed split {split}")
 
     _logger.info("Complete")
 

From b6f44fb04caeeebd03afb82d73d92e1ddb841f47 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Tue, 9 Apr 2024 10:59:33 -0400
Subject: [PATCH 03/36] Add component definition YAML

---
 .../jsonl_gsm8k_fetch_component.yaml          | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 azureml/components/jsonl_gsm8k_fetch_component.yaml

diff --git a/azureml/components/jsonl_gsm8k_fetch_component.yaml b/azureml/components/jsonl_gsm8k_fetch_component.yaml
new file mode 100644
index 0000000..dc647e9
--- /dev/null
+++ b/azureml/components/jsonl_gsm8k_fetch_component.yaml
@@ -0,0 +1,32 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+
+name: jsonl_gsm8k_fetch
+version: 0.0.1pre1
+display_name: JSONL GSM8K Fetcher
+type: command
+description: Fetches the GSM8K dataset, and formats into JSONL
+is_deterministic: true
+
+inputs:
+  output_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the output datasets
+
+outputs:
+  output_dataset:
+    type: uri_folder
+    description: |
+      Folder which will contain 'train.jsonl' and 'test.jsonl'
+
+code: ./src/
+
+command: >-
+  python ./jsonl_gsm8k_fetch.py\
+  --output_encoding ${{ inputs.output_encoding }}
+  --output_dataset ${{ outputs.output_dataset }}
+
+environment:
+  # Will be updated when component uploads
+  image: azureml:promptbase_aml@latest
\ No newline at end of file

From 5605b2fbf1150bd5dfffa349315a1eb7545f17bf Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Tue, 9 Apr 2024 11:23:57 -0400
Subject: [PATCH 04/36] Trying to get to first submission

---
 azureml/pipelines/azureml_utils.py            |  1 +
 azureml/pipelines/configs.py                  |  5 +
 .../configs/gsm8k_zeroshot_config.yaml        | 10 ++
 azureml/pipelines/submit_gsm8k_zeroshot.py    | 93 +++++++++++++++++++
 4 files changed, 109 insertions(+)
 create mode 100644 azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
 create mode 100644 azureml/pipelines/submit_gsm8k_zeroshot.py

diff --git a/azureml/pipelines/azureml_utils.py b/azureml/pipelines/azureml_utils.py
index 4401418..25d641e 100644
--- a/azureml/pipelines/azureml_utils.py
+++ b/azureml/pipelines/azureml_utils.py
@@ -15,6 +15,7 @@
 ALL_COMPONENTS = dict(
     jsonl_embeddings="jsonl_embeddings_aoai_component.yaml",
     jsonl_filter_correct_multiplechoice="jsonl_filter_correct_multiplechoice_component.yaml",
+    jsonl_gsm8k_fetch="jsonl_gsm8k_fetch_component.yaml",
     jsonl_guidance="jsonl_guidance_component.yaml",
     jsonl_key_filter="jsonl_key_filter_component.yaml",
     jsonl_key_rename="jsonl_key_rename_component.yaml",
diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py
index 424ce6b..ddd0989 100644
--- a/azureml/pipelines/configs.py
+++ b/azureml/pipelines/configs.py
@@ -115,3 +115,8 @@ class BiosBiasJSONPipelineConfig:
     biosbias_dataset: str = str()
     json_guidance_program: str = str()
     aoai_config: AOAIConfig = field(default_factory=AOAIConfig)
+
+
+@dataclass
+class GSM8KZeroShotConfig:
+    pipeline: PipelineConfig = field(default_factory=PipelineConfig)
\ No newline at end of file
diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
new file mode 100644
index 0000000..462e9ce
--- /dev/null
+++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
@@ -0,0 +1,10 @@
+defaults:
+  - _self_
+  - aml_config
+  - aoai_config
+
+zeroshot_config:
+  pipeline:
+    base_experiment_name: gsm8k_zeroshot
+    tags:
+    default_compute_target: isolatedcompute
\ No newline at end of file
diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py
new file mode 100644
index 0000000..3bcb2a2
--- /dev/null
+++ b/azureml/pipelines/submit_gsm8k_zeroshot.py
@@ -0,0 +1,93 @@
+# Submit a run using:
+# python .\submit_mmlu_zeroshot.py -cn zeroshot_config
+
+import time
+
+from dataclasses import dataclass
+
+import hydra
+from hydra.core.config_store import ConfigStore
+
+import omegaconf
+
+from azure.identity import DefaultAzureCredential
+from azure.ai.ml import MLClient
+
+from azure.ai.ml import dsl, Input, MLClient
+from azure.ai.ml.entities import Pipeline
+
+from azureml_pipelines import create_zeroshot_pipeline
+from azureml_utils import get_component_collector
+from configs import AMLConfig, GSM8KZeroShotConfig
+from constants import GUIDANCE_PROGRAMS_DIR
+from logging_utils import get_standard_logger_for_file
+
+_logger = get_standard_logger_for_file(__file__)
+
+
+@dataclass
+class PipelineConfig:
+    zeroshot_config: GSM8KZeroShotConfig = omegaconf.MISSING
+    azureml_config: AMLConfig = omegaconf.MISSING
+
+
+cs = ConfigStore.instance()
+cs.store(name="config", node=PipelineConfig)
+
+
+def create_gsm8k_zeroshot_pipeline(
+    ml_client: MLClient, run_config: GSM8KZeroShotConfig, version_string: str
+):
+    components = get_component_collector(ml_client, version_string)
+
+    @dsl.pipeline()
+    def basic_pipeline() -> Pipeline:
+        mmlu_fetch_job = components.jsonl_gsm8k_fetch()
+        mmlu_fetch_job.name = f"fetch_gsm8k"
+
+        get_split_job = components.uri_folder_to_file(
+            input_dataset=mmlu_fetch_job.outputs.output_dataset,
+            filename_pattern=f"test.jsonl",
+        )
+        get_split_job.name = f"extract_split_test"
+
+    pipeline = basic_pipeline()
+    pipeline.experiment_name = f"{run_config.pipeline.base_experiment_name}"
+    pipeline.display_name = None
+    pipeline.compute = run_config.pipeline.default_compute_target
+    if run_config.pipeline.tags:
+        pipeline.tags.update(run_config.tags)
+    _logger.info("Pipeline created")
+
+    return pipeline
+
+
+@hydra.main(config_path="configs", version_base="1.1")
+def main(config: PipelineConfig):
+    version_string = str(int(time.time()))
+    _logger.info(f"AzureML object version for this run: {version_string}")
+
+    _logger.info(f"Azure Subscription: {config.azureml_config.subscription_id}")
+    _logger.info(f"Resource Group: {config.azureml_config.resource_group}")
+    _logger.info(f"Workspace : {config.azureml_config.workspace_name}")
+
+    credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
+
+    ws_client = MLClient(
+        credential=credential,
+        subscription_id=config.azureml_config.subscription_id,
+        resource_group_name=config.azureml_config.resource_group,
+        workspace_name=config.azureml_config.workspace_name,
+        logging_enable=False,
+    )
+
+    pipeline = create_gsm8k_zeroshot_pipeline(
+        ws_client, config.zeroshot_config, version_string
+    )
+    _logger.info("Submitting pipeline")
+    submitted_job = ws_client.jobs.create_or_update(pipeline)
+    _logger.info(f"Submitted: {submitted_job.name}")
+
+
+if __name__ == "__main__":
+    main()

From 18bbb22e65c405c8a24929a65226289d5bc3cd28 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Tue, 9 Apr 2024 12:15:05 -0400
Subject: [PATCH 05/36] Give up on locale :-/

---
 .../jsonl_gsm8k_fetch_component.yaml           |  2 +-
 azureml/components/src/jsonl_gsm8k_fetch.py    | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/azureml/components/jsonl_gsm8k_fetch_component.yaml b/azureml/components/jsonl_gsm8k_fetch_component.yaml
index dc647e9..5d2fdeb 100644
--- a/azureml/components/jsonl_gsm8k_fetch_component.yaml
+++ b/azureml/components/jsonl_gsm8k_fetch_component.yaml
@@ -23,7 +23,7 @@ outputs:
 code: ./src/
 
 command: >-
-  python ./jsonl_gsm8k_fetch.py\
+  python ./jsonl_gsm8k_fetch.py
   --output_encoding ${{ inputs.output_encoding }}
   --output_dataset ${{ outputs.output_dataset }}
 
diff --git a/azureml/components/src/jsonl_gsm8k_fetch.py b/azureml/components/src/jsonl_gsm8k_fetch.py
index 02541a5..01cb15d 100644
--- a/azureml/components/src/jsonl_gsm8k_fetch.py
+++ b/azureml/components/src/jsonl_gsm8k_fetch.py
@@ -1,6 +1,5 @@
 import argparse
 import json
-import locale
 import pathlib
 import re
 
@@ -51,23 +50,29 @@ def process_line(item: Dict[str, Any]) -> Dict[str, Any]:
 
     result["question"] = item["question"]
 
+    # The answer embeds a chain of thought and the
+    # numeric result
     split_answer = item["answer"].split("####")
-    result["answer"] = locale.atof(split_answer[1])
 
     result["thoughts"] = []
     for thought in split_answer[0].splitlines():
         result["thoughts"].append(extract_thought_parts(thought))
+
+    # The following is not how you're supposed to handle
+    # numbers with thousand separators.
+    # This is a work around, pending three-way negotiations
+    # with locale.atof() and the AzureML compute nodes
+    result["answer"] = float(split_answer[1].replace(",", ""))
+
     return result
 
 
 def main():
     args = parse_args()
 
-    # For parsing numbers
-    locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
-
     for split in SPLITS:
         _logger.info(f"Starting split {split}")
+        line_count = 0
         target_url = f"{BASE_DATA_URL}{split}.jsonl"
 
         _logger.info(f"Fetching {target_url}")
@@ -81,7 +86,8 @@ def main():
                 nxt_item = json.loads(line)
                 output_item = process_line(nxt_item)
                 jlw.write_line(output_item)
-        _logger.info(f"Completed split {split}")
+                line_count += 1
+        _logger.info(f"Completed split {split} ({line_count} lines)")
 
     _logger.info("Complete")
 

From 6fa1f932ce327d75f7b2b53807902a495160bace Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Tue, 9 Apr 2024 13:52:34 -0400
Subject: [PATCH 06/36] Drafting a component

---
 .../src/jsonl_guidance_mistral7b.py           | 120 ++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 azureml/components/src/jsonl_guidance_mistral7b.py

diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py
new file mode 100644
index 0000000..2e53f65
--- /dev/null
+++ b/azureml/components/src/jsonl_guidance_mistral7b.py
@@ -0,0 +1,120 @@
+import argparse
+import importlib.util
+import json
+import pathlib
+
+from typing import Any, Callable, Dict
+
+import guidance
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from aether_utils.jsonl_utils import line_map
+from aether_utils.logging_utils import get_standard_logger_for_file
+
+
+_logger = get_standard_logger_for_file(__file__)
+
+USER_MODULE = "user_module"
+GUIDANCE_FUNCTION = "guidance_generation"
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(add_help=True)
+
+    # Information about the datasets
+    datasets_group = parser.add_argument_group("Datasets")
+    datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
+    datasets_group.add_argument("--input_encoding", type=str, required=True)
+    datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
+    datasets_group.add_argument("--output_encoding", type=str, required=True)
+    datasets_group.add_argument("--error_dataset", type=pathlib.Path, required=True)
+    datasets_group.add_argument("--error_encoding", type=str, required=True)
+    datasets_group.add_argument(
+        "--common_dataset", type=pathlib.Path, required=False, default=None
+    )
+    datasets_group.add_argument("--common_encoding", type=str, required=False)
+
+    # Information about the guidance program
+    parser.add_argument("--guidance_program", type=pathlib.Path, required=True)
+
+    args = parser.parse_args()
+    return args
+
+
+class LLMProcessor:
+    def __init__(
+        self,
+        program_path,
+        model: guidance.models.Model,
+        common_data: dict[str, any] | None,
+    ):
+        self._program_path = program_path
+        self._model = model
+        self._guidance_function = self._get_guidance_function()
+        self._common_data = common_data
+
+    def __call__(self, item: Dict[str, Any]) -> dict[str, any]:
+        _logger.debug(f"__call__: {item}")
+        result = self._guidance_function(self._model, item, common=self._common_data)
+        _logger.debug(f"Checking keys")
+        for k in result.keys():
+            assert k not in item, f"Duplicate key: {k}"
+
+        _logger.debug(f"Updating item")
+        item.update(**result)
+
+        return item
+
+    def _get_guidance_function(
+        self,
+    ) -> Callable[[Dict[str, Any]], Dict[str, Any]]:
+        _logger.debug("Importing guidance file")
+        spec = importlib.util.spec_from_file_location(USER_MODULE, self._program_path)
+        module_definition = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module_definition)
+
+        guidance_func = getattr(module_definition, GUIDANCE_FUNCTION)
+
+        return guidance_func
+
+
+def main():
+    args = parse_args()
+
+    # Load the common data (if required)
+    common_data = None
+    if args.common_dataset is not None:
+        _logger.info("Loading common dataset")
+        with open(args.common_dataset, "r", encoding=args.common_encoding) as jf:
+            common_data = json.load(jf)
+    else:
+        _logger.info("No common dataset present")
+
+    guidance_model = guidance.models.Transformers(
+        "mistralai/Mistral-7B-v0.1",
+        device_map="cuda:0",
+        echo=False,
+    )
+    _logger.info(f"guidance_model.device: {guidance_model.engine.device}")
+
+    processor = LLMProcessor(
+        program_path=args.guidance_program,
+        model=guidance_model,
+        common_data=common_data,
+    )
+
+    s, f = line_map(
+        map_func=processor,
+        source_file=args.input_dataset,
+        dest_file=args.output_dataset,
+        source_encoding=args.input_encoding,
+        dest_encoding=args.output_encoding,
+    )
+
+    _logger.info(f"Complete with {s} successes and {f} failures")
+
+
+if __name__ == "__main__":
+    main()

From 98405aa6f287011e95c8026c3160b099858c3c01 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Tue, 9 Apr 2024 13:58:21 -0400
Subject: [PATCH 07/36] Roughing out the mistral7b component

---
 .../jsonl_guidance_mistral7b_component.yaml   | 71 +++++++++++++++++++
 guidance_programs/gsm8k_zero_or_few_shot.py   | 51 +++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 azureml/components/jsonl_guidance_mistral7b_component.yaml
 create mode 100644 guidance_programs/gsm8k_zero_or_few_shot.py

diff --git a/azureml/components/jsonl_guidance_mistral7b_component.yaml b/azureml/components/jsonl_guidance_mistral7b_component.yaml
new file mode 100644
index 0000000..e0fd05b
--- /dev/null
+++ b/azureml/components/jsonl_guidance_mistral7b_component.yaml
@@ -0,0 +1,71 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+
+name: jsonl_guidance_mistral7b
+version: 0.0.1pre1
+display_name: JSONL Guidance Mistral7B
+type: command
+description: Runs a supplied Guidance program on every line of a JSONL file via Mistral7B
+is_deterministic: false
+
+inputs:
+  guidance_program:
+    type: uri_file
+    optional: false
+    description: Python file containing the guidance program
+  input_dataset:
+    type: uri_file
+    optional: false
+    description: Dataset containing JSONL input
+  input_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the input dataset
+  common_dataset:
+    type: uri_file
+    optional: true
+    description: Dataset containing data to be shared with all rows in input
+  common_encoding:
+    type: string
+    optional: true
+    default: utf-8-sig
+    description: Encoding format of the common dataset
+  output_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the output dataset
+  error_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the error dataset
+
+outputs:
+  output_dataset:
+    type: uri_file
+    description: JSONL file
+  error_dataset:
+    type: uri_file
+    description: JSONL file containing failed lines
+
+code: ./src/
+
+command: |
+  # Get guidance from GitHub
+  pip install --upgrade git+https://github.com/guidance-ai/guidance
+  # Run the script
+  python ./jsonl_guidance_mistral7b.py \
+    --guidance_program ${{ inputs.guidance_program }} \
+    --input_dataset ${{ inputs.input_dataset }} \
+    --input_encoding ${{ inputs.input_encoding }} \
+    $[[--common_dataset ${{ inputs.common_dataset }} ]] \
+    $[[--common_encoding ${{ inputs.common_encoding }} ]] \
+    --output_dataset ${{ outputs.output_dataset }} \
+    --output_encoding ${{ inputs.output_encoding }} \
+    --error_dataset ${{ outputs.error_dataset }} \
+    --error_encoding ${{ inputs.error_encoding }}
+
+environment:
+  # Will be updated when component uploads
+  image: azureml:guidance_phi2_env@latest
\ No newline at end of file
diff --git a/guidance_programs/gsm8k_zero_or_few_shot.py b/guidance_programs/gsm8k_zero_or_few_shot.py
new file mode 100644
index 0000000..9f68b55
--- /dev/null
+++ b/guidance_programs/gsm8k_zero_or_few_shot.py
@@ -0,0 +1,51 @@
+# This is a very naive guidance program for GSM8K
+
+import logging
+import sys
+
+from typing import Any, Dict
+
+import guidance
+from guidance import gen, select, system, user, assistant
+
+
+_logger = logging.getLogger(__file__)
+_logger.setLevel(logging.INFO)
+_logger.addHandler(logging.StreamHandler(stream=sys.stdout))
+
+
+@guidance
+def zero_shot_gsm8k(
+    lm: guidance.models.Instruct,
+    question: str,
+    choices: list[str],
+    common: list[dict[str, Any]] | None,
+):
+    # Some general instruction to the model
+    lm += """Taking a maths test. Answer the following question and
+    show your working
+"""
+
+    if common:
+        _logger.debug("Adding few shot examples")
+        raise ValueError("common data not yet supported")
+
+
+
+    return lm
+
+
+def guidance_generation(
+    lm: guidance.models.Chat,
+    input: Dict[str, Any],
+    common: list[dict[str, Any]] | None = None,
+) -> Dict[str, Any]:
+    _logger.debug("Starting guidance_generation")
+    result = lm + zero_shot_gsm8k(
+        question=input["question"], common=common
+    )
+
+    _logger.debug(f"Result: {result}")
+
+    result = dict(zero_or_few_shot_choice=float(result["string_result"]))
+    return result

From 1e932cde21c78386e06822db710e5b65f921a44e Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Tue, 9 Apr 2024 15:05:44 -0400
Subject: [PATCH 08/36] Roughing out more changes

---
 azureml/pipelines/azureml_utils.py                   |  6 ++++++
 azureml/pipelines/configs.py                         |  7 +++++++
 azureml/pipelines/configs/gsm8k_zeroshot_config.yaml |  6 +++++-
 guidance_programs/gsm8k_zero_or_few_shot.py          | 12 +++++-------
 4 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/azureml/pipelines/azureml_utils.py b/azureml/pipelines/azureml_utils.py
index 993b2ae..96ce0a7 100644
--- a/azureml/pipelines/azureml_utils.py
+++ b/azureml/pipelines/azureml_utils.py
@@ -100,6 +100,12 @@ def prepare(self):
             environment=phi2_environment,
             version_string=self._version_string,
         )
+        self.jsonl_guidance_mistral7b = create_component_from_yaml(
+            self._client,
+            self._base_dir / "jsonl_guidance_mistral7b_component.yaml",
+            environment=phi2_environment,
+            version_string=self._version_string,
+        )
 
         _logger.info("Added all components")
 
diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py
index e380f8b..39ea22b 100644
--- a/azureml/pipelines/configs.py
+++ b/azureml/pipelines/configs.py
@@ -30,6 +30,10 @@ class AOAIConfig:
 class Phi2Config:
     compute_target: str = str()
 
+@dataclass
+class TransformerConfig:
+    compute_target: str = str()
+
 
 @dataclass
 class ZeroShotRunConfig:
@@ -134,3 +138,6 @@ class Phi2BiosBiasJSONPipelineConfig:
 @dataclass
 class GSM8KZeroShotConfig:
     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
+    json_guidance_programs: list[str] = field(default_factory=list)
+    transformer_config: TransformerConfig = field(default_factory=TransformerConfig)
+
diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
index 462e9ce..76708c2 100644
--- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
@@ -7,4 +7,8 @@ zeroshot_config:
   pipeline:
     base_experiment_name: gsm8k_zeroshot
     tags:
-    default_compute_target: isolatedcompute
\ No newline at end of file
+    default_compute_target: isolatedcompute
+  json_guidance_programs:
+    - gsm8k_zero_or_few_shot.py
+  transformer_config:
+    compute_target: gput4
\ No newline at end of file
diff --git a/guidance_programs/gsm8k_zero_or_few_shot.py b/guidance_programs/gsm8k_zero_or_few_shot.py
index 9f68b55..4257e51 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot.py
@@ -6,7 +6,6 @@
 from typing import Any, Dict
 
 import guidance
-from guidance import gen, select, system, user, assistant
 
 
 _logger = logging.getLogger(__file__)
@@ -18,21 +17,22 @@
 def zero_shot_gsm8k(
     lm: guidance.models.Instruct,
     question: str,
-    choices: list[str],
     common: list[dict[str, Any]] | None,
 ):
     # Some general instruction to the model
     lm += """Taking a maths test. Answer the following question and
-    show your working
+    show your working.
 """
 
     if common:
         _logger.debug("Adding few shot examples")
         raise ValueError("common data not yet supported")
 
+    lm += question
 
+    schema_obj = dict(type="object", properties=dict(string_result="number"))
 
-    return lm
+    return lm + guidance.json(name="string_result", schema=schema_obj)
 
 
 def guidance_generation(
@@ -41,9 +41,7 @@ def guidance_generation(
     common: list[dict[str, Any]] | None = None,
 ) -> Dict[str, Any]:
     _logger.debug("Starting guidance_generation")
-    result = lm + zero_shot_gsm8k(
-        question=input["question"], common=common
-    )
+    result = lm + zero_shot_gsm8k(question=input["question"], common=common)
 
     _logger.debug(f"Result: {result}")
 

From 29358352a9a77c4b8af7bddba70ab3f6aae9d9c9 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Tue, 9 Apr 2024 15:06:07 -0400
Subject: [PATCH 09/36] Better name

---
 azureml/pipelines/configs/gsm8k_zeroshot_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
index 76708c2..db784e6 100644
--- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
@@ -5,7 +5,7 @@ defaults:
 
 zeroshot_config:
   pipeline:
-    base_experiment_name: gsm8k_zeroshot
+    base_experiment_name: gsm8k_zeroshot_debugging
     tags:
     default_compute_target: isolatedcompute
   json_guidance_programs:

From d16df86cc33ece14f13f1cffbc716aea9abf842c Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 10 Apr 2024 10:36:27 -0400
Subject: [PATCH 10/36] Hack things into working

---
 .../jsonl_guidance_mistral7b_component.yaml   | 18 +++++++++++++++--
 .../src/jsonl_guidance_mistral7b.py           | 15 +++++++++-----
 azureml/pipelines/submit_gsm8k_zeroshot.py    | 20 +++++++++++++++++++
 guidance_programs/gsm8k_zero_or_few_shot.py   | 12 +++++++----
 4 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/azureml/components/jsonl_guidance_mistral7b_component.yaml b/azureml/components/jsonl_guidance_mistral7b_component.yaml
index e0fd05b..f6ba778 100644
--- a/azureml/components/jsonl_guidance_mistral7b_component.yaml
+++ b/azureml/components/jsonl_guidance_mistral7b_component.yaml
@@ -52,8 +52,22 @@ outputs:
 code: ./src/
 
 command: |
-  # Get guidance from GitHub
-  pip install --upgrade git+https://github.com/guidance-ai/guidance
+  # Download the zip
+  wget https://github.com/guidance-ai/guidance/archive/refs/heads/main.zip
+  echo
+  ls
+  echo
+  # Unzip
+  unzip ./main.zip
+  echo
+  ls -p
+  echo
+  # Install from download
+  pip install --upgrade ./guidance-main/
+  echo
+  # Install LlamaCpp
+  CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python<0.2.58"
+  echo
   # Run the script
   python ./jsonl_guidance_mistral7b.py \
     --guidance_program ${{ inputs.guidance_program }} \
diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py
index 2e53f65..d22a9bf 100644
--- a/azureml/components/src/jsonl_guidance_mistral7b.py
+++ b/azureml/components/src/jsonl_guidance_mistral7b.py
@@ -7,6 +7,8 @@
 
 import guidance
 
+from huggingface_hub import hf_hub_download
+
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
@@ -92,12 +94,14 @@ def main():
     else:
         _logger.info("No common dataset present")
 
-    guidance_model = guidance.models.Transformers(
-        "mistralai/Mistral-7B-v0.1",
-        device_map="cuda:0",
-        echo=False,
+    repo_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+    filename = "mistral-7b-instruct-v0.2.Q8_0.gguf"
+    downloaded_file = hf_hub_download(repo_id=repo_id, filename=filename)
+
+    guidance_model = guidance.models.LlamaCpp(
+        downloaded_file, verbose=True, n_gpu_layers=-1
     )
-    _logger.info(f"guidance_model.device: {guidance_model.engine.device}")
+    # _logger.info(f"guidance_model.device: {guidance_model.engine.device}")
 
     processor = LLMProcessor(
         program_path=args.guidance_program,
@@ -105,6 +109,7 @@ def main():
         common_data=common_data,
     )
 
+    _logger.info("Starting to process input")
     s, f = line_map(
         map_func=processor,
         source_file=args.input_dataset,
diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py
index 3bcb2a2..ad31752 100644
--- a/azureml/pipelines/submit_gsm8k_zeroshot.py
+++ b/azureml/pipelines/submit_gsm8k_zeroshot.py
@@ -40,6 +40,17 @@ def create_gsm8k_zeroshot_pipeline(
 ):
     components = get_component_collector(ml_client, version_string)
 
+    guidance_inputs = dict()
+    for prog_filename in run_config.json_guidance_programs:
+        k = prog_filename[0:-3]
+        v = Input(
+            type="uri_file",
+            path=GUIDANCE_PROGRAMS_DIR / prog_filename,
+            model="download",
+        )
+        guidance_inputs[k] = v
+    _logger.info(f"Found {len(guidance_inputs)} guidance programs")
+
     @dsl.pipeline()
     def basic_pipeline() -> Pipeline:
         mmlu_fetch_job = components.jsonl_gsm8k_fetch()
@@ -51,6 +62,15 @@ def basic_pipeline() -> Pipeline:
         )
         get_split_job.name = f"extract_split_test"
 
+        for progname, prog_input in guidance_inputs.items():
+
+            guidance_job = components.jsonl_guidance_mistral7b(
+                guidance_program=prog_input,
+                input_dataset=get_split_job.outputs.output_dataset,
+            )
+            guidance_job.compute = run_config.transformer_config.compute_target
+            guidance_job.name = f"guidance_mistral7b_{progname}"
+
     pipeline = basic_pipeline()
     pipeline.experiment_name = f"{run_config.pipeline.base_experiment_name}"
     pipeline.display_name = None
diff --git a/guidance_programs/gsm8k_zero_or_few_shot.py b/guidance_programs/gsm8k_zero_or_few_shot.py
index 4257e51..b64d52a 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot.py
@@ -1,5 +1,6 @@
 # This is a very naive guidance program for GSM8K
 
+import json
 import logging
 import sys
 
@@ -30,9 +31,9 @@ def zero_shot_gsm8k(
 
     lm += question
 
-    schema_obj = dict(type="object", properties=dict(string_result="number"))
+    schema_obj = dict(type="object", properties=dict(answer=dict(type="number")))
 
-    return lm + guidance.json(name="string_result", schema=schema_obj)
+    return lm + guidance.json(name="json_result_object", schema=schema_obj)
 
 
 def guidance_generation(
@@ -43,7 +44,10 @@ def guidance_generation(
     _logger.debug("Starting guidance_generation")
     result = lm + zero_shot_gsm8k(question=input["question"], common=common)
 
-    _logger.debug(f"Result: {result}")
+    _logger.info(f"Result: {result}")
+    _logger.info(f"JSON portion: {result['json_result_object']}")
 
-    result = dict(zero_or_few_shot_choice=float(result["string_result"]))
+    loaded_obj = json.loads(result["json_result_object"])
+
+    result = dict(zero_or_few_shot_answer=loaded_obj["answer"])
     return result

From d3b2a4e08ae77760eb55f176b3a95ac2c8db6f4f Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 10 Apr 2024 10:54:42 -0400
Subject: [PATCH 11/36] Rename things to match switch to LlamaCpp

---
 azureml/pipelines/configs.py                         | 4 ++--
 azureml/pipelines/configs/gsm8k_zeroshot_config.yaml | 2 +-
 azureml/pipelines/submit_gsm8k_zeroshot.py           | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py
index 39ea22b..a45d9a8 100644
--- a/azureml/pipelines/configs.py
+++ b/azureml/pipelines/configs.py
@@ -31,7 +31,7 @@ class Phi2Config:
     compute_target: str = str()
 
 @dataclass
-class TransformerConfig:
+class LlamaCppConfig:
     compute_target: str = str()
 
 
@@ -139,5 +139,5 @@ class Phi2BiosBiasJSONPipelineConfig:
 class GSM8KZeroShotConfig:
     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
     json_guidance_programs: list[str] = field(default_factory=list)
-    transformer_config: TransformerConfig = field(default_factory=TransformerConfig)
+    llamacpp_config: LlamaCppConfig = field(default_factory=LlamaCppConfig)
 
diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
index db784e6..3870e12 100644
--- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
@@ -10,5 +10,5 @@ zeroshot_config:
     default_compute_target: isolatedcompute
   json_guidance_programs:
     - gsm8k_zero_or_few_shot.py
-  transformer_config:
+  llamacpp_config:
     compute_target: gput4
\ No newline at end of file
diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py
index ad31752..ea3badd 100644
--- a/azureml/pipelines/submit_gsm8k_zeroshot.py
+++ b/azureml/pipelines/submit_gsm8k_zeroshot.py
@@ -68,7 +68,7 @@ def basic_pipeline() -> Pipeline:
                 guidance_program=prog_input,
                 input_dataset=get_split_job.outputs.output_dataset,
             )
-            guidance_job.compute = run_config.transformer_config.compute_target
+            guidance_job.compute = run_config.llamacpp_config.compute_target
             guidance_job.name = f"guidance_mistral7b_{progname}"
 
     pipeline = basic_pipeline()

From 033c407cc8a1c46488934c6700eaa0c12bbc3ca5 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 10 Apr 2024 11:58:37 -0400
Subject: [PATCH 12/36] Add a scoring component

---
 .../jsonl_score_numeric_component.yaml        | 56 ++++++++++++
 azureml/components/src/jsonl_score_numeric.py | 85 +++++++++++++++++++
 azureml/pipelines/azureml_utils.py            |  1 +
 azureml/pipelines/submit_gsm8k_zeroshot.py    |  7 ++
 4 files changed, 149 insertions(+)
 create mode 100644 azureml/components/jsonl_score_numeric_component.yaml
 create mode 100644 azureml/components/src/jsonl_score_numeric.py

diff --git a/azureml/components/jsonl_score_numeric_component.yaml b/azureml/components/jsonl_score_numeric_component.yaml
new file mode 100644
index 0000000..6098006
--- /dev/null
+++ b/azureml/components/jsonl_score_numeric_component.yaml
@@ -0,0 +1,56 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+
+name: jsonl_score_numeric
+version: 0.0.1pre1
+display_name: JSONL Numeric Scorer
+type: command
+description: |
+  Takes a JSONL file of numeric questions and correct answers and responses
+  from a model, and produces the overall score.
+  Results are stored in JSON
+is_deterministic: true
+
+inputs:
+  input_dataset:
+    type: uri_file
+    optional: false
+    description: Dataset containing JSONL input
+  input_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the input dataset
+  correct_key:
+    type: string
+    optional: false
+    description: Which key contains the correct answer
+  response_key:
+    type: string
+    optional: false
+    description: Which key contains the answer produced by the model
+  output_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the output dataset
+
+outputs:
+  output_dataset:
+    type: uri_file
+    description: JSON file containing score summary
+
+
+code: ./src/
+
+command: >-
+  python ./jsonl_score_numeric.py
+  --input_dataset ${{ inputs.input_dataset }}
+  --input_encoding ${{ inputs.input_encoding }}
+  --output_dataset ${{ outputs.output_dataset }}
+  --output_encoding ${{ inputs.output_encoding }}
+  --correct_key ${{ inputs.correct_key }}
+  --response_key ${{ inputs.response_key }}
+
+environment:
+  # Will be updated when component uploads
+  image: azureml:promptbase_aml@latest
\ No newline at end of file
diff --git a/azureml/components/src/jsonl_score_numeric.py b/azureml/components/src/jsonl_score_numeric.py
new file mode 100644
index 0000000..35a260e
--- /dev/null
+++ b/azureml/components/src/jsonl_score_numeric.py
@@ -0,0 +1,85 @@
+import argparse
+import functools
+import json
+import pathlib
+
+from typing import Any
+
+import mlflow
+
+from aether_utils.jsonl_utils import line_reduce
+from aether_utils.logging_utils import get_standard_logger_for_file
+
+_logger = get_standard_logger_for_file(__file__)
+
+
+class Scorer:
+    def __init__(self, correct_key: str, response_key: str):
+        self.y_true = []
+        self.y_pred = []
+        self.dataset = []
+        self.subject = []
+        self.correct_key = correct_key
+        self.response_key = response_key
+
+    def __call__(self, line: dict[str, Any]):
+        correct_answer = line[self.correct_key]
+        response_answer = line[self.response_key]
+        self.y_true.append(correct_answer)
+        self.y_pred.append(response_answer)
+
+    def generate_summary(self) -> dict[str, Any]:
+        result = dict()
+
+        result["n_answers"] = len(self.y_true)
+        n_correct = 0
+        for y_t, y_p in zip(self.y_true, self.y_pred):
+            if y_t == y_p:
+                n_correct += 1
+        result["n_correct"] = n_correct
+        result["accuracy"] = float(n_correct) / len(self.y_true)
+
+        return result
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(add_help=True)
+
+    # Information about the ports
+    ports_group = parser.add_argument_group("Ports")
+    ports_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
+    ports_group.add_argument("--input_encoding", type=str, required=True)
+    ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
+    ports_group.add_argument("--output_encoding", type=str, required=True)
+
+    # Information about the keys
+    keys_group = parser.add_argument_group("Keys")
+    keys_group.add_argument("--correct_key", type=str, required=True)
+    keys_group.add_argument("--response_key", type=str, required=True)
+
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    scorer = Scorer(correct_key=args.correct_key, response_key=args.response_key)
+    line_reduce(
+        reducer=scorer,
+        source_file=args.input_dataset,
+        source_encoding=args.input_encoding,
+    )
+    summary = scorer.generate_summary()
+
+    _logger.info("Logging with mlflow")
+    mlflow.log_metrics(summary)
+
+    _logger.info("Writing output file")
+    with open(args.output_dataset, encoding=args.output_encoding, mode="w") as jf:
+        json.dump(summary, jf, indent=4)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/azureml/pipelines/azureml_utils.py b/azureml/pipelines/azureml_utils.py
index 96ce0a7..341b9b6 100644
--- a/azureml/pipelines/azureml_utils.py
+++ b/azureml/pipelines/azureml_utils.py
@@ -25,6 +25,7 @@
     jsonl_schema_checker="jsonl_schema_checker_component.yaml",
     jsonl_score_biosbias_json="jsonl_score_biosbias_json_component.yaml",
     jsonl_score_multiplechoice="jsonl_score_multiplechoice_component.yaml",
+    jsonl_score_numeric="jsonl_score_numeric_component.yaml",
     jsonl_to_json="jsonl_to_json_component.yaml",
     uri_folder_to_file="uri_folder_to_file_component.yaml",
 )
diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py
index ea3badd..b04bb4b 100644
--- a/azureml/pipelines/submit_gsm8k_zeroshot.py
+++ b/azureml/pipelines/submit_gsm8k_zeroshot.py
@@ -71,6 +71,13 @@ def basic_pipeline() -> Pipeline:
             guidance_job.compute = run_config.llamacpp_config.compute_target
             guidance_job.name = f"guidance_mistral7b_{progname}"
 
+            score_job = components.jsonl_score_numeric(
+                input_dataset=guidance_job.outputs.output_dataset,
+                correct_key="answer",
+                response_key="zero_or_few_shot_answer",
+            )
+            score_job.name = f"score_{progname}"
+
     pipeline = basic_pipeline()
     pipeline.experiment_name = f"{run_config.pipeline.base_experiment_name}"
     pipeline.display_name = None

From cb56759b160b47c8207e9b72a938b216d4e3d92b Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 10 Apr 2024 12:03:16 -0400
Subject: [PATCH 13/36] Start expanding on the prompts

---
 .../configs/gsm8k_zeroshot_config.yaml        |  2 +
 .../gsm8k_zero_or_few_shot_plain.py           | 49 +++++++++++++++++++
 .../gsm8k_zero_or_few_shot_regex_number.py    | 49 +++++++++++++++++++
 3 files changed, 100 insertions(+)
 create mode 100644 guidance_programs/gsm8k_zero_or_few_shot_plain.py
 create mode 100644 guidance_programs/gsm8k_zero_or_few_shot_regex_number.py

diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
index 3870e12..ef66d27 100644
--- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
@@ -9,6 +9,8 @@ zeroshot_config:
     tags:
     default_compute_target: isolatedcompute
   json_guidance_programs:
+    - gsm8k_zero_or_few_shot_plain.py
+    - gsm8k_zero_or_few_shot_regex_number.py
     - gsm8k_zero_or_few_shot.py
   llamacpp_config:
     compute_target: gput4
\ No newline at end of file
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
new file mode 100644
index 0000000..530cfa3
--- /dev/null
+++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
@@ -0,0 +1,49 @@
+# This is a very naive guidance program for GSM8K
+
+import json
+import logging
+import sys
+
+from typing import Any, Dict
+
+import guidance
+
+
+_logger = logging.getLogger(__file__)
+_logger.setLevel(logging.INFO)
+_logger.addHandler(logging.StreamHandler(stream=sys.stdout))
+
+
+@guidance
+def zero_shot_gsm8k(
+    lm: guidance.models.Instruct,
+    question: str,
+    common: list[dict[str, Any]] | None,
+):
+    # Some general instruction to the model
+    lm += """Taking a maths test. Answer the following question:
+"""
+
+    if common:
+        _logger.debug("Adding few shot examples")
+        raise ValueError("common data not yet supported")
+
+    lm += question
+
+    return lm + guidance.gen(name="result_string")
+
+
+def guidance_generation(
+    lm: guidance.models.Chat,
+    input: Dict[str, Any],
+    common: list[dict[str, Any]] | None = None,
+) -> Dict[str, Any]:
+    _logger.debug("Starting guidance_generation")
+    result = lm + zero_shot_gsm8k(question=input["question"], common=common)
+
+    _logger.info(f"JSON portion: {result['result_string']}")
+
+    float_result = float(result['result_string'])
+
+    result = dict(zero_or_few_shot_answer=float_result)
+    return result
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
new file mode 100644
index 0000000..6766c5c
--- /dev/null
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -0,0 +1,49 @@
+# This is a very naive guidance program for GSM8K
+
+import json
+import logging
+import sys
+
+from typing import Any, Dict
+
+import guidance
+
+
+_logger = logging.getLogger(__file__)
+_logger.setLevel(logging.INFO)
+_logger.addHandler(logging.StreamHandler(stream=sys.stdout))
+
+
+@guidance
+def zero_shot_gsm8k(
+    lm: guidance.models.Instruct,
+    question: str,
+    common: list[dict[str, Any]] | None,
+):
+    # Some general instruction to the model
+    lm += """Taking a maths test. Answer the following question:
+"""
+
+    if common:
+        _logger.debug("Adding few shot examples")
+        raise ValueError("common data not yet supported")
+
+    lm += question
+
+    return lm + guidance.gen(name="result_string", regex=r"\d+\.?\d*")
+
+
+def guidance_generation(
+    lm: guidance.models.Chat,
+    input: Dict[str, Any],
+    common: list[dict[str, Any]] | None = None,
+) -> Dict[str, Any]:
+    _logger.debug("Starting guidance_generation")
+    result = lm + zero_shot_gsm8k(question=input["question"], common=common)
+
+    _logger.info(f"JSON portion: {result['result_string']}")
+
+    float_result = float(result["result_string"])
+
+    result = dict(zero_or_few_shot_answer=float_result)
+    return result

From ff305b276a875ca56b32981aea2de19a9b67734f Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 10 Apr 2024 13:14:13 -0400
Subject: [PATCH 14/36] Unsuccessful tweak

---
 guidance_programs/gsm8k_zero_or_few_shot_plain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
index 530cfa3..da02638 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
@@ -21,7 +21,7 @@ def zero_shot_gsm8k(
     common: list[dict[str, Any]] | None,
 ):
     # Some general instruction to the model
-    lm += """Taking a maths test. Answer the following question:
+    lm += """Taking a maths test. Answer the following question. Respond with just the numerical answer:
 """
 
     if common:

From b8d25bfdfa1d4ed8555a96c12add0110a8277fda Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 10 Apr 2024 13:35:16 -0400
Subject: [PATCH 15/36] Add some random examples

---
 azureml/pipelines/configs.py                  |  2 ++
 .../configs/gsm8k_zeroshot_config.yaml        |  4 +++-
 azureml/pipelines/submit_gsm8k_zeroshot.py    | 22 ++++++++++++++-----
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py
index a45d9a8..3befd93 100644
--- a/azureml/pipelines/configs.py
+++ b/azureml/pipelines/configs.py
@@ -140,4 +140,6 @@ class GSM8KZeroShotConfig:
     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
     json_guidance_programs: list[str] = field(default_factory=list)
     llamacpp_config: LlamaCppConfig = field(default_factory=LlamaCppConfig)
+    random_seed: int = int()
+    n_samples: int = int()
 
diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
index ef66d27..b6183b3 100644
--- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
@@ -13,4 +13,6 @@ zeroshot_config:
     - gsm8k_zero_or_few_shot_regex_number.py
     - gsm8k_zero_or_few_shot.py
   llamacpp_config:
-    compute_target: gput4
\ No newline at end of file
+    compute_target: gput4
+  random_seed: 4521
+  n_samples: 5
\ No newline at end of file
diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py
index b04bb4b..fb325a8 100644
--- a/azureml/pipelines/submit_gsm8k_zeroshot.py
+++ b/azureml/pipelines/submit_gsm8k_zeroshot.py
@@ -56,17 +56,29 @@ def basic_pipeline() -> Pipeline:
         mmlu_fetch_job = components.jsonl_gsm8k_fetch()
         mmlu_fetch_job.name = f"fetch_gsm8k"
 
-        get_split_job = components.uri_folder_to_file(
-            input_dataset=mmlu_fetch_job.outputs.output_dataset,
-            filename_pattern=f"test.jsonl",
+        split_outputs = dict()
+        for s in ["train", "test"]:
+            get_split_job = components.uri_folder_to_file(
+                input_dataset=mmlu_fetch_job.outputs.output_dataset,
+                filename_pattern=f"{s}.jsonl",
+            )
+            get_split_job.name = f"extract_split_{s}"
+            split_outputs[s] = get_split_job.outputs.output_dataset
+
+        random_examples_job = components.jsonl_random_examples(
+            input_dataset=split_outputs["train"],
+            example_dataset=split_outputs["test"],
+            output_key="examples",
+            num_examples=run_config.n_samples,
+            random_seed=run_config.random_seed
         )
-        get_split_job.name = f"extract_split_test"
+        random_examples_job.name=f"add_random_examples"
 
         for progname, prog_input in guidance_inputs.items():
 
             guidance_job = components.jsonl_guidance_mistral7b(
                 guidance_program=prog_input,
-                input_dataset=get_split_job.outputs.output_dataset,
+                input_dataset=random_examples_job.outputs.output_dataset,
             )
             guidance_job.compute = run_config.llamacpp_config.compute_target
             guidance_job.name = f"guidance_mistral7b_{progname}"

From ac49c39b281ddce5c88753ec69c6ee5df80b4818 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 10 Apr 2024 13:59:33 -0400
Subject: [PATCH 16/36] Thinking about few shots a bit

---
 azureml/pipelines/configs.py                  |  2 +-
 .../configs/gsm8k_zeroshot_config.yaml        |  4 +-
 azureml/pipelines/submit_gsm8k_zeroshot.py    |  6 +-
 guidance_programs/gsm8k_zero_or_few_shot.py   | 53 ----------------
 .../gsm8k_zero_or_few_shot_basic_json.py      | 62 +++++++++++++++++++
 .../gsm8k_zero_or_few_shot_plain.py           | 37 +++++++----
 .../gsm8k_zero_or_few_shot_regex_number.py    | 37 +++++++----
 7 files changed, 116 insertions(+), 85 deletions(-)
 delete mode 100644 guidance_programs/gsm8k_zero_or_few_shot.py
 create mode 100644 guidance_programs/gsm8k_zero_or_few_shot_basic_json.py

diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py
index 3befd93..e51cbec 100644
--- a/azureml/pipelines/configs.py
+++ b/azureml/pipelines/configs.py
@@ -136,7 +136,7 @@ class Phi2BiosBiasJSONPipelineConfig:
 
 
 @dataclass
-class GSM8KZeroShotConfig:
+class GSM8KZeroOrFewShotConfig:
     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
     json_guidance_programs: list[str] = field(default_factory=list)
     llamacpp_config: LlamaCppConfig = field(default_factory=LlamaCppConfig)
diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
index b6183b3..6128613 100644
--- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
@@ -5,13 +5,13 @@ defaults:
 
 zeroshot_config:
   pipeline:
-    base_experiment_name: gsm8k_zeroshot_debugging
+    base_experiment_name: gsm8k_zeroorfewshot_debugging
     tags:
     default_compute_target: isolatedcompute
   json_guidance_programs:
     - gsm8k_zero_or_few_shot_plain.py
     - gsm8k_zero_or_few_shot_regex_number.py
-    - gsm8k_zero_or_few_shot.py
+    - gsm8k_zero_or_few_shot_basic_json.py
   llamacpp_config:
     compute_target: gput4
   random_seed: 4521
diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroshot.py
index fb325a8..0a459b1 100644
--- a/azureml/pipelines/submit_gsm8k_zeroshot.py
+++ b/azureml/pipelines/submit_gsm8k_zeroshot.py
@@ -18,7 +18,7 @@
 
 from azureml_pipelines import create_zeroshot_pipeline
 from azureml_utils import get_component_collector
-from configs import AMLConfig, GSM8KZeroShotConfig
+from configs import AMLConfig, GSM8KZeroOrFewShotConfig
 from constants import GUIDANCE_PROGRAMS_DIR
 from logging_utils import get_standard_logger_for_file
 
@@ -27,7 +27,7 @@
 
 @dataclass
 class PipelineConfig:
-    zeroshot_config: GSM8KZeroShotConfig = omegaconf.MISSING
+    zeroshot_config: GSM8KZeroOrFewShotConfig = omegaconf.MISSING
     azureml_config: AMLConfig = omegaconf.MISSING
 
 
@@ -36,7 +36,7 @@ class PipelineConfig:
 
 
 def create_gsm8k_zeroshot_pipeline(
-    ml_client: MLClient, run_config: GSM8KZeroShotConfig, version_string: str
+    ml_client: MLClient, run_config: GSM8KZeroOrFewShotConfig, version_string: str
 ):
     components = get_component_collector(ml_client, version_string)
 
diff --git a/guidance_programs/gsm8k_zero_or_few_shot.py b/guidance_programs/gsm8k_zero_or_few_shot.py
deleted file mode 100644
index b64d52a..0000000
--- a/guidance_programs/gsm8k_zero_or_few_shot.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# This is a very naive guidance program for GSM8K
-
-import json
-import logging
-import sys
-
-from typing import Any, Dict
-
-import guidance
-
-
-_logger = logging.getLogger(__file__)
-_logger.setLevel(logging.INFO)
-_logger.addHandler(logging.StreamHandler(stream=sys.stdout))
-
-
-@guidance
-def zero_shot_gsm8k(
-    lm: guidance.models.Instruct,
-    question: str,
-    common: list[dict[str, Any]] | None,
-):
-    # Some general instruction to the model
-    lm += """Taking a maths test. Answer the following question and
-    show your working.
-"""
-
-    if common:
-        _logger.debug("Adding few shot examples")
-        raise ValueError("common data not yet supported")
-
-    lm += question
-
-    schema_obj = dict(type="object", properties=dict(answer=dict(type="number")))
-
-    return lm + guidance.json(name="json_result_object", schema=schema_obj)
-
-
-def guidance_generation(
-    lm: guidance.models.Chat,
-    input: Dict[str, Any],
-    common: list[dict[str, Any]] | None = None,
-) -> Dict[str, Any]:
-    _logger.debug("Starting guidance_generation")
-    result = lm + zero_shot_gsm8k(question=input["question"], common=common)
-
-    _logger.info(f"Result: {result}")
-    _logger.info(f"JSON portion: {result['json_result_object']}")
-
-    loaded_obj = json.loads(result["json_result_object"])
-
-    result = dict(zero_or_few_shot_answer=loaded_obj["answer"])
-    return result
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
new file mode 100644
index 0000000..07e2eac
--- /dev/null
+++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
@@ -0,0 +1,62 @@
+# This is a very naive guidance program for GSM8K
+
+import json
+import logging
+import sys
+
+from typing import Any, Dict
+
+import guidance
+
+
+_logger = logging.getLogger(__file__)
+_logger.setLevel(logging.INFO)
+_logger.addHandler(logging.StreamHandler(stream=sys.stdout))
+
+
+@guidance
+def zero_shot_gsm8k(
+    lm: guidance.models.Instruct,
+    question: str,
+    examples: list[dict[str, Any]] | None,
+):
+    # Some general instruction to the model
+    lm += """You are taking a maths test\n\n"""
+
+    # Show the few shots
+    for e in examples:
+        lm += f"Question: {e['question']}\n"
+        lm += f"Reasoning:"
+        for t in e["thoughts"]:
+            lm += t["step"] + t["result"]
+        lm += f"Answer: {e['answer']}"
+        lm += "\n"
+
+    # Now ask the question
+    lm += f"Question: {question}\n"
+    lm += f"Reasoning:"
+    lm += guidance.gen("reasons")
+    lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number"))
+
+    return lm
+
+
+def guidance_generation(
+    lm: guidance.models.Chat,
+    input: Dict[str, Any],
+    common: list[dict[str, Any]] | None = None,
+) -> Dict[str, Any]:
+    _logger.debug("Starting guidance_generation")
+    if common:
+        raise ValueError("Common Data not supported!")
+
+    result = lm + zero_shot_gsm8k(
+        question=input["question"], examples=input["examples"]
+    )
+
+    _logger.info(f"result_string: {result['result_string']}")
+
+    float_result = float(result["result_string"])
+
+    result = dict(zero_or_few_shot_answer=float_result)
+    return result
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
index da02638..6b420f3 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
@@ -18,19 +18,27 @@
 def zero_shot_gsm8k(
     lm: guidance.models.Instruct,
     question: str,
-    common: list[dict[str, Any]] | None,
+    examples: list[dict[str, Any]] | None,
 ):
     # Some general instruction to the model
-    lm += """Taking a maths test. Answer the following question. Respond with just the numerical answer:
-"""
-
-    if common:
-        _logger.debug("Adding few shot examples")
-        raise ValueError("common data not yet supported")
-
-    lm += question
-
-    return lm + guidance.gen(name="result_string")
+    lm += """You are taking a maths test\n\n"""
+
+    # Show the few shots
+    for e in examples:
+        lm += f"Question: {e['question']}\n"
+        lm += f"Reasoning:"
+        for t in e["thoughts"]:
+            lm += t["step"] + t["result"]
+        lm += f"Answer: {e['answer']}"
+        lm += "\n"
+    
+    # Now ask the question
+    lm += f"Question: {question}\n"
+    lm += f"Reasoning:"
+    lm += guidance.gen("reasons")
+    lm += f"Answer: " + guidance.gen(name="result_string")
+
+    return lm
 
 
 def guidance_generation(
@@ -39,9 +47,12 @@ def guidance_generation(
     common: list[dict[str, Any]] | None = None,
 ) -> Dict[str, Any]:
     _logger.debug("Starting guidance_generation")
-    result = lm + zero_shot_gsm8k(question=input["question"], common=common)
+    if common:
+        raise ValueError("Common Data not supported!")
+    
+    result = lm + zero_shot_gsm8k(question=input["question"], examples=input["examples"])
 
-    _logger.info(f"JSON portion: {result['result_string']}")
+    _logger.info(f"result_string: {result['result_string']}")
 
     float_result = float(result['result_string'])
 
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
index 6766c5c..1ab6e85 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -18,19 +18,27 @@
 def zero_shot_gsm8k(
     lm: guidance.models.Instruct,
     question: str,
-    common: list[dict[str, Any]] | None,
+    examples: list[dict[str, Any]] | None,
 ):
     # Some general instruction to the model
-    lm += """Taking a maths test. Answer the following question:
-"""
-
-    if common:
-        _logger.debug("Adding few shot examples")
-        raise ValueError("common data not yet supported")
-
-    lm += question
-
-    return lm + guidance.gen(name="result_string", regex=r"\d+\.?\d*")
+    lm += """You are taking a maths test\n\n"""
+
+    # Show the few shots
+    for e in examples:
+        lm += f"Question: {e['question']}\n"
+        lm += f"Reasoning:"
+        for t in e["thoughts"]:
+            lm += t["step"] + t["result"]
+        lm += f"Answer: {e['answer']}"
+        lm += "\n"
+    
+    # Now ask the question
+    lm += f"Question: {question}\n"
+    lm += f"Reasoning:"
+    lm += guidance.gen("reasons")
+    lm += f"Answer: " + guidance.gen(name="result_string", regex=r"\d+\.?\d*")
+
+    return lm
 
 
 def guidance_generation(
@@ -39,9 +47,12 @@ def guidance_generation(
     common: list[dict[str, Any]] | None = None,
 ) -> Dict[str, Any]:
     _logger.debug("Starting guidance_generation")
-    result = lm + zero_shot_gsm8k(question=input["question"], common=common)
+    if common:
+        raise ValueError("Common Data not supported!")
+    
+    result = lm + zero_shot_gsm8k(question=input["question"], examples=input["examples"])
 
-    _logger.info(f"JSON portion: {result['result_string']}")
+    _logger.info(f"result_string: {result['result_string']}")
 
     float_result = float(result["result_string"])
 

From 6f8ef9d2b6622c218bf025e5ac431068de93a657 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 10 Apr 2024 14:08:38 -0400
Subject: [PATCH 17/36] Doing some renaming

---
 azureml/pipelines/configs/gsm8k_zeroshot_config.yaml          | 2 +-
 ...submit_gsm8k_zeroshot.py => submit_gsm8k_zeroorfewshot.py} | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename azureml/pipelines/{submit_gsm8k_zeroshot.py => submit_gsm8k_zeroorfewshot.py} (97%)

diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
index 6128613..e548f92 100644
--- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
@@ -3,7 +3,7 @@ defaults:
   - aml_config
   - aoai_config
 
-zeroshot_config:
+zeroorfewshot_config:
   pipeline:
     base_experiment_name: gsm8k_zeroorfewshot_debugging
     tags:
diff --git a/azureml/pipelines/submit_gsm8k_zeroshot.py b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py
similarity index 97%
rename from azureml/pipelines/submit_gsm8k_zeroshot.py
rename to azureml/pipelines/submit_gsm8k_zeroorfewshot.py
index 0a459b1..b93f738 100644
--- a/azureml/pipelines/submit_gsm8k_zeroshot.py
+++ b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py
@@ -27,7 +27,7 @@
 
 @dataclass
 class PipelineConfig:
-    zeroshot_config: GSM8KZeroOrFewShotConfig = omegaconf.MISSING
+    zeroorfewshot_config: GSM8KZeroOrFewShotConfig = omegaconf.MISSING
     azureml_config: AMLConfig = omegaconf.MISSING
 
 
@@ -121,7 +121,7 @@ def main(config: PipelineConfig):
     )
 
     pipeline = create_gsm8k_zeroshot_pipeline(
-        ws_client, config.zeroshot_config, version_string
+        ws_client, config.zeroorfewshot_config, version_string
     )
     _logger.info("Submitting pipeline")
     submitted_job = ws_client.jobs.create_or_update(pipeline)

From 2076b53ffa35cf87436fa5ce23aff908ba1061c9 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 10 Apr 2024 14:28:33 -0400
Subject: [PATCH 18/36] Tweaking

---
 ..._zeroshot_config.yaml => gsm8k_zeroorfewshot_config.yaml} | 2 +-
 guidance_programs/gsm8k_zero_or_few_shot_basic_json.py       | 5 ++++-
 guidance_programs/gsm8k_zero_or_few_shot_plain.py            | 5 ++++-
 guidance_programs/gsm8k_zero_or_few_shot_regex_number.py     | 5 ++++-
 4 files changed, 13 insertions(+), 4 deletions(-)
 rename azureml/pipelines/configs/{gsm8k_zeroshot_config.yaml => gsm8k_zeroorfewshot_config.yaml} (96%)

diff --git a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
similarity index 96%
rename from azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
rename to azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
index e548f92..2509917 100644
--- a/azureml/pipelines/configs/gsm8k_zeroshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
@@ -15,4 +15,4 @@ zeroorfewshot_config:
   llamacpp_config:
     compute_target: gput4
   random_seed: 4521
-  n_samples: 5
\ No newline at end of file
+  n_samples: 2
\ No newline at end of file
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
index 07e2eac..3f6dddc 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
@@ -28,7 +28,10 @@ def zero_shot_gsm8k(
         lm += f"Question: {e['question']}\n"
         lm += f"Reasoning:"
         for t in e["thoughts"]:
-            lm += t["step"] + t["result"]
+            lm += t["step"]
+            if "result" in t:
+                lm += t["result"]
+            lm += "\n"
         lm += f"Answer: {e['answer']}"
         lm += "\n"
 
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
index 6b420f3..4ea11ad 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
@@ -28,7 +28,10 @@ def zero_shot_gsm8k(
         lm += f"Question: {e['question']}\n"
         lm += f"Reasoning:"
         for t in e["thoughts"]:
-            lm += t["step"] + t["result"]
+            lm += t["step"]
+            if "result" in t:
+                lm += t["result"]
+            lm += "\n"
         lm += f"Answer: {e['answer']}"
         lm += "\n"
     
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
index 1ab6e85..42a5773 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -28,7 +28,10 @@ def zero_shot_gsm8k(
         lm += f"Question: {e['question']}\n"
         lm += f"Reasoning:"
         for t in e["thoughts"]:
-            lm += t["step"] + t["result"]
+            lm += t["step"]
+            if "result" in t:
+                lm += t["result"]
+            lm += "\n"
         lm += f"Answer: {e['answer']}"
         lm += "\n"
     

From d60345c46b469f59c67ba81d4b8a8e03afad5613 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 10 Apr 2024 14:43:44 -0400
Subject: [PATCH 19/36] Some token limits

---
 .../configs/gsm8k_zeroorfewshot_config.yaml        |  2 +-
 .../gsm8k_zero_or_few_shot_basic_json.py           |  2 +-
 guidance_programs/gsm8k_zero_or_few_shot_plain.py  | 14 ++++++++------
 .../gsm8k_zero_or_few_shot_regex_number.py         | 10 ++++++----
 4 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
index 2509917..40248f9 100644
--- a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
@@ -15,4 +15,4 @@ zeroorfewshot_config:
   llamacpp_config:
     compute_target: gput4
   random_seed: 4521
-  n_samples: 2
\ No newline at end of file
+  n_samples: 1
\ No newline at end of file
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
index 3f6dddc..7a619f4 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
@@ -38,7 +38,7 @@ def zero_shot_gsm8k(
     # Now ask the question
     lm += f"Question: {question}\n"
     lm += f"Reasoning:"
-    lm += guidance.gen("reasons")
+    lm += guidance.gen("reasons", max_tokens=50)
     lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number"))
 
     return lm
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
index 4ea11ad..5cc21da 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
@@ -34,12 +34,12 @@ def zero_shot_gsm8k(
             lm += "\n"
         lm += f"Answer: {e['answer']}"
         lm += "\n"
-    
+
     # Now ask the question
     lm += f"Question: {question}\n"
     lm += f"Reasoning:"
-    lm += guidance.gen("reasons")
-    lm += f"Answer: " + guidance.gen(name="result_string")
+    lm += guidance.gen("reasons", max_tokens=50)
+    lm += f"Answer: " + guidance.gen(name="result_string", max_tokens=10)
 
     return lm
 
@@ -52,12 +52,14 @@ def guidance_generation(
     _logger.debug("Starting guidance_generation")
     if common:
         raise ValueError("Common Data not supported!")
-    
-    result = lm + zero_shot_gsm8k(question=input["question"], examples=input["examples"])
+
+    result = lm + zero_shot_gsm8k(
+        question=input["question"], examples=input["examples"]
+    )
 
     _logger.info(f"result_string: {result['result_string']}")
 
-    float_result = float(result['result_string'])
+    float_result = float(result["result_string"])
 
     result = dict(zero_or_few_shot_answer=float_result)
     return result
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
index 42a5773..f88e099 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -34,11 +34,11 @@ def zero_shot_gsm8k(
             lm += "\n"
         lm += f"Answer: {e['answer']}"
         lm += "\n"
-    
+
     # Now ask the question
     lm += f"Question: {question}\n"
     lm += f"Reasoning:"
-    lm += guidance.gen("reasons")
+    lm += guidance.gen("reasons", max_tokens=50)
     lm += f"Answer: " + guidance.gen(name="result_string", regex=r"\d+\.?\d*")
 
     return lm
@@ -52,8 +52,10 @@ def guidance_generation(
     _logger.debug("Starting guidance_generation")
     if common:
         raise ValueError("Common Data not supported!")
-    
-    result = lm + zero_shot_gsm8k(question=input["question"], examples=input["examples"])
+
+    result = lm + zero_shot_gsm8k(
+        question=input["question"], examples=input["examples"]
+    )
 
     _logger.info(f"result_string: {result['result_string']}")
 

From 1cfefad068aa2116549a0ac382412d32cdf32d6c Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Thu, 11 Apr 2024 10:43:44 -0400
Subject: [PATCH 20/36] More involved JSON

---
 .../configs/gsm8k_zeroorfewshot_config.yaml   |  1 +
 .../gsm8k_zero_or_few_shot_json_response.py   | 76 +++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 guidance_programs/gsm8k_zero_or_few_shot_json_response.py

diff --git a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
index 40248f9..59efb39 100644
--- a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
@@ -12,6 +12,7 @@ zeroorfewshot_config:
     - gsm8k_zero_or_few_shot_plain.py
     - gsm8k_zero_or_few_shot_regex_number.py
     - gsm8k_zero_or_few_shot_basic_json.py
+    - gsm8k_zero_or_few_shot_json_response.py
   llamacpp_config:
     compute_target: gput4
   random_seed: 4521
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
new file mode 100644
index 0000000..13b67ba
--- /dev/null
+++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
@@ -0,0 +1,76 @@
+# This is a very naive guidance program for GSM8K
+
+import json
+import logging
+import sys
+
+from typing import Any, Dict
+
+import guidance
+
+
+_logger = logging.getLogger(__file__)
+_logger.setLevel(logging.INFO)
+_logger.addHandler(logging.StreamHandler(stream=sys.stdout))
+
+
+@guidance
+def zero_shot_gsm8k(
+    lm: guidance.models.Instruct,
+    question: str,
+    examples: list[dict[str, Any]] | None,
+):
+    # Some general instruction to the model
+    lm += """You are taking a maths test\n\n"""
+
+    response_schema = dict(
+        type="object",
+        properties=dict(
+            thoughts=dict(type="array", items=dict(type="string")),
+            result=dict(type="number"),
+        ),
+    )
+
+    # Show the few shots
+    for e in examples:
+        lm += f"Question: {e['question']}\n"
+
+        nxt_obj = dict(result=e["answer"], thoughts=[])
+        for t in e["thoughts"]:
+            nxt_thought = t["step"]
+            if "result" in t:
+                nxt_thought += t["result"]
+            nxt_obj["thoughts"].append(nxt_thought)
+
+        lm += guidance.library._json._to_compact_json(nxt_obj)
+        lm += "\n"
+
+    # Now ask the question
+    lm += f"Question: {question}\n"
+    lm += guidance.json(name="response_json", schema=response_schema)
+
+    return lm
+
+
+def guidance_generation(
+    lm: guidance.models.Chat,
+    input: Dict[str, Any],
+    common: list[dict[str, Any]] | None = None,
+) -> Dict[str, Any]:
+    _logger.debug("Starting guidance_generation")
+    if common:
+        raise ValueError("Common Data not supported!")
+
+    result = lm + zero_shot_gsm8k(
+        question=input["question"], examples=input["examples"]
+    )
+
+    _logger.info(f"result_string: {result['response_json']}")
+
+    loaded_obj = json.loads(result["response_json"])
+
+    result = dict(
+        zero_or_few_shot_answer=loaded_obj["result"],
+        zero_or_few_show_thoughts=loaded_obj["thoughts"],
+    )
+    return result

From 0714d8877aa3015686133bd18fc5421fe53e5d02 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Fri, 12 Apr 2024 20:49:40 -0400
Subject: [PATCH 21/36] Make sure pydantic and jsonschema are available

---
 azureml/environments/phi2transformer-env.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/azureml/environments/phi2transformer-env.yaml b/azureml/environments/phi2transformer-env.yaml
index f31b249..8045498 100644
--- a/azureml/environments/phi2transformer-env.yaml
+++ b/azureml/environments/phi2transformer-env.yaml
@@ -21,4 +21,6 @@ conda_file:
       - accelerate
       - aether-utils==0.0.1.dev1
       - guidance>=0.1.13
+      - jsonschema
+      - pydantic
       - transformers
\ No newline at end of file

From 6f54a517979eac1eba17d32e749acf6a27645869 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Mon, 15 Apr 2024 13:01:37 -0400
Subject: [PATCH 22/36] Fix regex for a number

---
 guidance_programs/gsm8k_zero_or_few_shot_regex_number.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
index f88e099..2b219ad 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -39,7 +39,7 @@ def zero_shot_gsm8k(
     lm += f"Question: {question}\n"
     lm += f"Reasoning:"
     lm += guidance.gen("reasons", max_tokens=50)
-    lm += f"Answer: " + guidance.gen(name="result_string", regex=r"\d+\.?\d*")
+    lm += f"Answer: " + guidance.gen(name="result_string", regex=r"-?\d+\.?\d*")
 
     return lm
 

From 1bbc53d0babe33b53df3880e9b9ffcdfe210b356 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Mon, 15 Apr 2024 13:37:24 -0400
Subject: [PATCH 23/36] Increase context window for model

---
 azureml/components/src/jsonl_guidance_mistral7b.py        | 2 +-
 azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py
index d22a9bf..c1cc446 100644
--- a/azureml/components/src/jsonl_guidance_mistral7b.py
+++ b/azureml/components/src/jsonl_guidance_mistral7b.py
@@ -99,7 +99,7 @@ def main():
     downloaded_file = hf_hub_download(repo_id=repo_id, filename=filename)
 
     guidance_model = guidance.models.LlamaCpp(
-        downloaded_file, verbose=True, n_gpu_layers=-1
+        downloaded_file, verbose=True, n_gpu_layers=-1, n_ctx=4096
     )
     # _logger.info(f"guidance_model.device: {guidance_model.engine.device}")
 
diff --git a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
index 59efb39..3279e3e 100644
--- a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
@@ -16,4 +16,4 @@ zeroorfewshot_config:
   llamacpp_config:
     compute_target: gput4
   random_seed: 4521
-  n_samples: 1
\ No newline at end of file
+  n_samples: 5
\ No newline at end of file

From 88ef9efc3082a12419345a814fbd03080c9ebf06 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 17 Apr 2024 08:44:13 -0400
Subject: [PATCH 24/36] Create a JSONL sampling component

---
 .../jsonl_sample_lines_component.yaml         | 52 +++++++++++++++++++
 azureml/components/src/jsonl_sample_lines.py  | 52 +++++++++++++++++++
 2 files changed, 104 insertions(+)
 create mode 100644 azureml/components/jsonl_sample_lines_component.yaml
 create mode 100644 azureml/components/src/jsonl_sample_lines.py

diff --git a/azureml/components/jsonl_sample_lines_component.yaml b/azureml/components/jsonl_sample_lines_component.yaml
new file mode 100644
index 0000000..5c23a68
--- /dev/null
+++ b/azureml/components/jsonl_sample_lines_component.yaml
@@ -0,0 +1,52 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+
+name: jsonl_sample_lines
+display_name: 'JSONL Sample Lines'
+type: command
+description: |
+  Samples lines (without replacement) from a JSONL file
+is_deterministic: true
+
+inputs:
+  input_dataset:
+    type: uri_file
+    optional: false
+    description: Dataset containing JSONL input
+  input_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the input dataset
+  n_samples:
+    type: integer
+    optional: false
+    description: Number of samples required
+  random_seed:
+    type: integer
+    optional: false
+    description: Seed for Pythons PRNG
+  output_encoding:
+    type: string
+    optional: false
+    default: utf-8-sig
+    description: Encoding format of the output dataset
+
+outputs:
+  output_dataset:
+    type: uri_file
+    description: Dataset containing sampled JSONL
+
+code: ./src
+
+command: >-
+  python ./jsonl_sample_lines.py
+  --input_dataset ${{ inputs.input_dataset }}
+  --input_encoding ${{ inputs.input_encoding }}
+  --n_samples ${{ inputs.n_samples }}
+  --random_seed ${{ inputs.random_seed }}
+  --output_dataset ${{ outputs.output_dataset }}
+  --output_encoding ${{ inputs.output_encoding }}
+
+environment:
+  # Will be updated when component uploads
+  image: azureml:promptbase_aml@latest
\ No newline at end of file
diff --git a/azureml/components/src/jsonl_sample_lines.py b/azureml/components/src/jsonl_sample_lines.py
new file mode 100644
index 0000000..224c877
--- /dev/null
+++ b/azureml/components/src/jsonl_sample_lines.py
@@ -0,0 +1,52 @@
+import argparse
+import pathlib
+import random
+
+from typing import Any, Dict, List
+
+from aether_utils.jsonl_file_utils import load_jsonl, save_jsonl
+from aether_utils.logging_utils import get_standard_logger_for_file
+
+_logger = get_standard_logger_for_file(__file__)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(add_help=True)
+
+    # Information about the datasets
+    datasets_group = parser.add_argument_group("Datasets")
+    datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
+    datasets_group.add_argument("--input_encoding", type=str, required=True)
+    datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
+    datasets_group.add_argument("--output_encoding", type=str, required=True)
+
+    # Information about the sampling
+    sampling_group = parser.add_argument_group("Sampling")
+    sampling_group.add_argument("--n_samples", type=int, required=True)
+    sampling_group.add_argument("--random_seed", type=int, required=True)
+
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    _logger.info("Loading input")
+    all_data = load_jsonl(args.input_dataset, args.input_encoding)
+    _logger.info(f"Loaded {len(all_data)} items")
+
+    random.seed(args.random_seed)
+    sampled_data = random.sample(all_data, k=args.n_samples)
+
+    _logger.info("Saving output")
+    save_jsonl(
+        file_path=args.output_dataset,
+        data=sampled_data,
+        destination_encoding=args.output_encoding,
+    )
+    _logger.info("Done")
+
+
+if __name__ == "__main__":
+    main()

From 85657f38015630f1904d309f300190a4bc78c2a9 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 17 Apr 2024 09:01:48 -0400
Subject: [PATCH 25/36] Hook new component into pipeline

---
 azureml/pipelines/azureml_utils.py                  |  1 +
 azureml/pipelines/configs.py                        |  4 +++-
 .../configs/gsm8k_zeroorfewshot_config.yaml         |  6 ++++--
 azureml/pipelines/submit_gsm8k_zeroorfewshot.py     | 13 ++++++++++---
 4 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/azureml/pipelines/azureml_utils.py b/azureml/pipelines/azureml_utils.py
index 341b9b6..9dfce0d 100644
--- a/azureml/pipelines/azureml_utils.py
+++ b/azureml/pipelines/azureml_utils.py
@@ -22,6 +22,7 @@
     jsonl_knn_cosine_similarity="jsonl_knn_cosine_similarity_component.yaml",
     jsonl_mmlu_fetch="jsonl_mmlu_fetch_component.yaml",
     jsonl_random_examples="jsonl_random_examples_component.yaml",
+    jsonl_sample_lines="jsonl_sample_lines_component.yaml",
     jsonl_schema_checker="jsonl_schema_checker_component.yaml",
     jsonl_score_biosbias_json="jsonl_score_biosbias_json_component.yaml",
     jsonl_score_multiplechoice="jsonl_score_multiplechoice_component.yaml",
diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py
index e51cbec..8b6675a 100644
--- a/azureml/pipelines/configs.py
+++ b/azureml/pipelines/configs.py
@@ -140,6 +140,8 @@ class GSM8KZeroOrFewShotConfig:
     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
     json_guidance_programs: list[str] = field(default_factory=list)
     llamacpp_config: LlamaCppConfig = field(default_factory=LlamaCppConfig)
-    random_seed: int = int()
+    fewshot_random_seed: int = int()
+    n_fewshot: int = int()
+    sample_random_seed: int = int()
     n_samples: int = int()
 
diff --git a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
index 3279e3e..20c1e59 100644
--- a/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
+++ b/azureml/pipelines/configs/gsm8k_zeroorfewshot_config.yaml
@@ -15,5 +15,7 @@ zeroorfewshot_config:
     - gsm8k_zero_or_few_shot_json_response.py
   llamacpp_config:
     compute_target: gput4
-  random_seed: 4521
-  n_samples: 5
\ No newline at end of file
+  fewshot_random_seed: 4521
+  n_fewshot: 5
+  sample_random_seed: 234891
+  n_samples: 400
\ No newline at end of file
diff --git a/azureml/pipelines/submit_gsm8k_zeroorfewshot.py b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py
index b93f738..4890548 100644
--- a/azureml/pipelines/submit_gsm8k_zeroorfewshot.py
+++ b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py
@@ -65,12 +65,19 @@ def basic_pipeline() -> Pipeline:
             get_split_job.name = f"extract_split_{s}"
             split_outputs[s] = get_split_job.outputs.output_dataset
 
-        random_examples_job = components.jsonl_random_examples(
+        sample_lines_job = components.jsonl_sample_lines(
             input_dataset=split_outputs["train"],
+            n_samples=run_config.n_samples,
+            random_seed=run_config.sample_random_seed
+        )
+        sample_lines_job.name= f"sample_{run_config.n_samples}_lines"
+
+        random_examples_job = components.jsonl_random_examples(
+            input_dataset=sample_lines_job.outputs.output_dataset,
             example_dataset=split_outputs["test"],
             output_key="examples",
-            num_examples=run_config.n_samples,
-            random_seed=run_config.random_seed
+            num_examples=run_config.n_fewshot,
+            random_seed=run_config.fewshot_random_seed
         )
         random_examples_job.name=f"add_random_examples"
 

From 53d4fe598c33c8deba2ccddeae95065aed2f225f Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 17 Apr 2024 14:53:58 -0400
Subject: [PATCH 26/36] Try tweaking the prompts (and saving)

---
 .../gsm8k_zero_or_few_shot_basic_json.py              |  7 ++++---
 .../gsm8k_zero_or_few_shot_json_response.py           |  1 +
 guidance_programs/gsm8k_zero_or_few_shot_plain.py     | 11 ++++++-----
 .../gsm8k_zero_or_few_shot_regex_number.py            |  7 ++++---
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
index 7a619f4..f73316e 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
@@ -26,7 +26,7 @@ def zero_shot_gsm8k(
     # Show the few shots
     for e in examples:
         lm += f"Question: {e['question']}\n"
-        lm += f"Reasoning:"
+        lm += f"Reasoning:\n"
         for t in e["thoughts"]:
             lm += t["step"]
             if "result" in t:
@@ -38,7 +38,8 @@ def zero_shot_gsm8k(
     # Now ask the question
     lm += f"Question: {question}\n"
     lm += f"Reasoning:"
-    lm += guidance.gen("reasons", max_tokens=50)
+    lm += guidance.gen("reasons", max_tokens=100)
+    lm += "\n"
     lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number"))
 
     return lm
@@ -61,5 +62,5 @@ def guidance_generation(
 
     float_result = float(result["result_string"])
 
-    result = dict(zero_or_few_shot_answer=float_result)
+    result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result))
     return result
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
index 13b67ba..06e017c 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
@@ -72,5 +72,6 @@ def guidance_generation(
     result = dict(
         zero_or_few_shot_answer=loaded_obj["result"],
         zero_or_few_show_thoughts=loaded_obj["thoughts"],
+        final_lm=str(result),
     )
     return result
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
index 5cc21da..7b47e71 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
@@ -26,19 +26,20 @@ def zero_shot_gsm8k(
     # Show the few shots
     for e in examples:
         lm += f"Question: {e['question']}\n"
-        lm += f"Reasoning:"
+        lm += f"Reasoning:\n"
         for t in e["thoughts"]:
             lm += t["step"]
             if "result" in t:
                 lm += t["result"]
             lm += "\n"
-        lm += f"Answer: {e['answer']}"
+        lm += f"Answer: {e['answer']}\n"
         lm += "\n"
 
     # Now ask the question
     lm += f"Question: {question}\n"
-    lm += f"Reasoning:"
-    lm += guidance.gen("reasons", max_tokens=50)
+    lm += f"Reasoning:\n"
+    lm += guidance.gen("reasons", max_tokens=100)
+    lm += "\n"
     lm += f"Answer: " + guidance.gen(name="result_string", max_tokens=10)
 
     return lm
@@ -61,5 +62,5 @@ def guidance_generation(
 
     float_result = float(result["result_string"])
 
-    result = dict(zero_or_few_shot_answer=float_result)
+    result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result))
     return result
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
index 2b219ad..9fff285 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -26,7 +26,7 @@ def zero_shot_gsm8k(
     # Show the few shots
     for e in examples:
         lm += f"Question: {e['question']}\n"
-        lm += f"Reasoning:"
+        lm += f"Reasoning:\n"
         for t in e["thoughts"]:
             lm += t["step"]
             if "result" in t:
@@ -38,7 +38,8 @@ def zero_shot_gsm8k(
     # Now ask the question
     lm += f"Question: {question}\n"
     lm += f"Reasoning:"
-    lm += guidance.gen("reasons", max_tokens=50)
+    lm += guidance.gen("reasons", max_tokens=100)
+    lm += "\n"
     lm += f"Answer: " + guidance.gen(name="result_string", regex=r"-?\d+\.?\d*")
 
     return lm
@@ -61,5 +62,5 @@ def guidance_generation(
 
     float_result = float(result["result_string"])
 
-    result = dict(zero_or_few_shot_answer=float_result)
+    result = dict(zero_or_few_shot_answer=float_result, final_lm=str(result))
     return result

From 4ec2b820660175a9093d57e7195e042e4a49c451 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 17 Apr 2024 15:29:44 -0400
Subject: [PATCH 27/36] Fix division by zero

---
 azureml/components/src/jsonl_score_numeric.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/azureml/components/src/jsonl_score_numeric.py b/azureml/components/src/jsonl_score_numeric.py
index 35a260e..c5dd211 100644
--- a/azureml/components/src/jsonl_score_numeric.py
+++ b/azureml/components/src/jsonl_score_numeric.py
@@ -37,7 +37,9 @@ def generate_summary(self) -> dict[str, Any]:
             if y_t == y_p:
                 n_correct += 1
         result["n_correct"] = n_correct
-        result["accuracy"] = float(n_correct) / len(self.y_true)
+        result["accuracy"] = 0
+        if len(self.y_true) > 0:
+            result["accuracy"] = float(n_correct) / len(self.y_true)
 
         return result
 

From bf85df89e3aa0ae5ddc7980f00029d01adb2085a Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 17 Apr 2024 15:29:53 -0400
Subject: [PATCH 28/36] Want mlflow available

---
 azureml/environments/phi2transformer-env.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/azureml/environments/phi2transformer-env.yaml b/azureml/environments/phi2transformer-env.yaml
index 8045498..8924347 100644
--- a/azureml/environments/phi2transformer-env.yaml
+++ b/azureml/environments/phi2transformer-env.yaml
@@ -19,6 +19,7 @@ conda_file:
       # ... so we have to add PyPI back in as an alternative index
       - --extra-index-url https://pypi.org/simple
       - accelerate
+      - azureml-mlflow
       - aether-utils==0.0.1.dev1
       - guidance>=0.1.13
       - jsonschema

From ade55793dcbd662249f7b4e7526669a9efbf481b Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 17 Apr 2024 15:30:00 -0400
Subject: [PATCH 29/36] Try some more logging

---
 azureml/components/src/jsonl_guidance_mistral7b.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py
index c1cc446..16399fa 100644
--- a/azureml/components/src/jsonl_guidance_mistral7b.py
+++ b/azureml/components/src/jsonl_guidance_mistral7b.py
@@ -2,6 +2,7 @@
 import importlib.util
 import json
 import pathlib
+import time
 
 from typing import Any, Callable, Dict
 
@@ -9,7 +10,8 @@
 
 from huggingface_hub import hf_hub_download
 
-import torch
+import mlflow
+
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from aether_utils.jsonl_utils import line_map
@@ -56,16 +58,21 @@ def __init__(
         self._model = model
         self._guidance_function = self._get_guidance_function()
         self._common_data = common_data
+        self._step = 0
 
     def __call__(self, item: Dict[str, Any]) -> dict[str, any]:
         _logger.debug(f"__call__: {item}")
+        start = time.time()
         result = self._guidance_function(self._model, item, common=self._common_data)
+        stop = time.time()
+        mlflow.log_metric("time_taken", value=stop-start, step=self._step)
         _logger.debug(f"Checking keys")
         for k in result.keys():
             assert k not in item, f"Duplicate key: {k}"
 
         _logger.debug(f"Updating item")
         item.update(**result)
+        self._step += 1
 
         return item
 

From 542b7c88b3559c58c8802fa18ba4b9266d0e1585 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Wed, 17 Apr 2024 15:30:25 -0400
Subject: [PATCH 30/36] Tweaking guidance programs

---
 .../gsm8k_zero_or_few_shot_basic_json.py      |  6 ++++--
 .../gsm8k_zero_or_few_shot_json_response.py   | 20 ++++++++++++++++---
 .../gsm8k_zero_or_few_shot_plain.py           |  6 ++++--
 .../gsm8k_zero_or_few_shot_regex_number.py    |  6 ++++--
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
index f73316e..c3a2edf 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
@@ -27,9 +27,11 @@ def zero_shot_gsm8k(
     for e in examples:
         lm += f"Question: {e['question']}\n"
         lm += f"Reasoning:\n"
-        for t in e["thoughts"]:
-            lm += t["step"]
+        for i, t in enumerate(e["thoughts"]):
+            lm += f"{i+1}.  {t['step']}"
             if "result" in t:
+                lm += " "
+                lm += t["calculation"]
                 lm += t["result"]
             lm += "\n"
         lm += f"Answer: {e['answer']}"
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
index 06e017c..81a08e7 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
@@ -6,6 +6,8 @@
 
 from typing import Any, Dict
 
+from jsonschema import validate
+
 import guidance
 
 
@@ -26,7 +28,17 @@ def zero_shot_gsm8k(
     response_schema = dict(
         type="object",
         properties=dict(
-            thoughts=dict(type="array", items=dict(type="string")),
+            thoughts=dict(
+                type="array",
+                items=dict(
+                    type="object",
+                    properties=dict(
+                        step=dict(type="string"),
+                        calculation=dict(type="string"),
+                        result=dict(type="string"),
+                    ),
+                ),
+            ),
             result=dict(type="number"),
         ),
     )
@@ -37,11 +49,13 @@ def zero_shot_gsm8k(
 
         nxt_obj = dict(result=e["answer"], thoughts=[])
         for t in e["thoughts"]:
-            nxt_thought = t["step"]
+            nxt_thought = dict(step=t["step"])
             if "result" in t:
-                nxt_thought += t["result"]
+                nxt_thought["calculation"] = t["calculation"]
+                nxt_thought["result"] += t["result"]
             nxt_obj["thoughts"].append(nxt_thought)
 
+        validate(nxt_obj, schema=response_schema)
         lm += guidance.library._json._to_compact_json(nxt_obj)
         lm += "\n"
 
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
index 7b47e71..ae5eca8 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
@@ -27,9 +27,11 @@ def zero_shot_gsm8k(
     for e in examples:
         lm += f"Question: {e['question']}\n"
         lm += f"Reasoning:\n"
-        for t in e["thoughts"]:
-            lm += t["step"]
+        for i, t in enumerate(e["thoughts"]):
+            lm += f"{i+1}.  {t['step']}"
             if "result" in t:
+                lm += " "
+                lm += t["calculation"]
                 lm += t["result"]
             lm += "\n"
         lm += f"Answer: {e['answer']}\n"
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
index 9fff285..3402595 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -27,9 +27,11 @@ def zero_shot_gsm8k(
     for e in examples:
         lm += f"Question: {e['question']}\n"
         lm += f"Reasoning:\n"
-        for t in e["thoughts"]:
-            lm += t["step"]
+        for i, t in enumerate(e["thoughts"]):
+            lm += f"{i+1}.  {t['step']}"
             if "result" in t:
+                lm += " "
+                lm += t["calculation"]
                 lm += t["result"]
             lm += "\n"
         lm += f"Answer: {e['answer']}"

From 8a2afa5ef9092b9392fd82bfe07b0a41eca4b5ce Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Thu, 18 Apr 2024 08:27:25 -0400
Subject: [PATCH 31/36] Try some extra new lines

---
 guidance_programs/gsm8k_zero_or_few_shot_basic_json.py    | 4 ++--
 guidance_programs/gsm8k_zero_or_few_shot_json_response.py | 1 +
 guidance_programs/gsm8k_zero_or_few_shot_plain.py         | 6 +++---
 guidance_programs/gsm8k_zero_or_few_shot_regex_number.py  | 8 +++++---
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
index c3a2edf..8eb6c08 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
@@ -35,12 +35,12 @@ def zero_shot_gsm8k(
                 lm += t["result"]
             lm += "\n"
         lm += f"Answer: {e['answer']}"
-        lm += "\n"
+        lm += "\n\n"
 
     # Now ask the question
     lm += f"Question: {question}\n"
     lm += f"Reasoning:"
-    lm += guidance.gen("reasons", max_tokens=100)
+    lm += guidance.gen("reasons", max_tokens=100, stop="\n")
     lm += "\n"
     lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number"))
 
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
index 81a08e7..88bcc37 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
@@ -54,6 +54,7 @@ def zero_shot_gsm8k(
                 nxt_thought["calculation"] = t["calculation"]
                 nxt_thought["result"] += t["result"]
             nxt_obj["thoughts"].append(nxt_thought)
+        lm += "\n"
 
         validate(nxt_obj, schema=response_schema)
         lm += guidance.library._json._to_compact_json(nxt_obj)
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
index ae5eca8..9b7c76b 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
@@ -35,14 +35,14 @@ def zero_shot_gsm8k(
                 lm += t["result"]
             lm += "\n"
         lm += f"Answer: {e['answer']}\n"
-        lm += "\n"
+        lm += "\n\n"
 
     # Now ask the question
     lm += f"Question: {question}\n"
     lm += f"Reasoning:\n"
-    lm += guidance.gen("reasons", max_tokens=100)
+    lm += guidance.gen("reasons", max_tokens=100, stop="\n")
     lm += "\n"
-    lm += f"Answer: " + guidance.gen(name="result_string", max_tokens=10)
+    lm += f"Answer: " + guidance.gen(name="result_string", max_tokens=10, stop="\n")
 
     return lm
 
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
index 3402595..1a51aab 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -35,14 +35,16 @@ def zero_shot_gsm8k(
                 lm += t["result"]
             lm += "\n"
         lm += f"Answer: {e['answer']}"
-        lm += "\n"
+        lm += "\n\n"
 
     # Now ask the question
     lm += f"Question: {question}\n"
     lm += f"Reasoning:"
-    lm += guidance.gen("reasons", max_tokens=100)
+    lm += guidance.gen("reasons", max_tokens=100, stop="\n")
     lm += "\n"
-    lm += f"Answer: " + guidance.gen(name="result_string", regex=r"-?\d+\.?\d*")
+    lm += f"Answer: " + guidance.gen(
+        name="result_string", regex=r"-?\d+\.?\d*", stop="\n"
+    )
 
     return lm
 

From e51a0258df4deb54302aec2c7551ff495f1ad81c Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Thu, 18 Apr 2024 13:06:50 -0400
Subject: [PATCH 32/36] Small tweaks and linting

---
 azureml/components/src/jsonl_guidance_mistral7b.py       | 2 +-
 azureml/pipelines/configs.py                             | 2 +-
 azureml/pipelines/submit_gsm8k_zeroorfewshot.py          | 9 ++++-----
 guidance_programs/gsm8k_zero_or_few_shot_basic_json.py   | 3 ++-
 .../gsm8k_zero_or_few_shot_json_response.py              | 3 +--
 guidance_programs/gsm8k_zero_or_few_shot_plain.py        | 3 ++-
 guidance_programs/gsm8k_zero_or_few_shot_regex_number.py | 3 ++-
 7 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/azureml/components/src/jsonl_guidance_mistral7b.py b/azureml/components/src/jsonl_guidance_mistral7b.py
index 16399fa..7fa5063 100644
--- a/azureml/components/src/jsonl_guidance_mistral7b.py
+++ b/azureml/components/src/jsonl_guidance_mistral7b.py
@@ -65,7 +65,7 @@ def __call__(self, item: Dict[str, Any]) -> dict[str, any]:
         start = time.time()
         result = self._guidance_function(self._model, item, common=self._common_data)
         stop = time.time()
-        mlflow.log_metric("time_taken", value=stop-start, step=self._step)
+        mlflow.log_metric("time_taken", value=stop - start, step=self._step)
         _logger.debug(f"Checking keys")
         for k in result.keys():
             assert k not in item, f"Duplicate key: {k}"
diff --git a/azureml/pipelines/configs.py b/azureml/pipelines/configs.py
index 8b6675a..7fadb45 100644
--- a/azureml/pipelines/configs.py
+++ b/azureml/pipelines/configs.py
@@ -30,6 +30,7 @@ class AOAIConfig:
 class Phi2Config:
     compute_target: str = str()
 
+
 @dataclass
 class LlamaCppConfig:
     compute_target: str = str()
@@ -144,4 +145,3 @@ class GSM8KZeroOrFewShotConfig:
     n_fewshot: int = int()
     sample_random_seed: int = int()
     n_samples: int = int()
-
diff --git a/azureml/pipelines/submit_gsm8k_zeroorfewshot.py b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py
index 4890548..99a4569 100644
--- a/azureml/pipelines/submit_gsm8k_zeroorfewshot.py
+++ b/azureml/pipelines/submit_gsm8k_zeroorfewshot.py
@@ -68,21 +68,20 @@ def basic_pipeline() -> Pipeline:
         sample_lines_job = components.jsonl_sample_lines(
             input_dataset=split_outputs["train"],
             n_samples=run_config.n_samples,
-            random_seed=run_config.sample_random_seed
+            random_seed=run_config.sample_random_seed,
         )
-        sample_lines_job.name= f"sample_{run_config.n_samples}_lines"
+        sample_lines_job.name = f"sample_{run_config.n_samples}_lines"
 
         random_examples_job = components.jsonl_random_examples(
             input_dataset=sample_lines_job.outputs.output_dataset,
             example_dataset=split_outputs["test"],
             output_key="examples",
             num_examples=run_config.n_fewshot,
-            random_seed=run_config.fewshot_random_seed
+            random_seed=run_config.fewshot_random_seed,
         )
-        random_examples_job.name=f"add_random_examples"
+        random_examples_job.name = f"add_random_examples"
 
         for progname, prog_input in guidance_inputs.items():
-
             guidance_job = components.jsonl_guidance_mistral7b(
                 guidance_program=prog_input,
                 input_dataset=random_examples_job.outputs.output_dataset,
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
index 8eb6c08..f201fca 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
@@ -35,7 +35,8 @@ def zero_shot_gsm8k(
                 lm += t["result"]
             lm += "\n"
         lm += f"Answer: {e['answer']}"
-        lm += "\n\n"
+        lm += "\n"
+    lm += "\n"
 
     # Now ask the question
     lm += f"Question: {question}\n"
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
index 88bcc37..0357631 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
@@ -54,11 +54,10 @@ def zero_shot_gsm8k(
                 nxt_thought["calculation"] = t["calculation"]
                 nxt_thought["result"] += t["result"]
             nxt_obj["thoughts"].append(nxt_thought)
-        lm += "\n"
 
         validate(nxt_obj, schema=response_schema)
         lm += guidance.library._json._to_compact_json(nxt_obj)
-        lm += "\n"
+        lm += "\n\n"
 
     # Now ask the question
     lm += f"Question: {question}\n"
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_plain.py b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
index 9b7c76b..aa0bd90 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_plain.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_plain.py
@@ -35,7 +35,8 @@ def zero_shot_gsm8k(
                 lm += t["result"]
             lm += "\n"
         lm += f"Answer: {e['answer']}\n"
-        lm += "\n\n"
+        lm += "\n"
+    lm += "\n"
 
     # Now ask the question
     lm += f"Question: {question}\n"
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
index 1a51aab..1df94e3 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -35,7 +35,8 @@ def zero_shot_gsm8k(
                 lm += t["result"]
             lm += "\n"
         lm += f"Answer: {e['answer']}"
-        lm += "\n\n"
+        lm += "\n"
+    lm += "\n"
 
     # Now ask the question
     lm += f"Question: {question}\n"

From 622be2895500567965ba915d19055d013a3a000d Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Thu, 18 Apr 2024 13:38:25 -0400
Subject: [PATCH 33/36] Bad stop for reasons

---
 guidance_programs/gsm8k_zero_or_few_shot_basic_json.py   | 2 +-
 guidance_programs/gsm8k_zero_or_few_shot_regex_number.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
index f201fca..499a15e 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_basic_json.py
@@ -41,7 +41,7 @@ def zero_shot_gsm8k(
     # Now ask the question
     lm += f"Question: {question}\n"
     lm += f"Reasoning:"
-    lm += guidance.gen("reasons", max_tokens=100, stop="\n")
+    lm += guidance.gen("reasons", max_tokens=100)
     lm += "\n"
     lm += f"Answer: " + guidance.json(name="result_string", schema=dict(type="number"))
 
diff --git a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
index 1df94e3..ad07bca 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_regex_number.py
@@ -41,7 +41,7 @@ def zero_shot_gsm8k(
     # Now ask the question
     lm += f"Question: {question}\n"
     lm += f"Reasoning:"
-    lm += guidance.gen("reasons", max_tokens=100, stop="\n")
+    lm += guidance.gen("reasons", max_tokens=100)
     lm += "\n"
     lm += f"Answer: " + guidance.gen(
         name="result_string", regex=r"-?\d+\.?\d*", stop="\n"

From a90380e8b108d3b8615975736612c574e05c1c82 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Thu, 18 Apr 2024 14:44:26 -0400
Subject: [PATCH 34/36] Dumb mistakes in json formatted response

---
 .../gsm8k_zero_or_few_shot_json_response.py         | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
index 0357631..34505df 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
@@ -47,13 +47,14 @@ def zero_shot_gsm8k(
     for e in examples:
         lm += f"Question: {e['question']}\n"
 
-        nxt_obj = dict(result=e["answer"], thoughts=[])
+        nxt_obj = dict(thoughts=[])
         for t in e["thoughts"]:
-            nxt_thought = dict(step=t["step"])
+            nxt_thought = dict(step=t["step"], calculation="", result="")
             if "result" in t:
                 nxt_thought["calculation"] = t["calculation"]
-                nxt_thought["result"] += t["result"]
+                nxt_thought["result"] = t["result"]
             nxt_obj["thoughts"].append(nxt_thought)
+        nxt_obj["result"] = e["answer"]
 
         validate(nxt_obj, schema=response_schema)
         lm += guidance.library._json._to_compact_json(nxt_obj)
@@ -75,13 +76,13 @@ def guidance_generation(
     if common:
         raise ValueError("Common Data not supported!")
 
-    result = lm + zero_shot_gsm8k(
+    llm_result = lm + zero_shot_gsm8k(
         question=input["question"], examples=input["examples"]
     )
 
-    _logger.info(f"result_string: {result['response_json']}")
+    _logger.info(f"result_string: {llm_result['response_json']}")
 
-    loaded_obj = json.loads(result["response_json"])
+    loaded_obj = json.loads(llm_result["response_json"])
 
     result = dict(
         zero_or_few_shot_answer=loaded_obj["result"],

From db91f0183208e9f5a72042db014d8978e13266b9 Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Fri, 19 Apr 2024 08:07:10 -0400
Subject: [PATCH 35/36] Another silly mistake

---
 guidance_programs/gsm8k_zero_or_few_shot_json_response.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
index 34505df..2d58c30 100644
--- a/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
+++ b/guidance_programs/gsm8k_zero_or_few_shot_json_response.py
@@ -87,6 +87,6 @@ def guidance_generation(
     result = dict(
         zero_or_few_shot_answer=loaded_obj["result"],
         zero_or_few_show_thoughts=loaded_obj["thoughts"],
-        final_lm=str(result),
+        final_lm=str(llm_result),
     )
     return result

From 94c465cfb9be8c075e20aa498e338765440686ba Mon Sep 17 00:00:00 2001
From: "Richard Edgar (Microsoft)" <riedgar@microsoft.com>
Date: Sun, 21 Apr 2024 16:53:38 -0400
Subject: [PATCH 36/36] Need Rust

---
 azureml/components/jsonl_guidance_mistral7b_component.yaml | 5 +++++
 azureml/environments/phi2transformer-env.yaml              | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/azureml/components/jsonl_guidance_mistral7b_component.yaml b/azureml/components/jsonl_guidance_mistral7b_component.yaml
index f6ba778..c451731 100644
--- a/azureml/components/jsonl_guidance_mistral7b_component.yaml
+++ b/azureml/components/jsonl_guidance_mistral7b_component.yaml
@@ -52,6 +52,11 @@ outputs:
 code: ./src/
 
 command: |
+  # Install Rust toolchain
+  #apt update
+  #apt upgrade -y
+  #apt install -y rustc build-essential
+  #pip install setup-rust
   # Download the zip
   wget https://github.com/guidance-ai/guidance/archive/refs/heads/main.zip
   echo
diff --git a/azureml/environments/phi2transformer-env.yaml b/azureml/environments/phi2transformer-env.yaml
index 8924347..fe96e88 100644
--- a/azureml/environments/phi2transformer-env.yaml
+++ b/azureml/environments/phi2transformer-env.yaml
@@ -8,8 +8,11 @@ image: mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cuda11.8-gpu-inference
 conda_file:
   channels:
     - defaults
+    - conda-forge
   dependencies:
     - python=3.11
+    # Rust is now part of building the guidance wheel
+    - rust
     - pip
     - pip:
       # Note that we have to force torch to install from this index