Skip to content
180 changes: 176 additions & 4 deletions .github/tests/lm_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import lotus
from lotus.models import LM, SentenceTransformersRM
from lotus.types import CascadeArgs
from lotus.types import CascadeArgs, ReasoningStrategy
from lotus.vector_store import FaissVS

################################################################################
Expand Down Expand Up @@ -269,7 +269,7 @@ def test_filter_operation_cot(setup_models, model):
}
df = pd.DataFrame(data)
user_instruction = "{Text} I have at least one apple"
filtered_df = df.sem_filter(user_instruction, strategy="cot")
filtered_df = df.sem_filter(user_instruction, strategy=ReasoningStrategy.ZS_COT)
expected_df = pd.DataFrame({"Text": ["I had two apples, then I gave away one", "My friend gave me an apple"]})
assert filtered_df.equals(expected_df)

Expand Down Expand Up @@ -302,7 +302,7 @@ def test_filter_operation_cot_fewshot(setup_models, model):
user_instruction = "{Sequence} is increasing"
filtered_df = df.sem_filter(
user_instruction,
strategy="cot",
strategy=ReasoningStrategy.COT,
examples=examples_df,
additional_cot_instructions="Assume the most typical or logical case.",
)
Expand Down Expand Up @@ -339,7 +339,7 @@ def test_filter_operation_cot_fewshot_no_reasoning(setup_models, model):
examples_df = pd.DataFrame(examples)

user_instruction = "{Sequence} is increasing"
filtered_df = df.sem_filter(user_instruction, strategy="cot", examples=examples_df)
filtered_df = df.sem_filter(user_instruction, strategy=ReasoningStrategy.ZS_COT, examples=examples_df)
expected_df = pd.DataFrame(
{
"Sequence": [
Expand All @@ -352,6 +352,178 @@ def test_filter_operation_cot_fewshot_no_reasoning(setup_models, model):
assert filtered_df.equals(expected_df)


@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini", "ollama/llama3.1"))
def test_filter_operation_cot(setup_models, model):
lm = setup_models[model]
lotus.settings.configure(lm=lm)

# Test filter operation on an easy dataframe
data = {
"Text": [
"I had two apples, then I gave away one",
"My friend gave me an apple",
"I gave away both of my apples",
"I gave away my apple, then a friend gave me his apple, then I threw my apple away",
]
}
df = pd.DataFrame(data)
user_instruction = "{Text} I have at least one apple"
filtered_df = df.sem_filter(user_instruction, strategy=ReasoningStrategy.ZS_COT)
expected_df = pd.DataFrame({"Text": ["I had two apples, then I gave away one", "My friend gave me an apple"]})
assert filtered_df.equals(expected_df)


@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini", "ollama/llama3.1"))
def test_filter_operation_cot_fewshot(setup_models, model):
lm = setup_models[model]
lotus.settings.configure(lm=lm)

# Test filter operation on an easy dataframe
data = {
"Sequence": [
"Five, Four, Three",
"A, B, C",
"Pond, Lake, Ocean",
]
}
df = pd.DataFrame(data)
examples = {
"Sequence": ["1, 2, 3", "penny, nickel, dime, quarter", "villiage, town, city"],
"Answer": [True, True, True],
"Reasoning": [
"1, 2, 3 is an increasing sequence of numbers",
"penny, nickel, dime, quarter is an increasing sequence of coins",
"villiage, town, city is an increasing sequence of settlements",
],
}
examples_df = pd.DataFrame(examples)

user_instruction = "{Sequence} is increasing"
filtered_df = df.sem_filter(
user_instruction,
strategy=ReasoningStrategy.COT,
examples=examples_df,
additional_cot_instructions="Assume the most typical or logical case.",
)
expected_df = pd.DataFrame(
{
"Sequence": [
"A, B, C",
"Pond, Lake, Ocean",
]
},
index=[1, 2],
)
assert filtered_df.equals(expected_df)


@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini", "ollama/llama3.1"))
def test_filter_operation_cot_fewshot_no_reasoning(setup_models, model):
lm = setup_models[model]
lotus.settings.configure(lm=lm)

# Test filter operation on an easy dataframe
data = {
"Sequence": [
"Five, Four, Three",
"A, B, C",
"Pond, Lake, Ocean",
]
}
df = pd.DataFrame(data)
examples = {
"Sequence": ["1, 2, 3", "penny, nickel, dime, quarter", "villiage, town, city"],
"Answer": [True, True, True],
}
examples_df = pd.DataFrame(examples)

user_instruction = "{Sequence} is increasing"
filtered_df = df.sem_filter(user_instruction, strategy=ReasoningStrategy.ZS_COT, examples=examples_df)
expected_df = pd.DataFrame(
{
"Sequence": [
"A, B, C",
"Pond, Lake, Ocean",
]
},
index=[1, 2],
)
assert filtered_df.equals(expected_df)

@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini", "ollama/llama3.1"))
def test_map_operation_cot(setup_models, model):
lm = setup_models[model]
lotus.settings.configure(lm=lm)

# Test filter operation on an easy dataframe
data = {
"Sequence": [
"Alpha, Bravo, Charlie",
"One, Two, Three",
"Triangle, Square, Pentagon",
]
}
df = pd.DataFrame(data)
user_instruction = "What should be the next item in the sequence: {Sequence}"
mapped_df = df.sem_map(user_instruction, strategy=ReasoningStrategy.ZS_COT)
expected_df = pd.DataFrame({"_map": ["Delta", "Four", "Hexagon"]})
assert mapped_df["_map"].equals(expected_df["_map"])


@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini", "ollama/llama3.1"))
def test_map_operation_cot_fewshot(setup_models, model):
lm = setup_models[model]
lotus.settings.configure(lm=lm)

# Test filter operation on an easy dataframe
data = {
"Sequence": [
"Alpha, Bravo, Charlie",
"One, Two, Three",
"Triangle, Square, Pentagon",
]
}
df = pd.DataFrame(data)
examples = {
"Sequence": ["A, B, C", "Kindergarten, First Grade, Second Grade"],
"Answer": ["D", "Third Grade"],
"Reasoning": [
"D is the next letter in the alphabet after C",
"Third Grade is the next grade after Second Grade",
],
}
examples_df = pd.DataFrame(examples)
user_instruction = "What should be the next item in the sequence: {Sequence}"
mapped_df = df.sem_map(user_instruction, strategy=ReasoningStrategy.COT, examples=examples_df)
expected_df = pd.DataFrame({"_map": ["Delta", "Four", "Hexagon"]})
assert mapped_df["_map"].equals(expected_df["_map"])


@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini", "ollama/llama3.1"))
def test_map_operation_cot_fewshot_no_reasoning(setup_models, model):
lm = setup_models[model]
lotus.settings.configure(lm=lm)

# Test filter operation on an easy dataframe
data = {
"Sequence": [
"Alpha, Bravo, Charlie",
"One, Two, Three",
"Triangle, Square, Pentagon",
]
}
df = pd.DataFrame(data)
examples = {
"Sequence": ["A, B, C", "Kindergarten, First Grade, Second Grade"],
"Answer": ["D", "Third Grade"],
}
examples_df = pd.DataFrame(examples)
user_instruction = "What should be the next item in the sequence: {Sequence}"
mapped_df = df.sem_map(user_instruction, strategy=ReasoningStrategy.ZS_COT, examples=examples_df)
expected_df = pd.DataFrame({"_map": ["Delta", "Four", "Hexagon"]})
assert mapped_df["_map"].equals(expected_df["_map"])


################################################################################
# Cascade tests
################################################################################
Expand Down
62 changes: 16 additions & 46 deletions lotus/sem_ops/postprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,28 @@


def cot_postprocessor(llm_answers: list[str]):
outputs: list[str | None] = []
explanations: list[str | None] = []
outputs: list[str] = []
explanations: list[str] = []
for llm_answer in llm_answers:
reasoning_idx = llm_answer.find("Reasoning:\n")
if reasoning_idx == -1:
reasoning_idx = 0
else:
reasoning_idx += len("Reasoning:\n")

answer_idx = llm_answer.find("Answer:")
answer_idx = llm_answer.find("Answer: ")
if answer_idx == -1:
answer_idx = 0
else:
answer_idx += len("Answer: ")


reasoning = llm_answer[reasoning_idx:answer_idx].rstrip("\n").lstrip("\n")
answer = llm_answer[answer_idx + len("Answer:") :]
answer = llm_answer[answer_idx:].rstrip("\n").lstrip("\n")

explanations.append(reasoning)
outputs.append(answer)

return outputs, explanations


Expand Down Expand Up @@ -106,57 +112,21 @@ def get_cot_postprocessor(model: lotus.models.LM, for_extract: bool = False) ->
return cot_postprocessor


def map_postprocess_cot(llm_answers: list[str]) -> SemanticMapPostprocessOutput:
"""
Postprocess the output of the map operator with CoT reasoning.

Args:
llm_answers (list[str]): The list of llm answers.

Returns:
SemanticMapPostprocessOutput
"""
outputs: list[str] = []
explanations: list[str | None] = []

for llm_answer in llm_answers:
reasoning_idx = llm_answer.find("Reasoning:\n")
if reasoning_idx == -1:
reasoning_idx = 0
else:
reasoning_idx += len("Reasoning:\n")

answer_idx = llm_answer.find("Answer:")
reasoning = llm_answer[reasoning_idx:answer_idx].rstrip("\n").lstrip("\n")
answer = llm_answer[answer_idx + len("Answer:") :]
outputs.append(answer)
explanations.append(reasoning)

return SemanticMapPostprocessOutput(raw_outputs=llm_answers, outputs=outputs, explanations=explanations)


def map_postprocess(
llm_answers: list[str],
model: lotus.models.LM,
cot_reasoning: bool = False,
) -> SemanticMapPostprocessOutput:
def map_postprocess(llm_answers: list[str], model: lotus.models.LM, cot_reasoning: bool = False, default: str = "") -> SemanticMapPostprocessOutput:
"""
Postprocess the output of the map operator.

Args:
llm_answers (list[str]): The list of llm answers.
cot_reasoning (bool): Whether there is CoT reasoning.
default (str): The default value to use if we fail to parse the answer.

Returns:
SemanticMapPostprocessOutput
"""

if cot_reasoning:
postprocessor = get_cot_postprocessor(model)
outputs, explanations = postprocessor(llm_answers)
else:
outputs = llm_answers
explanations = [None] * len(llm_answers)
postprocessor = get_cot_postprocessor(model)
outputs, explanations = postprocessor(llm_answers)
outputs = [output if output is not None else default for output in outputs]

return SemanticMapPostprocessOutput(raw_outputs=llm_answers, outputs=outputs, explanations=explanations)

Expand Down
Loading
Loading