From e77209a4232ac6d29524ddbf9accc563726edcef Mon Sep 17 00:00:00 2001 From: nischalj10 Date: Sat, 4 May 2024 14:42:33 +0530 Subject: [PATCH 1/2] add support for litellm --- app/llm.py | 18 +++++++----------- app/llm_with_func_calling.py | 10 +++------- requirements.txt | 1 + 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/app/llm.py b/app/llm.py index cecc3d9..ac8b163 100644 --- a/app/llm.py +++ b/app/llm.py @@ -1,5 +1,4 @@ -from openai import AsyncOpenAI -import os +from litellm import acompletion, litellm from typing import List from .custom_types import ( ResponseRequiredRequest, @@ -13,10 +12,7 @@ class LlmClient: def __init__(self): - self.client = AsyncOpenAI( - organization=os.environ["OPENAI_ORGANIZATION_ID"], - api_key=os.environ["OPENAI_API_KEY"], - ) + litellm.modify_params = True def draft_begin_message(self): response = ResponseResponse( @@ -61,11 +57,11 @@ def prepare_prompt(self, request: ResponseRequiredRequest): async def draft_response(self, request: ResponseRequiredRequest): prompt = self.prepare_prompt(request) - stream = await self.client.chat.completions.create( - model="gpt-4-turbo-preview", # Or use a 3.5 model for speed - messages=prompt, - stream=True, - ) + stream = await acompletion( + model="claude-3-haiku-20240307", # Or use a 3.5 model for speed + messages=prompt, + stream=True, + ) async for chunk in stream: if chunk.choices[0].delta.content is not None: response = ResponseResponse( diff --git a/app/llm_with_func_calling.py b/app/llm_with_func_calling.py index 8e0f425..e6fab9d 100644 --- a/app/llm_with_func_calling.py +++ b/app/llm_with_func_calling.py @@ -1,5 +1,4 @@ -from openai import AsyncOpenAI -import os +from litellm import acompletion, litellm import json from .custom_types import ( ResponseRequiredRequest, @@ -14,10 +13,7 @@ class LlmClient: def __init__(self): - self.client = AsyncOpenAI( - organization=os.environ["OPENAI_ORGANIZATION_ID"], - api_key=os.environ["OPENAI_API_KEY"], - ) + litellm.modify_params = True def draft_begin_message(self): response = ResponseResponse( @@ -87,7 +83,7 @@ async def draft_response(self, request: ResponseRequiredRequest): prompt = self.prepare_prompt(request) func_call = {} func_arguments = "" - stream = await self.client.chat.completions.create( + stream = await acompletion( model="gpt-4-turbo-preview", # Or use a 3.5 model for speed messages=prompt, stream=True, diff --git a/requirements.txt b/requirements.txt index 536b641..0101740 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,3 +29,4 @@ retell-sdk==3.11.0 fastapi==0.100.1 uvicorn==0.21.1 python-multipart==0.0.9 +litellm==1.35.38 \ No newline at end of file From d5eb45bd8b38dbba305017c36e218fbe12b47cc6 Mon Sep 17 00:00:00 2001 From: nischalj10 Date: Sat, 4 May 2024 14:50:31 +0530 Subject: [PATCH 2/2] added comments --- .env | 1 + app/llm.py | 2 +- app/llm_with_func_calling.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.env b/.env index 5c02abe..ca926b6 100644 --- a/.env +++ b/.env @@ -1,5 +1,6 @@ OPENAI_API_KEY="" OPENAI_ORGANIZATION_ID="" +ANTHROPIC_API_KEY = "" #or any other provider api key as per https://litellm.vercel.app/docs/providers TWILIO_ACCOUNT_ID="" TWILIO_AUTH_TOKEN="" diff --git a/app/llm.py b/app/llm.py index ac8b163..3f0f84a 100644 --- a/app/llm.py +++ b/app/llm.py @@ -58,7 +58,7 @@ def prepare_prompt(self, request: ResponseRequiredRequest): async def draft_response(self, request: ResponseRequiredRequest): prompt = self.prepare_prompt(request) stream = await acompletion( - model="claude-3-haiku-20240307", # Or use a 3.5 model for speed + model="claude-3-haiku-20240307", # Or use a Open AI/ any other model messages=prompt, stream=True, ) diff --git a/app/llm_with_func_calling.py b/app/llm_with_func_calling.py index e6fab9d..2150db9 100644 --- a/app/llm_with_func_calling.py +++ b/app/llm_with_func_calling.py @@ -84,7 +84,7 @@ async def draft_response(self, request: ResponseRequiredRequest): func_call = {} func_arguments = "" stream = await acompletion( - model="gpt-4-turbo-preview", # Or use a 3.5 model for speed + model="gpt-4-turbo-preview", # Or use a 3.5 / claude haiku/ any other model for speed messages=prompt, stream=True, # Step 2: Add the function into your request