finic-ai · maishathasin · Apr 30, 2024 · Apr 30, 2024 · Apr 30, 2024 · Apr 30, 2024
diff --git a/doctran/doctran.py b/doctran/doctran.py
@@ -75,6 +75,7 @@ class Document(BaseModel):
     content_type: ContentType
     raw_content: str
     transformed_content: str
+    system: Optional[str]
     config: DoctranConfig
     extracted_properties: Optional[Dict] = {}
     metadata: Optional[Dict[str, Any]] = None
@@ -193,7 +194,7 @@ def __init__(self, openai_api_key: str = None, openai_model: str = "gpt-4", open
         if os.environ.get('OPENAI_API_VERSION'):
             self.config.openai.api_version = os.environ['OPENAI_API_VERSION']
 
-    def parse(self, *, content: str, content_type: ContentType = "text", uri: str = None, metadata: dict = None) -> Document:
+    def parse(self, *, content: str,system: Optional[str] = None, content_type: ContentType = "text", uri: str = None, metadata: dict = None) -> Document:
         '''
         Parse raw text and apply different chunking schemes based on the content type.
 
@@ -204,5 +205,5 @@ def parse(self, *, content: str, content_type: ContentType = "text", uri: str =
             uri = str(uuid.uuid4())
         if content_type == ContentType.text.value:
             # TODO: Optional chunking for documents that are too large
-            document = Document(id=str(uuid.uuid4()), content_type=content_type, raw_content=content, transformed_content=content, config=self.config, uri=uri, metadata=metadata)
+            document = Document(id=str(uuid.uuid4()), content_type=content_type, raw_content=content, transformed_content=content,system=system, config=self.config, uri=uri, metadata=metadata)
             return document
diff --git a/doctran/transformers/transformers.py b/doctran/transformers/transformers.py
@@ -7,6 +7,7 @@
 import tiktoken
 from doctran import Document, DoctranConfig, ExtractProperty, RecognizerEntity
 
+
 class TooManyTokensException(Exception):
     def __init__(self, content_token_size: int, token_limit: int):
         super().__init__(f"OpenAI document transformation failed. The document is {content_token_size} tokens long, which exceeds the token limit of {token_limit}.")
@@ -59,7 +60,9 @@ def executeOpenAICall(self, document: Document) -> Document:
             function_call = OpenAIFunctionCall(
                 seed=self.config.openai_deployment_id,
                 model=self.config.openai_model,
-                messages=[{"role": "user", "content": document.transformed_content}],
+                messages=[{"role": "system", "content":document.system},
+
+                    {"role": "user", "content": document.transformed_content}],
                 tools=[{
                     "type": "function",
                     "function": {