wueb-chatbot/chatbot.py at main · Harshitha-arch/wueb-chatbot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import logging
from typing import List, Dict, Optional
from config import Config
from vector_store import VectorStore
import openai

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class WUEBChatbot:
    def __init__(self):
        self.config = Config()
        self.vector_store = VectorStore()
        self.openai_client = openai.OpenAI(api_key=self.config.OPENAI_API_KEY)

    def generate_response(self, query: str, context_docs: List[Dict]) -> str:
        """Generate a response using OpenAI based on query and context documents."""
        try:
            # Prepare context from relevant documents
            context_text = self._prepare_context(context_docs)

            # Create the prompt
            prompt = self._create_prompt(query, context_text)

            # Generate response
            response = self.openai_client.chat.completions.create(
                model=self.config.OPENAI_MODEL,
                messages=[
                    {"role": "system", "content": self.config.SYSTEM_PROMPT},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=self.config.MAX_TOKENS,
                temperature=self.config.TEMPERATURE
            )

            return response.choices[0].message.content.strip()

        except Exception as e:
            logger.error(f"Error generating response: {str(e)}")
            return "I apologize, but I encountered an error while processing your request. Please try again or contact the university directly for assistance."

    def _prepare_context(self, context_docs: List[Dict]) -> str:
        """Prepare context text from relevant documents."""
        if not context_docs:
            return "No relevant documents found."

        context_parts = []
        for i, doc in enumerate(context_docs, 1):
            source_info = f"Document {i}: {doc['metadata']['source_file']}"
            context_parts.append(f"{source_info}\n{doc['text']}\n")

        return "\n".join(context_parts)

    def _create_prompt(self, query: str, context: str) -> str:
        """Create a prompt for the LLM."""
        prompt = f"""Based on the following context from WUEB governing documents, please answer the user's question.

Context from WUEB documents:
{context}

User Question: {query}

Please provide a comprehensive and accurate answer based only on the information provided in the context above. If the information is not available in the provided context, please state that you don't have enough information and suggest contacting the official WUEB website or university administration.

Remember to:
1. Only use information from the provided context
2. Be specific and cite the source document when possible
3. If unsure, recommend contacting official channels
4. Be professional and helpful in your response"""

        return prompt

    def process_query(self, query: str) -> Dict:
        """Process a user query and return a response with metadata."""
        try:
            logger.info(f"Processing query: {query}")

            # Search for relevant documents
            search_results = self.vector_store.search_similar(query)

            # Filter by similarity threshold
            filtered_results = self.vector_store.filter_by_similarity(search_results)

            # Generate response
            if filtered_results:
                response = self.generate_response(query, filtered_results)
                confidence = "high" if filtered_results[0]['similarity_score'] > 0.8 else "medium"
            else:
                response = "I don't have enough information to answer this question accurately. Please refer to the official WUEB website (https://www.ue.wroc.pl) or contact the university directly for the most up-to-date information."
                confidence = "low"

            return {
                'query': query,
                'response': response,
                'confidence': confidence,
                'relevant_documents': filtered_results,
                'total_documents_found': len(search_results),
                'filtered_documents': len(filtered_results)
            }

        except Exception as e:
            logger.error(f"Error processing query: {str(e)}")
            return {
                'query': query,
                'response': "I apologize, but I encountered an error while processing your request. Please try again or contact the university directly for assistance.",
                'confidence': 'error',
                'relevant_documents': [],
                'total_documents_found': 0,
                'filtered_documents': 0,
                'error': str(e)
            }

    def get_system_info(self) -> Dict:
        """Get information about the chatbot system."""
        collection_info = self.vector_store.get_collection_info()

        return {
            'system_name': 'WUEB Document Assistant',
            'description': 'AI assistant for Wroclaw University of Economics and Business governing documents',
            'vector_store_info': collection_info,
            'model': self.config.OPENAI_MODEL,
            'embedding_model': self.config.EMBEDDING_MODEL,
            'similarity_threshold': self.config.SIMILARITY_THRESHOLD
        }

    def validate_query(self, query: str) -> Dict:
        """Validate if a query is appropriate for the system."""
        # Check if query is too short
        if len(query.strip()) < 3:
            return {
                'valid': False,
                'reason': 'Query is too short. Please provide a more detailed question.'
            }

        # Check if query is too long
        if len(query) > 500:
            return {
                'valid': False,
                'reason': 'Query is too long. Please provide a more concise question.'
            }

        # Check for inappropriate content (basic check)
        inappropriate_keywords = ['hack', 'cheat', 'illegal', 'unauthorized']
        query_lower = query.lower()
        for keyword in inappropriate_keywords:
            if keyword in query_lower:
                return {
                    'valid': False,
                    'reason': 'This query appears to be inappropriate. Please ask about WUEB policies and procedures.'
                }

        return {'valid': True, 'reason': 'Query is valid'}