Spaces:

AryanJh
/

Brock-Events-Assistant

Running

App Files Files Community

LLM Google T5 integration

by AryanJh - opened 19 days ago

base: refs/heads/main

←

from: refs/pr/3

Discussion Files changed

+107

-65

Files changed (1) hide show

app.py +107 -65

app.py CHANGED Viewed

@@ -16,17 +16,26 @@ class BrockEventsRAG:
     def __init__(self):
         """Initialize the RAG system with improved caching"""
         self.model = SentenceTransformer('all-MiniLM-L6-v2')
-        self.chroma_client = chromadb.Client()
         # Get current date range
         self.eastern = pytz.timezone('America/New_York')
         self.today = datetime.now(self.eastern).replace(hour=0, minute=0, second=0, microsecond=0)
         self.date_range_end = self.today + timedelta(days=14)
         # Cache directory setup
         os.makedirs("cache", exist_ok=True)
         self.cache_file = "cache/events_cache.json"
         # Initialize or reset collection
         try:
             self.collection = self.chroma_client.create_collection(
@@ -42,69 +51,18 @@ class BrockEventsRAG:
         # Load initial events
         self.update_database()
-    def save_cache(self, data: dict):
-        """Save events data to cache file"""
-        try:
-            # Convert datetime objects to strings for JSON serialization
-            serializable_data = {
-                'last_update': data['last_update'],
-                'events': []
-            }
-            for event in data['events']:
-                event_copy = event.copy()
-                # Convert datetime objects to strings
-                if event_copy.get('start_time'):
-                    event_copy['start_time'] = event_copy['start_time'].isoformat()
-                if event_copy.get('end_time'):
-                    event_copy['end_time'] = event_copy['end_time'].isoformat()
-                serializable_data['events'].append(event_copy)
-            with open(self.cache_file, 'w', encoding='utf-8') as f:
-                json.dump(serializable_data, f, ensure_ascii=False, indent=2)
-            print(f"Cache saved successfully to {self.cache_file}")
-        except Exception as e:
-            print(f"Error saving cache: {e}")
-    def load_cache(self) -> dict:
-        """Load and parse cached events data"""
         try:
-            if os.path.exists(self.cache_file):
-                with open(self.cache_file, 'r', encoding='utf-8') as f:
-                    data = json.load(f)
-                # Convert string timestamps back to datetime objects
-                for event in data['events']:
-                    if event.get('start_time'):
-                        event['start_time'] = datetime.fromisoformat(event['start_time'])
-                    if event.get('end_time'):
-                        event['end_time'] = datetime.fromisoformat(event['end_time'])
-                return data
-            return {'last_update': None, 'events': []}
         except Exception as e:
-            print(f"Error loading cache: {e}")
-            return {'last_update': None, 'events': []}
-    def should_update_cache(self) -> bool:
-        """Check if cache needs updating (older than 24 hours)"""
-        try:
-            cached_data = self.load_cache()
-            if not cached_data['last_update']:
-                return True
-            last_update = datetime.fromisoformat(cached_data['last_update'])
-            time_since_update = datetime.now() - last_update
-            return time_since_update.total_seconds() > 86400  # 24 hours
-        except Exception as e:
-            print(f"Error checking cache: {e}")
-            return True
     def parse_event_datetime(self, entry) -> tuple:
         """Parse start and end times from both RSS and HTML"""
         try:
@@ -294,6 +252,28 @@ class BrockEventsRAG:
         except Exception as e:
             print(f"Error during query: {e}")
             return None
     def generate_response(self, question: str, history: list) -> str:
         """Generate a response based on the query and chat history"""
         try:
@@ -308,7 +288,7 @@ class BrockEventsRAG:
             is_location_query = any(word in question_lower for word in ['where', 'location', 'place', 'building', 'room'])
             # Format the response
-            response = "Here are some relevant events I found:\n\n"
             # Add top 3 matching events
             for i, (doc, metadata) in enumerate(zip(results['documents'][0][:3], results['metadatas'][0][:3]), 1):
@@ -326,7 +306,69 @@ class BrockEventsRAG:
         except Exception as e:
             print(f"Error generating response: {e}")
             return "I encountered an error while searching for events. Please try asking in a different way."
 def create_demo():
     # Initialize the RAG system
     rag_system = BrockEventsRAG()

     def __init__(self):
         """Initialize the RAG system with improved caching"""
         self.model = SentenceTransformer('all-MiniLM-L6-v2')
+        self.embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
+       # ChromaDB client setup
+        self.chroma_client = chromadb.Client(Settings(persist_directory="chroma_db", chroma_db_impl="duckdb+parquet"))
+        # LLM model setup
+        self.tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+        self.llm = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
         # Get current date range
         self.eastern = pytz.timezone('America/New_York')
         self.today = datetime.now(self.eastern).replace(hour=0, minute=0, second=0, microsecond=0)
         self.date_range_end = self.today + timedelta(days=14)
         # Cache directory setup
         os.makedirs("cache", exist_ok=True)
         self.cache_file = "cache/events_cache.json"
         # Initialize or reset collection
         try:
             self.collection = self.chroma_client.create_collection(
         # Load initial events
         self.update_database()
+    def fetch_rss_feed(self, url: str) -> List[Dict]:
+        """Fetch and parse RSS feed from the given URL"""
         try:
+            feed = feedparser.parse(url)
+            entries = feed.entries
+            print(f"Fetched {len(entries)} entries from the feed.")
+            return entries
         except Exception as e:
+            print(f"Error fetching RSS feed: {e}")
+            return []
     def parse_event_datetime(self, entry) -> tuple:
         """Parse start and end times from both RSS and HTML"""
         try:
         except Exception as e:
             print(f"Error during query: {e}")
             return None
+    def generate_response_with_llm(events: List[Dict]) -> str:
+        """Use the LLM to generate a natural language response for the given events."""
+        try:
+            if not events:
+                input_text = "There are no events matching the query. How should I respond?"
+            else:
+                event_summaries = "\n".join([
+                    f"Event: {event['title']}. Start: {event['start_time']}, Location: {event['location']}."
+                    for event in events
+                ])
+                input_text = f"Format this information into a friendly response: {event_summaries}"
+            inputs = self.tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
+            outputs = self.llm.generate(**inputs)
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            return response
+        except Exception as e:
+            print(f"Error generating response: {e}")
+            return "Sorry, I couldn't generate a response."
     def generate_response(self, question: str, history: list) -> str:
         """Generate a response based on the query and chat history"""
         try:
             is_location_query = any(word in question_lower for word in ['where', 'location', 'place', 'building', 'room'])
             # Format the response
+            response = generate_response_with_llm(matched_events)
             # Add top 3 matching events
             for i, (doc, metadata) in enumerate(zip(results['documents'][0][:3], results['metadatas'][0][:3]), 1):
         except Exception as e:
             print(f"Error generating response: {e}")
             return "I encountered an error while searching for events. Please try asking in a different way."
+            def save_cache(self, data: dict):
+        """Save events data to cache file"""
+        try:
+            # Convert datetime objects to strings for JSON serialization
+            serializable_data = {
+                'last_update': data['last_update'],
+                'events': []
+            }
+            for event in data['events']:
+                event_copy = event.copy()
+                # Convert datetime objects to strings
+                if event_copy.get('start_time'):
+                    event_copy['start_time'] = event_copy['start_time'].isoformat()
+                if event_copy.get('end_time'):
+                    event_copy['end_time'] = event_copy['end_time'].isoformat()
+                serializable_data['events'].append(event_copy)
+            with open(self.cache_file, 'w', encoding='utf-8') as f:
+                json.dump(serializable_data, f, ensure_ascii=False, indent=2)
+            print(f"Cache saved successfully to {self.cache_file}")
+        except Exception as e:
+            print(f"Error saving cache: {e}")
+"""
+    def load_cache(self) -> dict:
+        #Load and parse cached events data
+        try:
+            if os.path.exists(self.cache_file):
+                with open(self.cache_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                # Convert string timestamps back to datetime objects
+                for event in data['events']:
+                    if event.get('start_time'):
+                        event['start_time'] = datetime.fromisoformat(event['start_time'])
+                    if event.get('end_time'):
+                        event['end_time'] = datetime.fromisoformat(event['end_time'])
+                return data
+            return {'last_update': None, 'events': []}
+        except Exception as e:
+            print(f"Error loading cache: {e}")
+            return {'last_update': None, 'events': []}
+    def should_update_cache(self) -> bool:
+        #Check if cache needs updating (older than 24 hours)
+        try:
+            cached_data = self.load_cache()
+            if not cached_data['last_update']:
+                return True
+            last_update = datetime.fromisoformat(cached_data['last_update'])
+            time_since_update = datetime.now() - last_update
+            return time_since_update.total_seconds() > 86400  # 24 hours
+        except Exception as e:
+            print(f"Error checking cache: {e}")
+            return True
+"""
 def create_demo():
     # Initialize the RAG system
     rag_system = BrockEventsRAG()