change to math tutor

NoCapCbas · NoCapCbas · commit 90d2b3226dee · 2025-01-22T18:26:34.000-05:00
diff --git a/.github/workflows/dog-chatbot.yml b/.github/workflows/dog-chatbot.yml
@@ -1,4 +1,4 @@
-name: Dog Breed Assistant Chatbot
+name: Math Assistant Chatbot
 on:
   push:
     branches:
@@ -47,4 +47,4 @@ jobs:
           context: .
           file: dockerfile.prod
           push: true
-          tags: ghcr.io/nocapcbas/dog-breed-assistant-chatbot:prod
+          tags: ghcr.io/nocapcbas/math-assistant-chatbot:prod
diff --git a/Makefile b/Makefile
@@ -1,8 +1,8 @@
 build:
-	docker build -t dog-breed-assistant-chatbot .
+	docker build -f dockerfile.prod -t math-assistant-chatbot .
 
 run:
-	docker run -p 8501:8501 dog-breed-assistant-chatbot
+	docker run -p 8080:8080 math-assistant-chatbot
 
 local:
-	streamlit run app.py --server.port=8080
+	streamlit run app.py --server.port=8080
diff --git a/README.md b/README.md
@@ -1,10 +1,10 @@
-# Dog Breed Assistant Chatbot
+# Math Assistant Chatbot
 
-This is a chatbot using Llama 2, Sentence Transformers, CTransformers, Langchain, and Streamlit.
+This is a chatbot using Phi 3, Sentence Transformers, CTransformers, Langchain, and Streamlit.
 
-A Streamlit-based chatbot that helps users learn about different dog breeds. Built with:
+A Streamlit-based chatbot that helps users learn about different math problems. Built with:
 - Streamlit for the web interface
-- Mistral 7B (via Ollama) for the language model
+- Phi 3 (via Ollama) for the language model
 - FAISS for vector storage
 - LangChain for the conversation chain
 
diff --git a/app.py b/app.py
@@ -1,19 +1,14 @@
 import streamlit as st 
-import pandas as pd
 from streamlit_chat import message
-from langchain_community.document_loaders.csv_loader import CSVLoader
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
 from langchain_community.llms import Ollama
-from langchain.chains import ConversationalRetrievalChain
 import time
+import re
 
-DB_FAISS_PATH = 'vectorstore/db_faiss'
-CSV_FILE_PATH = r'dogs_cleaned.csv'  # Replace with your CSV file path
 st.set_page_config(
-    page_title="Dog Breed Assistant",
-    page_icon="🐕",
+    page_title="Math Assistant",
+    page_icon="🔢",
 )
+
 # Hide Streamlit's default menu and footer
 hide_streamlit_style = """
             <style>
@@ -24,60 +19,48 @@
             """
 st.markdown(hide_streamlit_style, unsafe_allow_html=True)
 
-
-#Loading the model
 def load_llm():
-    # Load Mistral through Ollama
+    # Load Phi-2 through Ollama
     llm = Ollama(
-        model="mistral",  # Using Mistral 7B - a powerful open source model
-        # model="phi",
-        temperature=0.5,
+        model="phi3",  # Using Phi-2 - good balance of size and math capability
+        temperature=0.1,  # Lower temperature for more precise math
     )
     return llm
 
-@st.cache_resource  # This ensures the data is loaded only once
-def load_and_process_data():
-    loader = CSVLoader(file_path=CSV_FILE_PATH, encoding="utf-8", csv_args={
-                'delimiter': ','})
-    data = loader.load()
-    
-    embeddings = HuggingFaceEmbeddings(
-        model_name='sentence-transformers/all-MiniLM-L6-v2',
-        model_kwargs={'device': 'cpu'},
-        encode_kwargs={'normalize_embeddings': True}
-    )
+def is_math_related(query):
+    # List of math-related keywords and patterns
+    math_keywords = [
+        'math', 'calculate', 'solve', 'equation', 'problem', 'plus', 'minus',
+        'multiply', 'divide', 'sum', 'difference', 'product', 'quotient',
+        'algebra', 'geometry', 'trigonometry', 'calculus', 'number',
+        'fraction', 'decimal', 'percentage', 'square root', 'power',
+        'logarithm', 'factorial', 'series', 'sequence', 'probability',
+        'statistics', 'mean', 'median', 'mode', 'variance', 'derivative',
+        'integral', 'function', 'graph', 'plot', 'coordinate'
+        "add", "subtract"
+    ]
     
-    db = FAISS.from_documents(data, embeddings)
-    llm = load_llm()
-    chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever())
-    return chain
-
-st.markdown("<h1 style='text-align: center; color: white;'>🐕 Dog Breed Assistant 🦮</h1>", unsafe_allow_html=True)
-st.markdown("<h3 style='text-align: center; color: white;'>Your Friendly Guide to Dog Breeds</h3>", unsafe_allow_html=True)
-st.markdown("<h3 style='text-align: center; color: red;'> This chatbot uses open source model Phi due to its smaller size and faster response time</h3>", unsafe_allow_html=True)
-
-# Load the chain once at startup
-chain = load_and_process_data()
-
-def is_dog_related(query):
-    # List of dog-related keywords
-    dog_keywords = [
-        'dog', 'breed', 'puppy', 'canine', 'hound', 'terrier', 'shepherd', 
-        'retriever', 'poodle', 'bulldog', 'labrador', 'german', 'golden',
-        'bark', 'pet', 'training', 'grooming', 'walk', 'leash', 'collar',
-        'kennel', 'veterinary', 'vet', 'pup', 'pooch', 'dog breed', 'dog breeds',
-        'chew', 'bite', 'paw', 'paws', 'paw print', 'paw print', 'paw print', 'paw print',
+    # Mathematical symbols and patterns
+    math_patterns = [
+        r'[\d+\-*/^√∫∑π=]',  # Basic math operators and symbols
+        r'\d+',              # Numbers
+        r'[xyz]\s*=',        # Variables
+        r'\b\d+\s*[\+\-\*/]\s*\d+\b',  # Basic arithmetic
+        r'\b\d+\s*%',        # Percentages
+        r'sqrt|sin|cos|tan|log|ln',  # Math functions
     ]
-    # read first column of csv file
-    df = pd.read_csv(CSV_FILE_PATH)
-    df = df.iloc[:, 0]
-    # add all the values in the first column to the dog_keywords list as lowercase
-    dog_keywords.extend(df.str.lower().tolist())
-    # Convert query to lowercase for case-insensitive matching
+    
     query_lower = query.lower()
     
-    # Check if any dog-related keyword is in the query
-    return any(keyword in query_lower for keyword in dog_keywords)
+    # Check for math keywords
+    if any(keyword in query_lower for keyword in math_keywords):
+        return True
+        
+    # Check for math patterns
+    if any(re.search(pattern, query) for pattern in math_patterns):
+        return True
+        
+    return False
 
 def conversational_chat(query):
     current_time = time.time()
@@ -87,89 +70,94 @@ def conversational_chat(query):
         time_since_last_query = current_time - st.session_state.last_query_time
         if time_since_last_query < 10:
             remaining_time = int(10 - time_since_last_query)
-            st.error(f"Rate limit exceeded! ⏳ Please wait {remaining_time} seconds before sending another message! This is to prevent abuse and overload my server. This rate limit is applied to all users. Resend you query to continue and Thanks for your patience!")
+            st.error(f"Rate limit exceeded! ⏳ Please wait {remaining_time} seconds before sending another message!")
             return None
 
-    # Check if query is dog-related
-    if not is_dog_related(query):
-        return "I am a dog breed expert assistant. I can only answer questions about dogs and dog breeds. Please ask me about dogs! 🐕"
+    # Check if query is math-related
+    # if not is_math_related(query):
+    #     return "I am a math assistant. I can only help with mathematical questions and calculations. Please ask me about math! 🔢"
 
     st.session_state.last_query_time = current_time
 
-    with st.spinner('🐾 Fetching response... Thank you for your patience! 🐕'):
-        context = """You are a dog breed expert assistant. You must ONLY answer questions about dogs and dog breeds.
-        If the question is not about dogs, respond with "I am a dog breed expert assistant. I can only answer questions about dogs and dog breeds."
-
-        These are the columns in the data:
-        Breed Name,Detailed Description Link,Dog Size,Dog Breed Group,Height,"Avg. Height, cm",Weight,"Avg. Weight, kg",Life Span,"Avg. Life Span, years",Adaptability,Adapts Well To Apartment Living,Good For Novice Owners,Sensitivity Level,Tolerates Being Alone,Tolerates Cold Weather,Tolerates Hot Weather,All Around Friendliness,Affectionate With Family,Kid-Friendly,Dog Friendly,Friendly Toward Strangers,Health And Grooming Needs,Amount Of Shedding,Drooling Potential,Easy To Groom,General Health,Potential For Weight Gain,Size,Trainability,Easy To Train,Intelligence,Potential For Mouthiness,Prey Drive,Tendency To Bark Or Howl,Wanderlust Potential,Physical Needs,Energy Level,Intensity,Exercise Needs,Potential For Playfulness
+    with st.spinner('🔢 Computing... Thank you for your patience!'):
+        context = """You are a math assistant. You must ONLY answer questions about mathematics.
+        If the question is not about math, or about how to solve a math problem, respond with "I am a math assistant. I can only help with mathematical questions."
 
-        The data contains ratings on a scale of 1-5 for columns (Adaptability, Adapts Well To Apartment Living, Good For Novice Owners, Sensitivity Level, Tolerates Being Alone, Tolerates Cold Weather, Tolerates Hot Weather, All Around Friendliness, Affectionate With Family, Kid-Friendly, Dog Friendly, Friendly Toward Strangers, Health And Grooming Needs, Amount Of Shedding, Drooling Potential, Easy To Groom, General Health, Potential For Weight Gain, Size, Trainability, Easy To Train, Intelligence, Potential For Mouthiness, Prey Drive, Tendency To Bark Or Howl, Wanderlust Potential, Physical Needs, Energy Level, Intensity, Exercise Needs, Potential For Playfulness) where:
-        - 5 is the BEST/HIGHEST score (excellent)
-        - 4 is ABOVE AVERAGE
-        - 3 is AVERAGE
-        - 2 is BELOW AVERAGE
-        - 1 is the WORST/LOWEST score (poor)
+        Example questions:
+        - What is the square root of 16?
+        - How do I solve the equation 2x + 3 = 7?
+        - What is the sum of 10 and 5?
+        - What is the product of 3 and 4?
+        - What is the difference between 10 and 5?
         
         Important rules:
-        1. NEVER answer questions that are not about dogs
-        2. Do not mention the data or ratings in your response
-        3. If unsure, say "Sorry, I don't know the answer to that question"
-        4. Keep responses focused only on dogs and dog breeds
-        5. Be friendly and helpful, but stay strictly within dog-related topics
+        1. NEVER answer questions that are not about math
+        2. Show your work step by step
+        3. If unsure, say "Sorry, I don't know how to solve this problem"
+        4. Use proper mathematical notation
+        5. Be precise and accurate
+        6. Explain concepts clearly
+        7. If the question involves complex calculations, break them down
+        8. Use LaTeX notation for complex mathematical expressions
         
         User Question: """
         
         enhanced_query = context + query
-        
-        result = chain({"question": enhanced_query, "chat_history": st.session_state['history']})
-        st.session_state['history'].append((query, result["answer"]))
-        return result["answer"]
+        llm = load_llm()
+        result = llm.invoke(enhanced_query)
+        return result
 
+# Initialize session state
 if 'history' not in st.session_state:
     st.session_state['history'] = []
 
 if 'generated' not in st.session_state:
-    st.session_state['generated'] = ["Woof! I'm your friendly dog breed expert! Ask me anything about dogs! 🐕"]
+    st.session_state['generated'] = ["Hello! I'm your math assistant. Ask me any math question! 🔢"]
 
 if 'past' not in st.session_state:
     st.session_state['past'] = ["Hey! 👋"]
 
+# Title and description
+st.markdown("<h1 style='text-align: center; color: white;'>🔢 Math Assistant</h1>", unsafe_allow_html=True)
+st.markdown("<h3 style='text-align: center; color: white;'>Your Step-by-Step Math Problem Solver</h3>", unsafe_allow_html=True)
+
 # Add suggested questions
-st.sidebar.markdown("### 📝 Suggested Questions")
+st.sidebar.markdown("### 📝 Example Math Problems")
 suggested_questions = [
-    "What dog breeds are best for beginners?",
-    "What are some rare or unusual dog breeds?",
-    "What are the largest dog breeds?",
-    "Which breeds are best with children?",
-    "What are the most low-maintenance dog breeds?",
-    "Which breeds are best for apartment living?"
+    "What is 15% of 80?",
+    "Solve the equation: 2x + 5 = 13",
+    "Find the area of a circle with radius 5",
+    "What is the square root of 144?",
+    "Calculate 3^4",
+    "Find the mean of 12, 15, 18, 21, 24"
 ]
 
 if st.sidebar.button("Clear Chat History"):
     st.session_state['history'] = []
     st.session_state['past'] = ["Hey! 👋"]
-    st.session_state['generated'] = ["Woof! I'm your friendly dog breed expert! Ask me anything about dogs! 🐕"]
+    st.session_state['generated'] = ["Hello! I'm your math assistant. Ask me any math question! 🔢"]
 
 for question in suggested_questions:
     if st.sidebar.button(question):
         st.session_state['past'].append(question)
         output = conversational_chat(question)
-        st.session_state['generated'].append(output)
+        if output:
+            st.session_state['generated'].append(output)
 
-#container for the chat history
+# Chat interface
 response_container = st.container()
-#container for the user's text input
 container = st.container()
 
 with container:
     with st.form(key='my_form', clear_on_submit=True):
-        user_input = st.text_input("Query:", placeholder="Ask me anything about dogs! 🐕", key='input')
-        submit_button = st.form_submit_button(label='Send')
+        user_input = st.text_input("Query:", placeholder="Enter your math question here! 🔢", key='input')
+        submit_button = st.form_submit_button(label='Calculate')
         
     if submit_button and user_input:
         output = conversational_chat(user_input)
-        st.session_state['past'].append(user_input)
-        st.session_state['generated'].append(output)
+        if output:
+            st.session_state['past'].append(user_input)
+            st.session_state['generated'].append(output)
 
 if st.session_state['generated']:
     with response_container:
@@ -180,8 +168,8 @@ def conversational_chat(query):
                    avatar_style="big-smile")
             message(st.session_state["generated"][i], 
                    key=str(i), 
-                   avatar_style="bottts-neutral",  # This gives a more dog-like cartoon avatar
-                   seed="Buddy")  # This helps maintain consistent avatar appearance
+                   avatar_style="bottts",
+                   seed="Math")
 
 
 
diff --git a/dockerfile.prod b/dockerfile.prod
@@ -1,47 +1,37 @@
-# Build stage
-FROM python:3.12-slim-bullseye as builder
+# Use an official Python runtime as a parent image
+FROM python:3.12-slim
 
+# Set working directory in the container
 WORKDIR /app
 
-# Install build dependencies
+# Install system dependencies including Ollama
 RUN apt-get update && apt-get install -y \
     curl \
-    build-essential \
     && rm -rf /var/lib/apt/lists/*
 
-# Copy and install requirements with CPU-only PyTorch
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt \
-    && pip uninstall -y torch \
-    && pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
-
-# Final stage
-FROM python:3.12-slim-bullseye
-
-WORKDIR /app
+# Install Ollama
+RUN curl https://ollama.ai/install.sh | sh
 
-# Install only essential runtime dependencies
-RUN apt-get update && apt-get install -y \
-    curl \
-    && rm -rf /var/lib/apt/lists/* \
-    && curl https://ollama.ai/install.sh | sh
+# Copy the requirements
+COPY requirements.txt .
 
-# Copy only necessary Python packages
-COPY --from=builder /usr/local/lib/python3.12/site-packages/ /usr/local/lib/python3.12/site-packages/
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
 
-# Copy application files
-COPY app.py .
-COPY dogs_cleaned.csv .
+# Copy the application
+COPY . .
 
-# Start Ollama, pull model, and clean up unnecessary files
+# Start Ollama service and pull the model
 RUN ollama serve & \
     sleep 10 && \
-    ollama pull mistral && \
-    rm -rf /root/.cache
+    ollama pull phi3
 
+# Expose the port Streamlit runs on
 EXPOSE 8080
 
-RUN echo '#!/bin/bash\nollama serve & sleep 5 && streamlit run app.py --server.address=0.0.0.0 --server.port=8080' > start.sh \
-    && chmod +x start.sh
+# Create a shell script to start both Ollama and Streamlit
+RUN echo '#!/bin/bash\nollama serve & sleep 5 && streamlit run app.py --server.address=0.0.0.0 --server.port=8080' > start.sh
+RUN chmod +x start.sh
 
+# Command to run the script
 CMD ["./start.sh"]
diff --git a/dogs_cleaned.csv b/dogs_cleaned.csv