Joash2024 commited on
Commit
0e7ff76
·
1 Parent(s): 081a250

Add initial demo files

Browse files
Files changed (6) hide show
  1. DESCRIPTION.md +1 -0
  2. README.md +59 -8
  3. Spacefile +24 -0
  4. app.py +144 -0
  5. monitoring.py +97 -0
  6. requirements.txt +11 -0
DESCRIPTION.md ADDED
@@ -0,0 +1 @@
 
 
1
+ Interactive demo comparing base (1B) and fine-tuned (1.7B) L
README.md CHANGED
@@ -1,14 +1,65 @@
1
  ---
2
- title: Math Llm Demo
3
- emoji: 📈
4
- colorFrom: purple
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.7.1
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
- short_description: Interactive demo comparing base (1B) and fine-tuned (1.7B) L
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Math Problem Solver Demo
3
+ emoji: 🧮
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.0.0
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
+ # Mathematics Problem Solver Demo
13
+
14
+ This demo showcases a comparison between base and fine-tuned language models in solving mathematical problems. It features real-time performance monitoring and supports multiple types of math problems.
15
+
16
+ ## Models Used
17
+
18
+ - Base Model: [LlaMA 3.2 1B](https://huggingface.co/Alexis-Az/Math-Problem-LlaMA-3.2-1B-GGUF)
19
+ - Fine-tuned Model: [SmolLM2 1.7B](https://huggingface.co/Alexis-Az/Math-Problem-LlaMA-3.2-1.7B-GGUF)
20
+
21
+ ## Features
22
+
23
+ - 🔢 Multiple problem types:
24
+ - Addition operations
25
+ - Root finding
26
+ - Derivatives
27
+ - Custom problems
28
+ - 📊 Real-time performance metrics:
29
+ - Response times
30
+ - Success rates
31
+ - Problem type distribution
32
+ - 🔄 Side-by-side model comparison
33
+ - ⚡ Example problems included
34
+
35
+ ## How to Use
36
+
37
+ 1. Select a problem type from the dropdown menu
38
+ 2. Enter your math problem in the input field
39
+ 3. Click "Solve" to see solutions from both models
40
+ 4. Compare the results and view performance metrics
41
+
42
+ ## Example Problems
43
+
44
+ Try these sample problems:
45
+
46
+ - Derivative: "Find the derivative of x^2 + 3x"
47
+ - Root Finding: "What is the square root of 144?"
48
+ - Addition: "Calculate 235 + 567"
49
+
50
+ ## Performance Monitoring
51
+
52
+ The interface includes a live dashboard showing:
53
+
54
+ - Average response times for each model
55
+ - Success rates comparison
56
+ - Distribution of problem types solved
57
+ - Real-time performance metrics
58
+
59
+ ## Project Details
60
+
61
+ This demo is part of a larger project comparing LLM performance on mathematical problems. The models have been fine-tuned on a custom dataset of mathematical problems to improve their problem-solving capabilities.
62
+
63
+ ## Credits
64
+
65
+ Models provided by [Alexis-Az](https://huggingface.co/Alexis-Az)
Spacefile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Spacefile for math-llm-demo
2
+ configuration:
3
+ name: math-llm-demo
4
+ organization: Joash2024
5
+ hardware:
6
+ cpu: 2
7
+ memory: 16
8
+ system:
9
+ python_version: "3.10"
10
+
11
+ sdk: gradio
12
+ sdk_version: 4.0.0
13
+ python_packages:
14
+ - "torch>=2.0.0"
15
+ - "transformers>=4.30.0"
16
+ - "accelerate>=0.20.0"
17
+ - "numpy>=1.21.0"
18
+
19
+ app_file: app.py
20
+ app_port: 7860
21
+
22
+ models:
23
+ - "Alexis-Az/Math-Problem-LlaMA-3.2-1B-GGUF"
24
+ - "Alexis-Az/Math-Problem-LlaMA-3.2-1.7B-GGUF"
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, pipeline
3
+ import torch
4
+ import numpy as np
5
+ from monitoring import PerformanceMonitor, measure_time
6
+
7
+ # Model IDs
8
+ BASE_MODEL_ID = "Alexis-Az/Math-Problem-LlaMA-3.2-1B-GGUF"
9
+ FINETUNED_MODEL_ID = "Alexis-Az/Math-Problem-LlaMA-3.2-1.7B-GGUF"
10
+
11
+ # Initialize performance monitor
12
+ monitor = PerformanceMonitor()
13
+
14
+ def format_prompt(problem):
15
+ """Format the input problem according to the model's expected format"""
16
+ return f"<|im_start|>user\nCan you help me solve this math problem? {problem}<|im_end|>\n"
17
+
18
+ @measure_time
19
+ def get_model_response(problem, model_id):
20
+ """Get response from a specific model"""
21
+ try:
22
+ # Initialize pipeline
23
+ pipe = pipeline(
24
+ "text-generation",
25
+ model=model_id,
26
+ torch_dtype=torch.float16,
27
+ device_map="auto",
28
+ )
29
+
30
+ # Format prompt and generate response
31
+ prompt = format_prompt(problem)
32
+ response = pipe(
33
+ prompt,
34
+ max_new_tokens=100,
35
+ temperature=0.1,
36
+ top_p=0.95,
37
+ repetition_penalty=1.15
38
+ )[0]["generated_text"]
39
+
40
+ # Extract assistant's response
41
+ assistant_response = response.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0]
42
+ return assistant_response.strip()
43
+ except Exception as e:
44
+ return f"Error: {str(e)}"
45
+
46
+ def solve_problem(problem, problem_type):
47
+ """Solve a math problem using both models"""
48
+ if not problem:
49
+ return "Please enter a problem", "Please enter a problem", None
50
+
51
+ # Record problem type
52
+ monitor.record_problem_type(problem_type)
53
+
54
+ # Add problem type context if provided
55
+ if problem_type != "Custom":
56
+ problem = f"{problem_type}: {problem}"
57
+
58
+ # Get responses from both models with timing
59
+ base_response, base_time = get_model_response(problem, BASE_MODEL_ID)
60
+ finetuned_response, finetuned_time = get_model_response(problem, FINETUNED_MODEL_ID)
61
+
62
+ # Record response times
63
+ monitor.record_response_time("base", base_time)
64
+ monitor.record_response_time("finetuned", finetuned_time)
65
+
66
+ # Record success (basic check - no error message)
67
+ monitor.record_success("base", not base_response.startswith("Error"))
68
+ monitor.record_success("finetuned", not finetuned_response.startswith("Error"))
69
+
70
+ # Get updated statistics
71
+ stats = monitor.get_statistics()
72
+
73
+ # Format statistics for display
74
+ stats_display = f"""
75
+ ### Performance Metrics
76
+
77
+ #### Response Times (seconds)
78
+ - Base Model: {stats.get('base_avg_response_time', 0):.2f} avg
79
+ - Fine-tuned Model: {stats.get('finetuned_avg_response_time', 0):.2f} avg
80
+
81
+ #### Success Rates
82
+ - Base Model: {stats.get('base_success_rate', 0):.1f}%
83
+ - Fine-tuned Model: {stats.get('finetuned_success_rate', 0):.1f}%
84
+
85
+ #### Problem Type Distribution
86
+ """
87
+ for ptype, percentage in stats.get('problem_type_distribution', {}).items():
88
+ stats_display += f"- {ptype}: {percentage:.1f}%\n"
89
+
90
+ return base_response, finetuned_response, stats_display
91
+
92
+ # Create Gradio interface
93
+ with gr.Blocks(title="Mathematics Problem Solver") as demo:
94
+ gr.Markdown("# Mathematics Problem Solver")
95
+ gr.Markdown("Compare solutions between base (1B) and fine-tuned (1.7B) models")
96
+
97
+ with gr.Row():
98
+ with gr.Column():
99
+ problem_type = gr.Dropdown(
100
+ choices=["Addition", "Root Finding", "Derivative", "Custom"],
101
+ value="Custom",
102
+ label="Problem Type"
103
+ )
104
+ problem_input = gr.Textbox(
105
+ label="Enter your math problem",
106
+ placeholder="Example: Find the derivative of x^2 + 3x"
107
+ )
108
+ solve_btn = gr.Button("Solve", variant="primary")
109
+
110
+ with gr.Row():
111
+ with gr.Column():
112
+ gr.Markdown("### Base Model (1B)")
113
+ base_output = gr.Textbox(label="Base Model Solution", lines=5)
114
+
115
+ with gr.Column():
116
+ gr.Markdown("### Fine-tuned Model (1.7B)")
117
+ finetuned_output = gr.Textbox(label="Fine-tuned Model Solution", lines=5)
118
+
119
+ # Performance metrics display
120
+ with gr.Row():
121
+ metrics_display = gr.Markdown("### Performance Metrics\n*Solve a problem to see metrics*")
122
+
123
+ # Example problems
124
+ gr.Examples(
125
+ examples=[
126
+ ["Find the derivative of x^2 + 3x", "Derivative"],
127
+ ["What is the square root of 144?", "Root Finding"],
128
+ ["Calculate 235 + 567", "Addition"],
129
+ ],
130
+ inputs=[problem_input, problem_type],
131
+ outputs=[base_output, finetuned_output, metrics_display],
132
+ fn=solve_problem,
133
+ cache_examples=True,
134
+ )
135
+
136
+ # Connect the interface
137
+ solve_btn.click(
138
+ fn=solve_problem,
139
+ inputs=[problem_input, problem_type],
140
+ outputs=[base_output, finetuned_output, metrics_display]
141
+ )
142
+
143
+ if __name__ == "__main__":
144
+ demo.launch()
monitoring.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from datetime import datetime
3
+ import json
4
+ import os
5
+ from collections import defaultdict
6
+ import threading
7
+ import numpy as np
8
+
9
+ class PerformanceMonitor:
10
+ def __init__(self, metrics_file="metrics.json"):
11
+ self.metrics_file = metrics_file
12
+ self.metrics = defaultdict(list)
13
+ self.lock = threading.Lock()
14
+ self._load_metrics()
15
+
16
+ def _load_metrics(self):
17
+ """Load existing metrics from file"""
18
+ if os.path.exists(self.metrics_file):
19
+ try:
20
+ with open(self.metrics_file, 'r') as f:
21
+ self.metrics.update(json.load(f))
22
+ except json.JSONDecodeError:
23
+ pass
24
+
25
+ def _save_metrics(self):
26
+ """Save metrics to file"""
27
+ with self.lock:
28
+ with open(self.metrics_file, 'w') as f:
29
+ json.dump(dict(self.metrics), f)
30
+
31
+ def record_response_time(self, model_id, duration):
32
+ """Record response time for a model"""
33
+ with self.lock:
34
+ self.metrics[f"{model_id}_response_times"].append({
35
+ 'timestamp': datetime.now().isoformat(),
36
+ 'duration': duration
37
+ })
38
+ self._save_metrics()
39
+
40
+ def record_success(self, model_id, success):
41
+ """Record success/failure for a model"""
42
+ with self.lock:
43
+ self.metrics[f"{model_id}_success_rate"].append({
44
+ 'timestamp': datetime.now().isoformat(),
45
+ 'success': success
46
+ })
47
+ self._save_metrics()
48
+
49
+ def record_problem_type(self, problem_type):
50
+ """Record usage of different problem types"""
51
+ with self.lock:
52
+ self.metrics['problem_types'].append({
53
+ 'timestamp': datetime.now().isoformat(),
54
+ 'type': problem_type
55
+ })
56
+ self._save_metrics()
57
+
58
+ def get_statistics(self):
59
+ """Calculate and return performance statistics"""
60
+ stats = {}
61
+
62
+ # Response time statistics
63
+ for model in ['base', 'finetuned']:
64
+ times = [x['duration'] for x in self.metrics.get(f"{model}_response_times", [])]
65
+ if times:
66
+ stats[f"{model}_avg_response_time"] = np.mean(times)
67
+ stats[f"{model}_max_response_time"] = np.max(times)
68
+ stats[f"{model}_min_response_time"] = np.min(times)
69
+
70
+ # Success rate statistics
71
+ for model in ['base', 'finetuned']:
72
+ successes = [x['success'] for x in self.metrics.get(f"{model}_success_rate", [])]
73
+ if successes:
74
+ stats[f"{model}_success_rate"] = sum(successes) / len(successes) * 100
75
+
76
+ # Problem type distribution
77
+ problem_types = [x['type'] for x in self.metrics.get('problem_types', [])]
78
+ if problem_types:
79
+ type_counts = defaultdict(int)
80
+ for ptype in problem_types:
81
+ type_counts[ptype] += 1
82
+ total = len(problem_types)
83
+ stats['problem_type_distribution'] = {
84
+ ptype: (count / total) * 100
85
+ for ptype, count in type_counts.items()
86
+ }
87
+
88
+ return stats
89
+
90
+ def measure_time(func):
91
+ """Decorator to measure function execution time"""
92
+ def wrapper(*args, **kwargs):
93
+ start_time = time.time()
94
+ result = func(*args, **kwargs)
95
+ duration = time.time() - start_time
96
+ return result, duration
97
+ return wrapper
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ gradio>=4.0.0
3
+ torch>=2.0.0
4
+ transformers>=4.30.0
5
+ accelerate>=0.20.0
6
+ numpy>=1.21.0
7
+
8
+ # Testing dependencies
9
+ pytest>=7.0.0
10
+ pytest-cov>=4.0.0
11
+ pytest-mock>=3.10.0