SamhitaG commited on
Commit
0db656c
Β·
verified Β·
1 Parent(s): 11e7fc7

Uploaded app files

Browse files
Files changed (4) hide show
  1. app.py +15 -0
  2. echoanimal.py +541 -0
  3. requirements.txt +15 -3
  4. wiki_taxonomy.py +152 -0
app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from echoanimal import main as echoanimal_main
3
+ from wiki_taxonomy import main as wiki_taxonomy_main
4
+
5
+ # Sidebar Navigation
6
+ st.sidebar.title("Choose an Application")
7
+ app_choice = st.sidebar.radio("Select App:", ["Animal Sound Translator", "Animal Taxonomy Classifier"])
8
+
9
+ # App Navigation Logic
10
+ if app_choice == "Animal Sound Translator":
11
+ st.title("Animal Sound Translator")
12
+ echoanimal_main()
13
+ elif app_choice == "Animal Taxonomy Classifier":
14
+ st.title("Animal Taxonomy Classifier")
15
+ wiki_taxonomy_main()
echoanimal.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import librosa
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from scipy import signal
7
+ import io
8
+ import base64
9
+ from transformers import pipeline, AutoFeatureExtractor, AutoModelForAudioClassification
10
+ import torch
11
+ import warnings
12
+ warnings.filterwarnings('ignore')
13
+
14
+ # Set page config
15
+ st.set_page_config(
16
+ page_title="🐾 Animal Sound Translator",
17
+ page_icon="🐾",
18
+ layout="wide",
19
+ initial_sidebar_state="expanded"
20
+ )
21
+
22
+ # Custom CSS for better styling
23
+ st.markdown("""
24
+ <style>
25
+ .main-header {
26
+ font-size: 3rem;
27
+ font-weight: bold;
28
+ text-align: center;
29
+ background: linear-gradient(90deg, #FF6B6B, #4ECDC4, #45B7D1);
30
+ -webkit-background-clip: text;
31
+ -webkit-text-fill-color: transparent;
32
+ margin-bottom: 2rem;
33
+ }
34
+
35
+ .metric-card {
36
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
37
+ padding: 1rem;
38
+ border-radius: 10px;
39
+ color: white;
40
+ text-align: center;
41
+ margin: 0.5rem 0;
42
+ }
43
+
44
+ .animal-card {
45
+ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
46
+ padding: 1.5rem;
47
+ border-radius: 15px;
48
+ color: white;
49
+ text-align: center;
50
+ margin: 1rem 0;
51
+ box-shadow: 0 4px 15px rgba(0,0,0,0.2);
52
+ }
53
+
54
+ .translation-card {
55
+ background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
56
+ padding: 1.5rem;
57
+ border-radius: 15px;
58
+ color: white;
59
+ margin: 1rem 0;
60
+ box-shadow: 0 4px 15px rgba(0,0,0,0.2);
61
+ }
62
+
63
+ .confidence-bar {
64
+ background: linear-gradient(90deg, #ff9a9e 0%, #fecfef 50%, #fecfef 100%);
65
+ height: 20px;
66
+ border-radius: 10px;
67
+ margin: 0.5rem 0;
68
+ }
69
+ </style>
70
+ """, unsafe_allow_html=True)
71
+
72
+ # Initialize session state
73
+ if 'audio_data' not in st.session_state:
74
+ st.session_state.audio_data = None
75
+ if 'sample_rate' not in st.session_state:
76
+ st.session_state.sample_rate = None
77
+
78
+ @st.cache_resource
79
+ def load_models():
80
+ """Load pre-trained models for audio classification"""
81
+ try:
82
+ # Load a general audio classification model
83
+ feature_extractor = AutoFeatureExtractor.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
84
+ model = AutoModelForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
85
+ classifier = pipeline("audio-classification",
86
+ model=model,
87
+ feature_extractor=feature_extractor)
88
+ return classifier
89
+ except Exception as e:
90
+ st.error(f"Error loading models: {e}")
91
+ return None
92
+
93
+ def analyze_audio_features(audio_data, sr):
94
+ """Extract comprehensive audio features"""
95
+ features = {}
96
+
97
+ # Basic features
98
+ features['duration'] = len(audio_data) / sr
99
+ features['sample_rate'] = sr
100
+ features['rms_energy'] = np.sqrt(np.mean(audio_data**2))
101
+
102
+ # Spectral features
103
+ features['spectral_centroid'] = np.mean(librosa.feature.spectral_centroid(y=audio_data, sr=sr))
104
+ features['spectral_bandwidth'] = np.mean(librosa.feature.spectral_bandwidth(y=audio_data, sr=sr))
105
+ features['spectral_rolloff'] = np.mean(librosa.feature.spectral_rolloff(y=audio_data, sr=sr))
106
+ features['zero_crossing_rate'] = np.mean(librosa.feature.zero_crossing_rate(audio_data))
107
+
108
+ # MFCC features
109
+ mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=13)
110
+ for i in range(13):
111
+ features[f'mfcc_{i+1}'] = np.mean(mfccs[i])
112
+
113
+ # Pitch and tempo
114
+ try:
115
+ pitches, magnitudes = librosa.piptrack(y=audio_data, sr=sr)
116
+ pitch_values = []
117
+ for t in range(pitches.shape[1]):
118
+ index = magnitudes[:, t].argmax()
119
+ pitch = pitches[index, t]
120
+ if pitch > 0:
121
+ pitch_values.append(pitch)
122
+ features['avg_pitch'] = np.mean(pitch_values) if pitch_values else 0
123
+ features['pitch_std'] = np.std(pitch_values) if pitch_values else 0
124
+ except:
125
+ features['avg_pitch'] = 0
126
+ features['pitch_std'] = 0
127
+
128
+ return features
129
+
130
+ def classify_animal_sound(audio_data, sr, classifier):
131
+ """Classify the animal sound using pre-trained model"""
132
+ try:
133
+ # Resample to 16kHz if needed (common requirement for audio models)
134
+ if sr != 16000:
135
+ audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=16000)
136
+ sr = 16000
137
+
138
+ # Ensure audio is not too long (limit to 30 seconds)
139
+ max_length = 30 * sr
140
+ if len(audio_data) > max_length:
141
+ audio_data = audio_data[:max_length]
142
+
143
+ # Get predictions
144
+ predictions = classifier(audio_data, sampling_rate=sr)
145
+
146
+ # Filter for animal-related sounds
147
+ animal_keywords = ['dog', 'cat', 'bird', 'cow', 'horse', 'pig', 'sheep', 'goat',
148
+ 'chicken', 'duck', 'rooster', 'bark', 'meow', 'chirp', 'moo',
149
+ 'neigh', 'oink', 'baa', 'cluck', 'quack', 'crow', 'howl', 'purr']
150
+
151
+ animal_predictions = []
152
+ for pred in predictions:
153
+ label_lower = pred['label'].lower()
154
+ if any(keyword in label_lower for keyword in animal_keywords):
155
+ animal_predictions.append(pred)
156
+
157
+ # If no animal sounds found, return top predictions anyway
158
+ if not animal_predictions:
159
+ animal_predictions = predictions[:3]
160
+
161
+ return animal_predictions[:5] # Return top 5
162
+
163
+ except Exception as e:
164
+ st.error(f"Error in classification: {e}")
165
+ return []
166
+
167
+ def generate_translation(animal_type, confidence, audio_features):
168
+ """Generate human-readable translation based on animal type and audio features"""
169
+
170
+ # Animal behavior patterns and translations
171
+ translations = {
172
+ 'dog': {
173
+ 'high_pitch': "I'm excited! Let's play!",
174
+ 'low_pitch': "I'm being protective or warning you.",
175
+ 'rapid': "I'm very excited or anxious!",
176
+ 'slow': "I'm calm but want your attention.",
177
+ 'loud': "I need something urgently!",
178
+ 'soft': "I'm content and happy.",
179
+ 'default': "Woof! I'm trying to communicate with you!"
180
+ },
181
+ 'cat': {
182
+ 'high_pitch': "I want something! Feed me or pet me!",
183
+ 'low_pitch': "I'm content and relaxed.",
184
+ 'rapid': "I'm frustrated or demanding attention!",
185
+ 'slow': "I'm greeting you or feeling social.",
186
+ 'loud': "I'm upset or in distress!",
187
+ 'soft': "I'm happy and comfortable.",
188
+ 'default': "Meow! I'm talking to you, human!"
189
+ },
190
+ 'bird': {
191
+ 'high_pitch': "I'm alerting others or expressing joy!",
192
+ 'low_pitch': "I'm establishing territory or calling for a mate.",
193
+ 'rapid': "I'm excited or warning of danger!",
194
+ 'slow': "I'm content and peaceful.",
195
+ 'loud': "I'm calling to my flock or defending my space!",
196
+ 'soft': "I'm content and comfortable.",
197
+ 'default': "Tweet! I'm singing my song!"
198
+ },
199
+ 'cow': {
200
+ 'high_pitch': "I'm looking for my calf or feeling distressed!",
201
+ 'low_pitch': "I'm calm and content.",
202
+ 'loud': "I need attention or I'm calling to the herd!",
203
+ 'soft': "I'm peaceful and relaxed.",
204
+ 'default': "Moo! I'm communicating with my herd!"
205
+ },
206
+ 'default': {
207
+ 'high_pitch': "I'm expressing excitement or alertness!",
208
+ 'low_pitch': "I'm calm or showing dominance.",
209
+ 'rapid': "I'm excited, anxious, or trying to get attention!",
210
+ 'slow': "I'm relaxed and content.",
211
+ 'loud': "I need attention or I'm expressing strong emotion!",
212
+ 'soft': "I'm comfortable and peaceful.",
213
+ 'default': "I'm trying to communicate something important!"
214
+ }
215
+ }
216
+
217
+ # Determine animal category
218
+ animal_key = 'default'
219
+ for key in translations.keys():
220
+ if key in animal_type.lower():
221
+ animal_key = key
222
+ break
223
+
224
+ # Analyze audio characteristics
225
+ pitch = audio_features.get('avg_pitch', 0)
226
+ energy = audio_features.get('rms_energy', 0)
227
+ zcr = audio_features.get('zero_crossing_rate', 0)
228
+
229
+ # Determine characteristics
230
+ characteristics = []
231
+ if pitch > 300:
232
+ characteristics.append('high_pitch')
233
+ elif pitch > 0 and pitch < 200:
234
+ characteristics.append('low_pitch')
235
+
236
+ if energy > 0.1:
237
+ characteristics.append('loud')
238
+ elif energy < 0.05:
239
+ characteristics.append('soft')
240
+
241
+ if zcr > 0.1:
242
+ characteristics.append('rapid')
243
+ elif zcr < 0.05:
244
+ characteristics.append('slow')
245
+
246
+ # Get translation
247
+ translation_dict = translations[animal_key]
248
+ translation = translation_dict.get('default', "I'm trying to communicate!")
249
+
250
+ # Use most specific characteristic available
251
+ for char in characteristics:
252
+ if char in translation_dict:
253
+ translation = translation_dict[char]
254
+ break
255
+
256
+ # Add confidence-based modifier
257
+ if confidence < 0.3:
258
+ translation = f"[Uncertain] {translation}"
259
+ elif confidence > 0.8:
260
+ translation = f"[Very Confident] {translation}"
261
+
262
+ return translation
263
+
264
+ def create_spectrogram(audio_data, sr):
265
+ """Create and return spectrogram plot"""
266
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
267
+
268
+ # Waveform
269
+ time = np.linspace(0, len(audio_data)/sr, len(audio_data))
270
+ ax1.plot(time, audio_data, color='#4ECDC4', linewidth=1)
271
+ ax1.set_title('Audio Waveform', fontsize=14, fontweight='bold')
272
+ ax1.set_xlabel('Time (seconds)')
273
+ ax1.set_ylabel('Amplitude')
274
+ ax1.grid(True, alpha=0.3)
275
+
276
+ # Spectrogram
277
+ D = librosa.amplitude_to_db(np.abs(librosa.stft(audio_data)), ref=np.max)
278
+ img = librosa.display.specshow(D, y_axis='hz', x_axis='time', sr=sr, ax=ax2, cmap='viridis')
279
+ ax2.set_title('Spectrogram', fontsize=14, fontweight='bold')
280
+ plt.colorbar(img, ax=ax2, format='%+2.0f dB')
281
+
282
+ plt.tight_layout()
283
+ return fig
284
+
285
+ def main():
286
+ # Header
287
+ st.markdown('<h1 class="main-header">🐾 Animal Sound Translator</h1>', unsafe_allow_html=True)
288
+ st.markdown("### 🐾 Animal sound Translator ###")
289
+
290
+ # Load models
291
+ with st.spinner("Loading AI models..."):
292
+ classifier = load_models()
293
+
294
+ if classifier is None:
295
+ st.error("Failed to load models. Please refresh the page.")
296
+ return
297
+
298
+ # Sidebar
299
+ st.sidebar.header("🎡 Audio Input")
300
+
301
+ # Audio input options
302
+ input_method = st.sidebar.radio(
303
+ "Choose input method:",
304
+ ["Upload Audio File", "Record Audio (if supported)"]
305
+ )
306
+
307
+ audio_file = None
308
+ if input_method == "Upload Audio File":
309
+ audio_file = st.sidebar.file_uploader(
310
+ "Upload an audio file",
311
+ type=['wav', 'mp3', 'ogg', 'flac', 'm4a'],
312
+ help="Upload an audio file containing animal sounds"
313
+ )
314
+
315
+ # Process audio
316
+ if audio_file is not None:
317
+ try:
318
+ # Load audio
319
+ audio_data, sample_rate = librosa.load(audio_file, sr=None)
320
+ st.session_state.audio_data = audio_data
321
+ st.session_state.sample_rate = sample_rate
322
+
323
+ # Display audio player
324
+ st.sidebar.audio(audio_file, format='audio/wav')
325
+
326
+ # Main analysis
327
+ col1, col2 = st.columns([2, 1])
328
+
329
+ with col1:
330
+ st.subheader("πŸ”Š Audio Analysis")
331
+
332
+ # Create spectrogram
333
+ with st.spinner("Generating spectrogram..."):
334
+ fig = create_spectrogram(audio_data, sample_rate)
335
+ st.pyplot(fig)
336
+ plt.close()
337
+
338
+ with col2:
339
+ st.subheader("πŸ“Š Audio Properties")
340
+
341
+ # Basic audio info
342
+ duration = len(audio_data) / sample_rate
343
+ st.metric("Duration", f"{duration:.2f} seconds")
344
+ st.metric("Sample Rate", f"{sample_rate} Hz")
345
+ st.metric("Channels", "Mono")
346
+
347
+ # Audio features
348
+ features = analyze_audio_features(audio_data, sample_rate)
349
+
350
+ st.metric("RMS Energy", f"{features['rms_energy']:.4f}")
351
+ st.metric("Avg Pitch", f"{features['avg_pitch']:.1f} Hz")
352
+ st.metric("Spectral Centroid", f"{features['spectral_centroid']:.1f} Hz")
353
+
354
+ # Animal Classification
355
+ st.subheader("🐾 Animal Identification")
356
+
357
+ with st.spinner("Analyzing animal sounds..."):
358
+ predictions = classify_animal_sound(audio_data, sample_rate, classifier)
359
+
360
+ if predictions:
361
+ # Display top prediction
362
+ top_prediction = predictions[0]
363
+ confidence = top_prediction['score']
364
+ animal_type = top_prediction['label']
365
+
366
+ # Animal identification card
367
+ st.markdown(f"""
368
+ <div class="animal-card">
369
+ <h2>🎯 Identified Animal</h2>
370
+ <h3>{animal_type.title()}</h3>
371
+ <p>Confidence: {confidence:.1%}</p>
372
+ </div>
373
+ """, unsafe_allow_html=True)
374
+
375
+ # Confidence visualization
376
+ st.subheader("πŸ“ˆ Confidence Levels")
377
+ conf_col1, conf_col2 = st.columns(2)
378
+
379
+ with conf_col1:
380
+ # Create confidence chart
381
+ labels = [pred['label'][:20] + '...' if len(pred['label']) > 20 else pred['label']
382
+ for pred in predictions[:5]]
383
+ scores = [pred['score'] for pred in predictions[:5]]
384
+
385
+ fig, ax = plt.subplots(figsize=(8, 6))
386
+ bars = ax.barh(labels, scores, color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7'])
387
+ ax.set_xlabel('Confidence Score')
388
+ ax.set_title('Top 5 Predictions')
389
+ ax.set_xlim(0, 1)
390
+
391
+ # Add value labels on bars
392
+ for i, (bar, score) in enumerate(zip(bars, scores)):
393
+ ax.text(score + 0.01, bar.get_y() + bar.get_height()/2,
394
+ f'{score:.1%}', va='center', fontweight='bold')
395
+
396
+ plt.tight_layout()
397
+ st.pyplot(fig)
398
+ plt.close()
399
+
400
+ with conf_col2:
401
+ # Detailed predictions
402
+ st.write("**Detailed Predictions:**")
403
+ for i, pred in enumerate(predictions[:5], 1):
404
+ st.write(f"{i}. **{pred['label']}** - {pred['score']:.1%}")
405
+
406
+ # Translation
407
+ st.subheader("πŸ—£οΈ Translation")
408
+
409
+ translation = generate_translation(animal_type, confidence, features)
410
+
411
+ st.markdown(f"""
412
+ <div class="translation-card">
413
+ <h3>πŸ’¬ What the animal is saying:</h3>
414
+ <p style="font-size: 1.2em; font-style: italic;">"{translation}"</p>
415
+ </div>
416
+ """, unsafe_allow_html=True)
417
+
418
+ # Additional insights
419
+ st.subheader("πŸ” Audio Insights")
420
+
421
+ insight_col1, insight_col2, insight_col3 = st.columns(3)
422
+
423
+ with insight_col1:
424
+ st.metric("Pitch Variation", f"{features['pitch_std']:.1f} Hz")
425
+ if features['pitch_std'] > 50:
426
+ st.write("🎡 High pitch variation - expressive vocalization")
427
+ else:
428
+ st.write("🎡 Low pitch variation - steady vocalization")
429
+
430
+ with insight_col2:
431
+ st.metric("Zero Crossing Rate", f"{features['zero_crossing_rate']:.3f}")
432
+ if features['zero_crossing_rate'] > 0.1:
433
+ st.write("⚑ High activity - excited or agitated")
434
+ else:
435
+ st.write("😌 Low activity - calm or relaxed")
436
+
437
+ with insight_col3:
438
+ st.metric("Spectral Bandwidth", f"{features['spectral_bandwidth']:.1f} Hz")
439
+ if features['spectral_bandwidth'] > 2000:
440
+ st.write("🌈 Rich harmonic content")
441
+ else:
442
+ st.write("🎯 Focused frequency content")
443
+
444
+ else:
445
+ st.warning("No animal sounds detected. Please try uploading a different audio file.")
446
+
447
+ except Exception as e:
448
+ st.error(f"Error processing audio: {e}")
449
+
450
+ else:
451
+ # Welcome message
452
+ st.info("πŸ‘† Please upload an audio file containing animal sounds to begin analysis!")
453
+
454
+ # Sample information
455
+ st.subheader("ℹ️ How it works")
456
+ st.write("""
457
+ 1. **Upload** an audio file containing animal sounds
458
+ 2. **AI Analysis** identifies the animal and analyzes audio features
459
+ 3. **Translation** converts the sound into human-readable meaning
460
+ 4. **Visualization** shows spectrograms and confidence levels
461
+
462
+ **Supported animals:** Dogs, cats, birds, cows, horses, pigs, sheep, and more!
463
+ """)
464
+
465
+ # Technical details
466
+ with st.expander("πŸ”¬ Technical Details"):
467
+ st.write("""
468
+ - **Audio Processing:** librosa for feature extraction
469
+ - **AI Model:** Pre-trained audio classification from Hugging Face
470
+ - **Features Analyzed:** MFCC, spectral features, pitch, energy
471
+ - **Translation Logic:** Based on animal behavior patterns and audio characteristics
472
+ - **Confidence Scoring:** Model prediction confidence with uncertainty handling
473
+ """)
474
+ if __name__ == "__main__":
475
+ main()
476
+
477
+ # import os
478
+ # import torch
479
+ # import torchaudio
480
+ # from flask import Flask, request, render_template_string
481
+ # from transformers import AutoModelForCTC, AutoProcessor
482
+
483
+ # app = Flask(__name__)
484
+
485
+ # # Load model and processor
486
+ # def load_model():
487
+ # processor = AutoProcessor.from_pretrained("FahriHuseynli/DolphinGemma")
488
+ # model = AutoModelForCTC.from_pretrained("FahriHuseynli/DolphinGemma")
489
+ # return processor, model
490
+
491
+ # def convert_audio(file_path):
492
+ # """Convert uploaded audio to WAV format at 16kHz mono"""
493
+ # new_path = file_path.rsplit(".", 1)[0] + "_converted.wav"
494
+ # waveform, sample_rate = torchaudio.load(file_path)
495
+ # waveform = waveform.mean(dim=0, keepdim=True) # convert to mono
496
+ # resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
497
+ # waveform = resampler(waveform)
498
+ # torchaudio.save(new_path, waveform, 16000)
499
+ # return new_path
500
+
501
+ # def predict_text(model, processor, input_file):
502
+ # speech, sr = torchaudio.load(input_file)
503
+ # input_values = processor(speech, sampling_rate=sr, return_tensors="pt").input_values
504
+ # with torch.no_grad():
505
+ # logits = model(input_values).logits
506
+ # predicted_ids = torch.argmax(logits, dim=-1)
507
+ # transcription = processor.batch_decode(predicted_ids)[0]
508
+ # return transcription
509
+
510
+ # @app.route("/", methods=["GET"])
511
+ # def index():
512
+ # return render_template_string('''
513
+ # <h1>Dolphin Audio to Text</h1>
514
+ # <form action="/predict" method="post" enctype="multipart/form-data">
515
+ # <input type="file" name="audiofile">
516
+ # <input type="submit">
517
+ # </form>
518
+ # ''')
519
+
520
+ # @app.route("/predict", methods=["POST"])
521
+ # def predict():
522
+ # file = request.files["audiofile"]
523
+ # if file:
524
+ # path = os.path.join("temp_audio.wav")
525
+ # file.save(path)
526
+ # processor, model = load_model()
527
+ # processed_path = convert_audio(path)
528
+ # text = predict_text(model, processor, processed_path)
529
+ # return f"<h3>Predicted Text:</h3><p>{text}</p>"
530
+ # return "No file uploaded."
531
+
532
+ # # βœ… Programmatic interface for reuse in other apps
533
+ # def analyze_dolphin_audio(audio_file_path: str) -> str:
534
+ # processor, model = load_model()
535
+ # processed_path = convert_audio(audio_file_path)
536
+ # text = predict_text(model, processor, processed_path)
537
+ # return text
538
+
539
+ # if __name__ == "__main__":
540
+ # app.run(debug=True)
541
+
requirements.txt CHANGED
@@ -1,3 +1,15 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Streamlit version for both apps
2
+ streamlit==1.35.0
3
+
4
+ # For echoanimal.py
5
+ librosa==0.10.1
6
+ matplotlib==3.8.4
7
+ seaborn==0.13.2
8
+ scipy==1.13.1
9
+
10
+ # For wiki_taxonomy.py
11
+ transformers==4.41.1
12
+ torch==2.3.0
13
+ Pillow==10.3.0
14
+ beautifulsoup4==4.12.3
15
+ requests==2.32.3
wiki_taxonomy.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from PIL import Image
4
+ from transformers import pipeline
5
+ from bs4 import BeautifulSoup
6
+
7
+ @st.cache_resource
8
+ def load_classifier():
9
+ return pipeline("image-classification", model="microsoft/resnet-50")
10
+
11
+ def predict_species(image):
12
+ classifier = load_classifier()
13
+ if image.mode != 'RGB':
14
+ image = image.convert('RGB')
15
+ predictions = classifier(image)
16
+ predicted_species = predictions[0]['label']
17
+ confidence = predictions[0]['score']
18
+ return predicted_species, confidence
19
+
20
+ @st.cache_data
21
+ def get_wikipedia_title(name):
22
+ try:
23
+ url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={name}&format=json"
24
+ res = requests.get(url, timeout=10)
25
+ data = res.json()
26
+ if data["query"]["search"]:
27
+ return data["query"]["search"][0]["title"]
28
+ return name
29
+ except Exception as e:
30
+ st.warning(f"Wikipedia search error: {e}")
31
+ return name
32
+
33
+
34
+ # @st.cache_data
35
+ from functools import lru_cache
36
+
37
+ @st.cache_data
38
+ def get_taxonomy_from_wikidata(label):
39
+ try:
40
+ # Step 1: Search for label on Wikidata
41
+ search_url = f"https://www.wikidata.org/w/api.php?action=wbsearchentities&search={label}&language=en&format=json"
42
+ search_response = requests.get(search_url, timeout=5).json()
43
+ if not search_response["search"]:
44
+ return {"error": f"No Wikidata entity found for label '{label}'."}
45
+
46
+ entity_id = search_response["search"][0]["id"]
47
+
48
+ # Step 2: Try to trace to valid taxon
49
+ @lru_cache(maxsize=128)
50
+ def fetch_entity_data(eid):
51
+ url = f"https://www.wikidata.org/wiki/Special:EntityData/{eid}.json"
52
+ return requests.get(url, timeout=5).json()["entities"][eid]
53
+
54
+ def find_taxon_entity(eid, depth=0):
55
+ if depth > 5:
56
+ return None
57
+ ent = fetch_entity_data(eid)
58
+ claims = ent.get("claims", {})
59
+ if "P225" in claims and "P171" in claims:
60
+ return eid
61
+ for prop in ["P31", "P279"]:
62
+ for val in claims.get(prop, []):
63
+ try:
64
+ next_id = val["mainsnak"]["datavalue"]["value"]["id"]
65
+ found = find_taxon_entity(next_id, depth+1)
66
+ if found:
67
+ return found
68
+ except:
69
+ continue
70
+ return None
71
+
72
+ taxon_entity = find_taxon_entity(entity_id)
73
+ if not taxon_entity:
74
+ return {"error": f"Could not find taxonomic root from entity '{entity_id}'."}
75
+
76
+ taxonomy = {}
77
+
78
+ def fetch_taxonomy(eid, level=0):
79
+ if level > 20:
80
+ return
81
+ ent = fetch_entity_data(eid)
82
+ claims = ent.get("claims", {})
83
+
84
+ sci_name = claims.get("P225", [{}])[0].get("mainsnak", {}).get("datavalue", {}).get("value", "")
85
+ rank_id = claims.get("P105", [{}])[0].get("mainsnak", {}).get("datavalue", {}).get("value", {}).get("id", "")
86
+
87
+ rank = ""
88
+ if rank_id:
89
+ try:
90
+ rank_ent = fetch_entity_data(rank_id)
91
+ rank = rank_ent["labels"].get("en", {}).get("value", "")
92
+ except:
93
+ pass
94
+
95
+ if rank and sci_name:
96
+ taxonomy[rank.capitalize()] = sci_name
97
+ elif sci_name:
98
+ taxonomy[f"Unranked {level}"] = sci_name
99
+
100
+ parent_id = claims.get("P171", [{}])[0].get("mainsnak", {}).get("datavalue", {}).get("value", {}).get("id", "")
101
+ if parent_id:
102
+ fetch_taxonomy(parent_id, level+1)
103
+
104
+ fetch_taxonomy(taxon_entity)
105
+
106
+ ordered = {}
107
+ for key in ["Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species", "Subspecies"]:
108
+ if key in taxonomy:
109
+ ordered[key] = taxonomy[key]
110
+
111
+ return ordered if ordered else {"error": f"No taxonomy could be extracted from entity '{taxon_entity}'."}
112
+ except Exception as e:
113
+ return {"error": f"Wikidata taxonomy error: {e}"}
114
+
115
+
116
+ def display_taxonomy(taxonomy):
117
+ st.subheader("🧬 Taxonomic Classification (from Wikidata)")
118
+ if "error" in taxonomy:
119
+ st.warning(taxonomy["error"])
120
+ return
121
+
122
+ for rank in ["Kingdom", "Phylum", "Class", "Order", "Family", "Subfamily", "Genus", "Species", "Subspecies"]:
123
+ if rank in taxonomy:
124
+ st.markdown(f"**{rank}:** `{taxonomy[rank]}`")
125
+
126
+ def main():
127
+ st.set_page_config(page_title="Animal Taxonomy Classifier (Wikipedia)", layout="wide")
128
+ st.title("🧬 Animal Taxonomy Classifier")
129
+
130
+ uploaded_image = st.file_uploader("πŸ“€ Upload an animal image", type=["jpg", "jpeg", "png"])
131
+
132
+ if uploaded_image:
133
+ image = Image.open(uploaded_image)
134
+ st.image(image, caption="Uploaded Image", use_column_width=True)
135
+
136
+ if st.button("πŸ” Analyze"):
137
+ with st.spinner("Analyzing and fetching taxonomy..."):
138
+ try:
139
+ predicted_species, confidence = predict_species(image)
140
+ st.success(f"🎯 Predicted: **{predicted_species}** ({confidence:.2%} confidence)")
141
+
142
+ corrected_title = get_wikipedia_title(predicted_species)
143
+ st.info(f"πŸ” Wikipedia Title: **{corrected_title}**")
144
+
145
+ taxonomy = get_taxonomy_from_wikidata(corrected_title)
146
+ display_taxonomy(taxonomy)
147
+
148
+ except Exception as e:
149
+ st.error(f"Error: {e}")
150
+
151
+ if __name__ == "__main__":
152
+ main()