Spaces:
Sleeping
Sleeping
Uploaded app files
Browse files- app.py +15 -0
- echoanimal.py +541 -0
- requirements.txt +15 -3
- wiki_taxonomy.py +152 -0
app.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from echoanimal import main as echoanimal_main
|
3 |
+
from wiki_taxonomy import main as wiki_taxonomy_main
|
4 |
+
|
5 |
+
# Sidebar Navigation
|
6 |
+
st.sidebar.title("Choose an Application")
|
7 |
+
app_choice = st.sidebar.radio("Select App:", ["Animal Sound Translator", "Animal Taxonomy Classifier"])
|
8 |
+
|
9 |
+
# App Navigation Logic
|
10 |
+
if app_choice == "Animal Sound Translator":
|
11 |
+
st.title("Animal Sound Translator")
|
12 |
+
echoanimal_main()
|
13 |
+
elif app_choice == "Animal Taxonomy Classifier":
|
14 |
+
st.title("Animal Taxonomy Classifier")
|
15 |
+
wiki_taxonomy_main()
|
echoanimal.py
ADDED
@@ -0,0 +1,541 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import librosa
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
from scipy import signal
|
7 |
+
import io
|
8 |
+
import base64
|
9 |
+
from transformers import pipeline, AutoFeatureExtractor, AutoModelForAudioClassification
|
10 |
+
import torch
|
11 |
+
import warnings
|
12 |
+
warnings.filterwarnings('ignore')
|
13 |
+
|
14 |
+
# Set page config
|
15 |
+
st.set_page_config(
|
16 |
+
page_title="πΎ Animal Sound Translator",
|
17 |
+
page_icon="πΎ",
|
18 |
+
layout="wide",
|
19 |
+
initial_sidebar_state="expanded"
|
20 |
+
)
|
21 |
+
|
22 |
+
# Custom CSS for better styling
|
23 |
+
st.markdown("""
|
24 |
+
<style>
|
25 |
+
.main-header {
|
26 |
+
font-size: 3rem;
|
27 |
+
font-weight: bold;
|
28 |
+
text-align: center;
|
29 |
+
background: linear-gradient(90deg, #FF6B6B, #4ECDC4, #45B7D1);
|
30 |
+
-webkit-background-clip: text;
|
31 |
+
-webkit-text-fill-color: transparent;
|
32 |
+
margin-bottom: 2rem;
|
33 |
+
}
|
34 |
+
|
35 |
+
.metric-card {
|
36 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
37 |
+
padding: 1rem;
|
38 |
+
border-radius: 10px;
|
39 |
+
color: white;
|
40 |
+
text-align: center;
|
41 |
+
margin: 0.5rem 0;
|
42 |
+
}
|
43 |
+
|
44 |
+
.animal-card {
|
45 |
+
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
|
46 |
+
padding: 1.5rem;
|
47 |
+
border-radius: 15px;
|
48 |
+
color: white;
|
49 |
+
text-align: center;
|
50 |
+
margin: 1rem 0;
|
51 |
+
box-shadow: 0 4px 15px rgba(0,0,0,0.2);
|
52 |
+
}
|
53 |
+
|
54 |
+
.translation-card {
|
55 |
+
background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
|
56 |
+
padding: 1.5rem;
|
57 |
+
border-radius: 15px;
|
58 |
+
color: white;
|
59 |
+
margin: 1rem 0;
|
60 |
+
box-shadow: 0 4px 15px rgba(0,0,0,0.2);
|
61 |
+
}
|
62 |
+
|
63 |
+
.confidence-bar {
|
64 |
+
background: linear-gradient(90deg, #ff9a9e 0%, #fecfef 50%, #fecfef 100%);
|
65 |
+
height: 20px;
|
66 |
+
border-radius: 10px;
|
67 |
+
margin: 0.5rem 0;
|
68 |
+
}
|
69 |
+
</style>
|
70 |
+
""", unsafe_allow_html=True)
|
71 |
+
|
72 |
+
# Initialize session state
|
73 |
+
if 'audio_data' not in st.session_state:
|
74 |
+
st.session_state.audio_data = None
|
75 |
+
if 'sample_rate' not in st.session_state:
|
76 |
+
st.session_state.sample_rate = None
|
77 |
+
|
78 |
+
@st.cache_resource
|
79 |
+
def load_models():
|
80 |
+
"""Load pre-trained models for audio classification"""
|
81 |
+
try:
|
82 |
+
# Load a general audio classification model
|
83 |
+
feature_extractor = AutoFeatureExtractor.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
|
84 |
+
model = AutoModelForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
|
85 |
+
classifier = pipeline("audio-classification",
|
86 |
+
model=model,
|
87 |
+
feature_extractor=feature_extractor)
|
88 |
+
return classifier
|
89 |
+
except Exception as e:
|
90 |
+
st.error(f"Error loading models: {e}")
|
91 |
+
return None
|
92 |
+
|
93 |
+
def analyze_audio_features(audio_data, sr):
|
94 |
+
"""Extract comprehensive audio features"""
|
95 |
+
features = {}
|
96 |
+
|
97 |
+
# Basic features
|
98 |
+
features['duration'] = len(audio_data) / sr
|
99 |
+
features['sample_rate'] = sr
|
100 |
+
features['rms_energy'] = np.sqrt(np.mean(audio_data**2))
|
101 |
+
|
102 |
+
# Spectral features
|
103 |
+
features['spectral_centroid'] = np.mean(librosa.feature.spectral_centroid(y=audio_data, sr=sr))
|
104 |
+
features['spectral_bandwidth'] = np.mean(librosa.feature.spectral_bandwidth(y=audio_data, sr=sr))
|
105 |
+
features['spectral_rolloff'] = np.mean(librosa.feature.spectral_rolloff(y=audio_data, sr=sr))
|
106 |
+
features['zero_crossing_rate'] = np.mean(librosa.feature.zero_crossing_rate(audio_data))
|
107 |
+
|
108 |
+
# MFCC features
|
109 |
+
mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=13)
|
110 |
+
for i in range(13):
|
111 |
+
features[f'mfcc_{i+1}'] = np.mean(mfccs[i])
|
112 |
+
|
113 |
+
# Pitch and tempo
|
114 |
+
try:
|
115 |
+
pitches, magnitudes = librosa.piptrack(y=audio_data, sr=sr)
|
116 |
+
pitch_values = []
|
117 |
+
for t in range(pitches.shape[1]):
|
118 |
+
index = magnitudes[:, t].argmax()
|
119 |
+
pitch = pitches[index, t]
|
120 |
+
if pitch > 0:
|
121 |
+
pitch_values.append(pitch)
|
122 |
+
features['avg_pitch'] = np.mean(pitch_values) if pitch_values else 0
|
123 |
+
features['pitch_std'] = np.std(pitch_values) if pitch_values else 0
|
124 |
+
except:
|
125 |
+
features['avg_pitch'] = 0
|
126 |
+
features['pitch_std'] = 0
|
127 |
+
|
128 |
+
return features
|
129 |
+
|
130 |
+
def classify_animal_sound(audio_data, sr, classifier):
|
131 |
+
"""Classify the animal sound using pre-trained model"""
|
132 |
+
try:
|
133 |
+
# Resample to 16kHz if needed (common requirement for audio models)
|
134 |
+
if sr != 16000:
|
135 |
+
audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=16000)
|
136 |
+
sr = 16000
|
137 |
+
|
138 |
+
# Ensure audio is not too long (limit to 30 seconds)
|
139 |
+
max_length = 30 * sr
|
140 |
+
if len(audio_data) > max_length:
|
141 |
+
audio_data = audio_data[:max_length]
|
142 |
+
|
143 |
+
# Get predictions
|
144 |
+
predictions = classifier(audio_data, sampling_rate=sr)
|
145 |
+
|
146 |
+
# Filter for animal-related sounds
|
147 |
+
animal_keywords = ['dog', 'cat', 'bird', 'cow', 'horse', 'pig', 'sheep', 'goat',
|
148 |
+
'chicken', 'duck', 'rooster', 'bark', 'meow', 'chirp', 'moo',
|
149 |
+
'neigh', 'oink', 'baa', 'cluck', 'quack', 'crow', 'howl', 'purr']
|
150 |
+
|
151 |
+
animal_predictions = []
|
152 |
+
for pred in predictions:
|
153 |
+
label_lower = pred['label'].lower()
|
154 |
+
if any(keyword in label_lower for keyword in animal_keywords):
|
155 |
+
animal_predictions.append(pred)
|
156 |
+
|
157 |
+
# If no animal sounds found, return top predictions anyway
|
158 |
+
if not animal_predictions:
|
159 |
+
animal_predictions = predictions[:3]
|
160 |
+
|
161 |
+
return animal_predictions[:5] # Return top 5
|
162 |
+
|
163 |
+
except Exception as e:
|
164 |
+
st.error(f"Error in classification: {e}")
|
165 |
+
return []
|
166 |
+
|
167 |
+
def generate_translation(animal_type, confidence, audio_features):
|
168 |
+
"""Generate human-readable translation based on animal type and audio features"""
|
169 |
+
|
170 |
+
# Animal behavior patterns and translations
|
171 |
+
translations = {
|
172 |
+
'dog': {
|
173 |
+
'high_pitch': "I'm excited! Let's play!",
|
174 |
+
'low_pitch': "I'm being protective or warning you.",
|
175 |
+
'rapid': "I'm very excited or anxious!",
|
176 |
+
'slow': "I'm calm but want your attention.",
|
177 |
+
'loud': "I need something urgently!",
|
178 |
+
'soft': "I'm content and happy.",
|
179 |
+
'default': "Woof! I'm trying to communicate with you!"
|
180 |
+
},
|
181 |
+
'cat': {
|
182 |
+
'high_pitch': "I want something! Feed me or pet me!",
|
183 |
+
'low_pitch': "I'm content and relaxed.",
|
184 |
+
'rapid': "I'm frustrated or demanding attention!",
|
185 |
+
'slow': "I'm greeting you or feeling social.",
|
186 |
+
'loud': "I'm upset or in distress!",
|
187 |
+
'soft': "I'm happy and comfortable.",
|
188 |
+
'default': "Meow! I'm talking to you, human!"
|
189 |
+
},
|
190 |
+
'bird': {
|
191 |
+
'high_pitch': "I'm alerting others or expressing joy!",
|
192 |
+
'low_pitch': "I'm establishing territory or calling for a mate.",
|
193 |
+
'rapid': "I'm excited or warning of danger!",
|
194 |
+
'slow': "I'm content and peaceful.",
|
195 |
+
'loud': "I'm calling to my flock or defending my space!",
|
196 |
+
'soft': "I'm content and comfortable.",
|
197 |
+
'default': "Tweet! I'm singing my song!"
|
198 |
+
},
|
199 |
+
'cow': {
|
200 |
+
'high_pitch': "I'm looking for my calf or feeling distressed!",
|
201 |
+
'low_pitch': "I'm calm and content.",
|
202 |
+
'loud': "I need attention or I'm calling to the herd!",
|
203 |
+
'soft': "I'm peaceful and relaxed.",
|
204 |
+
'default': "Moo! I'm communicating with my herd!"
|
205 |
+
},
|
206 |
+
'default': {
|
207 |
+
'high_pitch': "I'm expressing excitement or alertness!",
|
208 |
+
'low_pitch': "I'm calm or showing dominance.",
|
209 |
+
'rapid': "I'm excited, anxious, or trying to get attention!",
|
210 |
+
'slow': "I'm relaxed and content.",
|
211 |
+
'loud': "I need attention or I'm expressing strong emotion!",
|
212 |
+
'soft': "I'm comfortable and peaceful.",
|
213 |
+
'default': "I'm trying to communicate something important!"
|
214 |
+
}
|
215 |
+
}
|
216 |
+
|
217 |
+
# Determine animal category
|
218 |
+
animal_key = 'default'
|
219 |
+
for key in translations.keys():
|
220 |
+
if key in animal_type.lower():
|
221 |
+
animal_key = key
|
222 |
+
break
|
223 |
+
|
224 |
+
# Analyze audio characteristics
|
225 |
+
pitch = audio_features.get('avg_pitch', 0)
|
226 |
+
energy = audio_features.get('rms_energy', 0)
|
227 |
+
zcr = audio_features.get('zero_crossing_rate', 0)
|
228 |
+
|
229 |
+
# Determine characteristics
|
230 |
+
characteristics = []
|
231 |
+
if pitch > 300:
|
232 |
+
characteristics.append('high_pitch')
|
233 |
+
elif pitch > 0 and pitch < 200:
|
234 |
+
characteristics.append('low_pitch')
|
235 |
+
|
236 |
+
if energy > 0.1:
|
237 |
+
characteristics.append('loud')
|
238 |
+
elif energy < 0.05:
|
239 |
+
characteristics.append('soft')
|
240 |
+
|
241 |
+
if zcr > 0.1:
|
242 |
+
characteristics.append('rapid')
|
243 |
+
elif zcr < 0.05:
|
244 |
+
characteristics.append('slow')
|
245 |
+
|
246 |
+
# Get translation
|
247 |
+
translation_dict = translations[animal_key]
|
248 |
+
translation = translation_dict.get('default', "I'm trying to communicate!")
|
249 |
+
|
250 |
+
# Use most specific characteristic available
|
251 |
+
for char in characteristics:
|
252 |
+
if char in translation_dict:
|
253 |
+
translation = translation_dict[char]
|
254 |
+
break
|
255 |
+
|
256 |
+
# Add confidence-based modifier
|
257 |
+
if confidence < 0.3:
|
258 |
+
translation = f"[Uncertain] {translation}"
|
259 |
+
elif confidence > 0.8:
|
260 |
+
translation = f"[Very Confident] {translation}"
|
261 |
+
|
262 |
+
return translation
|
263 |
+
|
264 |
+
def create_spectrogram(audio_data, sr):
|
265 |
+
"""Create and return spectrogram plot"""
|
266 |
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
|
267 |
+
|
268 |
+
# Waveform
|
269 |
+
time = np.linspace(0, len(audio_data)/sr, len(audio_data))
|
270 |
+
ax1.plot(time, audio_data, color='#4ECDC4', linewidth=1)
|
271 |
+
ax1.set_title('Audio Waveform', fontsize=14, fontweight='bold')
|
272 |
+
ax1.set_xlabel('Time (seconds)')
|
273 |
+
ax1.set_ylabel('Amplitude')
|
274 |
+
ax1.grid(True, alpha=0.3)
|
275 |
+
|
276 |
+
# Spectrogram
|
277 |
+
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio_data)), ref=np.max)
|
278 |
+
img = librosa.display.specshow(D, y_axis='hz', x_axis='time', sr=sr, ax=ax2, cmap='viridis')
|
279 |
+
ax2.set_title('Spectrogram', fontsize=14, fontweight='bold')
|
280 |
+
plt.colorbar(img, ax=ax2, format='%+2.0f dB')
|
281 |
+
|
282 |
+
plt.tight_layout()
|
283 |
+
return fig
|
284 |
+
|
285 |
+
def main():
|
286 |
+
# Header
|
287 |
+
st.markdown('<h1 class="main-header">πΎ Animal Sound Translator</h1>', unsafe_allow_html=True)
|
288 |
+
st.markdown("### πΎ Animal sound Translator ###")
|
289 |
+
|
290 |
+
# Load models
|
291 |
+
with st.spinner("Loading AI models..."):
|
292 |
+
classifier = load_models()
|
293 |
+
|
294 |
+
if classifier is None:
|
295 |
+
st.error("Failed to load models. Please refresh the page.")
|
296 |
+
return
|
297 |
+
|
298 |
+
# Sidebar
|
299 |
+
st.sidebar.header("π΅ Audio Input")
|
300 |
+
|
301 |
+
# Audio input options
|
302 |
+
input_method = st.sidebar.radio(
|
303 |
+
"Choose input method:",
|
304 |
+
["Upload Audio File", "Record Audio (if supported)"]
|
305 |
+
)
|
306 |
+
|
307 |
+
audio_file = None
|
308 |
+
if input_method == "Upload Audio File":
|
309 |
+
audio_file = st.sidebar.file_uploader(
|
310 |
+
"Upload an audio file",
|
311 |
+
type=['wav', 'mp3', 'ogg', 'flac', 'm4a'],
|
312 |
+
help="Upload an audio file containing animal sounds"
|
313 |
+
)
|
314 |
+
|
315 |
+
# Process audio
|
316 |
+
if audio_file is not None:
|
317 |
+
try:
|
318 |
+
# Load audio
|
319 |
+
audio_data, sample_rate = librosa.load(audio_file, sr=None)
|
320 |
+
st.session_state.audio_data = audio_data
|
321 |
+
st.session_state.sample_rate = sample_rate
|
322 |
+
|
323 |
+
# Display audio player
|
324 |
+
st.sidebar.audio(audio_file, format='audio/wav')
|
325 |
+
|
326 |
+
# Main analysis
|
327 |
+
col1, col2 = st.columns([2, 1])
|
328 |
+
|
329 |
+
with col1:
|
330 |
+
st.subheader("π Audio Analysis")
|
331 |
+
|
332 |
+
# Create spectrogram
|
333 |
+
with st.spinner("Generating spectrogram..."):
|
334 |
+
fig = create_spectrogram(audio_data, sample_rate)
|
335 |
+
st.pyplot(fig)
|
336 |
+
plt.close()
|
337 |
+
|
338 |
+
with col2:
|
339 |
+
st.subheader("π Audio Properties")
|
340 |
+
|
341 |
+
# Basic audio info
|
342 |
+
duration = len(audio_data) / sample_rate
|
343 |
+
st.metric("Duration", f"{duration:.2f} seconds")
|
344 |
+
st.metric("Sample Rate", f"{sample_rate} Hz")
|
345 |
+
st.metric("Channels", "Mono")
|
346 |
+
|
347 |
+
# Audio features
|
348 |
+
features = analyze_audio_features(audio_data, sample_rate)
|
349 |
+
|
350 |
+
st.metric("RMS Energy", f"{features['rms_energy']:.4f}")
|
351 |
+
st.metric("Avg Pitch", f"{features['avg_pitch']:.1f} Hz")
|
352 |
+
st.metric("Spectral Centroid", f"{features['spectral_centroid']:.1f} Hz")
|
353 |
+
|
354 |
+
# Animal Classification
|
355 |
+
st.subheader("πΎ Animal Identification")
|
356 |
+
|
357 |
+
with st.spinner("Analyzing animal sounds..."):
|
358 |
+
predictions = classify_animal_sound(audio_data, sample_rate, classifier)
|
359 |
+
|
360 |
+
if predictions:
|
361 |
+
# Display top prediction
|
362 |
+
top_prediction = predictions[0]
|
363 |
+
confidence = top_prediction['score']
|
364 |
+
animal_type = top_prediction['label']
|
365 |
+
|
366 |
+
# Animal identification card
|
367 |
+
st.markdown(f"""
|
368 |
+
<div class="animal-card">
|
369 |
+
<h2>π― Identified Animal</h2>
|
370 |
+
<h3>{animal_type.title()}</h3>
|
371 |
+
<p>Confidence: {confidence:.1%}</p>
|
372 |
+
</div>
|
373 |
+
""", unsafe_allow_html=True)
|
374 |
+
|
375 |
+
# Confidence visualization
|
376 |
+
st.subheader("π Confidence Levels")
|
377 |
+
conf_col1, conf_col2 = st.columns(2)
|
378 |
+
|
379 |
+
with conf_col1:
|
380 |
+
# Create confidence chart
|
381 |
+
labels = [pred['label'][:20] + '...' if len(pred['label']) > 20 else pred['label']
|
382 |
+
for pred in predictions[:5]]
|
383 |
+
scores = [pred['score'] for pred in predictions[:5]]
|
384 |
+
|
385 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
386 |
+
bars = ax.barh(labels, scores, color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7'])
|
387 |
+
ax.set_xlabel('Confidence Score')
|
388 |
+
ax.set_title('Top 5 Predictions')
|
389 |
+
ax.set_xlim(0, 1)
|
390 |
+
|
391 |
+
# Add value labels on bars
|
392 |
+
for i, (bar, score) in enumerate(zip(bars, scores)):
|
393 |
+
ax.text(score + 0.01, bar.get_y() + bar.get_height()/2,
|
394 |
+
f'{score:.1%}', va='center', fontweight='bold')
|
395 |
+
|
396 |
+
plt.tight_layout()
|
397 |
+
st.pyplot(fig)
|
398 |
+
plt.close()
|
399 |
+
|
400 |
+
with conf_col2:
|
401 |
+
# Detailed predictions
|
402 |
+
st.write("**Detailed Predictions:**")
|
403 |
+
for i, pred in enumerate(predictions[:5], 1):
|
404 |
+
st.write(f"{i}. **{pred['label']}** - {pred['score']:.1%}")
|
405 |
+
|
406 |
+
# Translation
|
407 |
+
st.subheader("π£οΈ Translation")
|
408 |
+
|
409 |
+
translation = generate_translation(animal_type, confidence, features)
|
410 |
+
|
411 |
+
st.markdown(f"""
|
412 |
+
<div class="translation-card">
|
413 |
+
<h3>π¬ What the animal is saying:</h3>
|
414 |
+
<p style="font-size: 1.2em; font-style: italic;">"{translation}"</p>
|
415 |
+
</div>
|
416 |
+
""", unsafe_allow_html=True)
|
417 |
+
|
418 |
+
# Additional insights
|
419 |
+
st.subheader("π Audio Insights")
|
420 |
+
|
421 |
+
insight_col1, insight_col2, insight_col3 = st.columns(3)
|
422 |
+
|
423 |
+
with insight_col1:
|
424 |
+
st.metric("Pitch Variation", f"{features['pitch_std']:.1f} Hz")
|
425 |
+
if features['pitch_std'] > 50:
|
426 |
+
st.write("π΅ High pitch variation - expressive vocalization")
|
427 |
+
else:
|
428 |
+
st.write("π΅ Low pitch variation - steady vocalization")
|
429 |
+
|
430 |
+
with insight_col2:
|
431 |
+
st.metric("Zero Crossing Rate", f"{features['zero_crossing_rate']:.3f}")
|
432 |
+
if features['zero_crossing_rate'] > 0.1:
|
433 |
+
st.write("β‘ High activity - excited or agitated")
|
434 |
+
else:
|
435 |
+
st.write("π Low activity - calm or relaxed")
|
436 |
+
|
437 |
+
with insight_col3:
|
438 |
+
st.metric("Spectral Bandwidth", f"{features['spectral_bandwidth']:.1f} Hz")
|
439 |
+
if features['spectral_bandwidth'] > 2000:
|
440 |
+
st.write("π Rich harmonic content")
|
441 |
+
else:
|
442 |
+
st.write("π― Focused frequency content")
|
443 |
+
|
444 |
+
else:
|
445 |
+
st.warning("No animal sounds detected. Please try uploading a different audio file.")
|
446 |
+
|
447 |
+
except Exception as e:
|
448 |
+
st.error(f"Error processing audio: {e}")
|
449 |
+
|
450 |
+
else:
|
451 |
+
# Welcome message
|
452 |
+
st.info("π Please upload an audio file containing animal sounds to begin analysis!")
|
453 |
+
|
454 |
+
# Sample information
|
455 |
+
st.subheader("βΉοΈ How it works")
|
456 |
+
st.write("""
|
457 |
+
1. **Upload** an audio file containing animal sounds
|
458 |
+
2. **AI Analysis** identifies the animal and analyzes audio features
|
459 |
+
3. **Translation** converts the sound into human-readable meaning
|
460 |
+
4. **Visualization** shows spectrograms and confidence levels
|
461 |
+
|
462 |
+
**Supported animals:** Dogs, cats, birds, cows, horses, pigs, sheep, and more!
|
463 |
+
""")
|
464 |
+
|
465 |
+
# Technical details
|
466 |
+
with st.expander("π¬ Technical Details"):
|
467 |
+
st.write("""
|
468 |
+
- **Audio Processing:** librosa for feature extraction
|
469 |
+
- **AI Model:** Pre-trained audio classification from Hugging Face
|
470 |
+
- **Features Analyzed:** MFCC, spectral features, pitch, energy
|
471 |
+
- **Translation Logic:** Based on animal behavior patterns and audio characteristics
|
472 |
+
- **Confidence Scoring:** Model prediction confidence with uncertainty handling
|
473 |
+
""")
|
474 |
+
if __name__ == "__main__":
|
475 |
+
main()
|
476 |
+
|
477 |
+
# import os
|
478 |
+
# import torch
|
479 |
+
# import torchaudio
|
480 |
+
# from flask import Flask, request, render_template_string
|
481 |
+
# from transformers import AutoModelForCTC, AutoProcessor
|
482 |
+
|
483 |
+
# app = Flask(__name__)
|
484 |
+
|
485 |
+
# # Load model and processor
|
486 |
+
# def load_model():
|
487 |
+
# processor = AutoProcessor.from_pretrained("FahriHuseynli/DolphinGemma")
|
488 |
+
# model = AutoModelForCTC.from_pretrained("FahriHuseynli/DolphinGemma")
|
489 |
+
# return processor, model
|
490 |
+
|
491 |
+
# def convert_audio(file_path):
|
492 |
+
# """Convert uploaded audio to WAV format at 16kHz mono"""
|
493 |
+
# new_path = file_path.rsplit(".", 1)[0] + "_converted.wav"
|
494 |
+
# waveform, sample_rate = torchaudio.load(file_path)
|
495 |
+
# waveform = waveform.mean(dim=0, keepdim=True) # convert to mono
|
496 |
+
# resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
|
497 |
+
# waveform = resampler(waveform)
|
498 |
+
# torchaudio.save(new_path, waveform, 16000)
|
499 |
+
# return new_path
|
500 |
+
|
501 |
+
# def predict_text(model, processor, input_file):
|
502 |
+
# speech, sr = torchaudio.load(input_file)
|
503 |
+
# input_values = processor(speech, sampling_rate=sr, return_tensors="pt").input_values
|
504 |
+
# with torch.no_grad():
|
505 |
+
# logits = model(input_values).logits
|
506 |
+
# predicted_ids = torch.argmax(logits, dim=-1)
|
507 |
+
# transcription = processor.batch_decode(predicted_ids)[0]
|
508 |
+
# return transcription
|
509 |
+
|
510 |
+
# @app.route("/", methods=["GET"])
|
511 |
+
# def index():
|
512 |
+
# return render_template_string('''
|
513 |
+
# <h1>Dolphin Audio to Text</h1>
|
514 |
+
# <form action="/predict" method="post" enctype="multipart/form-data">
|
515 |
+
# <input type="file" name="audiofile">
|
516 |
+
# <input type="submit">
|
517 |
+
# </form>
|
518 |
+
# ''')
|
519 |
+
|
520 |
+
# @app.route("/predict", methods=["POST"])
|
521 |
+
# def predict():
|
522 |
+
# file = request.files["audiofile"]
|
523 |
+
# if file:
|
524 |
+
# path = os.path.join("temp_audio.wav")
|
525 |
+
# file.save(path)
|
526 |
+
# processor, model = load_model()
|
527 |
+
# processed_path = convert_audio(path)
|
528 |
+
# text = predict_text(model, processor, processed_path)
|
529 |
+
# return f"<h3>Predicted Text:</h3><p>{text}</p>"
|
530 |
+
# return "No file uploaded."
|
531 |
+
|
532 |
+
# # β
Programmatic interface for reuse in other apps
|
533 |
+
# def analyze_dolphin_audio(audio_file_path: str) -> str:
|
534 |
+
# processor, model = load_model()
|
535 |
+
# processed_path = convert_audio(audio_file_path)
|
536 |
+
# text = predict_text(model, processor, processed_path)
|
537 |
+
# return text
|
538 |
+
|
539 |
+
# if __name__ == "__main__":
|
540 |
+
# app.run(debug=True)
|
541 |
+
|
requirements.txt
CHANGED
@@ -1,3 +1,15 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Streamlit version for both apps
|
2 |
+
streamlit==1.35.0
|
3 |
+
|
4 |
+
# For echoanimal.py
|
5 |
+
librosa==0.10.1
|
6 |
+
matplotlib==3.8.4
|
7 |
+
seaborn==0.13.2
|
8 |
+
scipy==1.13.1
|
9 |
+
|
10 |
+
# For wiki_taxonomy.py
|
11 |
+
transformers==4.41.1
|
12 |
+
torch==2.3.0
|
13 |
+
Pillow==10.3.0
|
14 |
+
beautifulsoup4==4.12.3
|
15 |
+
requests==2.32.3
|
wiki_taxonomy.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
from PIL import Image
|
4 |
+
from transformers import pipeline
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
+
|
7 |
+
@st.cache_resource
|
8 |
+
def load_classifier():
|
9 |
+
return pipeline("image-classification", model="microsoft/resnet-50")
|
10 |
+
|
11 |
+
def predict_species(image):
|
12 |
+
classifier = load_classifier()
|
13 |
+
if image.mode != 'RGB':
|
14 |
+
image = image.convert('RGB')
|
15 |
+
predictions = classifier(image)
|
16 |
+
predicted_species = predictions[0]['label']
|
17 |
+
confidence = predictions[0]['score']
|
18 |
+
return predicted_species, confidence
|
19 |
+
|
20 |
+
@st.cache_data
|
21 |
+
def get_wikipedia_title(name):
|
22 |
+
try:
|
23 |
+
url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={name}&format=json"
|
24 |
+
res = requests.get(url, timeout=10)
|
25 |
+
data = res.json()
|
26 |
+
if data["query"]["search"]:
|
27 |
+
return data["query"]["search"][0]["title"]
|
28 |
+
return name
|
29 |
+
except Exception as e:
|
30 |
+
st.warning(f"Wikipedia search error: {e}")
|
31 |
+
return name
|
32 |
+
|
33 |
+
|
34 |
+
# @st.cache_data
|
35 |
+
from functools import lru_cache
|
36 |
+
|
37 |
+
@st.cache_data
|
38 |
+
def get_taxonomy_from_wikidata(label):
|
39 |
+
try:
|
40 |
+
# Step 1: Search for label on Wikidata
|
41 |
+
search_url = f"https://www.wikidata.org/w/api.php?action=wbsearchentities&search={label}&language=en&format=json"
|
42 |
+
search_response = requests.get(search_url, timeout=5).json()
|
43 |
+
if not search_response["search"]:
|
44 |
+
return {"error": f"No Wikidata entity found for label '{label}'."}
|
45 |
+
|
46 |
+
entity_id = search_response["search"][0]["id"]
|
47 |
+
|
48 |
+
# Step 2: Try to trace to valid taxon
|
49 |
+
@lru_cache(maxsize=128)
|
50 |
+
def fetch_entity_data(eid):
|
51 |
+
url = f"https://www.wikidata.org/wiki/Special:EntityData/{eid}.json"
|
52 |
+
return requests.get(url, timeout=5).json()["entities"][eid]
|
53 |
+
|
54 |
+
def find_taxon_entity(eid, depth=0):
|
55 |
+
if depth > 5:
|
56 |
+
return None
|
57 |
+
ent = fetch_entity_data(eid)
|
58 |
+
claims = ent.get("claims", {})
|
59 |
+
if "P225" in claims and "P171" in claims:
|
60 |
+
return eid
|
61 |
+
for prop in ["P31", "P279"]:
|
62 |
+
for val in claims.get(prop, []):
|
63 |
+
try:
|
64 |
+
next_id = val["mainsnak"]["datavalue"]["value"]["id"]
|
65 |
+
found = find_taxon_entity(next_id, depth+1)
|
66 |
+
if found:
|
67 |
+
return found
|
68 |
+
except:
|
69 |
+
continue
|
70 |
+
return None
|
71 |
+
|
72 |
+
taxon_entity = find_taxon_entity(entity_id)
|
73 |
+
if not taxon_entity:
|
74 |
+
return {"error": f"Could not find taxonomic root from entity '{entity_id}'."}
|
75 |
+
|
76 |
+
taxonomy = {}
|
77 |
+
|
78 |
+
def fetch_taxonomy(eid, level=0):
|
79 |
+
if level > 20:
|
80 |
+
return
|
81 |
+
ent = fetch_entity_data(eid)
|
82 |
+
claims = ent.get("claims", {})
|
83 |
+
|
84 |
+
sci_name = claims.get("P225", [{}])[0].get("mainsnak", {}).get("datavalue", {}).get("value", "")
|
85 |
+
rank_id = claims.get("P105", [{}])[0].get("mainsnak", {}).get("datavalue", {}).get("value", {}).get("id", "")
|
86 |
+
|
87 |
+
rank = ""
|
88 |
+
if rank_id:
|
89 |
+
try:
|
90 |
+
rank_ent = fetch_entity_data(rank_id)
|
91 |
+
rank = rank_ent["labels"].get("en", {}).get("value", "")
|
92 |
+
except:
|
93 |
+
pass
|
94 |
+
|
95 |
+
if rank and sci_name:
|
96 |
+
taxonomy[rank.capitalize()] = sci_name
|
97 |
+
elif sci_name:
|
98 |
+
taxonomy[f"Unranked {level}"] = sci_name
|
99 |
+
|
100 |
+
parent_id = claims.get("P171", [{}])[0].get("mainsnak", {}).get("datavalue", {}).get("value", {}).get("id", "")
|
101 |
+
if parent_id:
|
102 |
+
fetch_taxonomy(parent_id, level+1)
|
103 |
+
|
104 |
+
fetch_taxonomy(taxon_entity)
|
105 |
+
|
106 |
+
ordered = {}
|
107 |
+
for key in ["Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species", "Subspecies"]:
|
108 |
+
if key in taxonomy:
|
109 |
+
ordered[key] = taxonomy[key]
|
110 |
+
|
111 |
+
return ordered if ordered else {"error": f"No taxonomy could be extracted from entity '{taxon_entity}'."}
|
112 |
+
except Exception as e:
|
113 |
+
return {"error": f"Wikidata taxonomy error: {e}"}
|
114 |
+
|
115 |
+
|
116 |
+
def display_taxonomy(taxonomy):
|
117 |
+
st.subheader("𧬠Taxonomic Classification (from Wikidata)")
|
118 |
+
if "error" in taxonomy:
|
119 |
+
st.warning(taxonomy["error"])
|
120 |
+
return
|
121 |
+
|
122 |
+
for rank in ["Kingdom", "Phylum", "Class", "Order", "Family", "Subfamily", "Genus", "Species", "Subspecies"]:
|
123 |
+
if rank in taxonomy:
|
124 |
+
st.markdown(f"**{rank}:** `{taxonomy[rank]}`")
|
125 |
+
|
126 |
+
def main():
|
127 |
+
st.set_page_config(page_title="Animal Taxonomy Classifier (Wikipedia)", layout="wide")
|
128 |
+
st.title("𧬠Animal Taxonomy Classifier")
|
129 |
+
|
130 |
+
uploaded_image = st.file_uploader("π€ Upload an animal image", type=["jpg", "jpeg", "png"])
|
131 |
+
|
132 |
+
if uploaded_image:
|
133 |
+
image = Image.open(uploaded_image)
|
134 |
+
st.image(image, caption="Uploaded Image", use_column_width=True)
|
135 |
+
|
136 |
+
if st.button("π Analyze"):
|
137 |
+
with st.spinner("Analyzing and fetching taxonomy..."):
|
138 |
+
try:
|
139 |
+
predicted_species, confidence = predict_species(image)
|
140 |
+
st.success(f"π― Predicted: **{predicted_species}** ({confidence:.2%} confidence)")
|
141 |
+
|
142 |
+
corrected_title = get_wikipedia_title(predicted_species)
|
143 |
+
st.info(f"π Wikipedia Title: **{corrected_title}**")
|
144 |
+
|
145 |
+
taxonomy = get_taxonomy_from_wikidata(corrected_title)
|
146 |
+
display_taxonomy(taxonomy)
|
147 |
+
|
148 |
+
except Exception as e:
|
149 |
+
st.error(f"Error: {e}")
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
main()
|