Spaces:
Runtime error
Runtime error
| import base64 | |
| import cv2 | |
| import face_recognition | |
| import gradio as gr | |
| import moviepy.editor as mp | |
| import os | |
| import time | |
| import torchaudio | |
| from fastai.vision.all import load_learner | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline | |
| emotion_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-emotion") | |
| sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") | |
| model = load_learner("gaze-recognizer-v3.pkl") | |
| def extract_audio(video_path): | |
| clip = mp.VideoFileClip(video_path) | |
| clip.audio.write_audiofile("audio.wav") | |
| def analyze_emotion(text): | |
| result = emotion_pipeline(text) | |
| return result | |
| def analyze_sentiment(text): | |
| result = sentiment_pipeline(text) | |
| return result | |
| def get_transcription(path): | |
| extract_audio(path) | |
| waveform, sample_rate = torchaudio.load("audio.wav") | |
| resampler = torchaudio.transforms.Resample(sample_rate, 16000) | |
| waveform = resampler(waveform)[0] | |
| processor = WhisperProcessor.from_pretrained("openai/whisper-tiny") | |
| model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny") | |
| model.config.forced_decoder_ids = None | |
| input_features = processor(waveform.squeeze(dim=0), return_tensors="pt").input_features | |
| predicted_ids = model.generate(input_features) | |
| transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) | |
| return transcription[0] | |
| def process_frame(frame): | |
| gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) | |
| face_locations = face_recognition.face_locations(gray) | |
| if len(face_locations) > 0: | |
| for top, right, bottom, left in face_locations: | |
| face_image = gray[top:bottom, left:right] | |
| resized_face_image = cv2.resize(face_image, (128, 128)) | |
| result = model.predict(resized_face_image) | |
| return result[0] | |
| return None | |
| def video_processing(video_file, encoded_video): | |
| if encoded_video != "": | |
| decoded_file_data = base64.b64decode(encoded_video) | |
| with open("temp_video.mp4", "wb") as f: | |
| f.write(decoded_file_data) | |
| video_file = "temp_video.mp4" | |
| transcription = get_transcription(video_file) | |
| print(transcription) | |
| video_capture = cv2.VideoCapture(video_file) | |
| on_camera = 0 | |
| off_camera = 0 | |
| total = 0 | |
| emotions = [] | |
| while True: | |
| for _ in range(24 * 3): | |
| ret, frame = video_capture.read() | |
| if not ret: | |
| break | |
| if not ret: | |
| break | |
| result = process_frame(frame) | |
| if result: | |
| if result == 'on_camera': | |
| on_camera += 1 | |
| elif result == 'off_camera': | |
| off_camera += 1 | |
| total += 1 | |
| emotion_results = analyze_emotion(transcription) | |
| emotions.append(emotion_results) | |
| video_capture.release() | |
| cv2.destroyAllWindows() | |
| if os.path.exists("temp_video.mp4"): | |
| os.remove("temp_video.mp4") | |
| gaze_percentage = on_camera / total * 100 if total > 0 | |