audeering
/

wav2small

Audio Classification

speech-emotion-recognition

Model card Files Files and versions

Dionyssos commited on Aug 7

Commit

6ea3fe5

·

1 Parent(s): b93e7b7

ckpt download speed

Files changed (1) hide show

README.md +4 -5

README.md CHANGED Viewed

@@ -12,7 +12,7 @@ tags:
 # Wav2Small2.0 - Arousal / Dominance / Valence
-Please note that this model is for research purpose only. A commercial [license](https://www.audeering.com/products/devaice/) can be acquired with audEERING. The model expects a raw audio signal 16KHz as input, and outputs: arousal, dominance valence in range [0, 1], as well as Anger/Happiness/Neutral/Sad probability. The model is created following the [Wav2Small paper](https://arxiv.org/abs/2408.13920) and has a total of 17K params.
 # How To
@@ -20,11 +20,10 @@ Please note that this model is for research purpose only. A commercial [license]
 ```python
 import torch
 import numpy as np
-import torch.nn.functional as F
 import librosa
-from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2PreTrainedModel
 from torch import nn
-from transformers import PretrainedConfig
@@ -97,7 +96,7 @@ class Spectrogram(nn.Module):
         real = self.conv_real(x)
         imag = self.conv_imag(x)
-        return real ** 2 + imag ** 2  # bs, mel, time-frames
 class LogmelFilterBank(nn.Module):

 # Wav2Small2.0 - Arousal / Dominance / Valence
+Please note that this model is for research purpose only. A commercial [license](https://www.audeering.com/products/devaice/) can be acquired with audEERING. The model expects a raw audio signal 16KHz as input, and outputs: arousal, dominance valence in range [0, 1]. The model is created following the [Wav2Small paper](https://arxiv.org/abs/2408.13920) and has a total of 17K params.
 # How To
 ```python
 import torch
 import numpy as np
 import librosa
+from transformers import Wav2Vec2PreTrainedModel, PretrainedConfig
 from torch import nn
         real = self.conv_real(x)
         imag = self.conv_imag(x)
+        return real ** 2 + imag ** 2  # bs, freq, time-frames
 class LogmelFilterBank(nn.Module):