feat: update with better checkpoint

Files changed (7) hide show

README.md +19 -19
config.yaml +0 -93
hparams.yaml +0 -15
overrides.yaml +0 -22
pytorch_model.bin +2 -2
tfevents.bin +0 -3
train.log +0 -18

README.md CHANGED Viewed

@@ -90,25 +90,25 @@ segmentation = inference("audio.wav")
 ## Reproducible research
 In order to reproduce the results of the paper ["End-to-end speaker segmentation for overlap-aware resegmentation
-"](https://arxiv.org/abs/2104.04045), use the following hyper-parameters:
-Voice activity detection  | `onset` | `offset` | `min_duration_on` | `min_duration_off`
-----------------|---------|----------|-------------------|-------------------
-AMI Mix-Headset | 0.684   | 0.577    | 0.181             | 0.037
-DIHARD3         | 0.767   | 0.377    | 0.136             | 0.067
-VoxConverse     | 0.767   | 0.713    | 0.182             | 0.501
-Overlapped speech detection | `onset` | `offset` | `min_duration_on` | `min_duration_off`
-----------------|---------|----------|-------------------|-------------------
-AMI Mix-Headset | 0.448   | 0.362    | 0.116             | 0.187
-DIHARD3         | 0.430   | 0.320    | 0.091             | 0.144
-VoxConverse     | 0.587   | 0.426    | 0.337             | 0.112
-Resegmentation of VBx | `onset` | `offset` | `min_duration_on` | `min_duration_off`
-----------------|---------|----------|-------------------|-------------------
-AMI Mix-Headset | 0.542   | 0.527    | 0.044             | 0.705
-DIHARD3         | 0.592   | 0.489    | 0.163             | 0.182
-VoxConverse     | 0.537   | 0.724    | 0.410             | 0.563
 Expected outputs (and VBx baseline) are also provided in the `/reproducible_research` sub-directories.

 ## Reproducible research
 In order to reproduce the results of the paper ["End-to-end speaker segmentation for overlap-aware resegmentation
+"](https://arxiv.org/abs/2104.04045), use `pyannote/segmentation@Interspeech2021` with the following hyper-parameters:
+| Voice activity detection | `onset` | `offset` | `min_duration_on` | `min_duration_off` |
+| ------------------------ | ------- | -------- | ----------------- | ------------------ |
+| AMI Mix-Headset          | 0.684   | 0.577    | 0.181             | 0.037              |
+| DIHARD3                  | 0.767   | 0.377    | 0.136             | 0.067              |
+| VoxConverse              | 0.767   | 0.713    | 0.182             | 0.501              |
+| Overlapped speech detection | `onset` | `offset` | `min_duration_on` | `min_duration_off` |
+| --------------------------- | ------- | -------- | ----------------- | ------------------ |
+| AMI Mix-Headset             | 0.448   | 0.362    | 0.116             | 0.187              |
+| DIHARD3                     | 0.430   | 0.320    | 0.091             | 0.144              |
+| VoxConverse                 | 0.587   | 0.426    | 0.337             | 0.112              |
+| Resegmentation of VBx | `onset` | `offset` | `min_duration_on` | `min_duration_off` |
+| --------------------- | ------- | -------- | ----------------- | ------------------ |
+| AMI Mix-Headset       | 0.542   | 0.527    | 0.044             | 0.705              |
+| DIHARD3               | 0.592   | 0.489    | 0.163             | 0.182              |
+| VoxConverse           | 0.537   | 0.724    | 0.410             | 0.563              |
 Expected outputs (and VBx baseline) are also provided in the `/reproducible_research` sub-directories.

config.yaml DELETED Viewed

@@ -1,93 +0,0 @@
-protocol: X.SpeakerDiarization.Custom
-patience: 20
-task:
-  _target_: pyannote.audio.tasks.Segmentation
-  duration: 5.0
-  warm_up: 0.0
-  balance: null
-  overlap:
-    probability: 0.5
-    snr_min: 0.0
-    snr_max: 10.0
-  weight: null
-  batch_size: 32
-  num_workers: 10
-  pin_memory: false
-  loss: bce
-  vad_loss: bce
-model:
-  _target_: pyannote.audio.models.segmentation.PyanNet
-  sincnet:
-    stride: 10
-  lstm:
-    num_layers: 4
-    monolithic: true
-    dropout: 0.5
-  linear:
-    num_layers: 2
-optimizer:
-  _target_: torch.optim.Adam
-  lr: 0.001
-  betas:
-  - 0.9
-  - 0.999
-  eps: 1.0e-08
-  weight_decay: 0
-  amsgrad: false
-trainer:
-  _target_: pytorch_lightning.Trainer
-  accelerator: ddp
-  accumulate_grad_batches: 1
-  amp_backend: native
-  amp_level: O2
-  auto_lr_find: false
-  auto_scale_batch_size: false
-  auto_select_gpus: true
-  benchmark: true
-  check_val_every_n_epoch: 1
-  checkpoint_callback: true
-  deterministic: false
-  fast_dev_run: false
-  flush_logs_every_n_steps: 100
-  gpus: -1
-  gradient_clip_val: 0.5
-  limit_test_batches: 1.0
-  limit_train_batches: 1.0
-  limit_val_batches: 1.0
-  log_every_n_steps: 50
-  log_gpu_memory: null
-  max_epochs: 1000
-  max_steps: null
-  min_epochs: 1
-  min_steps: null
-  num_nodes: 1
-  num_processes: 1
-  num_sanity_val_steps: 2
-  overfit_batches: 0.0
-  precision: 32
-  prepare_data_per_node: true
-  process_position: 0
-  profiler: null
-  progress_bar_refresh_rate: 1
-  reload_dataloaders_every_epoch: false
-  replace_sampler_ddp: true
-  sync_batchnorm: false
-  terminate_on_nan: false
-  tpu_cores: null
-  track_grad_norm: -1
-  truncated_bptt_steps: null
-  val_check_interval: 1.0
-  weights_save_path: null
-  weights_summary: top
-augmentation:
-  transform: Compose
-  params:
-    shuffle: false
-    transforms:
-    - transform: AddBackgroundNoise
-      params:
-        background_paths: /gpfswork/rech/eie/commun/data/background/musan
-        min_snr_in_db: 5.0
-        max_snr_in_db: 15.0
-        mode: per_example
-        p: 0.9

hparams.yaml DELETED Viewed

@@ -1,15 +0,0 @@
-linear:
-  hidden_size: 128
-  num_layers: 2
-lstm:
-  batch_first: true
-  bidirectional: true
-  dropout: 0.5
-  hidden_size: 128
-  monolithic: true
-  num_layers: 4
-num_channels: 1
-sample_rate: 16000
-sincnet:
-  sample_rate: 16000
-  stride: 10

overrides.yaml DELETED Viewed

@@ -1,22 +0,0 @@
-- protocol=X.SpeakerDiarization.Custom
-- task=Segmentation
-- task.batch_size=32
-- task.num_workers=10
-- task.duration=5.
-- task.warm_up=0.
-- task.loss=bce
-- task.vad_loss=bce
-- patience=20
-- model=PyanNet
-- +model.sincnet.stride=10
-- +model.lstm.num_layers=4
-- +model.lstm.monolithic=True
-- +model.lstm.dropout=0.5
-- +model.linear.num_layers=2
-- optimizer=Adam
-- optimizer.lr=0.001
-- trainer.benchmark=True
-- trainer.gradient_clip_val=0.5
-- trainer.gpus=-1
-- trainer.accelerator=ddp
-- +augmentation=background

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7d2e72ce20167e5eb05ce163b7af9762e92ef5fec7313435b676b74b8498afe
-size 17739960

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b5b3216d60a2d32fc086b47ea8c67589aaeb26b7e07fcbe620d6d0b83e209ea
+size 17719103

tfevents.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c2b33b3855ecc446b1913916d8369ede8597b66491541a6c67e5ceafc15bcdb3
-size 13357699

train.log DELETED Viewed

@@ -1,18 +0,0 @@
-[2021-03-19 18:29:57,529][lightning][INFO] - GPU available: True, used: True
-[2021-03-19 18:29:57,531][lightning][INFO] - TPU available: None, using: 0 TPU cores
-[2021-03-19 18:29:57,531][lightning][INFO] - LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
-[2021-03-19 18:30:08,622][lightning][INFO] - initializing ddp: GLOBAL_RANK: 0, MEMBER: 1/4
-[2021-03-19 18:32:58,993][lightning][INFO] - Set SLURM handle signals.
-[2021-03-19 18:32:59,068][lightning][INFO] -
-  | Name       | Type       | Params | In sizes       | Out sizes
-------------------------------------------------------------------------------------------------------------
-0 | sincnet    | SincNet    | 42.6 K | [32, 1, 80000] | [32, 60, 293]
-1 | lstm       | LSTM       | 1.4 M  | [32, 293, 60]  | [[32, 293, 256], [[8, 32, 128], [8, 32, 128]]]
-2 | linear     | ModuleList | 49.4 K | ?              | ?
-3 | classifier | Linear     | 516    | [32, 293, 128] | [32, 293, 4]
-4 | activation | Sigmoid    | 0      | [32, 293, 4]   | [32, 293, 4]
-------------------------------------------------------------------------------------------------------------
-1.5 M     Trainable params
-0         Non-trainable params
-1.5 M     Total params
-[2021-03-23 02:26:47,615][lightning][INFO] - bypassing sigterm