pj-mathematician's picture
Add files using upload-large-folder tool
70af7bb verified
metadata
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:124788
  - loss:GISTEmbedLoss
base_model: BAAI/bge-small-en-v1.5
widget:
  - source_sentence: 其他机械、设备和有形货物租赁服务代表
    sentences:
      - 其他机械和设备租赁服务工作人员
      - 电子和电信设备及零部件物流经理
      - 工业主厨
  - source_sentence: 公交车司机
    sentences:
      - 表演灯光设计师
      - 乙烯基地板安装工
      - 国际巴士司机
  - source_sentence: online communication manager
    sentences:
      - trades union official
      - social media manager
      - budget manager
  - source_sentence: Projektmanagerin
    sentences:
      - Projektmanager/Projektmanagerin
      - Category-Manager
      - Infanterist
  - source_sentence: Volksvertreter
    sentences:
      - Parlamentarier
      - Oberbürgermeister
      - Konsul
pipeline_tag: sentence-similarity
library_name: sentence-transformers
metrics:
  - cosine_accuracy@1
  - cosine_accuracy@20
  - cosine_accuracy@50
  - cosine_accuracy@100
  - cosine_accuracy@150
  - cosine_accuracy@200
  - cosine_precision@1
  - cosine_precision@20
  - cosine_precision@50
  - cosine_precision@100
  - cosine_precision@150
  - cosine_precision@200
  - cosine_recall@1
  - cosine_recall@20
  - cosine_recall@50
  - cosine_recall@100
  - cosine_recall@150
  - cosine_recall@200
  - cosine_ndcg@1
  - cosine_ndcg@20
  - cosine_ndcg@50
  - cosine_ndcg@100
  - cosine_ndcg@150
  - cosine_ndcg@200
  - cosine_mrr@1
  - cosine_mrr@20
  - cosine_mrr@50
  - cosine_mrr@100
  - cosine_mrr@150
  - cosine_mrr@200
  - cosine_map@1
  - cosine_map@20
  - cosine_map@50
  - cosine_map@100
  - cosine_map@150
  - cosine_map@200
  - cosine_map@500
model-index:
  - name: SentenceTransformer based on BAAI/bge-small-en-v1.5
    results:
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: full en
          type: full_en
        metrics:
          - type: cosine_accuracy@1
            value: 0.6571428571428571
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.9904761904761905
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.9904761904761905
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.9904761904761905
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.9904761904761905
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.9904761904761905
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.6571428571428571
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.5076190476190475
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.3089523809523809
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.1872380952380952
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.1321904761904762
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.1027142857142857
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.0680237860830842
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.5459242543214992
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.728483344815942
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.8382149119179341
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.8762032488748317
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.9059964336434017
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.6571428571428571
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.6895375515490911
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.7060633068166344
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.7619501692018719
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.778798440383198
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.7899830993214225
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.6571428571428571
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.8098412698412698
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.8098412698412698
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.8098412698412698
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.8098412698412698
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.8098412698412698
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.6571428571428571
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.5464916843297755
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.5351890636433139
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.5685440196941911
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.5756567539581475
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.5791635361565666
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.5835322146366259
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: full es
          type: full_es
        metrics:
          - type: cosine_accuracy@1
            value: 0.12432432432432433
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 1
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 1
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 1
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 1
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 1
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.12432432432432433
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.4924324324324324
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.31686486486486487
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.19843243243243244
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.14702702702702705
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.11762162162162161
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.003111544931768446
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.3235933309332048
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.4622883553307717
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.5424114301447981
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.5822792579944903
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.612586126212026
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.12432432432432433
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.5406828319866788
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.500776817925352
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.5143442473922782
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.5349751306205418
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.5498255219419508
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.12432432432432433
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.5516816816816817
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.5516816816816817
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.5516816816816817
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.5516816816816817
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.5516816816816817
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.12432432432432433
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.4061591888137979
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.3426196432849601
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.3398108870028267
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.3482007813358776
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.3534583367060008
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.36353547903357536
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: full de
          type: full_de
        metrics:
          - type: cosine_accuracy@1
            value: 0.2955665024630542
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.9211822660098522
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.9605911330049262
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.9753694581280788
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.9852216748768473
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.9852216748768473
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.2955665024630542
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.4226600985221674
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.2775369458128079
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.1787192118226601
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.1349753694581281
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.10960591133004927
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.01108543831680986
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.25787568646307335
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.378544115518205
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.4646991741198787
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.514077820298434
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.5479242719935129
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.2955665024630542
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.4571806408684656
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.4186161244795668
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.43413691996468995
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.45936827865079527
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.4762742892652946
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.2955665024630542
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.488501497777794
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.48978270334574775
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.4900376562912742
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.4901135143922775
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.4901135143922775
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.2955665024630542
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.3211048669539684
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.261888445835493
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.2558901722323677
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.2649913870834412
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.27010541031599244
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.28106938786931224
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: full zh
          type: full_zh
        metrics:
          - type: cosine_accuracy@1
            value: 0.30097087378640774
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.7087378640776699
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.8252427184466019
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.8543689320388349
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.912621359223301
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.941747572815534
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.30097087378640774
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.16844660194174763
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.09436893203883494
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.05844660194174757
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.04601941747572815
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.038203883495145634
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.024446152054452382
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.17513112391433697
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.23948897590045773
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.2859906000645493
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.32910264724851107
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.36304017348331746
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.30097087378640774
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.23848199480652515
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.23417872356945213
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.2558557487315817
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.27344459654855646
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.28574499658549296
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.30097087378640774
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.4211010881954553
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.4249525777196882
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.42539460155740233
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.42587488715939736
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.4260266850474542
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.30097087378640774
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.14164601531439067
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.12333195286802508
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.12884550949445583
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.13149151347084506
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.1329204280861929
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.13684460640814028
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: mix es
          type: mix_es
        metrics:
          - type: cosine_accuracy@1
            value: 0.40717628705148207
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.7581903276131046
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.8455538221528861
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.9048361934477379
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.9370774830993239
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.9547581903276131
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.40717628705148207
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.08819552782111284
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.04139365574622985
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.022704108164326574
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.015822499566649332
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.012189287571502862
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.1547198078399326
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.652070178045217
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.7639576059232845
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.8390052744966942
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.877764158185375
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.9016677809969541
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.40717628705148207
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.5052212563002627
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.5356805472279078
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.5521009256440798
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.5595924746165437
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.5639157869620031
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.40717628705148207
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.49319826751048873
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.49612329710657893
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.4969826860284758
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.49724671253026886
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.49734552981578295
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.40717628705148207
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.419614969499609
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.427088485280225
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.4292014145714
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.4298202694277485
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.4300980044671579
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.43055010151569684
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: mix de
          type: mix_de
        metrics:
          - type: cosine_accuracy@1
            value: 0.29017160686427457
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.6484659386375455
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.7540301612064483
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.8419136765470618
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.8788351534061363
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.9089963598543942
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.29017160686427457
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.07251690067602704
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.0355486219448778
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.02041601664066563
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.014505113537874847
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.01137545501820073
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.10890968972092217
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.5299011960478419
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.647191887675507
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.7442624371641533
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.7928583810019068
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.8289478245796498
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.29017160686427457
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.38598145754556046
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.41773491829410075
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.43906545567486005
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.4485955578737219
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.45520732213321263
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.29017160686427457
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.37232551415227233
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.375685507642469
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.3769348294784883
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.377239930826995
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.3774183771765249
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.29017160686427457
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.30311022602590254
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.31036427264538485
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.31304585670015317
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.3138396622777036
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.31426372512191
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.3150399864057635
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: mix zh
          type: mix_zh
        metrics:
          - type: cosine_accuracy@1
            value: 0.09394572025052192
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.35281837160751567
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.48329853862212946
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.5918580375782881
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.6649269311064718
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.7004175365344467
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.09394572025052192
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.030897703549060546
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.018204592901878917
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.011362212943632568
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.008639526791927627
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.007019832985386221
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.03185455810716771
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.20592877025549258
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.30069837956059253
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.3754792557245584
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.4282591046160983
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.46372361401067036
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.09394572025052192
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.13433471892252347
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.16091824243484512
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.1780017996510726
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.1886875211403746
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.19541417908856412
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.09394572025052192
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.14710513443845905
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.15122849766144658
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.15275090014884107
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.1533445728241347
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.1535456563541225
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.09394572025052192
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.083759101073897
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.08908800548950695
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.09092612397080438
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.09168814149038751
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.09208168156532727
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.09301554391402207
            name: Cosine Map@500

SentenceTransformer based on BAAI/bge-small-en-v1.5

This is a sentence-transformers model finetuned from BAAI/bge-small-en-v1.5 on the full_en, full_de, full_es, full_zh and mix datasets. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: BAAI/bge-small-en-v1.5
  • Maximum Sequence Length: 512 tokens
  • Output Dimensionality: 384 dimensions
  • Similarity Function: Cosine Similarity
  • Training Datasets:
    • full_en
    • full_de
    • full_es
    • full_zh
    • mix

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("sentence_transformers_model_id")
# Run inference
sentences = [
    'Volksvertreter',
    'Parlamentarier',
    'Oberbürgermeister',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Information Retrieval

Metric full_en full_es full_de full_zh mix_es mix_de mix_zh
cosine_accuracy@1 0.6571 0.1243 0.2956 0.301 0.4072 0.2902 0.0939
cosine_accuracy@20 0.9905 1.0 0.9212 0.7087 0.7582 0.6485 0.3528
cosine_accuracy@50 0.9905 1.0 0.9606 0.8252 0.8456 0.754 0.4833
cosine_accuracy@100 0.9905 1.0 0.9754 0.8544 0.9048 0.8419 0.5919
cosine_accuracy@150 0.9905 1.0 0.9852 0.9126 0.9371 0.8788 0.6649
cosine_accuracy@200 0.9905 1.0 0.9852 0.9417 0.9548 0.909 0.7004
cosine_precision@1 0.6571 0.1243 0.2956 0.301 0.4072 0.2902 0.0939
cosine_precision@20 0.5076 0.4924 0.4227 0.1684 0.0882 0.0725 0.0309
cosine_precision@50 0.309 0.3169 0.2775 0.0944 0.0414 0.0355 0.0182
cosine_precision@100 0.1872 0.1984 0.1787 0.0584 0.0227 0.0204 0.0114
cosine_precision@150 0.1322 0.147 0.135 0.046 0.0158 0.0145 0.0086
cosine_precision@200 0.1027 0.1176 0.1096 0.0382 0.0122 0.0114 0.007
cosine_recall@1 0.068 0.0031 0.0111 0.0244 0.1547 0.1089 0.0319
cosine_recall@20 0.5459 0.3236 0.2579 0.1751 0.6521 0.5299 0.2059
cosine_recall@50 0.7285 0.4623 0.3785 0.2395 0.764 0.6472 0.3007
cosine_recall@100 0.8382 0.5424 0.4647 0.286 0.839 0.7443 0.3755
cosine_recall@150 0.8762 0.5823 0.5141 0.3291 0.8778 0.7929 0.4283
cosine_recall@200 0.906 0.6126 0.5479 0.363 0.9017 0.8289 0.4637
cosine_ndcg@1 0.6571 0.1243 0.2956 0.301 0.4072 0.2902 0.0939
cosine_ndcg@20 0.6895 0.5407 0.4572 0.2385 0.5052 0.386 0.1343
cosine_ndcg@50 0.7061 0.5008 0.4186 0.2342 0.5357 0.4177 0.1609
cosine_ndcg@100 0.762 0.5143 0.4341 0.2559 0.5521 0.4391 0.178
cosine_ndcg@150 0.7788 0.535 0.4594 0.2734 0.5596 0.4486 0.1887
cosine_ndcg@200 0.79 0.5498 0.4763 0.2857 0.5639 0.4552 0.1954
cosine_mrr@1 0.6571 0.1243 0.2956 0.301 0.4072 0.2902 0.0939
cosine_mrr@20 0.8098 0.5517 0.4885 0.4211 0.4932 0.3723 0.1471
cosine_mrr@50 0.8098 0.5517 0.4898 0.425 0.4961 0.3757 0.1512
cosine_mrr@100 0.8098 0.5517 0.49 0.4254 0.497 0.3769 0.1528
cosine_mrr@150 0.8098 0.5517 0.4901 0.4259 0.4972 0.3772 0.1533
cosine_mrr@200 0.8098 0.5517 0.4901 0.426 0.4973 0.3774 0.1535
cosine_map@1 0.6571 0.1243 0.2956 0.301 0.4072 0.2902 0.0939
cosine_map@20 0.5465 0.4062 0.3211 0.1416 0.4196 0.3031 0.0838
cosine_map@50 0.5352 0.3426 0.2619 0.1233 0.4271 0.3104 0.0891
cosine_map@100 0.5685 0.3398 0.2559 0.1288 0.4292 0.313 0.0909
cosine_map@150 0.5757 0.3482 0.265 0.1315 0.4298 0.3138 0.0917
cosine_map@200 0.5792 0.3535 0.2701 0.1329 0.4301 0.3143 0.0921
cosine_map@500 0.5835 0.3635 0.2811 0.1368 0.4306 0.315 0.093

Training Details

Training Datasets

full_en

full_en

  • Dataset: full_en
  • Size: 28,880 training samples
  • Columns: anchor and positive
  • Approximate statistics based on the first 1000 samples:
    anchor positive
    type string string
    details
    • min: 3 tokens
    • mean: 5.0 tokens
    • max: 10 tokens
    • min: 3 tokens
    • mean: 5.01 tokens
    • max: 13 tokens
  • Samples:
    anchor positive
    air commodore flight lieutenant
    command and control officer flight officer
    air commodore command and control officer
  • Loss: GISTEmbedLoss with these parameters:
    {'guide': SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
      (2): Normalize()
    ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
    
full_de

full_de

  • Dataset: full_de
  • Size: 23,023 training samples
  • Columns: anchor and positive
  • Approximate statistics based on the first 1000 samples:
    anchor positive
    type string string
    details
    • min: 3 tokens
    • mean: 11.05 tokens
    • max: 45 tokens
    • min: 3 tokens
    • mean: 11.43 tokens
    • max: 45 tokens
  • Samples:
    anchor positive
    Staffelkommandantin Kommodore
    Luftwaffenoffizierin Luftwaffenoffizier/Luftwaffenoffizierin
    Staffelkommandantin Luftwaffenoffizierin
  • Loss: GISTEmbedLoss with these parameters:
    {'guide': SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
      (2): Normalize()
    ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
    
full_es

full_es

  • Dataset: full_es
  • Size: 20,724 training samples
  • Columns: anchor and positive
  • Approximate statistics based on the first 1000 samples:
    anchor positive
    type string string
    details
    • min: 3 tokens
    • mean: 12.95 tokens
    • max: 50 tokens
    • min: 3 tokens
    • mean: 12.57 tokens
    • max: 50 tokens
  • Samples:
    anchor positive
    jefe de escuadrón instructor
    comandante de aeronave instructor de simulador
    instructor oficial del Ejército del Aire
  • Loss: GISTEmbedLoss with these parameters:
    {'guide': SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
      (2): Normalize()
    ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
    
full_zh

full_zh

  • Dataset: full_zh
  • Size: 30,401 training samples
  • Columns: anchor and positive
  • Approximate statistics based on the first 1000 samples:
    anchor positive
    type string string
    details
    • min: 4 tokens
    • mean: 8.36 tokens
    • max: 20 tokens
    • min: 4 tokens
    • mean: 8.95 tokens
    • max: 27 tokens
  • Samples:
    anchor positive
    技术总监 技术和运营总监
    技术总监 技术主管
    技术总监 技术艺术总监
  • Loss: GISTEmbedLoss with these parameters:
    {'guide': SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
      (2): Normalize()
    ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
    
mix

mix

  • Dataset: mix
  • Size: 21,760 training samples
  • Columns: anchor and positive
  • Approximate statistics based on the first 1000 samples:
    anchor positive
    type string string
    details
    • min: 2 tokens
    • mean: 5.65 tokens
    • max: 14 tokens
    • min: 2 tokens
    • mean: 10.08 tokens
    • max: 30 tokens
  • Samples:
    anchor positive
    technical manager Technischer Direktor für Bühne, Film und Fernsehen
    head of technical directora técnica
    head of technical department 技术艺术总监
  • Loss: GISTEmbedLoss with these parameters:
    {'guide': SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
      (2): Normalize()
    ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: steps
  • per_device_train_batch_size: 128
  • per_device_eval_batch_size: 128
  • gradient_accumulation_steps: 2
  • num_train_epochs: 5
  • warmup_ratio: 0.05
  • log_on_each_node: False
  • fp16: True
  • dataloader_num_workers: 4
  • ddp_find_unused_parameters: True
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: steps
  • prediction_loss_only: True
  • per_device_train_batch_size: 128
  • per_device_eval_batch_size: 128
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 2
  • eval_accumulation_steps: None
  • torch_empty_cache_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 5
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.05
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: False
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: True
  • dataloader_num_workers: 4
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • tp_size: 0
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: True
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: None
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • include_for_metrics: []
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • eval_on_start: False
  • use_liger_kernel: False
  • eval_use_gather_object: False
  • average_tokens_across_devices: False
  • prompts: None
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Epoch Step Training Loss full_en_cosine_ndcg@200 full_es_cosine_ndcg@200 full_de_cosine_ndcg@200 full_zh_cosine_ndcg@200 mix_es_cosine_ndcg@200 mix_de_cosine_ndcg@200 mix_zh_cosine_ndcg@200
-1 -1 - 0.7322 0.4690 0.3853 0.2723 0.3209 0.2244 0.0919
0.0021 1 23.8878 - - - - - - -
0.2058 100 7.2098 - - - - - - -
0.4115 200 4.2635 0.7800 0.5132 0.4268 0.2798 0.4372 0.2996 0.1447
0.6173 300 4.1931 - - - - - - -
0.8230 400 3.73 0.7863 0.5274 0.4451 0.2805 0.4762 0.3455 0.1648
1.0309 500 3.3569 - - - - - - -
1.2366 600 3.6464 0.7868 0.5372 0.4540 0.2813 0.5063 0.3794 0.1755
1.4424 700 3.0772 - - - - - - -
1.6481 800 3.114 0.7906 0.5391 0.4576 0.2832 0.5221 0.4047 0.1779
1.8539 900 2.9246 - - - - - - -
2.0617 1000 2.7479 0.7873 0.5423 0.4631 0.2871 0.5323 0.4143 0.1843
2.2675 1100 3.049 - - - - - - -
2.4733 1200 2.6137 0.7878 0.5418 0.4685 0.2870 0.5470 0.4339 0.1932
2.6790 1300 2.8607 - - - - - - -
2.8848 1400 2.7071 0.7889 0.5465 0.4714 0.2891 0.5504 0.4362 0.1944
3.0926 1500 2.7012 - - - - - - -
3.2984 1600 2.7423 0.7882 0.5471 0.4748 0.2868 0.5542 0.4454 0.1976
3.5041 1700 2.5316 - - - - - - -
3.7099 1800 2.6344 0.7900 0.5498 0.4763 0.2857 0.5639 0.4552 0.1954

Framework Versions

  • Python: 3.11.11
  • Sentence Transformers: 4.1.0
  • Transformers: 4.51.3
  • PyTorch: 2.6.0+cu124
  • Accelerate: 1.6.0
  • Datasets: 3.5.0
  • Tokenizers: 0.21.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

GISTEmbedLoss

@misc{solatorio2024gistembed,
    title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
    author={Aivin V. Solatorio},
    year={2024},
    eprint={2402.16829},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}