Update README.md
Browse files
README.md
CHANGED
|
@@ -75,20 +75,10 @@ We used 8xA100 GPUs for model training.
|
|
| 75 |
|
| 76 |
|
| 77 |
|
| 78 |
-
##
|
| 79 |
|
| 80 |
TPO paper:
|
| 81 |
```
|
| 82 |
-
@article{meng2024simpo,
|
| 83 |
-
title={{SimPO}: Simple preference optimization with a reference-free reward},
|
| 84 |
-
author={Meng, Yu and Xia, Mengzhou and Chen, Danqi},
|
| 85 |
-
journal={arXiv preprint arXiv:2405.14734},
|
| 86 |
-
year={2024}
|
| 87 |
-
}
|
| 88 |
-
```
|
| 89 |
-
|
| 90 |
-
UltraFeedback paper:
|
| 91 |
-
```
|
| 92 |
@misc{saeidi2025triplepreferenceoptimizationachieving,
|
| 93 |
title={Triple Preference Optimization: Achieving Better Alignment using a Single Step Optimization},
|
| 94 |
author={Amir Saeidi and Shivanshu Verma and Aswin RRV and Kashif Rasul and Chitta Baral},
|
|
|
|
| 75 |
|
| 76 |
|
| 77 |
|
| 78 |
+
## Citation
|
| 79 |
|
| 80 |
TPO paper:
|
| 81 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
@misc{saeidi2025triplepreferenceoptimizationachieving,
|
| 83 |
title={Triple Preference Optimization: Achieving Better Alignment using a Single Step Optimization},
|
| 84 |
author={Amir Saeidi and Shivanshu Verma and Aswin RRV and Kashif Rasul and Chitta Baral},
|