Spaces:
Sleeping
Sleeping
slaren
commited on
cuda : fix data race in soft max (llama/5853)
Browse files- ggml-cuda.cu +1 -0
ggml-cuda.cu
CHANGED
|
@@ -6947,6 +6947,7 @@ static __global__ void soft_max_f32(const float * x, const float * mask, const f
|
|
| 6947 |
// find the sum of exps in the block
|
| 6948 |
tmp = warp_reduce_sum(tmp);
|
| 6949 |
if (block_size > WARP_SIZE) {
|
|
|
|
| 6950 |
if (warp_id == 0) {
|
| 6951 |
buf_iw[lane_id] = 0.0f;
|
| 6952 |
}
|
|
|
|
| 6947 |
// find the sum of exps in the block
|
| 6948 |
tmp = warp_reduce_sum(tmp);
|
| 6949 |
if (block_size > WARP_SIZE) {
|
| 6950 |
+
__syncthreads();
|
| 6951 |
if (warp_id == 0) {
|
| 6952 |
buf_iw[lane_id] = 0.0f;
|
| 6953 |
}
|