Spaces:
Sleeping
Sleeping
import datasets | |
import bm25s | |
from bm25s.hf import BM25HF | |
import json | |
dataset = datasets.load_dataset("anhdt-dsai-02/test_image_dataset_1_2_3_4") | |
corpus = dataset["train"]["caption"] | |
retriever = BM25HF(corpus=corpus) | |
retriever.index(bm25s.tokenize(corpus)) | |
# Set your username and token | |
user = "anhdt-dsai-02" | |
retriever.save_to_hub(f"{user}/caption_1_2_3_4") |