import datasets import bm25s from bm25s.hf import BM25HF import json dataset = datasets.load_dataset("anhdt-dsai-02/test_image_dataset_1_2_3_4") corpus = dataset["train"]["caption"] retriever = BM25HF(corpus=corpus) retriever.index(bm25s.tokenize(corpus)) # Set your username and token user = "anhdt-dsai-02" retriever.save_to_hub(f"{user}/caption_1_2_3_4")