arxiv-classifier / push_dataset.py
mmkuznecov's picture
added indexes for search
f44d262
raw
history blame contribute delete
379 Bytes
from datasets import load_from_disk
from huggingface_hub import HfApi
DATASET_PATH = "data/arxiv_parsed/snapshot_latest"
HF_DATASET_REPO = "mmkuznecov/arxiv-recent"
ds = load_from_disk(DATASET_PATH)
print(f"Loaded {len(ds)} rows, columns: {ds.column_names}")
ds.push_to_hub(HF_DATASET_REPO, private=False)
print(f"Pushed to https://huggingface.co/datasets/{HF_DATASET_REPO}")