Spaces:
Paused
Paused
update
Browse files
app.py
CHANGED
@@ -5,11 +5,50 @@ import os
|
|
5 |
import shutil
|
6 |
import uuid
|
7 |
import glob
|
8 |
-
|
9 |
-
from
|
10 |
|
11 |
api = HfApi(token=os.environ["HF_TOKEN"])
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
scheduler = CommitScheduler(
|
14 |
repo_id="taesiri/zb_dataset_storage",
|
15 |
repo_type="dataset",
|
@@ -884,4 +923,8 @@ with gr.Blocks() as demo:
|
|
884 |
],
|
885 |
)
|
886 |
|
887 |
-
|
|
|
|
|
|
|
|
|
|
5 |
import shutil
|
6 |
import uuid
|
7 |
import glob
|
8 |
+
from huggingface_hub import CommitScheduler, HfApi, snapshot_download
|
9 |
+
from pathlib import Path
|
10 |
|
11 |
api = HfApi(token=os.environ["HF_TOKEN"])
|
12 |
|
13 |
+
|
14 |
+
# Download existing data from hub
|
15 |
+
def sync_with_hub():
|
16 |
+
"""
|
17 |
+
Synchronize local data with the hub by downloading latest dataset
|
18 |
+
"""
|
19 |
+
print("Starting sync with hub...")
|
20 |
+
data_dir = Path("./data")
|
21 |
+
if data_dir.exists():
|
22 |
+
# Backup existing data
|
23 |
+
backup_dir = Path("./data_backup")
|
24 |
+
if backup_dir.exists():
|
25 |
+
shutil.rmtree(backup_dir)
|
26 |
+
shutil.copytree(data_dir, backup_dir)
|
27 |
+
|
28 |
+
# Download latest data from hub
|
29 |
+
repo_path = snapshot_download(
|
30 |
+
repo_id="taesiri/zb_dataset_storage", repo_type="dataset", local_dir="hub_data"
|
31 |
+
)
|
32 |
+
|
33 |
+
# Merge hub data with local data
|
34 |
+
hub_data_dir = Path(repo_path) / "data"
|
35 |
+
if hub_data_dir.exists():
|
36 |
+
# Create data dir if it doesn't exist
|
37 |
+
data_dir.mkdir(exist_ok=True)
|
38 |
+
|
39 |
+
# Copy files from hub
|
40 |
+
for item in hub_data_dir.glob("*"):
|
41 |
+
if item.is_dir():
|
42 |
+
dest = data_dir / item.name
|
43 |
+
if not dest.exists(): # Only copy if doesn't exist locally
|
44 |
+
shutil.copytree(item, dest)
|
45 |
+
|
46 |
+
# Clean up downloaded repo
|
47 |
+
if Path("hub_data").exists():
|
48 |
+
shutil.rmtree("hub_data")
|
49 |
+
print("Finished syncing with hub!")
|
50 |
+
|
51 |
+
|
52 |
scheduler = CommitScheduler(
|
53 |
repo_id="taesiri/zb_dataset_storage",
|
54 |
repo_type="dataset",
|
|
|
923 |
],
|
924 |
)
|
925 |
|
926 |
+
if __name__ == "__main__":
|
927 |
+
print("Initializing app...")
|
928 |
+
sync_with_hub() # Sync before launching the app
|
929 |
+
print("Starting Gradio interface...")
|
930 |
+
demo.launch()
|