taesiri commited on
Commit
fea4982
·
1 Parent(s): 5943a36
Files changed (1) hide show
  1. app.py +46 -3
app.py CHANGED
@@ -5,11 +5,50 @@ import os
5
  import shutil
6
  import uuid
7
  import glob
8
-
9
- from huggingface_hub import CommitScheduler, HfApi
10
 
11
  api = HfApi(token=os.environ["HF_TOKEN"])
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  scheduler = CommitScheduler(
14
  repo_id="taesiri/zb_dataset_storage",
15
  repo_type="dataset",
@@ -884,4 +923,8 @@ with gr.Blocks() as demo:
884
  ],
885
  )
886
 
887
- demo.launch()
 
 
 
 
 
5
  import shutil
6
  import uuid
7
  import glob
8
+ from huggingface_hub import CommitScheduler, HfApi, snapshot_download
9
+ from pathlib import Path
10
 
11
  api = HfApi(token=os.environ["HF_TOKEN"])
12
 
13
+
14
+ # Download existing data from hub
15
+ def sync_with_hub():
16
+ """
17
+ Synchronize local data with the hub by downloading latest dataset
18
+ """
19
+ print("Starting sync with hub...")
20
+ data_dir = Path("./data")
21
+ if data_dir.exists():
22
+ # Backup existing data
23
+ backup_dir = Path("./data_backup")
24
+ if backup_dir.exists():
25
+ shutil.rmtree(backup_dir)
26
+ shutil.copytree(data_dir, backup_dir)
27
+
28
+ # Download latest data from hub
29
+ repo_path = snapshot_download(
30
+ repo_id="taesiri/zb_dataset_storage", repo_type="dataset", local_dir="hub_data"
31
+ )
32
+
33
+ # Merge hub data with local data
34
+ hub_data_dir = Path(repo_path) / "data"
35
+ if hub_data_dir.exists():
36
+ # Create data dir if it doesn't exist
37
+ data_dir.mkdir(exist_ok=True)
38
+
39
+ # Copy files from hub
40
+ for item in hub_data_dir.glob("*"):
41
+ if item.is_dir():
42
+ dest = data_dir / item.name
43
+ if not dest.exists(): # Only copy if doesn't exist locally
44
+ shutil.copytree(item, dest)
45
+
46
+ # Clean up downloaded repo
47
+ if Path("hub_data").exists():
48
+ shutil.rmtree("hub_data")
49
+ print("Finished syncing with hub!")
50
+
51
+
52
  scheduler = CommitScheduler(
53
  repo_id="taesiri/zb_dataset_storage",
54
  repo_type="dataset",
 
923
  ],
924
  )
925
 
926
+ if __name__ == "__main__":
927
+ print("Initializing app...")
928
+ sync_with_hub() # Sync before launching the app
929
+ print("Starting Gradio interface...")
930
+ demo.launch()