Spaces:

jordyvl
/

viz_bdpc

Runtime error

jordyvl commited on Nov 24, 2023

Commit

13cbc38

1 Parent(s): 872bd1f

Issue with namedtempfile resolved

Files changed (1) hide show

app.py CHANGED Viewed

@@ -98,7 +98,10 @@ def main(dataset, label):
     timestamp = time.time()
     seed = int(timestamp * 1000) % 1000000
-    shuffled_dataset = DATASETS[dataset].shuffle(buffer_size=10, seed=seed)
     # first get PDF file
     for sample in shuffled_dataset:
@@ -110,9 +113,11 @@ def main(dataset, label):
         grid = pdf_to_grid(BytesIO(pdf_path))
         if grid is None:
             continue
-        PDF = tempfile.NamedTemporaryFile(suffix=".pdf")
-        PDF.write(pdf_path)
-        return filelabel, grid, pdf_path
 _CLASSES = [
@@ -139,7 +144,7 @@ _CLASSES = [
 DATASETS = OrderedDict(
     {
         # "rvl_cdip": load_dataset("bdpc/rvl_cdip_mp", split="test", streaming=True),
-        "rvl_cdip_N": load_dataset("bdpc/rvl_cdip_n_mp", split="test", streaming=True),
     }
 )
@@ -166,6 +171,7 @@ The first time that the app is launched, it will download the datasets, which ca
 For fastest response, choose the rvl_cdip_N dataset, which is considerably smaller to iterate over.
 """
 iface = gr.Interface(
     fn=main,
     inputs=sliders,

     timestamp = time.time()
     seed = int(timestamp * 1000) % 1000000
+    try:
+        shuffled_dataset = DATASETS[dataset].shuffle(buffer_size=10, seed=seed)
+    except:  # lazy
+        shuffled_dataset = DATASETS[dataset].shuffle(seed=seed)
     # first get PDF file
     for sample in shuffled_dataset:
         grid = pdf_to_grid(BytesIO(pdf_path))
         if grid is None:
             continue
+        PDF = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
+        with PDF as tmp_file:
+            # pdf_path.to_file(tmp_file.name)
+            tmp_file.write(pdf_path)
+            return filelabel, grid, tmp_file.name
 _CLASSES = [
 DATASETS = OrderedDict(
     {
         # "rvl_cdip": load_dataset("bdpc/rvl_cdip_mp", split="test", streaming=True),
+        "rvl_cdip_N": load_dataset("bdpc/rvl_cdip_n_mp", split="test"),
     }
 )
 For fastest response, choose the rvl_cdip_N dataset, which is considerably smaller to iterate over.
 """
+# main("rvl_cdip_N", "letter")
 iface = gr.Interface(
     fn=main,
     inputs=sliders,