Issue with namedtempfile resolved
Browse files
app.py
CHANGED
|
@@ -98,7 +98,10 @@ def main(dataset, label):
|
|
| 98 |
timestamp = time.time()
|
| 99 |
seed = int(timestamp * 1000) % 1000000
|
| 100 |
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
# first get PDF file
|
| 104 |
for sample in shuffled_dataset:
|
|
@@ -110,9 +113,11 @@ def main(dataset, label):
|
|
| 110 |
grid = pdf_to_grid(BytesIO(pdf_path))
|
| 111 |
if grid is None:
|
| 112 |
continue
|
| 113 |
-
PDF = tempfile.NamedTemporaryFile(suffix=".pdf")
|
| 114 |
-
PDF
|
| 115 |
-
|
|
|
|
|
|
|
| 116 |
|
| 117 |
|
| 118 |
_CLASSES = [
|
|
@@ -139,7 +144,7 @@ _CLASSES = [
|
|
| 139 |
DATASETS = OrderedDict(
|
| 140 |
{
|
| 141 |
# "rvl_cdip": load_dataset("bdpc/rvl_cdip_mp", split="test", streaming=True),
|
| 142 |
-
"rvl_cdip_N": load_dataset("bdpc/rvl_cdip_n_mp", split="test"
|
| 143 |
}
|
| 144 |
)
|
| 145 |
|
|
@@ -166,6 +171,7 @@ The first time that the app is launched, it will download the datasets, which ca
|
|
| 166 |
For fastest response, choose the rvl_cdip_N dataset, which is considerably smaller to iterate over.
|
| 167 |
"""
|
| 168 |
|
|
|
|
| 169 |
iface = gr.Interface(
|
| 170 |
fn=main,
|
| 171 |
inputs=sliders,
|
|
|
|
| 98 |
timestamp = time.time()
|
| 99 |
seed = int(timestamp * 1000) % 1000000
|
| 100 |
|
| 101 |
+
try:
|
| 102 |
+
shuffled_dataset = DATASETS[dataset].shuffle(buffer_size=10, seed=seed)
|
| 103 |
+
except: # lazy
|
| 104 |
+
shuffled_dataset = DATASETS[dataset].shuffle(seed=seed)
|
| 105 |
|
| 106 |
# first get PDF file
|
| 107 |
for sample in shuffled_dataset:
|
|
|
|
| 113 |
grid = pdf_to_grid(BytesIO(pdf_path))
|
| 114 |
if grid is None:
|
| 115 |
continue
|
| 116 |
+
PDF = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
|
| 117 |
+
with PDF as tmp_file:
|
| 118 |
+
# pdf_path.to_file(tmp_file.name)
|
| 119 |
+
tmp_file.write(pdf_path)
|
| 120 |
+
return filelabel, grid, tmp_file.name
|
| 121 |
|
| 122 |
|
| 123 |
_CLASSES = [
|
|
|
|
| 144 |
DATASETS = OrderedDict(
|
| 145 |
{
|
| 146 |
# "rvl_cdip": load_dataset("bdpc/rvl_cdip_mp", split="test", streaming=True),
|
| 147 |
+
"rvl_cdip_N": load_dataset("bdpc/rvl_cdip_n_mp", split="test"),
|
| 148 |
}
|
| 149 |
)
|
| 150 |
|
|
|
|
| 171 |
For fastest response, choose the rvl_cdip_N dataset, which is considerably smaller to iterate over.
|
| 172 |
"""
|
| 173 |
|
| 174 |
+
# main("rvl_cdip_N", "letter")
|
| 175 |
iface = gr.Interface(
|
| 176 |
fn=main,
|
| 177 |
inputs=sliders,
|