Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,16 @@
|
|
1 |
import gradio as gr
|
2 |
from docling.document_converter import DocumentConverter
|
|
|
3 |
import spaces
|
4 |
|
5 |
@spaces.GPU
|
6 |
def convert_document(file, output_format):
|
|
|
|
|
|
|
|
|
7 |
# Load document and convert it using Docling
|
8 |
-
converter = DocumentConverter()
|
9 |
result = converter.convert(file.name)
|
10 |
|
11 |
# Check available attributes in DoclingDocument
|
|
|
1 |
import gradio as gr
|
2 |
from docling.document_converter import DocumentConverter
|
3 |
+
from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractCliOcrOptions
|
4 |
import spaces
|
5 |
|
6 |
@spaces.GPU
|
7 |
def convert_document(file, output_format):
|
8 |
+
pdf_opts = PdfPipelineOptions(
|
9 |
+
do_ocr=True,
|
10 |
+
ocr_options=TesseractCliOcrOptions(lang=["eng"]), # or ["eng", "ara"]
|
11 |
+
)
|
12 |
# Load document and convert it using Docling
|
13 |
+
converter = DocumentConverter(pipeline_options=pdf_opts)
|
14 |
result = converter.convert(file.name)
|
15 |
|
16 |
# Check available attributes in DoclingDocument
|