yasserrmd commited on
Commit
2803b23
·
verified ·
1 Parent(s): fc0ef94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -1,11 +1,16 @@
1
  import gradio as gr
2
  from docling.document_converter import DocumentConverter
 
3
  import spaces
4
 
5
  @spaces.GPU
6
  def convert_document(file, output_format):
 
 
 
 
7
  # Load document and convert it using Docling
8
- converter = DocumentConverter()
9
  result = converter.convert(file.name)
10
 
11
  # Check available attributes in DoclingDocument
 
1
  import gradio as gr
2
  from docling.document_converter import DocumentConverter
3
+ from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractCliOcrOptions
4
  import spaces
5
 
6
  @spaces.GPU
7
  def convert_document(file, output_format):
8
+ pdf_opts = PdfPipelineOptions(
9
+ do_ocr=True,
10
+ ocr_options=TesseractCliOcrOptions(lang=["eng"]), # or ["eng", "ara"]
11
+ )
12
  # Load document and convert it using Docling
13
+ converter = DocumentConverter(pipeline_options=pdf_opts)
14
  result = converter.convert(file.name)
15
 
16
  # Check available attributes in DoclingDocument