Spaces:

opencv
/

text_recognition_crnn

Sleeping

File size: 4,044 Bytes

85daa07

import cv2 as cv
import numpy as np
import gradio as gr
from huggingface_hub import hf_hub_download
from ppocr_det import PPOCRDet
from crnn import CRNN

# Download model files from Hugging Face
det_model_path = hf_hub_download(
    repo_id="opencv/text_detection_ppocr",
    filename="text_detection_en_ppocrv3_2023may.onnx"
)
rec_model_path = hf_hub_download(
    repo_id="opencv/text_recognition_crnn",
    filename="text_recognition_CRNN_EN_2021sep.onnx"
)

# DNN backend and target
backend_id = cv.dnn.DNN_BACKEND_OPENCV
target_id = cv.dnn.DNN_TARGET_CPU

# Detector and recognizer setup
detector = PPOCRDet(
    modelPath=det_model_path,
    inputSize=[736, 736],
    binaryThreshold=0.3,
    polygonThreshold=0.5,
    maxCandidates=200,
    unclipRatio=2.0,
    backendId=backend_id,
    targetId=target_id
)

recognizer = CRNN(
    modelPath=rec_model_path,
    backendId=backend_id,
    targetId=target_id
)

def detect_and_recognize(input_image):
    bgr = cv.cvtColor(input_image, cv.COLOR_RGB2BGR)
    h_orig, w_orig = input_image.shape[:2]
    resized = cv.resize(bgr, (736, 736))
    scale_w = w_orig / 736
    scale_h = h_orig / 736

    # Detect & recognize
    det_results, _ = detector.infer(resized)
    texts = [recognizer.infer(resized, box.reshape(8)) for box in det_results]

    # Prepare canvases
    left = input_image.copy()
    right = np.ones_like(input_image) * 255

    for box_raw, text in zip(det_results, texts):
        # Rescale box to original image coords
        box = np.int32([[pt[0] * scale_w, pt[1] * scale_h] for pt in box_raw])

        # Compute box dimensions
        xs = box[:, 0]
        box_w = xs.max() - xs.min()
        # box height (average vertical edges)
        h1 = np.linalg.norm(box[1] - box[0])
        h2 = np.linalg.norm(box[2] - box[3])
        box_h = (h1 + h2) / 2.0

        # Initial font scale so text height ≈ 80% of box height
        (_, th0), _ = cv.getTextSize(text, cv.FONT_HERSHEY_SIMPLEX, 1.0, 1)
        font_scale = (box_h * 0.8) / th0 if th0 > 0 else 1.0
        font_thickness = max(1, int(font_scale))

        # Re-measure text size with this scale
        (tw, th), _ = cv.getTextSize(text, cv.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)

        # If text is wider than box or taller than box, scale down to fit
        scale_x = box_w / tw if tw > 0 else 1.0
        scale_y = (box_h * 0.8) / th if th > 0 else 1.0
        final_scale = font_scale * min(1.0, scale_x, scale_y)
        font_scale = final_scale
        font_thickness = max(1, int(np.floor(font_scale)))

        # Draw boxes on both panels
        cv.polylines(left, [box], isClosed=True, color=(0, 0, 255), thickness=2)
        cv.polylines(right, [box], isClosed=True, color=(0, 0, 255), thickness=2)

        # Draw text on whiteboard, just above top-left corner
        x0, y0 = box[0]
        y_text = max(0, int(y0 - 5))
        cv.putText(
            right, text, (int(x0), y_text),
            cv.FONT_HERSHEY_SIMPLEX,
            font_scale, (0, 0, 0), font_thickness
        )

    combined = cv.hconcat([left, right])
    return combined

with gr.Blocks(title="Scene Text Detection + Recognition (PPOCR + CRNN)") as demo:
    gr.Markdown("## Scene Text Detection and Recognition (PPOCR + CRNN)")
    gr.Markdown("Upload an image with scene text to detect text regions and recognize text using OpenCV DNN with PPOCR + CRNN models.")

    input_img = gr.Image(type="numpy", label="Upload Image")
    output_img = gr.Image(type="numpy", label="Detected Text Image")

    with gr.Row():
        submit_btn = gr.Button("Submit", variant="primary")
        clear_btn = gr.Button("Clear")

    submit_btn.click(
        fn=detect_and_recognize,
        inputs=input_img,
        outputs=output_img
    )

    clear_btn.click(
        fn=lambda: (None, None),
        inputs=[],
        outputs=[input_img, output_img]
    )

    gr.Markdown("**Note**: Left side of output shows detected regions, right side shows recognized text.")

if __name__ == "__main__":
    demo.launch()