File size: 4,975 Bytes
19dc2e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import streamlit as st
import matplotlib.pyplot as plt
import cv2
import easyocr
import numpy as np
from PIL import Image as PILImage
from io import BytesIO
import pdf2image

# Function to handle file upload and OCR processing
def process_image(image, language):
    reader = easyocr.Reader([language])  # Initialize OCR reader with chosen language
    
    # Convert the image to OpenCV format
    open_cv_image = np.array(image)
    open_cv_image = open_cv_image[:, :, ::-1].copy()  # Convert RGB to BGR for OpenCV
    
    # Apply the scan effect
    processed_image = scan_effect(open_cv_image)
    
    # Perform OCR
    output = reader.readtext(processed_image)
    
    return processed_image, output

# Function for image scan effect
def scan_effect(img):
    blackPoint = 66
    whitePoint = 130
    image = highPassFilter(img, kSize=51)
    image_white = whitePointSelect(image, whitePoint)
    img_black = blackPointSelect(image_white, blackPoint)
    image = blackPointSelect(img, blackPoint)
    white = whitePointSelect(image, whitePoint)
    img_black = blackAndWhite(white)
    return img_black

# Helper functions for scan effect
def map(x, in_min, in_max, out_min, out_max):
    return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min

def highPassFilter(img, kSize):
    if not kSize % 2:
        kSize += 1
    kernel = np.ones((kSize, kSize), np.float32) / (kSize * kSize)
    filtered = cv2.filter2D(img, -1, kernel)
    filtered = img.astype('float32') - filtered.astype('float32')
    filtered = filtered + 127 * np.ones(img.shape, np.uint8)
    filtered = filtered.astype('uint8')
    return filtered

def blackPointSelect(img, blackPoint):
    img = img.astype('int32')
    img = map(img, blackPoint, 255, 0, 255)
    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_TOZERO)
    img = img.astype('uint8')
    return img

def whitePointSelect(img, whitePoint):
    _, img = cv2.threshold(img, whitePoint, 255, cv2.THRESH_TRUNC)
    img = img.astype('int32')
    img = map(img, 0, whitePoint, 0, 255)
    img = img.astype('uint8')
    return img

def blackAndWhite(img):
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    (l, a, b) = cv2.split(lab)
    img = cv2.add(cv2.subtract(l, b), cv2.subtract(l, a))
    return img

# Streamlit app layout
st.title("OCR and Image Processing App")

# Sidebar for file upload and language selection
st.sidebar.title("Upload and Settings")
uploaded_file = st.sidebar.file_uploader("Upload PDF or Image", type=["pdf", "png", "jpg", "jpeg"])
language_option = st.sidebar.radio("Select OCR Language", ('ne', 'en'))  # 'ne' for Nepali, 'en' for English

if uploaded_file:
    file_type = uploaded_file.type
    
    if file_type == "application/pdf":
        # Convert PDF to images
        images = pdf2image.convert_from_bytes(uploaded_file.read())
        
        st.image(images, caption='Uploaded PDF as Images', use_column_width=True)
        
        if st.button("Process PDF"):
            # Process the first page of the PDF
            st.write("Processing PDF...")
            for page_image in images:
                with st.spinner('Processing...'):
                    processed_image, ocr_output = process_image(page_image, language_option)
                    st.image(processed_image, caption="Processed Image", use_column_width=True)
                    
                    # Display OCR output
                    st.write("Extracted Text:")
                    for item in ocr_output:
                        st.write(item[1])
                        
                    # Allow download of processed image
                    img = PILImage.fromarray(processed_image)
                    buf = BytesIO()
                    img.save(buf, format="PNG")
                    byte_im = buf.getvalue()
                    st.download_button(label="Download Processed Image", data=byte_im, file_name="processed_image.png", mime="image/png")
                    
    else:
        # Handle image files
        image = PILImage.open(uploaded_file)
        st.image(image, caption="Uploaded Image", use_column_width=True)
        
        if st.button("Process Image"):
            # Process the uploaded image
            with st.spinner('Processing...'):
                processed_image, ocr_output = process_image(image, language_option)
                st.image(processed_image, caption="Processed Image", use_column_width=True)
                
                # Display OCR output
                st.write("Extracted Text:")
                for item in ocr_output:
                    st.write(item[1])
                
                # Allow download of processed image
                img = PILImage.fromarray(processed_image)
                buf = BytesIO()
                img.save(buf, format="PNG")
                byte_im = buf.getvalue()
                st.download_button(label="Download Processed Image", data=byte_im, file_name="processed_image.png", mime="image/png")