import os import urllib.request import json import sys import requests import tarfile import numpy as np from PIL import Image import PIL.Image from pathlib import Path import shutil from PIL import Image import pandas as pd from PIL import ImageFont, ImageDraw from IPython.display import display, Image from matplotlib import pyplot as plt import cv2 as cv def get_data_and_annots(): images={} with open('data/raw/label/publaynet/train.json') as t: data=json.load(t) for train_images in os.walk('data/raw/train/publaynet/train'): train_imgs = train_images[2] for image in data['images']: if image['file_name'] in train_imgs: images[image['id']] = {'file_name': "data/raw/train/publaynet/train/" + image['file_name'], 'annotations': []} if len(images) == 10000: break for ann in data['annotations']: if ann['image_id'] in images.keys(): images[ann['image_id']]['annotations'].append(ann) return images,data def markup(samples,image, annotations): ''' Draws the segmentation, bounding box, and label of each annotation ''' draw = ImageDraw.Draw(image, 'RGBA') font = ImageFont.load_default() # You can specify a different font if needed for annotation in annotations: # Draw segmentation draw.polygon(annotation['segmentation'][0], fill=colors[samples['categories'][annotation['category_id'] - 1]['name']] + (64,)) # Draw bbox draw.rectangle( (annotation['bbox'][0], annotation['bbox'][1], annotation['bbox'][0] + annotation['bbox'][2], annotation['bbox'][1] + annotation['bbox'][3]), outline=colors[data['categories'][annotation['category_id'] - 1]['name']] + (255,), width=2 ) # Draw label text = samples['categories'][annotation['category_id'] - 1]['name'] bbox = draw.textbbox((0, 0), text, font=font) w = bbox[2] - bbox[0] h = bbox[3] - bbox[1] if annotation['bbox'][3] < h: draw.rectangle( (annotation['bbox'][0] + annotation['bbox'][2], annotation['bbox'][1], annotation['bbox'][0] + annotation['bbox'][2] + w, annotation['bbox'][1] + h), fill=(64, 64, 64, 255) ) draw.text( (annotation['bbox'][0] + annotation['bbox'][2], annotation['bbox'][1]), text=samples['categories'][annotation['category_id'] - 1]['name'], fill=(255, 255, 255, 255) ) else: draw.rectangle( (annotation['bbox'][0], annotation['bbox'][1], annotation['bbox'][0] + w, annotation['bbox'][1] + h), fill=(64, 64, 64, 255) ) draw.text( (annotation['bbox'][0], annotation['bbox'][1]), text=samples['categories'][annotation['category_id'] - 1]['name'], fill=(255, 255, 255, 255) ) return np.array(image) import os import shutil from pathlib import Path import cv2 as cv def write_file(image_id, inside, filename, content, check_set): """ Writes content to a file. If 'inside' is True, appends the content, otherwise overwrites the file. Args: image_id (str): The ID of the image. inside (bool): Flag to determine if content should be appended or overwritten. filename (str): The path to the file. content (str): The content to write to the file. check_set (set): A set to keep track of image IDs. """ if inside: with open(filename, "a") as file: file.write("\n") file.write(content) else: check_set.add(image_id) with open(filename, "w") as file: file.write(content) def get_bb_shape(bboxe, img): """ Calculates the shape of the bounding box in the image. Args: bboxe (list): Bounding box coordinates [x, y, width, height]. img (numpy.ndarray): The image array. Returns: tuple: The shape (height, width) of the bounding box. """ tleft = (bboxe[0], bboxe[1]) tright = (bboxe[0] + bboxe[2], bboxe[1]) bleft = (bboxe[0], bboxe[1] + bboxe[3]) bright = (bboxe[0] + bboxe[2], bboxe[1] + bboxe[3]) top_left_x = min([tleft[0], tright[0], bleft[0], bright[0]]) top_left_y = min([tleft[1], tright[1], bleft[1], bright[1]]) bot_right_x = max([tleft[0], tright[0], bleft[0], bright[0]]) bot_right_y = max([tleft[1], tright[1], bleft[1], bright[1]]) image = img[int(top_left_y):int(bot_right_y) + 1, int(top_left_x):int(bot_right_x) + 1] return image.shape[:2] def coco_to_yolo(x1, y1, w, h, image_w, image_h): """ Converts COCO format bounding box to YOLO format. Args: x1 (float): Top-left x coordinate. y1 (float): Top-left y coordinate. w (float): Width of the bounding box. h (float): Height of the bounding box. image_w (int): Width of the image. image_h (int): Height of the image. Returns: list: YOLO format bounding box [x_center, y_center, width, height]. """ return [((2 * x1 + w) / (2 * image_w)), ((2 * y1 + h) / (2 * image_h)), w / image_w, h / image_h] def create_directory(path): """ Creates a directory, deleting it first if it already exists. Args: path (str): The path to the directory. """ dirpath = Path(path) if dirpath.exists() and dirpath.is_dir(): shutil.rmtree(dirpath) os.mkdir(dirpath) def generate_yolo_labels(images): """ Generates YOLO format labels from the given images and annotations. Args: images (dict): Dictionary containing image data and annotations. """ check_set = set() create_directory(os.getcwd() + '/data/processed/yolo') for key in images: image_id = ','.join(map(str, [image_id['image_id'] for image_id in images[key]['annotations']])) category_id = ''.join(map(str, [cat_id['category_id'] - 1 for cat_id in images[key]['annotations']])) bbox = [bbox['bbox'] for bbox in images[key]['annotations']] image_path = images[key]['file_name'] filename = os.getcwd() + '/data/processed/yolo/' + image_path.split('/')[-1].split(".")[0] + '.txt' for index, b in enumerate(bbox): bbox = [b[0], b[1], b[2], b[3]] shape = get_bb_shape(bbox, cv.imread(image_path)) yolo_bbox = coco_to_yolo(bbox[0], bbox[1], shape[1], shape[0], cv.imread(image_path).shape[1], cv.imread(image_path).shape[0]) content = category_id[index] + ' ' + str(yolo_bbox[0]) + ' ' + str(yolo_bbox[1]) + ' ' + str(yolo_bbox[2]) + ' ' + str(yolo_bbox[3]) if image_id in check_set: write_file(image_id, True, filename, content, check_set) else: write_file(image_id, False, filename, content, check_set) def delete_additional_images(old_train_path, temp_images_path, yolo_path): train = next(os.walk(old_train_path), (None, None, []))[2] label = next(os.walk(yolo_path), (None, None, []))[2] dirpath = Path(temp_images_path) if dirpath.exists() and dirpath.is_dir(): shutil.rmtree(dirpath) os.mkdir(dirpath) for img in train: splited = img.split(".")[0] txt = f"{splited}.txt" if txt in label: shutil.move(f"{old_train_path}/{img}", f"{temp_images_path}/{img}") return def split_data(temp_images_path): image = next(os.walk(temp_images_path), (None, None, []))[2] train = image[int(len(image) * .1) : int(len(image) * .90)] validation = list(set(image) - set(train)) create_directory(os.getcwd() + '/data/processed/training') create_directory(os.getcwd() + '/data/processed/validation') create_directory(os.getcwd() + '/data/processed/training/images/') create_directory(os.getcwd() + '/data/processed/validation/images/') for train_img in train: shutil.move(f'{temp_images_path}/{train_img}', os.getcwd() + '/data/processed/training/images/') for valid_img in validation: shutil.move(f'{temp_images_path}/{valid_img}', os.getcwd() + '/data/processed/validation/images/') validation_without_ext = [i.split('.')[0] for i in validation] return validation_without_ext def create_directory(path): dirpath = Path(path) if dirpath.exists() and dirpath.is_dir(): shutil.rmtree(dirpath) os.mkdir(dirpath) def get_labels(yolo_path, valid_without_extension): create_directory(os.getcwd() + '/data/processed/training/labels') create_directory(os.getcwd() + '/data/processed/validation/labels') label = next(os.walk(yolo_path), (None, None, []))[2] for lab in label: split = lab.split(".")[0] if split in valid_without_extension: shutil.move(f"{yolo_path}/{lab}", os.getcwd() + f'/data/processed/validation/labels/{lab}') else: shutil.move(f"{yolo_path}/{lab}", os.getcwd() + f'/data/processed/training/labels/{lab}') return def final_preparation(old_train_path, temp_images_path, yolo_path, images): delete_additional_images(old_train_path, temp_images_path, yolo_path) valid_without_extension = split_data(temp_images_path) dirpath = Path(temp_images_path) if dirpath.exists() and dirpath.is_dir(): shutil.rmtree(dirpath) return get_labels(yolo_path, valid_without_extension) def annotate_tables(directory): dirpath = Path(os.getcwd() + f'/data/processed/tables') if dirpath.exists() and dirpath.is_dir(): shutil.rmtree(dirpath) os.mkdir(dirpath) # Iterate through the directory for filename in os.listdir(directory): # Get the full path of the file file_path = os.path.join(directory, filename) # Check if it's a file (not a subdirectory) if os.path.isfile(file_path): img_name = filename.split('.')[0] if os.path.isfile(os.getcwd() + f'/data/processed/training/images/{img_name}.jpg'): with open(os.getcwd() + f'/data/processed/training/labels/{img_name}.txt', 'r') as f: results = f.read() original_image = Image.open(os.getcwd() + f'/data/processed/training/images/{img_name}.jpg') elif os.path.isfile(os.getcwd() + f'/data/processed/validation/images/{img_name}.jpg'): with open(os.getcwd() + f'/data/processed/validation/labels/{img_name}.txt', 'r') as f: results = f.read() original_image = Image.open(os.getcwd() + f'/data/processed/validation/images/{img_name}.jpg') # Iterate through the results for r in results: boxes = r.boxes # Bounding boxes object for box in boxes: # Check if the detected object is a table if box.cls == 3: # Get the bounding box coordinates x1, y1, x2, y2 = box.xyxy[0] # get box coordinates in (top, left, bottom, right) format # Convert tensor to int x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # Crop the original image to the table region table_image = original_image.crop((x1, y1, x2, y2)) # Show the cropped table image table_image.show() # Save the cropped table image table_image.save(os.getcwd() + f'/data/processed/tables/{img_name}.jpg') # Break after finding the first table (remove this if you want to detect multiple tables) break # Break after processing the first result (usually there's only one result per image) break if __name__ == '__main__': colors = {'title': (255, 0, 0), 'text': (0, 255, 0), 'figure': (0, 0, 255), 'table': (255, 255, 0), 'list': (0, 255, 255)} images,data = get_data_and_annots() generate_labels = generate_yolo_labels(images) finalPrep = final_preparation(os.path.join(os.getcwd() + r'\data\raw\train\publaynet\train'),os.path.join(os.getcwd() + r"\data\processed\images"), os.getcwd() + '/data/processed/yolo',images) annotate_tables(os.getcwd() + '/data/processed/hand_labeled_tables/hand_labeled_tables')