File size: 4,675 Bytes

58343f2

# sample_inference.py
import argparse
import torch
from ultralytics import YOLO
import cv2
import numpy as np
import json
from PIL import Image

def main():
    parser = argparse.ArgumentParser(description='Run person segmentation with YOLO12l-seg model')
    parser.add_argument('--model', type=str, default='yolo12l-person-seg.pt', help='Model path')
    parser.add_argument('--image', type=str, required=True, help='Image path for inference')
    parser.add_argument('--output', type=str, default='output.jpg', help='Output visualization image path')
    parser.add_argument('--json', type=str, default='detections.json', help='JSON output file for detection data')
    parser.add_argument('--conf', type=float, default=0.5, help='Confidence threshold')
    args = parser.parse_args()

    # Load the model
    model = YOLO(args.model)
    
    # Move to appropriate device if available
    if torch.cuda.is_available():
        print(f"Using CUDA device: {torch.cuda.get_device_name(0)}")
        model.to('cuda')
        device = 'cuda'
        use_half = True
    elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
        print("Using Apple Silicon MPS")
        model.to('mps')
        device = 'mps'
        use_half = False
    else:
        print("Using CPU")
        device = None
        use_half = False

    # Load and check input image
    try:
        img = Image.open(args.image)
        img_width, img_height = img.size
        print(f"Image dimensions: {img_width}x{img_height}")
    except Exception as e:
        print(f"Error opening image: {e}")
        return

    # Run inference
    if device == 'cuda':
        results = model(args.image, classes=0, conf=args.conf, device=device, half=use_half)
    elif device == 'mps':
        results = model(args.image, classes=0, conf=args.conf, device=device)
    else:
        results = model(args.image, classes=0, conf=args.conf)

    # Process results
    detections = []
    visualization_img = cv2.imread(args.image)

    for result in results:
        masks = result.masks
        boxes = result.boxes
        
        if boxes is None or len(boxes) == 0:
            print("No people detected in the image")
            return
            
        person_count = len(boxes)
        print(f"Detected {person_count} people")

        # Visualize and extract data
        if masks is not None:
            for i, (mask, box) in enumerate(zip(masks.xy, boxes)):
                confidence = float(box.conf[0])
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                
                # Extract mask points
                polygon_points = mask.tolist()
                
                # Calculate percentages of image dimensions
                x_coords = [point[0] for point in polygon_points]
                y_coords = [point[1] for point in polygon_points]
                min_x, max_x = min(x_coords), max(x_coords)
                min_y, max_y = min(y_coords), max(y_coords)
                width_pct = (max_x - min_x) / img_width
                height_pct = (max_y - min_y) / img_height
                
                # Create detection record
                detection = {
                    "id": i,
                    "confidence": confidence,
                    "box": [x1, y1, x2, y2],
                    "points": polygon_points,
                    "width_pct": width_pct,
                    "height_pct": height_pct,
                }
                detections.append(detection)
                
                # Draw bounding box
                cv2.rectangle(visualization_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(visualization_img, f'Person: {confidence:.2f}', (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                
                # Draw segmentation mask
                color_mask = np.zeros_like(visualization_img, dtype=np.uint8)
                mask_points = np.array(polygon_points, dtype=np.int32)
                cv2.fillPoly(color_mask, [mask_points], (0, 0, 255))
                
                # Blend the mask with the original image
                visualization_img = cv2.addWeighted(visualization_img, 1.0, color_mask, 0.5, 0)

    # Save visualization
    cv2.imwrite(args.output, visualization_img)
    print(f"Visualization saved to {args.output}")
    
    # Save detection data to JSON
    with open(args.json, 'w') as f:
        json.dump({
            "person_count": person_count,
            "detections": detections
        }, f, indent=4)
    print(f"Detection data saved to {args.json}")

if __name__ == "__main__":
    main()