Spaces:

Ryukijano
/

Image-processor

Runtime error

App Files Files Community

Image-processor / app.py

Ryukijano

Update app.py

a2c060f verified 11 months ago

raw

history blame

2.62 kB

	# app.py for Hugging Face Space: Connecting Meta Llama 3.2 Vision, Segment Anything 2, and Diffusion Model
	import gradio as gr
	import spaces # Import the spaces module to use GPU-specific decorators
	from transformers import pipeline
	from diffusers import StableDiffusionPipeline
	import torch
	import os

	# Set up Hugging Face token for private model access
	hf_token = os.getenv("HF_TOKEN")

	# Set up Meta Llama 3.2 Vision model (using private model with token)
	llama_vision_model_id = "meta-llama/Llama-3.2-1B-Vision"
	vision_pipe = pipeline(
	"image-captioning", # Supports image captioning and image Q&A
	model=llama_vision_model_id,
	torch_dtype=torch.bfloat16,
	device=0, # Force usage of GPU
	use_auth_token=hf_token, # Use Hugging Face token for authentication
	)

	# Set up Meta Segment Anything 2 model (using private model with token)
	segment_model_id = "meta/segment-anything-2"
	segment_pipe = pipeline(
	"image-segmentation",
	model=segment_model_id,
	device=0, # Force usage of GPU
	use_auth_token=hf_token, # Use Hugging Face token for authentication
	)

	# Set up Stable Diffusion Lite model
	stable_diffusion_model_id = "runwayml/stable-diffusion-v1-5"
	diffusion_pipe = StableDiffusionPipeline.from_pretrained(
	stable_diffusion_model_id, torch_dtype=torch.float16, use_auth_token=hf_token
	)
	diffusion_pipe = diffusion_pipe.to("cuda") # Force usage of GPU

	# Use the GPU decorator for the function that needs GPU access
	@spaces.GPU(duration=120) # Allocates GPU for a maximum of 120 seconds
	def process_image(image):
	# Step 1: Use Vision model for initial image understanding (captioning)
	caption_result = vision_pipe(image=image)
	caption = caption_result[0]['generated_text']

	# Step 2: Segment important parts of the image
	segmented_result = segment_pipe(image=image)
	segments = segmented_result["segments"]

	# Step 3: Modify segmented image using Diffusion model
	# Here, we modify based on the caption result and segmented area
	output_image = diffusion_pipe(prompt=f"Modify the {caption}", image=image).images[0]

	return output_image

	# Create Gradio interface
	interface = gr.Interface(
	fn=process_image,
	inputs=gr.Image(type="pil"),
	outputs="image",
	live=True, # Allow for dynamic updates if necessary
	allow_flagging="never", # Disallow flagging to keep interactions light
	title="Image Processor: Vision, Segmentation, and Modification",
	description="Upload an image to generate a caption, segment important parts, and modify the image using Stable Diffusion."
	)

	# Launch the app
	interface.launch()