from fastapi import FastAPI, UploadFile, File from fastapi.responses import HTMLResponse from PIL import Image import io import torch from transformers import BlipProcessor, BlipForConditionalGeneration app = FastAPI() # Load BLIP model & processor processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") @app.get("/", response_class=HTMLResponse) async def main(): return """ Smart Image Captioning

🧠 Smart Image Captioning


""" @app.post("/caption", response_class=HTMLResponse) async def caption(file: UploadFile = File(...)): contents = await file.read() image = Image.open(io.BytesIO(contents)).convert('RGB') # Generate caption inputs = processor(images=image, return_tensors="pt") out = model.generate(**inputs) caption = processor.decode(out[0], skip_special_tokens=True) return f""" Caption Result

🖼️ Image Caption Result

File: {file.filename}

Caption: {caption}

🔁 Try another image
"""