Spaces:

Chand11
/

Image-captioner

Sleeping

App Files Files Community

Chand11 commited on Jun 22

Commit

c37e02d

verified ·

1 Parent(s): e0667ac

Upload 5 files

Browse files

Files changed (5) hide show

app.py +42 -0
caption_app.py +46 -0
caption_app_styled.py +126 -0
classifier.py +50 -0
fashion_mnist_model.h5 +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from fastapi import FastAPI, UploadFile, File, Request
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+import shutil
+import io
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+from tensorflow.keras.applications.mobilenet_v2 import (
+    MobileNetV2, preprocess_input, decode_predictions
+)
+app = FastAPI()
+templates = Jinja2Templates(directory="templates")
+# Load the model once
+model = MobileNetV2(weights="imagenet")
+@app.get("/", response_class=HTMLResponse)
+async def home(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request, "result": ""})
+@app.post("/", response_class=HTMLResponse)
+async def upload(request: Request, file: UploadFile = File(...)):
+    contents = await file.read()
+    img = Image.open(io.BytesIO(contents)).resize((224, 224)).convert("RGB")
+    img_array = np.array(img)
+    img_array = np.expand_dims(img_array, axis=0)
+    img_array = preprocess_input(img_array)
+    preds = model.predict(img_array)
+    decoded_preds = decode_predictions(preds, top=3)[0]
+    # Combine top 3 results
+    result_text = "\n".join(
+        f"{label} - {confidence * 100:.2f}%" for (_, label, confidence) in decoded_preds
+    )
+    return templates.TemplateResponse("index.html", {
+        "request": request,
+        "result": result_text
+    })

caption_app.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from fastapi import FastAPI, UploadFile, File, Request
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from PIL import Image
+import io
+import base64
+import torch
+from transformers import BlipProcessor, BlipForConditionalGeneration
+# Initialize FastAPI app
+app = FastAPI()
+# Setup static and templates directories
+app.mount("/static", StaticFiles(directory="static"), name="static")
+templates = Jinja2Templates(directory="templates")
+# Load BLIP model and processor
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+@app.get("/", response_class=HTMLResponse)
+async def main(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
+@app.post("/", response_class=HTMLResponse)
+async def caption(request: Request, file: UploadFile = File(...)):
+    contents = await file.read()
+    image = Image.open(io.BytesIO(contents)).convert("RGB")
+    # Generate caption using BLIP
+    inputs = processor(images=image, return_tensors="pt")
+    out = model.generate(**inputs)
+    caption = processor.decode(out[0], skip_special_tokens=True)
+    # Convert image to base64 for preview
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return templates.TemplateResponse("index.html", {
+        "request": request,
+        "caption": caption,
+        "image_data": img_str
+    })

caption_app_styled.py ADDED Viewed

	@@ -0,0 +1,126 @@

+from fastapi import FastAPI, UploadFile, File
+from fastapi.responses import HTMLResponse
+from PIL import Image
+import io
+import torch
+from transformers import BlipProcessor, BlipForConditionalGeneration
+app = FastAPI()
+# Load BLIP model & processor
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+@app.get("/", response_class=HTMLResponse)
+async def main():
+    return """
+    <html>
+    <head>
+        <title>Smart Image Captioning</title>
+        <style>
+            body {
+                font-family: 'Segoe UI', sans-serif;
+                display: flex;
+                flex-direction: column;
+                align-items: center;
+                justify-content: center;
+                min-height: 100vh;
+                background-color: #f3f4f6;
+                margin: 0;
+            }
+            h2 {
+                color: #111827;
+                margin-bottom: 1rem;
+            }
+            form {
+                background: white;
+                padding: 2rem;
+                border-radius: 12px;
+                box-shadow: 0 8px 24px rgba(0,0,0,0.1);
+            }
+            input[type="file"] {
+                margin-bottom: 1rem;
+            }
+            input[type="submit"] {
+                padding: 0.5rem 1.2rem;
+                font-size: 1rem;
+                background-color: #2563eb;
+                color: white;
+                border: none;
+                border-radius: 6px;
+                cursor: pointer;
+            }
+            input[type="submit"]:hover {
+                background-color: #1d4ed8;
+            }
+        </style>
+    </head>
+    <body>
+        <h2>🧠 Smart Image Captioning</h2>
+        <form action="/caption" enctype="multipart/form-data" method="post">
+            <input name="file" type="file" accept="image/*" required><br>
+            <input type="submit" value="Generate Caption">
+        </form>
+    </body>
+    </html>
+    """
+@app.post("/caption", response_class=HTMLResponse)
+async def caption(file: UploadFile = File(...)):
+    contents = await file.read()
+    image = Image.open(io.BytesIO(contents)).convert('RGB')
+    # Generate caption
+    inputs = processor(images=image, return_tensors="pt")
+    out = model.generate(**inputs)
+    caption = processor.decode(out[0], skip_special_tokens=True)
+    return f"""
+    <html>
+    <head>
+        <title>Caption Result</title>
+        <style>
+            body {{
+                font-family: 'Segoe UI', sans-serif;
+                background-color: #f9fafb;
+                display: flex;
+                flex-direction: column;
+                align-items: center;
+                justify-content: center;
+                min-height: 100vh;
+                padding: 2rem;
+            }}
+            .box {{
+                background: white;
+                padding: 2rem;
+                border-radius: 12px;
+                box-shadow: 0 8px 20px rgba(0,0,0,0.1);
+                text-align: center;
+            }}
+            .caption {{
+                font-size: 1.25rem;
+                color: #1f2937;
+                margin-top: 1rem;
+            }}
+            a {{
+                display: inline-block;
+                margin-top: 1.5rem;
+                color: #2563eb;
+                text-decoration: none;
+                font-weight: bold;
+            }}
+            a:hover {{
+                text-decoration: underline;
+            }}
+        </style>
+    </head>
+    <body>
+        <div class="box">
+            <h2>🖼️ Image Caption Result</h2>
+            <p><b>File:</b> {file.filename}</p>
+            <p class="caption"><b>Caption:</b> {caption}</p>
+            <a href="/">🔁 Try another image</a>
+        </div>
+    </body>
+    </html>
+    """

classifier.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import tensorflow as tf
+import numpy as np
+import matplotlib.pyplot as plt
+# Load Fashion MNIST dataset (built-in)
+fashion_mnist = tf.keras.datasets.fashion_mnist
+(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
+# Class names in the dataset
+class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
+               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
+# Normalize pixel values to [0,1]
+train_images = train_images / 255.0
+test_images = test_images / 255.0
+# Build a simple neural network model
+model = tf.keras.Sequential([
+    tf.keras.layers.Flatten(input_shape=(28, 28)),
+    tf.keras.layers.Dense(128, activation='relu'),
+    tf.keras.layers.Dense(10)
+])
+# Compile the model
+model.compile(optimizer='adam',
+              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+              metrics=['accuracy'])
+# Train the model
+model.fit(train_images, train_labels, epochs=10)
+# Evaluate on test data
+test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
+print('\nTest accuracy:', test_acc)
+# Save the model
+model.save('fashion_mnist_model.h5')
+# Optional: Test prediction and plot one image
+probability_model = tf.keras.Sequential([model,
+                                         tf.keras.layers.Softmax()])
+predictions = probability_model.predict(test_images)
+print("Predicted label for first test image:", class_names[np.argmax(predictions[0])])
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(test_images[0], cmap=plt.cm.binary)
+plt.title(class_names[test_labels[0]])
+plt.show()

fashion_mnist_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b31760c1d5e5c2b12720fd70d6474fc78b2fda3cb2e1d876cdd3d9447e4987d3
+size 1244464