Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- app.py +42 -0
- caption_app.py +46 -0
- caption_app_styled.py +126 -0
- classifier.py +50 -0
- fashion_mnist_model.h5 +3 -0
app.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile, File, Request
|
2 |
+
from fastapi.responses import HTMLResponse
|
3 |
+
from fastapi.templating import Jinja2Templates
|
4 |
+
import shutil
|
5 |
+
import io
|
6 |
+
import numpy as np
|
7 |
+
from PIL import Image
|
8 |
+
import tensorflow as tf
|
9 |
+
from tensorflow.keras.applications.mobilenet_v2 import (
|
10 |
+
MobileNetV2, preprocess_input, decode_predictions
|
11 |
+
)
|
12 |
+
|
13 |
+
app = FastAPI()
|
14 |
+
templates = Jinja2Templates(directory="templates")
|
15 |
+
|
16 |
+
# Load the model once
|
17 |
+
model = MobileNetV2(weights="imagenet")
|
18 |
+
|
19 |
+
@app.get("/", response_class=HTMLResponse)
|
20 |
+
async def home(request: Request):
|
21 |
+
return templates.TemplateResponse("index.html", {"request": request, "result": ""})
|
22 |
+
|
23 |
+
@app.post("/", response_class=HTMLResponse)
|
24 |
+
async def upload(request: Request, file: UploadFile = File(...)):
|
25 |
+
contents = await file.read()
|
26 |
+
img = Image.open(io.BytesIO(contents)).resize((224, 224)).convert("RGB")
|
27 |
+
img_array = np.array(img)
|
28 |
+
img_array = np.expand_dims(img_array, axis=0)
|
29 |
+
img_array = preprocess_input(img_array)
|
30 |
+
|
31 |
+
preds = model.predict(img_array)
|
32 |
+
decoded_preds = decode_predictions(preds, top=3)[0]
|
33 |
+
|
34 |
+
# Combine top 3 results
|
35 |
+
result_text = "\n".join(
|
36 |
+
f"{label} - {confidence * 100:.2f}%" for (_, label, confidence) in decoded_preds
|
37 |
+
)
|
38 |
+
|
39 |
+
return templates.TemplateResponse("index.html", {
|
40 |
+
"request": request,
|
41 |
+
"result": result_text
|
42 |
+
})
|
caption_app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile, File, Request
|
2 |
+
from fastapi.responses import HTMLResponse
|
3 |
+
from fastapi.staticfiles import StaticFiles
|
4 |
+
from fastapi.templating import Jinja2Templates
|
5 |
+
|
6 |
+
from PIL import Image
|
7 |
+
import io
|
8 |
+
import base64
|
9 |
+
import torch
|
10 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
11 |
+
|
12 |
+
# Initialize FastAPI app
|
13 |
+
app = FastAPI()
|
14 |
+
|
15 |
+
# Setup static and templates directories
|
16 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
17 |
+
templates = Jinja2Templates(directory="templates")
|
18 |
+
|
19 |
+
# Load BLIP model and processor
|
20 |
+
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
21 |
+
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
22 |
+
|
23 |
+
@app.get("/", response_class=HTMLResponse)
|
24 |
+
async def main(request: Request):
|
25 |
+
return templates.TemplateResponse("index.html", {"request": request})
|
26 |
+
|
27 |
+
@app.post("/", response_class=HTMLResponse)
|
28 |
+
async def caption(request: Request, file: UploadFile = File(...)):
|
29 |
+
contents = await file.read()
|
30 |
+
image = Image.open(io.BytesIO(contents)).convert("RGB")
|
31 |
+
|
32 |
+
# Generate caption using BLIP
|
33 |
+
inputs = processor(images=image, return_tensors="pt")
|
34 |
+
out = model.generate(**inputs)
|
35 |
+
caption = processor.decode(out[0], skip_special_tokens=True)
|
36 |
+
|
37 |
+
# Convert image to base64 for preview
|
38 |
+
buffered = io.BytesIO()
|
39 |
+
image.save(buffered, format="PNG")
|
40 |
+
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
41 |
+
|
42 |
+
return templates.TemplateResponse("index.html", {
|
43 |
+
"request": request,
|
44 |
+
"caption": caption,
|
45 |
+
"image_data": img_str
|
46 |
+
})
|
caption_app_styled.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile, File
|
2 |
+
from fastapi.responses import HTMLResponse
|
3 |
+
from PIL import Image
|
4 |
+
import io
|
5 |
+
import torch
|
6 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
7 |
+
|
8 |
+
app = FastAPI()
|
9 |
+
|
10 |
+
# Load BLIP model & processor
|
11 |
+
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
12 |
+
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
13 |
+
|
14 |
+
@app.get("/", response_class=HTMLResponse)
|
15 |
+
async def main():
|
16 |
+
return """
|
17 |
+
<html>
|
18 |
+
<head>
|
19 |
+
<title>Smart Image Captioning</title>
|
20 |
+
<style>
|
21 |
+
body {
|
22 |
+
font-family: 'Segoe UI', sans-serif;
|
23 |
+
display: flex;
|
24 |
+
flex-direction: column;
|
25 |
+
align-items: center;
|
26 |
+
justify-content: center;
|
27 |
+
min-height: 100vh;
|
28 |
+
background-color: #f3f4f6;
|
29 |
+
margin: 0;
|
30 |
+
}
|
31 |
+
h2 {
|
32 |
+
color: #111827;
|
33 |
+
margin-bottom: 1rem;
|
34 |
+
}
|
35 |
+
form {
|
36 |
+
background: white;
|
37 |
+
padding: 2rem;
|
38 |
+
border-radius: 12px;
|
39 |
+
box-shadow: 0 8px 24px rgba(0,0,0,0.1);
|
40 |
+
}
|
41 |
+
input[type="file"] {
|
42 |
+
margin-bottom: 1rem;
|
43 |
+
}
|
44 |
+
input[type="submit"] {
|
45 |
+
padding: 0.5rem 1.2rem;
|
46 |
+
font-size: 1rem;
|
47 |
+
background-color: #2563eb;
|
48 |
+
color: white;
|
49 |
+
border: none;
|
50 |
+
border-radius: 6px;
|
51 |
+
cursor: pointer;
|
52 |
+
}
|
53 |
+
input[type="submit"]:hover {
|
54 |
+
background-color: #1d4ed8;
|
55 |
+
}
|
56 |
+
</style>
|
57 |
+
</head>
|
58 |
+
<body>
|
59 |
+
<h2>🧠 Smart Image Captioning</h2>
|
60 |
+
<form action="/caption" enctype="multipart/form-data" method="post">
|
61 |
+
<input name="file" type="file" accept="image/*" required><br>
|
62 |
+
<input type="submit" value="Generate Caption">
|
63 |
+
</form>
|
64 |
+
</body>
|
65 |
+
</html>
|
66 |
+
"""
|
67 |
+
|
68 |
+
@app.post("/caption", response_class=HTMLResponse)
|
69 |
+
async def caption(file: UploadFile = File(...)):
|
70 |
+
contents = await file.read()
|
71 |
+
image = Image.open(io.BytesIO(contents)).convert('RGB')
|
72 |
+
|
73 |
+
# Generate caption
|
74 |
+
inputs = processor(images=image, return_tensors="pt")
|
75 |
+
out = model.generate(**inputs)
|
76 |
+
caption = processor.decode(out[0], skip_special_tokens=True)
|
77 |
+
|
78 |
+
return f"""
|
79 |
+
<html>
|
80 |
+
<head>
|
81 |
+
<title>Caption Result</title>
|
82 |
+
<style>
|
83 |
+
body {{
|
84 |
+
font-family: 'Segoe UI', sans-serif;
|
85 |
+
background-color: #f9fafb;
|
86 |
+
display: flex;
|
87 |
+
flex-direction: column;
|
88 |
+
align-items: center;
|
89 |
+
justify-content: center;
|
90 |
+
min-height: 100vh;
|
91 |
+
padding: 2rem;
|
92 |
+
}}
|
93 |
+
.box {{
|
94 |
+
background: white;
|
95 |
+
padding: 2rem;
|
96 |
+
border-radius: 12px;
|
97 |
+
box-shadow: 0 8px 20px rgba(0,0,0,0.1);
|
98 |
+
text-align: center;
|
99 |
+
}}
|
100 |
+
.caption {{
|
101 |
+
font-size: 1.25rem;
|
102 |
+
color: #1f2937;
|
103 |
+
margin-top: 1rem;
|
104 |
+
}}
|
105 |
+
a {{
|
106 |
+
display: inline-block;
|
107 |
+
margin-top: 1.5rem;
|
108 |
+
color: #2563eb;
|
109 |
+
text-decoration: none;
|
110 |
+
font-weight: bold;
|
111 |
+
}}
|
112 |
+
a:hover {{
|
113 |
+
text-decoration: underline;
|
114 |
+
}}
|
115 |
+
</style>
|
116 |
+
</head>
|
117 |
+
<body>
|
118 |
+
<div class="box">
|
119 |
+
<h2>🖼️ Image Caption Result</h2>
|
120 |
+
<p><b>File:</b> {file.filename}</p>
|
121 |
+
<p class="caption"><b>Caption:</b> {caption}</p>
|
122 |
+
<a href="/">🔁 Try another image</a>
|
123 |
+
</div>
|
124 |
+
</body>
|
125 |
+
</html>
|
126 |
+
"""
|
classifier.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
|
5 |
+
# Load Fashion MNIST dataset (built-in)
|
6 |
+
fashion_mnist = tf.keras.datasets.fashion_mnist
|
7 |
+
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
|
8 |
+
|
9 |
+
# Class names in the dataset
|
10 |
+
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
|
11 |
+
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
|
12 |
+
|
13 |
+
# Normalize pixel values to [0,1]
|
14 |
+
train_images = train_images / 255.0
|
15 |
+
test_images = test_images / 255.0
|
16 |
+
|
17 |
+
# Build a simple neural network model
|
18 |
+
model = tf.keras.Sequential([
|
19 |
+
tf.keras.layers.Flatten(input_shape=(28, 28)),
|
20 |
+
tf.keras.layers.Dense(128, activation='relu'),
|
21 |
+
tf.keras.layers.Dense(10)
|
22 |
+
])
|
23 |
+
|
24 |
+
# Compile the model
|
25 |
+
model.compile(optimizer='adam',
|
26 |
+
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
|
27 |
+
metrics=['accuracy'])
|
28 |
+
|
29 |
+
# Train the model
|
30 |
+
model.fit(train_images, train_labels, epochs=10)
|
31 |
+
|
32 |
+
# Evaluate on test data
|
33 |
+
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
|
34 |
+
print('\nTest accuracy:', test_acc)
|
35 |
+
|
36 |
+
# Save the model
|
37 |
+
model.save('fashion_mnist_model.h5')
|
38 |
+
|
39 |
+
# Optional: Test prediction and plot one image
|
40 |
+
probability_model = tf.keras.Sequential([model,
|
41 |
+
tf.keras.layers.Softmax()])
|
42 |
+
|
43 |
+
predictions = probability_model.predict(test_images)
|
44 |
+
print("Predicted label for first test image:", class_names[np.argmax(predictions[0])])
|
45 |
+
|
46 |
+
import matplotlib.pyplot as plt
|
47 |
+
plt.figure()
|
48 |
+
plt.imshow(test_images[0], cmap=plt.cm.binary)
|
49 |
+
plt.title(class_names[test_labels[0]])
|
50 |
+
plt.show()
|
fashion_mnist_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b31760c1d5e5c2b12720fd70d6474fc78b2fda3cb2e1d876cdd3d9447e4987d3
|
3 |
+
size 1244464
|