Chand11 commited on
Commit
c37e02d
·
verified ·
1 Parent(s): e0667ac

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +42 -0
  2. caption_app.py +46 -0
  3. caption_app_styled.py +126 -0
  4. classifier.py +50 -0
  5. fashion_mnist_model.h5 +3 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Request
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.templating import Jinja2Templates
4
+ import shutil
5
+ import io
6
+ import numpy as np
7
+ from PIL import Image
8
+ import tensorflow as tf
9
+ from tensorflow.keras.applications.mobilenet_v2 import (
10
+ MobileNetV2, preprocess_input, decode_predictions
11
+ )
12
+
13
+ app = FastAPI()
14
+ templates = Jinja2Templates(directory="templates")
15
+
16
+ # Load the model once
17
+ model = MobileNetV2(weights="imagenet")
18
+
19
+ @app.get("/", response_class=HTMLResponse)
20
+ async def home(request: Request):
21
+ return templates.TemplateResponse("index.html", {"request": request, "result": ""})
22
+
23
+ @app.post("/", response_class=HTMLResponse)
24
+ async def upload(request: Request, file: UploadFile = File(...)):
25
+ contents = await file.read()
26
+ img = Image.open(io.BytesIO(contents)).resize((224, 224)).convert("RGB")
27
+ img_array = np.array(img)
28
+ img_array = np.expand_dims(img_array, axis=0)
29
+ img_array = preprocess_input(img_array)
30
+
31
+ preds = model.predict(img_array)
32
+ decoded_preds = decode_predictions(preds, top=3)[0]
33
+
34
+ # Combine top 3 results
35
+ result_text = "\n".join(
36
+ f"{label} - {confidence * 100:.2f}%" for (_, label, confidence) in decoded_preds
37
+ )
38
+
39
+ return templates.TemplateResponse("index.html", {
40
+ "request": request,
41
+ "result": result_text
42
+ })
caption_app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Request
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.templating import Jinja2Templates
5
+
6
+ from PIL import Image
7
+ import io
8
+ import base64
9
+ import torch
10
+ from transformers import BlipProcessor, BlipForConditionalGeneration
11
+
12
+ # Initialize FastAPI app
13
+ app = FastAPI()
14
+
15
+ # Setup static and templates directories
16
+ app.mount("/static", StaticFiles(directory="static"), name="static")
17
+ templates = Jinja2Templates(directory="templates")
18
+
19
+ # Load BLIP model and processor
20
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
21
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
22
+
23
+ @app.get("/", response_class=HTMLResponse)
24
+ async def main(request: Request):
25
+ return templates.TemplateResponse("index.html", {"request": request})
26
+
27
+ @app.post("/", response_class=HTMLResponse)
28
+ async def caption(request: Request, file: UploadFile = File(...)):
29
+ contents = await file.read()
30
+ image = Image.open(io.BytesIO(contents)).convert("RGB")
31
+
32
+ # Generate caption using BLIP
33
+ inputs = processor(images=image, return_tensors="pt")
34
+ out = model.generate(**inputs)
35
+ caption = processor.decode(out[0], skip_special_tokens=True)
36
+
37
+ # Convert image to base64 for preview
38
+ buffered = io.BytesIO()
39
+ image.save(buffered, format="PNG")
40
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
41
+
42
+ return templates.TemplateResponse("index.html", {
43
+ "request": request,
44
+ "caption": caption,
45
+ "image_data": img_str
46
+ })
caption_app_styled.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ from fastapi.responses import HTMLResponse
3
+ from PIL import Image
4
+ import io
5
+ import torch
6
+ from transformers import BlipProcessor, BlipForConditionalGeneration
7
+
8
+ app = FastAPI()
9
+
10
+ # Load BLIP model & processor
11
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
12
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
13
+
14
+ @app.get("/", response_class=HTMLResponse)
15
+ async def main():
16
+ return """
17
+ <html>
18
+ <head>
19
+ <title>Smart Image Captioning</title>
20
+ <style>
21
+ body {
22
+ font-family: 'Segoe UI', sans-serif;
23
+ display: flex;
24
+ flex-direction: column;
25
+ align-items: center;
26
+ justify-content: center;
27
+ min-height: 100vh;
28
+ background-color: #f3f4f6;
29
+ margin: 0;
30
+ }
31
+ h2 {
32
+ color: #111827;
33
+ margin-bottom: 1rem;
34
+ }
35
+ form {
36
+ background: white;
37
+ padding: 2rem;
38
+ border-radius: 12px;
39
+ box-shadow: 0 8px 24px rgba(0,0,0,0.1);
40
+ }
41
+ input[type="file"] {
42
+ margin-bottom: 1rem;
43
+ }
44
+ input[type="submit"] {
45
+ padding: 0.5rem 1.2rem;
46
+ font-size: 1rem;
47
+ background-color: #2563eb;
48
+ color: white;
49
+ border: none;
50
+ border-radius: 6px;
51
+ cursor: pointer;
52
+ }
53
+ input[type="submit"]:hover {
54
+ background-color: #1d4ed8;
55
+ }
56
+ </style>
57
+ </head>
58
+ <body>
59
+ <h2>🧠 Smart Image Captioning</h2>
60
+ <form action="/caption" enctype="multipart/form-data" method="post">
61
+ <input name="file" type="file" accept="image/*" required><br>
62
+ <input type="submit" value="Generate Caption">
63
+ </form>
64
+ </body>
65
+ </html>
66
+ """
67
+
68
+ @app.post("/caption", response_class=HTMLResponse)
69
+ async def caption(file: UploadFile = File(...)):
70
+ contents = await file.read()
71
+ image = Image.open(io.BytesIO(contents)).convert('RGB')
72
+
73
+ # Generate caption
74
+ inputs = processor(images=image, return_tensors="pt")
75
+ out = model.generate(**inputs)
76
+ caption = processor.decode(out[0], skip_special_tokens=True)
77
+
78
+ return f"""
79
+ <html>
80
+ <head>
81
+ <title>Caption Result</title>
82
+ <style>
83
+ body {{
84
+ font-family: 'Segoe UI', sans-serif;
85
+ background-color: #f9fafb;
86
+ display: flex;
87
+ flex-direction: column;
88
+ align-items: center;
89
+ justify-content: center;
90
+ min-height: 100vh;
91
+ padding: 2rem;
92
+ }}
93
+ .box {{
94
+ background: white;
95
+ padding: 2rem;
96
+ border-radius: 12px;
97
+ box-shadow: 0 8px 20px rgba(0,0,0,0.1);
98
+ text-align: center;
99
+ }}
100
+ .caption {{
101
+ font-size: 1.25rem;
102
+ color: #1f2937;
103
+ margin-top: 1rem;
104
+ }}
105
+ a {{
106
+ display: inline-block;
107
+ margin-top: 1.5rem;
108
+ color: #2563eb;
109
+ text-decoration: none;
110
+ font-weight: bold;
111
+ }}
112
+ a:hover {{
113
+ text-decoration: underline;
114
+ }}
115
+ </style>
116
+ </head>
117
+ <body>
118
+ <div class="box">
119
+ <h2>🖼️ Image Caption Result</h2>
120
+ <p><b>File:</b> {file.filename}</p>
121
+ <p class="caption"><b>Caption:</b> {caption}</p>
122
+ <a href="/">🔁 Try another image</a>
123
+ </div>
124
+ </body>
125
+ </html>
126
+ """
classifier.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+
5
+ # Load Fashion MNIST dataset (built-in)
6
+ fashion_mnist = tf.keras.datasets.fashion_mnist
7
+ (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
8
+
9
+ # Class names in the dataset
10
+ class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
11
+ 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
12
+
13
+ # Normalize pixel values to [0,1]
14
+ train_images = train_images / 255.0
15
+ test_images = test_images / 255.0
16
+
17
+ # Build a simple neural network model
18
+ model = tf.keras.Sequential([
19
+ tf.keras.layers.Flatten(input_shape=(28, 28)),
20
+ tf.keras.layers.Dense(128, activation='relu'),
21
+ tf.keras.layers.Dense(10)
22
+ ])
23
+
24
+ # Compile the model
25
+ model.compile(optimizer='adam',
26
+ loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
27
+ metrics=['accuracy'])
28
+
29
+ # Train the model
30
+ model.fit(train_images, train_labels, epochs=10)
31
+
32
+ # Evaluate on test data
33
+ test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
34
+ print('\nTest accuracy:', test_acc)
35
+
36
+ # Save the model
37
+ model.save('fashion_mnist_model.h5')
38
+
39
+ # Optional: Test prediction and plot one image
40
+ probability_model = tf.keras.Sequential([model,
41
+ tf.keras.layers.Softmax()])
42
+
43
+ predictions = probability_model.predict(test_images)
44
+ print("Predicted label for first test image:", class_names[np.argmax(predictions[0])])
45
+
46
+ import matplotlib.pyplot as plt
47
+ plt.figure()
48
+ plt.imshow(test_images[0], cmap=plt.cm.binary)
49
+ plt.title(class_names[test_labels[0]])
50
+ plt.show()
fashion_mnist_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b31760c1d5e5c2b12720fd70d6474fc78b2fda3cb2e1d876cdd3d9447e4987d3
3
+ size 1244464