File size: 1,574 Bytes
175924e
eb0dc26
175924e
 
 
3a6bc73
175924e
3a6bc73
eb0dc26
 
 
 
175924e
 
eb0dc26
 
 
 
175924e
3a6bc73
eb0dc26
3a6bc73
eb0dc26
 
 
 
 
 
 
175924e
 
 
 
 
3a6bc73
175924e
3a6bc73
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import json
import os

# Hugging Face expects a class named Pipeline with __call__(self, inputs)
class Pipeline:
    def __init__(self):
        # Load tokenizer
        with open("tokenizer.json", "r", encoding="utf-8") as f:
            tokenizer_json = f.read()
            self.tokenizer = tokenizer_from_json(tokenizer_json)
        self.max_len = 150

        # Load model (SavedModel format)
        self.model = tf.keras.models.load_model(".")

        # Load label map if available
        self.label_map = None
        if os.path.exists("label_map.json"):
            with open("label_map.json", "r", encoding="utf-8") as f:
                self.label_map = json.load(f)

    def __call__(self, inputs):
        # Accepts a dict with keys 'text' and 'image_desc'
        text = inputs.get("text", "")
        image_desc = inputs.get("image_desc", "")
        input_text = text + " " + image_desc
        seq = self.tokenizer.texts_to_sequences([input_text])
        padded = pad_sequences(seq, maxlen=self.max_len, padding='post', truncating='post')
        pred_probs = self.model.predict(padded)
        pred_label = int(np.argmax(pred_probs, axis=1)[0])
        score = float(np.max(pred_probs))
        if self.label_map:
            label = self.label_map.get(str(pred_label), pred_label)
        else:
            label = pred_label
        return {"label": label, "score": score}