import torch import sys import pandas as pd from typing import TypedDict, Optional, Tuple import datetime import math import importlib.util from huggingface_hub import hf_hub_download import pickle """ Data container class representing the data shape of the synapse coming into `run_inference` """ class ProcessedSynapse(TypedDict): id: Optional[str] nextplace_id: Optional[str] property_id: Optional[str] listing_id: Optional[str] address: Optional[str] city: Optional[str] state: Optional[str] zip_code: Optional[str] price: Optional[float] beds: Optional[int] baths: Optional[float] sqft: Optional[int] lot_size: Optional[int] year_built: Optional[int] days_on_market: Optional[int] latitude: Optional[float] longitude: Optional[float] property_type: Optional[str] last_sale_date: Optional[str] hoa_dues: Optional[float] query_date: Optional[str] """ This class must do two things 1) The constructor must load the model 2) This class must implement a method called `run_inference` that takes the input data and returns a tuple of float, str representing the predicted sale price and the predicted sale date. """ class MLBaseModelDriver: def __init__(self): self.model, self.label_encoder, self.scaler = self.load_model() def load_model(self) -> Tuple[any, any, any]: """ load the model and model parameters :return: model, label encoder, and scaler """ print(f"Loading model...") model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files() model_class = self._import_model_class(model_class_file) model = model_class(input_dim=4) state_dict = torch.load(model_file, weights_only=False) model.load_state_dict(state_dict) model.eval() # Load additional artifacts with open(scaler_file, 'rb') as f: scaler = pickle.load(f) with open(label_encoders_file, 'rb') as f: label_encoders = pickle.load(f) print(f"Model Loaded.") return model, label_encoders, scaler def _download_model_files(self) -> Tuple[str, str, str, str]: """ download files from hugging face :return: downloaded files """ model_path = "ckoozzzu/NextPlace" # Download the model files from the Hugging Face Hub model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth") scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl") label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl") model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py") # Load the model and artifacts return model_file, scaler_file, label_encoders_file, model_class_file def _import_model_class(self, model_class_file): """ import the model class and instantiate it :param model_class_file: file path to the model class :return: None """ # Reference docs here: https://docs.python.org/3/library/importlib.html#importlib.util.spec_from_loader module_name = "MLBaseModel" spec = importlib.util.spec_from_file_location(module_name, model_class_file) model_module = importlib.util.module_from_spec(spec) sys.modules[module_name] = model_module spec.loader.exec_module(model_module) if hasattr(model_module, "MLBaseModel"): return model_module.MLBaseModel else: raise AttributeError(f"The module does not contain a class named 'MLBaseModel'") def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]: """ run inference using the MLBaseModel :param input_data: synapse from the validator :return: the predicted sale price and date """ input_tensor = self._preprocess_input(input_data) with torch.no_grad(): prediction = self.model(input_tensor) predicted_sale_price, predicted_days_on_market = prediction[0].numpy() predicted_days_on_market = math.floor(predicted_days_on_market) predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market) return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d") def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date: """ convert predicted days on market to a sale date :param days_on_market: number of days this home has been on the market :param predicted_days_on_market: the predicted number of days for this home on the market :return: the predicted sale date """ if days_on_market < predicted_days_on_market: days_until_sale = predicted_days_on_market - days_on_market sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale) return sale_date else: return datetime.date.today() + datetime.timedelta(days=1) def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor: """ preprocess the input for inference :param data: synapse from the validator :return: tensor representing the synapse """ df = pd.DataFrame([data]) default_beds = 3 default_sqft = 1500.0 default_property_type = '6' df['beds'] = df['beds'].fillna(default_beds) df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft) df['property_type'] = df['property_type'].fillna(default_property_type) df['property_type'] = df['property_type'].astype(int) df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']]) X = df[['beds', 'sqft', 'property_type', 'price']] input_tensor = torch.tensor(X.values, dtype=torch.float32) return input_tensor