import streamlit as st import pandas as pd import numpy as np import joblib import plotly.graph_objects as go import plotly.express as px import os from pathlib import Path import warnings warnings.filterwarnings('ignore') # Set page configuration st.set_page_config( page_title="đ HDB Price Predictor", page_icon="đ ", layout="wide", initial_sidebar_state="expanded" ) # Add custom CSS st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def create_dummy_model(model_type): """Create a realistic dummy model that has all required methods""" class RealisticDummyModel: def __init__(self, model_type): self.model_type = model_type self.n_features_in_ = 9 self.feature_names_in_ = [ 'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease', 'transaction_year', 'flat_type_encoded', 'town_encoded', 'flat_model_encoded', 'dummy_feature' ] self.get_params = lambda deep=True: {} self.set_params = lambda **params: self def predict(self, X): if isinstance(X, np.ndarray) and len(X.shape) == 2: X = X[0] floor_area = X[0] storey_level = X[1] flat_age = X[2] town_encoded = X[6] base_price = floor_area * (4800 + town_encoded * 200) storey_bonus = storey_level * 2500 age_discount = flat_age * 1800 if self.model_type == "xgboost": price = base_price + storey_bonus - age_discount + 35000 if storey_level > 20: price += 15000 if flat_age < 10: price += 20000 else: price = base_price + storey_bonus - age_discount - 25000 return np.array([max(300000, price)]) return RealisticDummyModel(model_type) @st.cache_resource def load_model_from_file(filename="best_model_xgboost1.joblib"): """Load model from local file with error handling. Download from GitHub if missing.""" url = "https://github.com/lesterchia1/HDB-Price-Predictor-Streamlit/raw/main/best_model_xgboost1.joblib" try: # Download if file doesn't exist if not os.path.exists(filename): st.info(f"â ī¸ Model file {filename} not found locally. Downloading from GitHub...") r = requests.get(url, allow_redirects=True) if r.status_code == 200: with open(filename, 'wb') as f: f.write(r.content) st.success(f"â Downloaded {filename} successfully") else: st.error(f"â Failed to download model: HTTP {r.status_code}") return create_dummy_model("xgboost") # Try loading the model if os.path.exists(filename): model = joblib.load(filename) st.success(f"â Successfully loaded model from {filename}") # Add missing methods if needed if not hasattr(model, 'predict'): st.error("â Loaded object doesn't have predict method") return create_dummy_model("xgboost") # Add missing methods if needed if not hasattr(model, 'get_params'): model.get_params = lambda deep=True: {} if not hasattr(model, 'set_params'): model.set_params = lambda **params: model return model else: st.warning(f"â ī¸ Model file {filename} not found, using dummy model") return create_dummy_model("xgboost") except Exception as e: st.error(f"â Error loading model from {filename}: {e}") return create_dummy_model("xgboost") @st.cache_data def create_sample_data(): """Create sample data for visualization""" np.random.seed(42) towns = ['ANG MO KIO', 'BEDOK', 'TAMPINES', 'WOODLANDS', 'JURONG WEST', 'SENGKANG', 'PUNGGOL', 'YISHUN', 'HOUGANG', 'CHOA CHU KANG'] flat_types = ['3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', '2 ROOM'] flat_models = ['Improved', 'Model A', 'New Generation', 'Standard', 'Premium'] data = [] for _ in range(200): town = np.random.choice(towns) flat_type = np.random.choice(flat_types) flat_model = np.random.choice(flat_models) floor_area = np.random.randint(60, 150) storey = np.random.randint(1, 25) age = np.random.randint(0, 40) # Realistic price calculation base_price = floor_area * 5000 town_bonus = towns.index(town) * 15000 storey_bonus = storey * 2000 age_discount = age * 1200 flat_type_bonus = flat_types.index(flat_type) * 25000 resale_price = base_price + town_bonus + storey_bonus - age_discount + flat_type_bonus resale_price = max(250000, resale_price + np.random.randint(-15000, 15000)) data.append({ 'town': town, 'flat_type': flat_type, 'flat_model': flat_model, 'floor_area_sqm': floor_area, 'storey_level': storey, 'flat_age': age, 'resale_price': resale_price }) return pd.DataFrame(data) def preprocess_input(user_input): """Preprocess user input for prediction with correct feature mapping""" # Flat type mapping flat_type_mapping = { '1 ROOM': 1, '2 ROOM': 2, '3 ROOM': 3, '4 ROOM': 4, '5 ROOM': 5, 'EXECUTIVE': 6, 'MULTI-GENERATION': 7 } # Town mapping town_mapping = { 'SENGKANG': 0, 'WOODLANDS': 1, 'TAMPINES': 2, 'PUNGGOL': 3, 'JURONG WEST': 4, 'YISHUN': 5, 'BEDOK': 6, 'HOUGANG': 7, 'CHOA CHU KANG': 8, 'ANG MO KIO': 9 } # Flat model mapping flat_model_mapping = { 'Model A': 0, 'Improved': 1, 'New Generation': 2, 'Standard': 3, 'Premium': 4 } # Create input array with features input_features = [ user_input['floor_area_sqm'], # Feature 1 user_input['storey_level'], # Feature 2 user_input['flat_age'], # Feature 3 99 - user_input['flat_age'], # Feature 4: remaining_lease 2024, # Feature 5: transaction_year (current year) flat_type_mapping.get(user_input['flat_type'], 4), # Feature 6 town_mapping.get(user_input['town'], 0), # Feature 7 flat_model_mapping.get(user_input['flat_model'], 0), # Feature 8 1 # Feature 9: (placeholder) ] return np.array([input_features]) def create_market_insights_chart(data, user_input, predicted_price): """Create market insights visualization""" if data is None or len(data) == 0: return None # Filter similar properties similar_properties = data[ (data['flat_type'] == user_input['flat_type']) & (data['town'] == user_input['town']) ] if len(similar_properties) < 5: similar_properties = data[data['flat_type'] == user_input['flat_type']] if len(similar_properties) > 0: fig = px.scatter( similar_properties, x='floor_area_sqm', y='resale_price', color='flat_model', title=f"Market Comparison: {user_input['flat_type']} in {user_input['town']}", labels={ 'floor_area_sqm': 'Floor Area (sqm)', 'resale_price': 'Resale Price (SGD)', 'flat_model': 'Flat Model' } ) # Add prediction marker fig.add_trace(go.Scatter( x=[user_input['floor_area_sqm']], y=[predicted_price], mode='markers', marker=dict(symbol='star', size=20, color='red', line=dict(width=2, color='darkred')), name='Your Prediction' )) fig.update_layout( template="plotly_white", height=500, showlegend=True, font=dict(size=12) ) return fig return None def predict_hdb_price(user_input): """Main prediction function""" try: processed_input = preprocess_input(user_input) # Get prediction from model predicted_price = max(0, float(model.predict(processed_input)[0])) # Create insights remaining_lease = 99 - user_input['flat_age'] price_per_sqm = predicted_price / user_input['floor_area_sqm'] insights = f""" **Property Summary:** - **Location:** {user_input['town']} - **Type:** {user_input['flat_type']} - **Model:** {user_input['flat_model']} - **Area:** {user_input['floor_area_sqm']} sqm - **Floor:** Level {user_input['storey_level']} - **Age:** {user_input['flat_age']} years - **Remaining Lease:** {remaining_lease} years - **Price per sqm:** ${price_per_sqm:,.0f} **Financing Eligibility:** """ if remaining_lease >= 60: insights += "â **Bank loan eligible** (âĨ60 years remaining)" elif remaining_lease >= 20: insights += "â ī¸ **HDB loan eligible only** (20-59 years remaining)" else: insights += "â **Limited financing options** (<20 years remaining)" # Create chart chart = create_market_insights_chart(data, user_input, predicted_price) return predicted_price, insights, chart except Exception as e: error_msg = f"Prediction failed. Error: {str(e)}" st.error(error_msg) return None, error_msg, None # Main app st.markdown('
Built with â¤ī¸ using Streamlit | HDB Resale Price Predictor
Predictions are estimates only | Done by Lester Chia | Š 2025