|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import joblib |
|
|
import plotly.graph_objects as go |
|
|
import plotly.express as px |
|
|
import os |
|
|
from pathlib import Path |
|
|
import warnings |
|
|
warnings.filterwarnings('ignore') |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="๐ HDB Price Predictor", |
|
|
page_icon="๐ ", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded" |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
.main-header { |
|
|
font-size: 3rem; |
|
|
color: #1f77b4; |
|
|
text-align: center; |
|
|
margin-bottom: 2rem; |
|
|
} |
|
|
.prediction-box { |
|
|
background-color: #f0f2f6; |
|
|
padding: 20px; |
|
|
border-radius: 10px; |
|
|
margin: 10px 0; |
|
|
text-align: center; |
|
|
} |
|
|
.metric-card { |
|
|
background-color: #f0f2f6; |
|
|
padding: 15px; |
|
|
border-radius: 10px; |
|
|
margin: 10px; |
|
|
text-align: center; |
|
|
} |
|
|
.stButton>button { |
|
|
width: 100%; |
|
|
background-color: #1f77b4; |
|
|
color: white; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
@st.cache_resource |
|
|
def create_dummy_model(model_type): |
|
|
"""Create a realistic dummy model that has all required methods""" |
|
|
class RealisticDummyModel: |
|
|
def __init__(self, model_type): |
|
|
self.model_type = model_type |
|
|
self.n_features_in_ = 9 |
|
|
self.feature_names_in_ = [ |
|
|
'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease', |
|
|
'transaction_year', 'flat_type_encoded', 'town_encoded', |
|
|
'flat_model_encoded', 'dummy_feature' |
|
|
] |
|
|
self.get_params = lambda deep=True: {} |
|
|
self.set_params = lambda **params: self |
|
|
|
|
|
def predict(self, X): |
|
|
if isinstance(X, np.ndarray) and len(X.shape) == 2: |
|
|
X = X[0] |
|
|
|
|
|
floor_area = X[0] |
|
|
storey_level = X[1] |
|
|
flat_age = X[2] |
|
|
town_encoded = X[6] |
|
|
|
|
|
base_price = floor_area * (4800 + town_encoded * 200) |
|
|
storey_bonus = storey_level * 2500 |
|
|
age_discount = flat_age * 1800 |
|
|
|
|
|
if self.model_type == "xgboost": |
|
|
price = base_price + storey_bonus - age_discount + 35000 |
|
|
if storey_level > 20: price += 15000 |
|
|
if flat_age < 10: price += 20000 |
|
|
else: |
|
|
price = base_price + storey_bonus - age_discount - 25000 |
|
|
|
|
|
return np.array([max(300000, price)]) |
|
|
|
|
|
return RealisticDummyModel(model_type) |
|
|
|
|
|
@st.cache_resource |
|
|
def load_model_from_file(filename="best_model_xgboost1.joblib"): |
|
|
|
|
|
"""Load model from local file with error handling. Download from GitHub if missing.""" |
|
|
url = "https://github.com/lesterchia1/HDB-Price-Predictor-Streamlit/raw/main/best_model_xgboost1.joblib" |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
if not os.path.exists(filename): |
|
|
st.info(f"โ ๏ธ Model file {filename} not found locally. Downloading from GitHub...") |
|
|
r = requests.get(url, allow_redirects=True) |
|
|
if r.status_code == 200: |
|
|
with open(filename, 'wb') as f: |
|
|
f.write(r.content) |
|
|
st.success(f"โ
Downloaded {filename} successfully") |
|
|
else: |
|
|
st.error(f"โ Failed to download model: HTTP {r.status_code}") |
|
|
return create_dummy_model("xgboost") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if os.path.exists(filename): |
|
|
model = joblib.load(filename) |
|
|
st.success(f"โ
Successfully loaded model from {filename}") |
|
|
|
|
|
|
|
|
if not hasattr(model, 'predict'): |
|
|
st.error("โ Loaded object doesn't have predict method") |
|
|
return create_dummy_model("xgboost") |
|
|
|
|
|
|
|
|
if not hasattr(model, 'get_params'): |
|
|
model.get_params = lambda deep=True: {} |
|
|
if not hasattr(model, 'set_params'): |
|
|
model.set_params = lambda **params: model |
|
|
|
|
|
return model |
|
|
else: |
|
|
st.warning(f"โ ๏ธ Model file {filename} not found, using dummy model") |
|
|
return create_dummy_model("xgboost") |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"โ Error loading model from {filename}: {e}") |
|
|
return create_dummy_model("xgboost") |
|
|
|
|
|
@st.cache_data |
|
|
def create_sample_data(): |
|
|
"""Create sample data for visualization""" |
|
|
np.random.seed(42) |
|
|
towns = ['ANG MO KIO', 'BEDOK', 'TAMPINES', 'WOODLANDS', 'JURONG WEST', |
|
|
'SENGKANG', 'PUNGGOL', 'YISHUN', 'HOUGANG', 'CHOA CHU KANG'] |
|
|
flat_types = ['3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', '2 ROOM'] |
|
|
flat_models = ['Improved', 'Model A', 'New Generation', 'Standard', 'Premium'] |
|
|
|
|
|
data = [] |
|
|
for _ in range(200): |
|
|
town = np.random.choice(towns) |
|
|
flat_type = np.random.choice(flat_types) |
|
|
flat_model = np.random.choice(flat_models) |
|
|
floor_area = np.random.randint(60, 150) |
|
|
storey = np.random.randint(1, 25) |
|
|
age = np.random.randint(0, 40) |
|
|
|
|
|
|
|
|
base_price = floor_area * 5000 |
|
|
town_bonus = towns.index(town) * 15000 |
|
|
storey_bonus = storey * 2000 |
|
|
age_discount = age * 1200 |
|
|
flat_type_bonus = flat_types.index(flat_type) * 25000 |
|
|
|
|
|
resale_price = base_price + town_bonus + storey_bonus - age_discount + flat_type_bonus |
|
|
resale_price = max(250000, resale_price + np.random.randint(-15000, 15000)) |
|
|
|
|
|
data.append({ |
|
|
'town': town, 'flat_type': flat_type, 'flat_model': flat_model, |
|
|
'floor_area_sqm': floor_area, 'storey_level': storey, |
|
|
'flat_age': age, 'resale_price': resale_price |
|
|
}) |
|
|
|
|
|
return pd.DataFrame(data) |
|
|
|
|
|
def preprocess_input(user_input): |
|
|
"""Preprocess user input for prediction with correct feature mapping""" |
|
|
|
|
|
flat_type_mapping = { |
|
|
'1 ROOM': 1, '2 ROOM': 2, '3 ROOM': 3, '4 ROOM': 4, |
|
|
'5 ROOM': 5, 'EXECUTIVE': 6, 'MULTI-GENERATION': 7 |
|
|
} |
|
|
|
|
|
|
|
|
town_mapping = { |
|
|
'SENGKANG': 0, 'WOODLANDS': 1, 'TAMPINES': 2, 'PUNGGOL': 3, |
|
|
'JURONG WEST': 4, 'YISHUN': 5, 'BEDOK': 6, 'HOUGANG': 7, |
|
|
'CHOA CHU KANG': 8, 'ANG MO KIO': 9 |
|
|
} |
|
|
|
|
|
|
|
|
flat_model_mapping = { |
|
|
'Model A': 0, 'Improved': 1, 'New Generation': 2, |
|
|
'Standard': 3, 'Premium': 4 |
|
|
} |
|
|
|
|
|
|
|
|
input_features = [ |
|
|
user_input['floor_area_sqm'], |
|
|
user_input['storey_level'], |
|
|
user_input['flat_age'], |
|
|
99 - user_input['flat_age'], |
|
|
2024, |
|
|
flat_type_mapping.get(user_input['flat_type'], 4), |
|
|
town_mapping.get(user_input['town'], 0), |
|
|
flat_model_mapping.get(user_input['flat_model'], 0), |
|
|
1 |
|
|
] |
|
|
|
|
|
return np.array([input_features]) |
|
|
|
|
|
def create_market_insights_chart(data, user_input, predicted_price): |
|
|
"""Create market insights visualization""" |
|
|
if data is None or len(data) == 0: |
|
|
return None |
|
|
|
|
|
|
|
|
similar_properties = data[ |
|
|
(data['flat_type'] == user_input['flat_type']) & |
|
|
(data['town'] == user_input['town']) |
|
|
] |
|
|
|
|
|
if len(similar_properties) < 5: |
|
|
similar_properties = data[data['flat_type'] == user_input['flat_type']] |
|
|
|
|
|
if len(similar_properties) > 0: |
|
|
fig = px.scatter( |
|
|
similar_properties, |
|
|
x='floor_area_sqm', |
|
|
y='resale_price', |
|
|
color='flat_model', |
|
|
title=f"Market Comparison: {user_input['flat_type']} in {user_input['town']}", |
|
|
labels={ |
|
|
'floor_area_sqm': 'Floor Area (sqm)', |
|
|
'resale_price': 'Resale Price (SGD)', |
|
|
'flat_model': 'Flat Model' |
|
|
} |
|
|
) |
|
|
|
|
|
|
|
|
fig.add_trace(go.Scatter( |
|
|
x=[user_input['floor_area_sqm']], |
|
|
y=[predicted_price], |
|
|
mode='markers', |
|
|
marker=dict(symbol='star', size=20, color='red', line=dict(width=2, color='darkred')), |
|
|
name='Your Prediction' |
|
|
)) |
|
|
|
|
|
fig.update_layout( |
|
|
template="plotly_white", |
|
|
height=500, |
|
|
showlegend=True, |
|
|
font=dict(size=12) |
|
|
) |
|
|
return fig |
|
|
return None |
|
|
|
|
|
def predict_hdb_price(user_input): |
|
|
"""Main prediction function""" |
|
|
try: |
|
|
processed_input = preprocess_input(user_input) |
|
|
|
|
|
|
|
|
predicted_price = max(0, float(model.predict(processed_input)[0])) |
|
|
|
|
|
|
|
|
remaining_lease = 99 - user_input['flat_age'] |
|
|
price_per_sqm = predicted_price / user_input['floor_area_sqm'] |
|
|
|
|
|
insights = f""" |
|
|
**Property Summary:** |
|
|
- **Location:** {user_input['town']} |
|
|
- **Type:** {user_input['flat_type']} |
|
|
- **Model:** {user_input['flat_model']} |
|
|
- **Area:** {user_input['floor_area_sqm']} sqm |
|
|
- **Floor:** Level {user_input['storey_level']} |
|
|
- **Age:** {user_input['flat_age']} years |
|
|
- **Remaining Lease:** {remaining_lease} years |
|
|
- **Price per sqm:** ${price_per_sqm:,.0f} |
|
|
|
|
|
**Financing Eligibility:** |
|
|
""" |
|
|
|
|
|
if remaining_lease >= 60: |
|
|
insights += "โ
**Bank loan eligible** (โฅ60 years remaining)" |
|
|
elif remaining_lease >= 20: |
|
|
insights += "โ ๏ธ **HDB loan eligible only** (20-59 years remaining)" |
|
|
else: |
|
|
insights += "โ **Limited financing options** (<20 years remaining)" |
|
|
|
|
|
|
|
|
chart = create_market_insights_chart(data, user_input, predicted_price) |
|
|
|
|
|
return predicted_price, insights, chart |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"Prediction failed. Error: {str(e)}" |
|
|
st.error(error_msg) |
|
|
return None, error_msg, None |
|
|
|
|
|
|
|
|
st.markdown('<h1 class="main-header">๐ HDB Price Predictor</h1>', unsafe_allow_html=True) |
|
|
st.markdown("### Estimate HDB resale prices using machine learning") |
|
|
|
|
|
|
|
|
with st.spinner("Loading prediction model..."): |
|
|
model = load_model_from_file("best_model_xgboost1.joblib") |
|
|
data = create_sample_data() |
|
|
|
|
|
|
|
|
towns_list = [ |
|
|
'ANG MO KIO', 'BEDOK', 'BISHAN', 'BUKIT BATOK', 'BUKIT MERAH', |
|
|
'BUKIT PANJANG', 'BUKIT TIMAH', 'CENTRAL AREA', 'CHOA CHU KANG', |
|
|
'CLEMENTI', 'GEYLANG', 'HOUGANG', 'JURONG EAST', 'JURONG WEST', |
|
|
'KALLANG/WHAMPOA', 'MARINE PARADE', 'PASIR RIS', 'PUNGGOL', |
|
|
'QUEENSTOWN', 'SENGKANG', 'SERANGOON', 'TAMPINES', 'TOA PAYOH', |
|
|
'WOODLANDS', 'YISHUN' |
|
|
] |
|
|
|
|
|
flat_types = ['2 ROOM', '3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE'] |
|
|
flat_models = ['Improved', 'New Generation', 'Model A', 'Standard', 'Premium', 'Simplified', 'Premium Apartment'] |
|
|
|
|
|
|
|
|
with st.form("prediction_form"): |
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.subheader("Property Details") |
|
|
town = st.selectbox("Town", sorted(towns_list), index=0) |
|
|
flat_type = st.selectbox("Flat Type", flat_types, index=2) |
|
|
flat_model = st.selectbox("Flat Model", flat_models, index=0) |
|
|
|
|
|
with col2: |
|
|
st.subheader("Specifications") |
|
|
floor_area_sqm = st.slider("Floor Area (sqm)", 30, 200, 100, 5, |
|
|
help="Typical HDB sizes: 3-room (60-75 sqm), 4-room (85-105 sqm), 5-room (110-125 sqm)") |
|
|
storey_level = st.slider("Storey Level", 1, 50, 8, 1, |
|
|
help="Higher floors typically command premium prices") |
|
|
flat_age = st.slider("Flat Age (years)", 0, 50, 10, 1, |
|
|
help="Newer flats generally have higher prices") |
|
|
|
|
|
|
|
|
predict_btn = st.form_submit_button("๐ฎ Predict Resale Price", use_container_width=True) |
|
|
|
|
|
|
|
|
if predict_btn: |
|
|
user_input = { |
|
|
'town': town, |
|
|
'flat_type': flat_type, |
|
|
'flat_model': flat_model, |
|
|
'floor_area_sqm': floor_area_sqm, |
|
|
'storey_level': storey_level, |
|
|
'flat_age': flat_age |
|
|
} |
|
|
|
|
|
with st.spinner("Analyzing property details..."): |
|
|
predicted_price, insights, chart = predict_hdb_price(user_input) |
|
|
|
|
|
if predicted_price: |
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown(f'<div class="prediction-box"><h2>๐ฐ Predicted Resale Price: ${predicted_price:,.0f}</h2></div>', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
with col1: |
|
|
price_per_sqm = predicted_price / floor_area_sqm |
|
|
st.metric("Price per sqm", f"${price_per_sqm:,.0f}") |
|
|
with col2: |
|
|
remaining_lease = 99 - flat_age |
|
|
st.metric("Remaining Lease", f"{remaining_lease} years") |
|
|
with col3: |
|
|
st.metric("Property Age", f"{flat_age} years") |
|
|
|
|
|
|
|
|
st.markdown("### ๐ Property Analysis") |
|
|
st.markdown(insights) |
|
|
|
|
|
|
|
|
if chart: |
|
|
st.markdown("### ๐ Market Comparison") |
|
|
st.plotly_chart(chart, use_container_width=True) |
|
|
|
|
|
|
|
|
with st.expander("โน๏ธ About This Predictor"): |
|
|
st.markdown(""" |
|
|
**How it works:** |
|
|
- This tool uses a machine learning model (XGBoost) trained on historical HDB resale data |
|
|
- The model considers factors like location, flat type, size, age, and floor level |
|
|
- Predictions are estimates based on historical patterns and market trends |
|
|
|
|
|
**Important Notes:** |
|
|
- Predictions are for reference only and not financial advice |
|
|
- Actual prices may vary based on market conditions and property condition |
|
|
- Always consult with real estate professionals for accurate valuations |
|
|
""") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown(""" |
|
|
<div style='text-align: center; color: #666; font-size: 0.9rem;'> |
|
|
<p>Built with โค๏ธ using Streamlit | HDB Resale Price Predictor</p> |
|
|
<p>Predictions are estimates only | Done by Lester Chia | ยฉ 2025</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|