|
|
|
|
|
""" |
|
|
Demand Data Validation Visualization Module |
|
|
|
|
|
Provides Streamlit visualization for demand data validation. |
|
|
Shows which products are included/excluded from optimization and why. |
|
|
""" |
|
|
|
|
|
import pandas as pd |
|
|
import streamlit as st |
|
|
from typing import Dict |
|
|
from src.config.constants import LineType |
|
|
from src.demand_filtering import DemandFilter |
|
|
|
|
|
|
|
|
|
|
|
LEVEL_NAMES = { |
|
|
'prepack': 'prepack', |
|
|
'subkit': 'subkit', |
|
|
'master': { |
|
|
'standalone': 'standalone_master', |
|
|
'with_hierarchy': 'master_with_hierarchy' |
|
|
}, |
|
|
'unclassified': 'no_hierarchy_data' |
|
|
} |
|
|
|
|
|
|
|
|
class DemandValidationViz: |
|
|
""" |
|
|
Simple visualization wrapper for demand filtering results. |
|
|
All filtering logic is in DemandFilter - this just displays the results. |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
self.filter_instance = DemandFilter() |
|
|
self.speed_data = None |
|
|
|
|
|
def load_data(self): |
|
|
"""Load all data needed for visualization""" |
|
|
try: |
|
|
from src.config import optimization_config |
|
|
from src.preprocess import extract |
|
|
self.speed_data = extract.read_package_speed_data() |
|
|
return self.filter_instance.load_data() |
|
|
except Exception as e: |
|
|
error_msg = f"Error loading data: {str(e)}" |
|
|
print(error_msg) |
|
|
if st: |
|
|
st.error(error_msg) |
|
|
return False |
|
|
|
|
|
def validate_all_products(self) -> pd.DataFrame: |
|
|
""" |
|
|
Create DataFrame with validation results for all products. |
|
|
Main visualization method - converts filtering results to displayable format. |
|
|
""" |
|
|
|
|
|
analysis = self.filter_instance.get_complete_product_analysis() |
|
|
product_details = analysis['product_details'] |
|
|
|
|
|
results = [] |
|
|
for product_id, details in product_details.items(): |
|
|
|
|
|
speed = self.speed_data.get(product_id) if self.speed_data else None |
|
|
production_hours = (details['demand'] / speed) if speed and speed > 0 else None |
|
|
|
|
|
|
|
|
line_type_id = details['line_assignment'] |
|
|
line_name = LineType.get_name(line_type_id) if line_type_id is not None else "no_assignment" |
|
|
|
|
|
|
|
|
ptype = details['product_type'] |
|
|
if ptype == 'unclassified': |
|
|
level_name = LEVEL_NAMES['unclassified'] |
|
|
elif ptype == 'master': |
|
|
level_name = LEVEL_NAMES['master']['standalone' if details['is_standalone_master'] else 'with_hierarchy'] |
|
|
else: |
|
|
level_name = LEVEL_NAMES.get(ptype, f"level_{ptype}") |
|
|
|
|
|
|
|
|
if not details['is_included_in_optimization']: |
|
|
validation_status = f"π« Excluded: {', '.join(details['exclusion_reasons'])}" |
|
|
else: |
|
|
issues = [] |
|
|
if speed is None: |
|
|
issues.append("missing_speed_data (will use default)") |
|
|
if not details['has_hierarchy']: |
|
|
issues.append("no_hierarchy_data") |
|
|
validation_status = f"β οΈ Data Issues: {', '.join(issues)}" if issues else "β
Ready for optimization" |
|
|
|
|
|
|
|
|
|
|
|
if details['has_too_high_demand']: |
|
|
issues.append("too_high_demand") |
|
|
validation_status = f"β οΈ Data Issues: {', '.join(issues)}" if issues else "β
Ready for optimization" |
|
|
results.append({ |
|
|
'Product ID': product_id, |
|
|
'Demand': details['demand'], |
|
|
'Product Type': ptype.title(), |
|
|
'Level': level_name, |
|
|
'Is Standalone Master': "Yes" if details['is_standalone_master'] else "No", |
|
|
'Line Type ID': line_type_id if line_type_id else "N/A", |
|
|
'Line Type': line_name, |
|
|
'UNICEF Staff': details['unicef_staff'], |
|
|
'Humanizer Staff': details['humanizer_staff'], |
|
|
'Total Staff': details['total_staff'], |
|
|
'Production Speed (units/hour)': f"{speed:.1f}" if speed else "N/A", |
|
|
'Production Hours Needed': f"{production_hours:.1f}" if production_hours else "N/A", |
|
|
'Has Line Assignment': "β
" if details['has_line_assignment'] else "β", |
|
|
'Has Staffing Data': "β
" if details['has_staffing'] else "β", |
|
|
'Has Speed Data': "β
" if speed is not None else "β (will use default)", |
|
|
'Has Hierarchy Data': "β
" if details['has_hierarchy'] else "β", |
|
|
'Excluded from Optimization': not details['is_included_in_optimization'], |
|
|
'Exclusion Reasons': ', '.join(details['exclusion_reasons']) if details['exclusion_reasons'] else '', |
|
|
'Data Quality Issues': ', '.join(issues) if details['is_included_in_optimization'] and 'issues' in locals() and issues else '', |
|
|
'Has Too High Demand': "β
" if details['has_too_high_demand'] else "β", |
|
|
'Validation Status': validation_status |
|
|
}) |
|
|
|
|
|
df = pd.DataFrame(results) |
|
|
df = df.sort_values(['Excluded from Optimization', 'Demand'], ascending=[False, False]) |
|
|
return df |
|
|
|
|
|
def get_summary_statistics(self, df: pd.DataFrame) -> Dict: |
|
|
"""Calculate summary statistics from validation results""" |
|
|
analysis = self.filter_instance.get_complete_product_analysis() |
|
|
included_df = df[df['Excluded from Optimization'] == False] |
|
|
|
|
|
return { |
|
|
'total_products': analysis['total_products'], |
|
|
'total_demand': analysis['total_demand'], |
|
|
'included_products': analysis['included_count'], |
|
|
'excluded_products': analysis['excluded_count'], |
|
|
'included_demand': analysis['included_demand'], |
|
|
'excluded_demand': analysis['excluded_demand'], |
|
|
'type_counts': df['Product Type'].value_counts().to_dict(), |
|
|
'no_line_assignment': len(included_df[included_df['Has Line Assignment'] == "β"]), |
|
|
'no_staffing': len(included_df[included_df['Has Staffing Data'] == "β"]), |
|
|
'no_speed': len(included_df[included_df['Has Speed Data'].str.contains("β")]), |
|
|
'no_hierarchy': len(included_df[included_df['Has Hierarchy Data'] == "β"]), |
|
|
'standalone_masters': analysis['standalone_masters_count'], |
|
|
'total_unicef_needed': sum(p['unicef_staff'] for p in analysis['product_details'].values()), |
|
|
'total_humanizer_needed': sum(p['humanizer_staff'] for p in analysis['product_details'].values()), |
|
|
'excluded_with_too_high_demand': analysis['excluded_with_too_high_demand_count'] |
|
|
} |
|
|
|
|
|
|
|
|
def display_demand_validation(): |
|
|
""" |
|
|
Display demand validation analysis in Streamlit. |
|
|
Main entry point for the validation page. |
|
|
""" |
|
|
st.header("π Demand Data Validation") |
|
|
st.markdown("Analysis showing which products are included/excluded from optimization and data quality status.") |
|
|
|
|
|
|
|
|
validator = DemandValidationViz() |
|
|
with st.spinner("Loading and analyzing data..."): |
|
|
if not validator.load_data(): |
|
|
st.error("Failed to load data for validation.") |
|
|
return |
|
|
validation_df = validator.validate_all_products() |
|
|
stats = validator.get_summary_statistics(validation_df) |
|
|
|
|
|
|
|
|
st.subheader("π Summary Statistics") |
|
|
col1, col2, col3, col4 = st.columns(4) |
|
|
col1.metric("Total Products", stats['total_products']) |
|
|
col1.metric("Included in Optimization", stats['included_products'], delta="Ready") |
|
|
col2.metric("Total Demand", f"{stats['total_demand']:,}") |
|
|
col2.metric("Excluded from Optimization", stats['excluded_products'], delta="Omitted") |
|
|
col3.metric("Included Demand", f"{stats['included_demand']:,}", delta="Will be optimized") |
|
|
col3.metric("UNICEF Staff Needed", stats['total_unicef_needed']) |
|
|
col4.metric("Excluded Demand", f"{stats['excluded_demand']:,}", delta="Omitted") |
|
|
col4.metric("Humanizer Staff Needed", stats['total_humanizer_needed']) |
|
|
|
|
|
|
|
|
st.subheader("π Product Type Distribution") |
|
|
if stats['type_counts']: |
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
type_df = pd.DataFrame(list(stats['type_counts'].items()), columns=['Product Type', 'Count']) |
|
|
st.bar_chart(type_df.set_index('Product Type')) |
|
|
with col2: |
|
|
for ptype, count in stats['type_counts'].items(): |
|
|
percentage = (count / stats['total_products']) * 100 |
|
|
st.write(f"**{ptype}:** {count} products ({percentage:.1f}%)") |
|
|
|
|
|
|
|
|
st.subheader("β οΈ Data Quality Issues (Included Products)") |
|
|
st.write("Issues affecting products that **will be** included in optimization:") |
|
|
col1, col2, col3, col4 = st.columns(4) |
|
|
col1.metric("No Line Assignment", stats['no_line_assignment'], |
|
|
delta=None if stats['no_line_assignment'] == 0 else "Issue") |
|
|
col2.metric("No Staffing Data", stats['no_staffing'], |
|
|
delta=None if stats['no_staffing'] == 0 else "Issue") |
|
|
col3.metric("No Speed Data", stats['no_speed'], |
|
|
delta=None if stats['no_speed'] == 0 else "Will use default") |
|
|
col4.metric("No Hierarchy Data", stats['no_hierarchy'], |
|
|
delta=None if stats['no_hierarchy'] == 0 else "Issue") |
|
|
col5.metric("Excluded: Too High Demand", stats['excluded_with_too_high_demand'], |
|
|
delta=None if stats['excluded_with_too_high_demand'] == 0 else "Excluded") |
|
|
|
|
|
included_df = validation_df[validation_df['Excluded from Optimization'] == False].copy() |
|
|
excluded_df = validation_df[validation_df['Excluded from Optimization'] == True].copy() |
|
|
|
|
|
st.subheader("β
Products Included in Optimization") |
|
|
st.write(f"**{len(included_df)} products** with total demand of **{included_df['Demand'].sum():,} units**") |
|
|
|
|
|
if len(included_df) > 0: |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
type_filter = col1.selectbox("Filter by type", ["All"] + list(included_df['Product Type'].unique()), key="inc_filter") |
|
|
min_demand = col2.number_input("Minimum demand", min_value=0, value=0, key="inc_demand") |
|
|
|
|
|
|
|
|
filtered = included_df.copy() |
|
|
if type_filter != "All": |
|
|
filtered = filtered[filtered['Product Type'] == type_filter] |
|
|
if min_demand > 0: |
|
|
filtered = filtered[filtered['Demand'] >= min_demand] |
|
|
|
|
|
|
|
|
display_cols = ['Product ID', 'Demand', 'Product Type', 'Line Type', 'UNICEF Staff', |
|
|
'Humanizer Staff', 'Production Speed (units/hour)', 'Data Quality Issues', 'Validation Status'] |
|
|
st.dataframe(filtered[display_cols], use_container_width=True, height=300) |
|
|
else: |
|
|
st.warning("No products are included in optimization!") |
|
|
|
|
|
|
|
|
st.subheader("π« Products Excluded from Optimization") |
|
|
st.write(f"**{len(excluded_df)} products** with total demand of **{excluded_df['Demand'].sum():,} units**") |
|
|
st.info("Excluded due to: missing line assignments, zero staffing, or non-standalone masters") |
|
|
|
|
|
if len(excluded_df) > 0: |
|
|
|
|
|
st.write("**Exclusion reasons:**") |
|
|
for reason, count in excluded_df['Exclusion Reasons'].value_counts().items(): |
|
|
st.write(f"β’ {reason}: {count} products") |
|
|
|
|
|
|
|
|
display_cols = ['Product ID', 'Demand', 'Product Type', 'Exclusion Reasons', |
|
|
'UNICEF Staff', 'Humanizer Staff', 'Line Type'] |
|
|
st.dataframe(excluded_df[display_cols], use_container_width=True, height=200) |
|
|
|
|
|
|
|
|
if st.button("π₯ Export Validation Results to CSV"): |
|
|
st.download_button("Download CSV", validation_df.to_csv(index=False), |
|
|
file_name="demand_validation_results.csv", mime="text/csv") |
|
|
|
|
|
|
|
|
st.subheader("π‘ Recommendations") |
|
|
|
|
|
if stats['excluded_products'] > 0: |
|
|
st.warning(f"**{stats['excluded_products']} products** ({stats['excluded_demand']:,} units) excluded from optimization") |
|
|
|
|
|
|
|
|
if stats['no_line_assignment'] > 0: |
|
|
st.info(f"**Line Assignment**: {stats['no_line_assignment']} included products missing line assignments") |
|
|
if stats['no_staffing'] > 0: |
|
|
st.info(f"**Staffing Data**: {stats['no_staffing']} included products missing staffing requirements") |
|
|
if stats['no_speed'] > 0: |
|
|
st.info(f"**Speed Data**: {stats['no_speed']} included products missing speed data (will use default 106.7 units/hour)") |
|
|
if stats['no_hierarchy'] > 0: |
|
|
st.info(f"**Hierarchy Data**: {stats['no_hierarchy']} included products not in kit hierarchy") |
|
|
|
|
|
|
|
|
if stats['included_products'] > 0: |
|
|
st.success(f"β
**{stats['included_products']} products** with {stats['included_demand']:,} units demand ready for optimization!") |
|
|
if stats['no_speed'] == 0 and stats['no_hierarchy'] == 0: |
|
|
st.info("π All included products have complete data!") |
|
|
else: |
|
|
st.error("β No products passed filtering. Review exclusion reasons and check data configuration.") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
display_demand_validation() |
|
|
|