Spaces:

helvia
/

sessions-observatory

Sleeping

App Files Files Community

eloukas commited on 10 days ago

Commit

62c9d32

verified ·

1 Parent(s): 8cbcb64

Optimize user interaction latency

Browse files

Files changed (1) hide show

app.py +1038 -252

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import base64
 import io
 import json
 import random
 import dash
 import numpy as np
@@ -13,6 +15,62 @@ from dash import Input, Output, State, callback, dcc, html
 # Initialize the Dash app
 app = dash.Dash(__name__, suppress_callback_exceptions=True)
 # Define app layout
 app.layout = html.Div(
     [
@@ -83,12 +141,14 @@ app.layout = html.Div(
                                     children="Sessions Observatory",
                                     className="section-header",
                                 ),
                                 dcc.Graph(
                                     id="bubble-chart",
                                     style={"height": "calc(100% - 154px)"},
-                                ),
                                 html.Div(
                                     [
                                         html.Div(
                                             [
                                                 html.Div(
@@ -101,6 +161,7 @@ app.layout = html.Div(
                                             ],
                                             className="control-labels-row",
                                         ),
                                         html.Div(
                                             [
                                                 html.Div(
@@ -185,9 +246,10 @@ app.layout = html.Div(
                                                                 html.I(
                                                                     className="fas fa-info-circle",
                                                                     title="Root cause detection is experimental and may require manual review since it is generated by AI models. Root causes are only shown in clusters with identifiable root causes.",
                                                                     style={
                                                                         "marginLeft": "0.2rem",
-                                                                        "color": "#6c757d",
                                                                         "fontSize": "0.9rem",
                                                                         "cursor": "pointer",
                                                                         "verticalAlign": "middle",
@@ -202,7 +264,9 @@ app.layout = html.Div(
                                                         ),
                                                     ],
                                                     id="root-causes-section",
-                                                    style={"display": "none"},
                                                 ),
                                                 # Added Tags section
                                                 html.Div(
@@ -217,7 +281,9 @@ app.layout = html.Div(
                                                         ),
                                                     ],
                                                     id="tags-section",
-                                                    style={"display": "none"},
                                                 ),
                                             ],
                                             className="details-section",
@@ -268,7 +334,7 @@ app.layout = html.Div(
                                                 ),
                                                 html.H3("No topic selected"),
                                                 html.P(
-                                                    "Click a bubble to view topic details."
                                                 ),
                                             ],
                                             className="no-selection-message",
@@ -387,8 +453,8 @@ app.layout = html.Div(
         ),
         # Store the processed data
         dcc.Store(id="stored-data"),
-        # NEW: Store for the minimal raw dataframe
-        dcc.Store(id="raw-data"),
         # Store the current selected topic for dialogs modal
         dcc.Store(id="selected-topic-store"),
         # Store the current selected root cause for root cause modal
@@ -397,7 +463,7 @@ app.layout = html.Div(
     className="app-container",
 )
-# Define CSS for the app (no changes needed here, so it's omitted for brevity)
 app.index_string = """
 <!DOCTYPE html>
 <html>
@@ -1221,10 +1287,10 @@ app.index_string = """
 )
 def update_topic_distribution_header(data):
     if not data:
-        return "Sessions Observatory"
     df = pd.DataFrame(data)
-    total_dialogs = df["count"].sum()
     return f"Sessions Observatory ({total_dialogs} dialogs)"
@@ -1232,91 +1298,107 @@ def update_topic_distribution_header(data):
 @callback(
     [
         Output("stored-data", "data"),
-        Output("raw-data", "data"),
         Output("upload-status", "children"),
-        Output("upload-status", "style"),
         Output("main-content", "style"),
     ],
     [Input("upload-data", "contents")],
     [State("upload-data", "filename")],
 )
 def process_upload(contents, filename):
     if contents is None:
-        return None, None, "", {"display": "none"}, {"display": "none"}
     try:
         content_type, content_string = contents.split(",")
         decoded = base64.b64decode(content_string)
         if "csv" in filename.lower():
-            df = pd.read_csv(io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str})
         elif "xls" in filename.lower():
             df = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
-        else:
-            return (
-                None,
-                None,
-                html.Div(
-                    ["Unsupported file. Please upload a CSV or Excel file."],
-                    style={"color": "var(--destructive)"},
-                ),
-                {"display": "block"},
-                {"display": "none"},
             )
         EXCLUDE_UNCLUSTERED = True
         if EXCLUDE_UNCLUSTERED and "deduplicated_topic_name" in df.columns:
             df = df[df["deduplicated_topic_name"] != "Unclustered"].copy()
         else:
             return (
-                None,
                 None,
                 html.Div(
-                    ["Please upload a CSV or Excel file with a 'deduplicated_topic_name' column."],
                     style={"color": "var(--destructive)"},
                 ),
-                {"display": "block"},
                 {"display": "none"},
             )
-        # Compute aggregated topic stats once
         topic_stats = analyze_topics(df)
-        # Store only the columns you use elsewhere to keep payload smaller
-        needed_cols = [
-            "id",
-            "conversation",
-            "deduplicated_topic_name",
-            "consolidated_tags",
-            "Root_Cause",
-            "root_cause_subcluster",
-            "Sentiment",
-            "Resolution",
-            "Urgency",
-            "Summary",
-        ]
-        df_min = df[[c for c in needed_cols if c in df.columns]].copy()
         return (
             topic_stats.to_dict("records"),
-            df_min.to_dict("records"),
             html.Div(
                 [
                     html.I(
                         className="fas fa-check-circle",
-                        style={"color": "hsl(142.1, 76.2%, 36.3%)", "marginRight": "8px"},
                     ),
                     f'Successfully uploaded "{filename}"',
                 ],
                 style={"color": "hsl(142.1, 76.2%, 36.3%)"},
             ),
-            {"display": "block"},
-            {"display": "block", "height": "calc(100vh - 40px)"},
         )
     except Exception as e:
         return (
-            None,
             None,
             html.Div(
                 [
@@ -1324,18 +1406,23 @@ def process_upload(contents, filename):
                         className="fas fa-exclamation-triangle",
                         style={"color": "var(--destructive)", "marginRight": "8px"},
                     ),
-                    f"Error: {e}",
                 ],
                 style={"color": "var(--destructive)"},
             ),
-            {"display": "block"},
             {"display": "none"},
         )
 # Function to analyze the topics and create statistics
 def analyze_topics(df):
     topic_stats = (
         df.groupby("deduplicated_topic_name")
         .agg(
             count=("id", "count"),
@@ -1345,94 +1432,204 @@ def analyze_topics(df):
         )
         .reset_index()
     )
-    topic_stats["negative_rate"] = (topic_stats["negative_count"] / topic_stats["count"] * 100).round(1)
-    topic_stats["unresolved_rate"] = (topic_stats["unresolved_count"] / topic_stats["count"] * 100).round(1)
-    topic_stats["urgent_rate"] = (topic_stats["urgent_count"] / topic_stats["count"] * 100).round(1)
     topic_stats = apply_binned_layout(topic_stats)
     return topic_stats
-# New binned layout function (no changes needed)
 def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
     df_sorted = df.copy()
     if bin_config is None:
         bin_config = [
-            (100, None, "100+ dialogs"), (50, 99, "50-99 dialogs"),
-            (25, 49, "25-49 dialogs"), (9, 24, "9-24 dialogs"),
-            (7, 8, "7-8 dialogs"), (5, 6, "5-6 dialogs"),
-            (4, 4, "4 dialogs"), (0, 3, "0-3 dialogs"),
         ]
     bin_descriptions = {}
     conditions = []
     bin_values = []
     for i, (lower, upper, description) in enumerate(bin_config):
         bin_name = f"Bin {i + 1}"
         bin_descriptions[bin_name] = description
         bin_values.append(bin_name)
-        if upper is None:
             conditions.append(df_sorted["count"] >= lower)
         else:
-            conditions.append((df_sorted["count"] >= lower) & (df_sorted["count"] <= upper))
-    df_sorted["bin"] = np.select(conditions, bin_values, default=f"Bin {len(bin_config)}")
     df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)
     df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
     original_bins = df_sorted["bin"].unique()
     new_rows = []
     new_bin_descriptions = bin_descriptions.copy()
     for bin_name in original_bins:
         bin_mask = df_sorted["bin"] == bin_name
         bin_group = df_sorted[bin_mask]
         bin_size = len(bin_group)
         if bin_size > max_items_per_row:
             num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row
             items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins
             remainder = bin_size % num_sub_bins
             for i in range(remainder):
                 items_per_sub_bin[i] += 1
             original_description = bin_descriptions[bin_name]
             start_idx = 0
             for i in range(num_sub_bins):
                 new_bin_name = f"{bin_name}_{i + 1}"
                 new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
                 new_bin_descriptions[new_bin_name] = new_description
                 end_idx = start_idx + items_per_sub_bin[i]
                 sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()
                 sub_bin_rows["bin"] = new_bin_name
                 sub_bin_rows["bin_description"] = new_description
                 new_rows.append(sub_bin_rows)
                 start_idx = end_idx
             df_sorted = df_sorted[~bin_mask]
     if new_rows:
         df_sorted = pd.concat([df_sorted] + new_rows)
     df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
     bins_with_topics = sorted(df_sorted["bin"].unique())
     num_rows = len(bins_with_topics)
     available_height = 100 - (2 * padding)
     row_height = available_height / num_rows
-    row_positions = {bin_name: padding + i * row_height + (row_height / 2) for i, bin_name in enumerate(bins_with_topics)}
     df_sorted["y"] = df_sorted["bin"].map(row_positions)
-    center_point = 50
     for bin_name in bins_with_topics:
         bin_mask = df_sorted["bin"] == bin_name
         num_topics_in_bin = bin_mask.sum()
         if num_topics_in_bin == 1:
             df_sorted.loc[bin_mask, "x"] = center_point
         else:
-            spacing = 17.5 if num_topics_in_bin < max_items_per_row else 15
-            total_width = (num_topics_in_bin - 1) * spacing
-            start_pos = center_point - (total_width / 2)
-            positions = [start_pos + (i * spacing) for i in range(num_topics_in_bin)]
-            df_sorted.loc[bin_mask, "x"] = positions
     df_sorted["size_rank"] = range(1, len(df_sorted) + 1)
     return df_sorted
-# function to update positions based on selected size metric (no changes needed)
 def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
     return apply_binned_layout(df)
-# Callback to update the bubble chart (no changes needed)
 @callback(
     Output("bubble-chart", "figure"),
     [
@@ -1446,109 +1643,258 @@ def update_bubble_chart(data, color_metric):
     df = pd.DataFrame(data)
-    # Note: `update_bubble_positions` is now called inside `analyze_topics` once
-    # and the results are stored. We don't call it here anymore.
-    # The 'x' and 'y' values are already in the `data`.
-    # df = update_bubble_positions(df) # This line can be removed if positions are pre-calculated
     size_values = df["count"]
     raw_sizes = df["count"]
     size_title = "Dialog Count"
-    min_size = 1
     if size_values.max() > size_values.min():
         log_sizes = np.log1p(size_values)
-        size_values = (min_size + (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50)
     else:
         size_values = np.ones(len(df)) * 12.5
     if color_metric == "negative_rate":
         color_values = df["negative_rate"]
         color_title = "Negativity (%)"
         color_scale = "Teal"
     elif color_metric == "unresolved_rate":
         color_values = df["unresolved_rate"]
         color_title = "Unresolved (%)"
         color_scale = "Teal"
-    else: # urgent_rate
         color_values = df["urgent_rate"]
         color_title = "Urgency (%)"
         color_scale = "Teal"
     hover_text = [
         f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}"
-        for topic, raw, color, bin_desc in zip(df["deduplicated_topic_name"], raw_sizes, color_values, df["bin_description"])
     ]
     fig = px.scatter(
         df,
-        x="x", y="y",
         size=size_values,
         color=color_values,
         hover_name="deduplicated_topic_name",
-        hover_data={"x": False, "y": False, "bin_description": True},
-        size_max=42.5,
         color_continuous_scale=color_scale,
-        custom_data=["deduplicated_topic_name", "count", "negative_rate", "unresolved_rate", "urgent_rate", "bin_description"],
     )
     fig.update_traces(
-        mode="markers",
         marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
         hovertemplate="%{hovertext}<extra></extra>",
         hovertext=hover_text,
     )
     annotations = []
     for i, row in df.iterrows():
         words = row["deduplicated_topic_name"].split()
-        wrapped_text = "<br>".join([" ".join(words[i : i + 4]) for i in range(0, len(words), 4)])
-        # Use df.index.get_loc(i) to safely get the index position for size_values
-        marker_size = (size_values[df.index.get_loc(i)] / 20)
         annotations.append(
             dict(
-                x=row["x"], y=row["y"] + 0.125 + marker_size,
-                text=wrapped_text, showarrow=False, textangle=0,
-                font=dict(size=9, color="var(--foreground)", family="Arial, sans-serif", weight="bold"),
-                xanchor="center", yanchor="top",
-                bgcolor="rgba(255,255,255,0.7)", bordercolor="rgba(0,0,0,0.1)",
-                borderwidth=1, borderpad=1,
             )
         )
     unique_bins = sorted(df["bin"].unique())
-    bin_y_positions = [df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins]
     bin_descriptions = df.set_index("bin")["bin_description"].to_dict()
     for bin_name, bin_y in zip(unique_bins, bin_y_positions):
-        fig.add_shape(type="line", x0=0, y0=bin_y, x1=100, y1=bin_y, line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"), layer="below")
         annotations.append(
             dict(
-                x=0, y=bin_y, xref="x", yref="y",
-                text=bin_descriptions[bin_name], showarrow=False,
                 font=dict(size=8.25, color="var(--muted-foreground)"),
-                align="left", xanchor="left", yanchor="middle",
-                bgcolor="rgba(255,255,255,0.7)", borderpad=1,
             )
         )
     fig.update_layout(
         title=None,
-        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, title=None, range=[0, 100]),
-        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, title=None, range=[0, 100], autorange="reversed"),
         hovermode="closest",
         margin=dict(l=0, r=0, t=10, b=10),
-        coloraxis_colorbar=dict(title=color_title, title_font=dict(size=9), tickfont=dict(size=8), thickness=10, len=0.6, yanchor="middle", y=0.5, xpad=0),
         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
         paper_bgcolor="rgba(0,0,0,0)",
         plot_bgcolor="rgba(0,0,0,0)",
         hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
-        annotations=annotations,
     )
     return fig
-# NEW: Update the topic details callback to be CLICK-ONLY and use the raw-data store
 @callback(
     [
         Output("topic-title", "children"),
@@ -1563,39 +1909,71 @@ def update_bubble_chart(data, color_metric):
         Output("selected-topic-store", "data"),
     ],
     [
-        Input("bubble-chart", "clickData"), # Changed from hoverData
         Input("refresh-dialogs-btn", "n_clicks"),
     ],
-    [State("stored-data", "data"), State("raw-data", "data")],
 )
-def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
-    # This callback now only fires on click or refresh
     ctx = dash.callback_context
-    triggered_id = ctx.triggered[0]["prop_id"].split(".")[0]
-    # If nothing triggered this, or data is missing, show the initial message
-    if not triggered_id or not stored_data or not raw_data:
-        return "", [], [], "", {"display": "none"}, "", {"display": "none"}, [], {"display": "flex"}, None
-    # We need to know which topic is currently selected if we are refreshing
-    if triggered_id == "refresh-dialogs-btn":
-        # To refresh, we would need to know the current topic. This requires
-        # getting it from a store. For simplicity, we can just use the last clickData.
-        # A more robust solution would use another dcc.Store for the *active* topic.
-        # For now, if there is no click_data, a refresh does nothing.
-        if not click_data:
-             return dash.no_update
-    topic_name = click_data["points"][0]["customdata"][0]
     df_stored = pd.DataFrame(stored_data)
     topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]
-    # Use the pre-processed data from the store - this is the fast part!
-    df_full = pd.DataFrame(raw_data)
     topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
-    # --- From here, all the UI building code is the same ---
     title = html.Div([html.Span(topic_name)])
     metadata_items = [
         html.Div(
             [
@@ -1603,8 +1981,10 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
                 html.Span(f"{int(topic_data['count'])} dialogs"),
                 html.Button(
                     [
-                        html.I(className="fas fa-table", style={"marginRight": "0.25rem"}),
-                        "Show all dialogs",
                     ],
                     id="show-all-dialogs-btn",
                     className="show-dialogs-btn",
@@ -1615,6 +1995,8 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
             style={"display": "flex", "alignItems": "center", "width": "100%"},
         ),
     ]
     metrics_boxes = [
         html.Div(
             [
@@ -1639,25 +2021,54 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
         ),
     ]
     root_causes_output = ""
     root_causes_section_style = {"display": "none"}
     if "root_cause_subcluster" in topic_conversations.columns:
         filtered_root_causes = [
-            rc for rc in topic_conversations["root_cause_subcluster"].dropna().unique()
-            if rc not in ["Sub-clustering disabled", "Not eligible for sub-clustering", "No valid root causes", "No Subcluster", "Unclustered", ""]
         ]
         if filtered_root_causes:
             root_causes_output = html.Div(
                 [
                     html.Div(
                         [
-                            html.I(className="fas fa-exclamation-triangle root-cause-tag-icon"),
                             html.Span(root_cause, style={"marginRight": "6px"}),
                             html.I(
                                 className="fas fa-external-link-alt root-cause-click-icon",
                                 id={"type": "root-cause-icon", "index": root_cause},
                                 title="Click to see specific chats assigned with this root cause.",
-                                style={"cursor": "pointer", "fontSize": "0.55rem", "opacity": "0.8"},
                             ),
                         ],
                         className="root-cause-tag",
@@ -1669,19 +2080,30 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
             )
             root_causes_section_style = {"display": "block"}
     tags_list = []
-    if "consolidated_tags" in topic_conversations.columns:
-        for tags_str in topic_conversations["consolidated_tags"].dropna():
-            tags_list.extend([tag.strip() for tag in tags_str.split(",") if tag.strip()])
     tag_counts = {}
     for tag in tags_list:
         tag_counts[tag] = tag_counts.get(tag, 0) + 1
-    sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))[:15]
     tags_section_style = {"display": "none"}
     if sorted_tags:
         tags_output = html.Div(
             [
                 html.Div(
@@ -1698,37 +2120,87 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
         tags_section_style = {"display": "block"}
     else:
         tags_output = html.Div(
-            [html.I(className="fas fa-info-circle", style={"marginRight": "5px"}), "No tags found for this topic"],
             className="no-tags-message",
         )
     sample_size = min(5, len(topic_conversations))
     if sample_size > 0:
-        samples = topic_conversations.sample(n=sample_size)
         dialog_items = []
         for _, row in samples.iterrows():
-            tags = [
-                html.Span(row["Sentiment"], className="dialog-tag tag-sentiment"),
-                html.Span(row["Resolution"], className="dialog-tag tag-resolution"),
-                html.Span(row["Urgency"], className="dialog-tag tag-urgency"),
-            ]
             if "id" in row:
-                tags.append(html.Span(
-                    [f"Chat ID: {row['id']} ", html.I(className="fas fa-arrow-up-right-from-square conversation-icon", id={"type": "conversation-icon", "index": row["id"]}, title="View full conversation", style={"marginLeft": "0.25rem"})],
-                    className="dialog-tag tag-chat-id", style={"display": "inline-flex", "alignItems": "center"}
-                ))
-            if "Root_Cause" in row and pd.notna(row["Root_Cause"]) and row["Root_Cause"] != "na":
-                tags.append(html.Span(f"Root Cause: {row['Root_Cause']}", className="dialog-tag tag-root-cause"))
             dialog_items.append(
                 html.Div(
-                    [html.Div(row["Summary"], className="dialog-summary"), html.Div(tags, className="dialog-metadata")],
                     className="dialog-item",
                 )
             )
         sample_dialogs = dialog_items
     else:
-        sample_dialogs = [html.Div("No sample dialogs available for this topic.", style={"color": "var(--muted-foreground)"})]
     return (
         title,
@@ -1740,11 +2212,11 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
         tags_section_style,
         sample_dialogs,
         {"display": "none"},
-        {"topic_name": topic_name},  # Pass only the topic name
     )
-# NEW: Updated to use raw-data store
 @callback(
     [
         Output("conversation-modal", "style"),
@@ -1752,22 +2224,31 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
         Output("conversation-subheader", "children"),
     ],
     [Input({"type": "conversation-icon", "index": dash.dependencies.ALL}, "n_clicks")],
-    [State("raw-data", "data")],
     prevent_initial_call=True,
 )
-def open_conversation_modal(n_clicks_list, raw_data):
-    if not any(n_clicks_list) or not raw_data:
         return {"display": "none"}, "", ""
     ctx = dash.callback_context
     if not ctx.triggered:
-        return {"display": "none"}, "", ""
     triggered_id = ctx.triggered[0]["prop_id"]
     chat_id = json.loads(triggered_id.split(".")[0])["index"]
-    df_full = pd.DataFrame(raw_data)
     conversation_row = df_full[df_full["id"] == chat_id]
     if len(conversation_row) == 0:
         conversation_text = "Conversation not found."
@@ -1775,17 +2256,28 @@ def open_conversation_modal(n_clicks_list, raw_data):
     else:
         row = conversation_row.iloc[0]
         conversation_text = row.get("conversation", "No conversation data available.")
         cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
         subheader_content = html.Div(
             [
-                html.Span(f"Chat ID: {chat_id}", style={"fontWeight": "600", "marginRight": "1rem"}),
-                html.Span(f"Cluster: {cluster_name}", style={"color": "hsl(215.4, 16.3%, 46.9%)"}),
             ]
         )
     return {"display": "flex"}, conversation_text, subheader_content
-# Callback to close modal (no changes needed)
 @callback(
     Output("conversation-modal", "style", allow_duplicate=True),
     [Input("close-modal-btn", "n_clicks")],
@@ -1794,10 +2286,10 @@ def open_conversation_modal(n_clicks_list, raw_data):
 def close_conversation_modal(n_clicks):
     if n_clicks:
         return {"display": "none"}
-    return dash.no_update
-# NEW: Updated to use raw-data store
 @callback(
     [
         Output("dialogs-table-modal", "style"),
@@ -1805,51 +2297,169 @@ def close_conversation_modal(n_clicks):
         Output("dialogs-table-content", "children"),
     ],
     [Input("show-all-dialogs-btn", "n_clicks")],
-    [State("selected-topic-store", "data"), State("raw-data", "data")],
     prevent_initial_call=True,
 )
-def open_dialogs_table_modal(n_clicks, selected_topic_data, raw_data):
-    if not n_clicks or not selected_topic_data or not raw_data:
         return {"display": "none"}, "", ""
     topic_name = selected_topic_data["topic_name"]
-    df_full = pd.DataFrame(raw_data)
     topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
-    table_rows = [
-        html.Tr([
-            html.Th("Chat ID"), html.Th("Summary"), html.Th("Root Cause"),
-            html.Th("Sentiment"), html.Th("Resolution"), html.Th("Urgency"),
-            html.Th("Tags"), html.Th("Action"),
-        ])
-    ]
     for _, row in topic_conversations.iterrows():
-        tags_display = "No tags"
-        if "consolidated_tags" in row and pd.notna(row["consolidated_tags"]):
-            tags = [tag.strip() for tag in row["consolidated_tags"].split(",") if tag.strip()]
-            tags_display = html.Div([
-                html.Span(tag, className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"}) for tag in tags[:3]
-            ] + ([html.Span(f"+{len(tags) - 3}", className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"})] if len(tags) > 3 else []))
         table_rows.append(
-            html.Tr([
-                html.Td(row["id"], style={"fontFamily": "monospace", "fontSize": "0.8rem"}),
-                html.Td(row.get("Summary", "No summary"), className="dialog-summary-cell"),
-                html.Td(html.Span(str(row.get("Root_Cause", "Unknown")).capitalize() if pd.notna(row.get("Root_Cause")) else "Unknown", className="dialog-tag-small", style={"backgroundColor": "#8B4513", "color": "white"})),
-                html.Td(html.Span(row.get("Sentiment", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Sentiment") == "negative" else "#6c757d", "color": "white"})),
-                html.Td(html.Span(row.get("Resolution", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Resolution") == "unresolved" else "#6c757d", "color": "white"})),
-                html.Td(html.Span(row.get("Urgency", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Urgency") == "urgent" else "#6c757d", "color": "white"})),
-                html.Td(tags_display, className="dialog-tags-cell"),
-                html.Td(html.Button([html.I(className="fas fa-eye", style={"marginRight": "0.25rem"}), "View chat"], id={"type": "open-chat-btn", "index": row["id"]}, className="open-chat-btn")),
-            ])
         )
     table = html.Table(table_rows, className="dialogs-table")
-    modal_title = f"All dialogs in Topic: {topic_name} ({len(topic_conversations)} dialogs)"
     return {"display": "flex"}, modal_title, table
-# Callback to close dialogs table modal (no changes needed)
 @callback(
     Output("dialogs-table-modal", "style", allow_duplicate=True),
     [Input("close-dialogs-modal-btn", "n_clicks")],
@@ -1858,10 +2468,10 @@ def open_dialogs_table_modal(n_clicks, selected_topic_data, raw_data):
 def close_dialogs_table_modal(n_clicks):
     if n_clicks:
         return {"display": "none"}
-    return dash.no_update
-# NEW: Updated to use raw-data store
 @callback(
     [
         Output("conversation-modal", "style", allow_duplicate=True),
@@ -1869,34 +2479,68 @@ def close_dialogs_table_modal(n_clicks):
         Output("conversation-subheader", "children", allow_duplicate=True),
     ],
     [Input({"type": "open-chat-btn", "index": dash.dependencies.ALL}, "n_clicks")],
-    [State("raw-data", "data")],
     prevent_initial_call=True,
 )
-def open_conversation_from_table(n_clicks_list, raw_data):
-    if not any(n_clicks_list) or not raw_data:
         return {"display": "none"}, "", ""
     ctx = dash.callback_context
     if not ctx.triggered:
         return {"display": "none"}, "", ""
     triggered_id = ctx.triggered[0]["prop_id"]
     chat_id = json.loads(triggered_id.split(".")[0])["index"]
-    df_full = pd.DataFrame(raw_data)
     conversation_row = df_full[df_full["id"] == chat_id]
     if len(conversation_row) == 0:
-        conversation_text = f"Conversation not found for Chat ID: {chat_id}"
         subheader_content = f"Chat ID: {chat_id} (Not Found)"
     else:
-        row = conversation_row.iloc[0]
-        conversation_text = row.get("conversation", "No conversation data available.")
-        subheader_content = f"Chat ID: {chat_id} | Topic: {row.get('deduplicated_topic_name', 'Unknown')} | Sentiment: {row.get('Sentiment', 'Unknown')} | Resolution: {row.get('Resolution', 'Unknown')}"
     return {"display": "flex"}, conversation_text, subheader_content
-# NEW: Updated to use raw-data store
 @callback(
     [
         Output("root-cause-modal", "style"),
@@ -1904,64 +2548,176 @@ def open_conversation_from_table(n_clicks_list, raw_data):
         Output("root-cause-table-content", "children"),
     ],
     [Input({"type": "root-cause-icon", "index": dash.dependencies.ALL}, "n_clicks")],
-    [State("selected-topic-store", "data"), State("raw-data", "data")],
     prevent_initial_call=True,
 )
-def open_root_cause_modal(n_clicks_list, selected_topic_data, raw_data):
-    if not any(n_clicks_list) or not selected_topic_data or not raw_data:
         return {"display": "none"}, "", ""
     ctx = dash.callback_context
     if not ctx.triggered:
         return {"display": "none"}, "", ""
     triggered_id = ctx.triggered[0]["prop_id"]
     root_cause = json.loads(triggered_id.split(".")[0])["index"]
     topic_name = selected_topic_data["topic_name"]
-    df_full = pd.DataFrame(raw_data)
     filtered_conversations = df_full[
         (df_full["deduplicated_topic_name"] == topic_name)
         & (df_full["root_cause_subcluster"] == root_cause)
     ]
-    table_rows = [
-        html.Tr([
-            html.Th("Chat ID"), html.Th("Summary"), html.Th("Sentiment"),
-            html.Th("Resolution"), html.Th("Urgency"), html.Th("Tags"), html.Th("Action"),
-        ])
-    ]
     for _, row in filtered_conversations.iterrows():
-        tags_display = "No tags"
-        if "consolidated_tags" in row and pd.notna(row["consolidated_tags"]):
-            tags = [tag.strip() for tag in row["consolidated_tags"].split(",") if tag.strip()]
-            tags_display = html.Div([
-                html.Span(tag, className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"}) for tag in tags[:3]
-            ] + ([html.Span(f"+{len(tags) - 3}", className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"})] if len(tags) > 3 else []))
         table_rows.append(
-            html.Tr([
-                html.Td(row["id"], style={"fontFamily": "monospace", "fontSize": "0.8rem"}),
-                html.Td(row.get("Summary", "No summary"), className="dialog-summary-cell"),
-                html.Td(html.Span(row.get("Sentiment", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Sentiment") == "negative" else "#6c757d", "color": "white"})),
-                html.Td(html.Span(row.get("Resolution", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Resolution") == "unresolved" else "#6c757d", "color": "white"})),
-                html.Td(html.Span(row.get("Urgency", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Urgency") == "urgent" else "#6c757d", "color": "white"})),
-                html.Td(tags_display, className="dialog-tags-cell"),
-                html.Td(html.Button([html.I(className="fas fa-eye", style={"marginRight": "0.25rem"}), "View chat"], id={"type": "open-chat-btn-rc", "index": row["id"]}, className="open-chat-btn")),
-            ])
         )
     table = html.Table(table_rows, className="dialogs-table")
-    modal_title = f"Dialogs for Root Cause: {root_cause} (in Topic: {topic_name})"
     count_info = html.P(
-        f"Found {len(filtered_conversations)} dialogs with this root cause.",
-        style={"margin": "0 0 1rem 0", "color": "var(--muted-foreground)", "fontSize": "0.875rem"},
     )
     content = html.Div([count_info, table])
     return {"display": "flex"}, modal_title, content
-# Callback to close root cause modal (no changes needed)
 @callback(
     Output("root-cause-modal", "style", allow_duplicate=True),
     [Input("close-root-cause-modal-btn", "n_clicks")],
@@ -1970,10 +2726,10 @@ def open_root_cause_modal(n_clicks_list, selected_topic_data, raw_data):
 def close_root_cause_modal(n_clicks):
     if n_clicks:
         return {"display": "none"}
-    return dash.no_update
-# NEW: Updated to use raw-data store
 @callback(
     [
         Output("conversation-modal", "style", allow_duplicate=True),
@@ -1981,39 +2737,69 @@ def close_root_cause_modal(n_clicks):
         Output("conversation-subheader", "children", allow_duplicate=True),
     ],
     [Input({"type": "open-chat-btn-rc", "index": dash.dependencies.ALL}, "n_clicks")],
-    [State("raw-data", "data")],
     prevent_initial_call=True,
 )
-def open_conversation_from_root_cause_table(n_clicks_list, raw_data):
-    if not any(n_clicks_list) or not raw_data:
         return {"display": "none"}, "", ""
     ctx = dash.callback_context
     if not ctx.triggered:
         return {"display": "none"}, "", ""
     triggered_id = ctx.triggered[0]["prop_id"]
     chat_id = json.loads(triggered_id.split(".")[0])["index"]
-    df_full = pd.DataFrame(raw_data)
     conversation_row = df_full[df_full["id"] == chat_id]
     if len(conversation_row) == 0:
         conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
     if len(conversation_row) == 0:
         conversation_text = f"Conversation not found for Chat ID: {chat_id}"
         subheader_content = f"Chat ID: {chat_id} (Not Found)"
     else:
         row = conversation_row.iloc[0]
         conversation_text = row.get("conversation", "No conversation data available.")
         root_cause = row.get("root_cause_subcluster", "Unknown")
         cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
-        subheader_content = html.Div([
-            html.Span(f"Chat ID: {chat_id}", style={"fontWeight": "600", "marginRight": "1rem"}),
-            html.Span(f"Cluster: {cluster_name}", style={"color": "hsl(215.4, 16.3%, 46.9%)", "marginRight": "1rem"}),
-            html.Span(f"Root Cause: {root_cause}", style={"color": "#8b6f47", "fontWeight": "500"}),
-        ])
     return {"display": "flex"}, conversation_text, subheader_content
 # IMPORTANT: Expose the server for Gunicorn

 import io
 import json
 import random
+import time
+import uuid
 import dash
 import numpy as np
 # Initialize the Dash app
 app = dash.Dash(__name__, suppress_callback_exceptions=True)
+# In-memory server-side cache for uploaded DataFrames (per-session key)
+# Each entry: {"df": DataFrame, "created": float, "last_access": float}
+_DF_CACHE: dict[str, dict] = {}
+_CACHE_MAX_ENTRIES = 5
+_CACHE_TTL_SECONDS = 10 * 60 * 60  # 10 hours
+def _cache_prune() -> None:
+    now = time.time()
+    # Remove expired entries
+    expired_keys = [
+        k
+        for k, v in _DF_CACHE.items()
+        if (now - v.get("created", now)) > _CACHE_TTL_SECONDS
+    ]
+    for k in expired_keys:
+        _DF_CACHE.pop(k, None)
+    # Enforce max size with LRU eviction by last_access
+    if len(_DF_CACHE) > _CACHE_MAX_ENTRIES:
+        # Sort keys by last_access ascending (least recently used first)
+        keys_by_lru = sorted(
+            _DF_CACHE.items(),
+            key=lambda kv: kv[1].get("last_access", kv[1].get("created", 0)),
+        )
+        to_evict = len(_DF_CACHE) - _CACHE_MAX_ENTRIES
+        for i in range(to_evict):
+            _DF_CACHE.pop(keys_by_lru[i][0], None)
+def _cache_put_df(df: pd.DataFrame) -> str:
+    _cache_prune()
+    key = str(uuid.uuid4())
+    now = time.time()
+    _DF_CACHE[key] = {"df": df, "created": now, "last_access": now}
+    _cache_prune()
+    return key
+def _cache_get_df(key: str | None) -> pd.DataFrame | None:
+    if not key:
+        return None
+    entry = _DF_CACHE.get(key)
+    if not entry:
+        return None
+    # TTL check
+    now = time.time()
+    if (now - entry.get("created", now)) > _CACHE_TTL_SECONDS:
+        # Expired
+        _DF_CACHE.pop(key, None)
+        return None
+    # Update last access and return
+    entry["last_access"] = now
+    return entry["df"]
 # Define app layout
 app.layout = html.Div(
     [
                                     children="Sessions Observatory",
                                     className="section-header",
                                 ),
+                                # dcc.Graph(id="bubble-chart", style={"height": "80vh"}),
                                 dcc.Graph(
                                     id="bubble-chart",
                                     style={"height": "calc(100% - 154px)"},
+                                ),  # this does not work for some reason
                                 html.Div(
                                     [
+                                        # Only keep Color by
                                         html.Div(
                                             [
                                                 html.Div(
                                             ],
                                             className="control-labels-row",
                                         ),
+                                        # Only keep Color by options
                                         html.Div(
                                             [
                                                 html.Div(
                                                                 html.I(
                                                                     className="fas fa-info-circle",
                                                                     title="Root cause detection is experimental and may require manual review since it is generated by AI models. Root causes are only shown in clusters with identifiable root causes.",
+                                                                    # Added title for info icon
                                                                     style={
                                                                         "marginLeft": "0.2rem",
+                                                                        "color": "#6c757d",  # General gray
                                                                         "fontSize": "0.9rem",
                                                                         "cursor": "pointer",
                                                                         "verticalAlign": "middle",
                                                         ),
                                                     ],
                                                     id="root-causes-section",
+                                                    style={
+                                                        "display": "none"
+                                                    },  # Initially hidden
                                                 ),
                                                 # Added Tags section
                                                 html.Div(
                                                         ),
                                                     ],
                                                     id="tags-section",
+                                                    style={
+                                                        "display": "none"
+                                                    },  # Initially hidden
                                                 ),
                                             ],
                                             className="details-section",
                                                 ),
                                                 html.H3("No topic selected"),
                                                 html.P(
+                                                    "Click or hover on a bubble to view topic details."
                                                 ),
                                             ],
                                             className="no-selection-message",
         ),
         # Store the processed data
         dcc.Store(id="stored-data"),
+        # Server-side cache key for full uploaded DataFrame
+        dcc.Store(id="data-cache-key"),
         # Store the current selected topic for dialogs modal
         dcc.Store(id="selected-topic-store"),
         # Store the current selected root cause for root cause modal
     className="app-container",
 )
+# Define CSS for the app
 app.index_string = """
 <!DOCTYPE html>
 <html>
 )
 def update_topic_distribution_header(data):
     if not data:
+        return "Sessions Observatory"  # Default when no data is available
     df = pd.DataFrame(data)
+    total_dialogs = df["count"].sum()  # Sum up the 'count' column
     return f"Sessions Observatory ({total_dialogs} dialogs)"
 @callback(
     [
         Output("stored-data", "data"),
         Output("upload-status", "children"),
+        Output("upload-status", "style"),  # Add style output for visibility
         Output("main-content", "style"),
+        Output("data-cache-key", "data"),
     ],
     [Input("upload-data", "contents")],
     [State("upload-data", "filename")],
 )
 def process_upload(contents, filename):
     if contents is None:
+        return None, "", {"display": "none"}, {"display": "none"}, None  # Keep hidden
     try:
+        # Parse uploaded file
         content_type, content_string = contents.split(",")
         decoded = base64.b64decode(content_string)
         if "csv" in filename.lower():
+            df = pd.read_csv(
+                io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
+            )
         elif "xls" in filename.lower():
             df = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
+        # DEBUG
+        # --- Print unique root_cause_subcluster values for each deduplicated_topic_name ---
+        if (
+            "deduplicated_topic_name" in df.columns
+            and "root_cause_subcluster" in df.columns
+        ):
+            print(
+                "\n[INFO] Unique root_cause_subcluster values for each deduplicated_topic_name:"
             )
+            for topic in df["deduplicated_topic_name"].unique():
+                subclusters = (
+                    df[df["deduplicated_topic_name"] == topic]["root_cause_subcluster"]
+                    .dropna()
+                    .unique()
+                )
+                print(f"- {topic}:")
+                for sub in subclusters:
+                    print(f"    - {sub}")
+            print()
+        # --- End of DEBUG ---
+        # Hardcoded flag to exclude 'Unclustered' topics
         EXCLUDE_UNCLUSTERED = True
         if EXCLUDE_UNCLUSTERED and "deduplicated_topic_name" in df.columns:
             df = df[df["deduplicated_topic_name"] != "Unclustered"].copy()
+            # If we strip leading and trailing `"` or `'` from the topic name here, then
+            # we will have a problem with the deduplicated names, as they will not match the
+            # original topic names in the dataset.
+            # Better do it in the first script.
         else:
             return (
                 None,
                 html.Div(
+                    [
+                        html.I(
+                            className="fas fa-exclamation-circle",
+                            style={"color": "var(--destructive)", "marginRight": "8px"},
+                        ),
+                        "Please upload a CSV or Excel file.",
+                    ],
                     style={"color": "var(--destructive)"},
                 ),
+                {"display": "block"},  # Make visible after error
                 {"display": "none"},
             )
+        # Process the dataframe to get topic statistics
         topic_stats = analyze_topics(df)
+        # Put full DataFrame in server-side cache and return key
+        cache_key = _cache_put_df(df)
         return (
             topic_stats.to_dict("records"),
             html.Div(
                 [
                     html.I(
                         className="fas fa-check-circle",
+                        style={
+                            "color": "hsl(142.1, 76.2%, 36.3%)",
+                            "marginRight": "8px",
+                        },
                     ),
                     f'Successfully uploaded "{filename}"',
                 ],
                 style={"color": "hsl(142.1, 76.2%, 36.3%)"},
             ),
+            {"display": "block"},  # maybe add the above line here too #TODO
+            {
+                "display": "block",
+                "height": "calc(100vh - 40px)",
+            },  # Make visible after successful upload
+            cache_key,
         )
     except Exception as e:
         return (
             None,
             html.Div(
                 [
                         className="fas fa-exclamation-triangle",
                         style={"color": "var(--destructive)", "marginRight": "8px"},
                     ),
+                    f"Error processing file: {str(e)}",
                 ],
                 style={"color": "var(--destructive)"},
             ),
+            {"display": "block"},  # Make visible after error
             {"display": "none"},
+            None,
         )
 # Function to analyze the topics and create statistics
 def analyze_topics(df):
+    # Group by topic name and calculate metrics
     topic_stats = (
+        # IMPORTANT!
+        # As deduplicated_topic_name, we have either the deduplicated names (if enabled by the process),
+        # either the kmeans_reclustered name (where available) and the ClusterNames.
         df.groupby("deduplicated_topic_name")
         .agg(
             count=("id", "count"),
         )
         .reset_index()
     )
+    # Calculate rates
+    topic_stats["negative_rate"] = (
+        topic_stats["negative_count"] / topic_stats["count"] * 100
+    ).round(1)
+    topic_stats["unresolved_rate"] = (
+        topic_stats["unresolved_count"] / topic_stats["count"] * 100
+    ).round(1)
+    topic_stats["urgent_rate"] = (
+        topic_stats["urgent_count"] / topic_stats["count"] * 100
+    ).round(1)
+    # Apply binned layout
     topic_stats = apply_binned_layout(topic_stats)
     return topic_stats
+# New binned layout function
 def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
+    """
+    Apply a binned layout where bubbles are grouped into rows based on dialog count.
+    Bubbles in each row will be centered horizontally.
+    Args:
+        df: DataFrame containing the topic data
+        padding: Padding from edges as percentage
+        bin_config: List of tuples defining bin ranges and descriptions.
+                    Example: [(300, None, "300+ dialogs"), (250, 299, "250-299 dialogs"), ...]
+        max_items_per_row: Maximum number of items to display in a single row
+    Returns:
+        DataFrame with updated x, y positions
+    """
+    # Create a copy of the dataframe to avoid modifying the original
     df_sorted = df.copy()
+    # Default bin configuration if none is provided
+    # 8 rows x 6 bubbles is usually good
     if bin_config is None:
         bin_config = [
+            (100, None, "100+ dialogs"),
+            (50, 99, "50-99 dialogs"),
+            (25, 49, "25-49 dialogs"),
+            (9, 24, "9-24 dialogs"),
+            (7, 8, "7-8 dialogs"),
+            (5, 7, "5-6 dialogs"),
+            (4, 4, "4 dialogs"),
+            (0, 3, "0-3 dialogs"),
         ]
+    # Generate bin descriptions and conditions dynamically
     bin_descriptions = {}
     conditions = []
     bin_values = []
     for i, (lower, upper, description) in enumerate(bin_config):
         bin_name = f"Bin {i + 1}"
         bin_descriptions[bin_name] = description
         bin_values.append(bin_name)
+        if upper is None:  # No upper limit
             conditions.append(df_sorted["count"] >= lower)
         else:
+            conditions.append(
+                (df_sorted["count"] >= lower) & (df_sorted["count"] <= upper)
+            )
+    # Apply the conditions to create the bin column
+    df_sorted["bin"] = np.select(conditions, bin_values, default="Bin 8")
     df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)
+    # Sort by bin (ascending to get Bin 1 first) and by count (descending) within each bin
     df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
+    # Now split bins that have more than max_items_per_row items
     original_bins = df_sorted["bin"].unique()
     new_rows = []
     new_bin_descriptions = bin_descriptions.copy()
     for bin_name in original_bins:
         bin_mask = df_sorted["bin"] == bin_name
         bin_group = df_sorted[bin_mask]
         bin_size = len(bin_group)
+        # If bin has more items than max_items_per_row, split it
         if bin_size > max_items_per_row:
+            # Calculate how many sub-bins we need
             num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row
+            # Calculate items per sub-bin (distribute evenly)
             items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins
+            # Distribute the remainder one by one to achieve balance
             remainder = bin_size % num_sub_bins
             for i in range(remainder):
                 items_per_sub_bin[i] += 1
+            # Original bin description
             original_description = bin_descriptions[bin_name]
+            # Create new row entries and update bin assignments
             start_idx = 0
             for i in range(num_sub_bins):
+                # Create new bin name with sub-bin index
                 new_bin_name = f"{bin_name}_{i + 1}"
+                # Create new bin description with sub-bin index
                 new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
                 new_bin_descriptions[new_bin_name] = new_description
+                # Get slice of dataframe for this sub-bin
                 end_idx = start_idx + items_per_sub_bin[i]
                 sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()
+                # Update bin name and description
                 sub_bin_rows["bin"] = new_bin_name
                 sub_bin_rows["bin_description"] = new_description
+                # Add to new rows
                 new_rows.append(sub_bin_rows)
+                # Update start index for next iteration
                 start_idx = end_idx
+            # Remove the original bin from df_sorted
             df_sorted = df_sorted[~bin_mask]
+    # Combine the original dataframe (with small bins) and the new split bins
     if new_rows:
         df_sorted = pd.concat([df_sorted] + new_rows)
+    # Re-sort with the new bin names
     df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
+    # Calculate the vertical positions for each row (bin)
     bins_with_topics = sorted(df_sorted["bin"].unique())
     num_rows = len(bins_with_topics)
     available_height = 100 - (2 * padding)
     row_height = available_height / num_rows
+    # Calculate and assign y-positions (vertical positions)
+    row_positions = {}
+    for i, bin_name in enumerate(bins_with_topics):
+        # Calculate row position (centered within its allocated space)
+        row_pos = padding + i * row_height + (row_height / 2)
+        row_positions[bin_name] = row_pos
     df_sorted["y"] = df_sorted["bin"].map(row_positions)
+    # Center the bubbles in each row horizontally
+    center_point = 50  # Middle of the chart (0-100 scale)
     for bin_name in bins_with_topics:
+        # Get topics in this bin
         bin_mask = df_sorted["bin"] == bin_name
         num_topics_in_bin = bin_mask.sum()
         if num_topics_in_bin == 1:
+            # If there's only one bubble, place it in the center
             df_sorted.loc[bin_mask, "x"] = center_point
         else:
+            if num_topics_in_bin < max_items_per_row:
+                # For fewer bubbles, add a little bit of spacing between them
+                # Calculate the total width needed
+                total_width = (num_topics_in_bin - 1) * 17.5  # 10 units between bubbles
+                # Calculate starting position (to center the group)
+                start_pos = center_point - (total_width / 2)
+                # Assign positions
+                positions = [start_pos + (i * 17.5) for i in range(num_topics_in_bin)]
+                df_sorted.loc[bin_mask, "x"] = positions
+            else:
+                # For multiple bubbles, distribute them evenly around the center
+                # Calculate the total width needed
+                total_width = (num_topics_in_bin - 1) * 15  # 15 units between bubbles
+                # Calculate starting position (to center the group)
+                start_pos = center_point - (total_width / 2)
+                # Assign positions
+                positions = [start_pos + (i * 15) for i in range(num_topics_in_bin)]
+                df_sorted.loc[bin_mask, "x"] = positions
+    # Add original rank for reference
     df_sorted["size_rank"] = range(1, len(df_sorted) + 1)
     return df_sorted
+# New function to update positions based on selected size metric
 def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
+    # For the main chart, we always use the binned layout
     return apply_binned_layout(df)
+# Callback to update the bubble chart
 @callback(
     Output("bubble-chart", "figure"),
     [
     df = pd.DataFrame(data)
+    # Update positions using binned layout
+    df = update_bubble_positions(df)
+    # Always use count for sizing
     size_values = df["count"]
     raw_sizes = df["count"]
     size_title = "Dialog Count"
+    # Apply log scaling to the size values for better visualization
+    # To make the smallest bubble bigger, increase the min_size value (currently 2.5).
+    min_size = 1  # Minimum bubble size
     if size_values.max() > size_values.min():
+        # Log-scale the sizes
         log_sizes = np.log1p(size_values)
+        # Scale to a reasonable range for visualization
+        # To make the biggest bubble smaller, reduce the multiplier (currently 50).
+        size_values = (
+            min_size
+            + (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50
+        )
     else:
+        # If all values are the same, use a default size
         size_values = np.ones(len(df)) * 12.5
+    # DEBUG: Print sizes of bubbles in the first and second bins
+    bins = sorted(df["bin"].unique())
+    if len(bins) >= 1:
+        # first_bin = bins[0]
+        # print(f"DEBUG - First bin '{first_bin}' bubble sizes:")
+        # first_bin_df = df[df["bin"] == first_bin]
+        # for idx, row in first_bin_df.iterrows():
+        #     print(
+        #         f"  Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
+        #     )
+        pass
+    if len(bins) >= 2:
+        # second_bin = bins[1]
+        # print(f"DEBUG - Second bin '{second_bin}' bubble sizes:")
+        # second_bin_df = df[df["bin"] == second_bin]
+        # for idx, row in second_bin_df.iterrows():
+        #     print(
+        #         f"  Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
+        #     )
+        pass
+    # Determine color based on selected metric
     if color_metric == "negative_rate":
         color_values = df["negative_rate"]
+        # color_title = "Negative Sentiment (%)"
         color_title = "Negativity (%)"
+        # color_scale = "RdBu"  # no ice, RdBu - og is Reds - matter is good too
+        # color_scale = "Portland"
+        # color_scale = "RdYlGn_r"
+        # color_scale = "Teal"
         color_scale = "Teal"
     elif color_metric == "unresolved_rate":
         color_values = df["unresolved_rate"]
         color_title = "Unresolved (%)"
+        # color_scale = "Burg"  # og is YlOrRd
+        # color_scale = "Temps"
+        # color_scale = "Armyrose"
+        # color_scale = "YlOrRd"
         color_scale = "Teal"
+    else:
         color_values = df["urgent_rate"]
         color_title = "Urgency (%)"
+        # color_scale = "Magenta"  # og is Blues
+        # color_scale = "Tealrose"
+        # color_scale = "Portland"
         color_scale = "Teal"
+    # Create enhanced hover text that includes bin information
     hover_text = [
         f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}"
+        for topic, raw, color, bin_desc in zip(
+            df["deduplicated_topic_name"],
+            raw_sizes,
+            color_values,
+            df["bin_description"],
+        )
     ]
+    # Create bubble chart
     fig = px.scatter(
         df,
+        x="x",
+        y="y",
         size=size_values,
         color=color_values,
+        # text="deduplicated_topic_name", # Remove text here
         hover_name="deduplicated_topic_name",
+        hover_data={
+            "x": False,
+            "y": False,
+            "bin_description": True,
+        },
+        size_max=42.5,  # Maximum size of the bubbles, change this to adjust the size
         color_continuous_scale=color_scale,
+        custom_data=[
+            "deduplicated_topic_name",
+            "count",
+            "negative_rate",
+            "unresolved_rate",
+            "urgent_rate",
+            "bin_description",
+        ],
     )
+    # Update traces: Remove text related properties
     fig.update_traces(
+        mode="markers",  # Remove '+text'
         marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
         hovertemplate="%{hovertext}<extra></extra>",
         hovertext=hover_text,
     )
+    # Create annotations for the bubbles
     annotations = []
     for i, row in df.iterrows():
+        # Wrap text every 2 words
         words = row["deduplicated_topic_name"].split()
+        wrapped_text = "<br>".join(
+            [" ".join(words[i : i + 4]) for i in range(0, len(words), 4)]
+        )
+        # Calculate size for vertical offset (approximately based on the bubble size)
+        # Add vertical offset based on bubble size to place text below the bubble
+        marker_size = (
+            size_values[i] / 20  # type: ignore  # FIXME: size_values[df.index.get_loc(i)] / 20
+        )  # Adjust this divisor as needed to get proper spacing
         annotations.append(
             dict(
+                x=row["x"],
+                y=row["y"]
+                + 0.125  # Adding this so in a row with maximum bubbles, the left one does not overlap with the bin label
+                + marker_size,  # Add vertical offset to position text below the bubble
+                text=wrapped_text,
+                showarrow=False,
+                textangle=0,
+                font=dict(
+                    # size=10,
+                    # size=15,
+                    size=9,
+                    color="var(--foreground)",
+                    family="Arial, sans-serif",
+                    weight="bold",
+                ),
+                xanchor="center",
+                yanchor="top",  # Anchor to top of text box so it hangs below the bubble
+                bgcolor="rgba(255,255,255,0.7)",  # Add semi-transparent background for better readability
+                bordercolor="rgba(0,0,0,0.1)",  # Add a subtle border color
+                borderwidth=1,
+                borderpad=1,
+                # TODO: Radius for rounded corners
             )
         )
+    # Add bin labels and separator lines
     unique_bins = sorted(df["bin"].unique())
+    bin_y_positions = [
+        df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins
+    ]
+    # Dynamically extract bin descriptions
     bin_descriptions = df.set_index("bin")["bin_description"].to_dict()
     for bin_name, bin_y in zip(unique_bins, bin_y_positions):
+        # Add horizontal line
+        fig.add_shape(
+            type="line",
+            x0=0,
+            y0=bin_y,
+            x1=100,
+            y1=bin_y,
+            line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
+            layer="below",
+        )
+    # Add subtle lines for each bin and bin labels
+    for bin_name, bin_y in zip(unique_bins, bin_y_positions):
+        # Add horizontal line
+        fig.add_shape(
+            type="line",
+            x0=0,
+            y0=bin_y,
+            x1=100,
+            y1=bin_y,
+            line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
+            layer="below",
+        )
+        # Add bin label annotation
         annotations.append(
             dict(
+                x=0,  # Position the label on the left side
+                y=bin_y,
+                xref="x",
+                yref="y",
+                text=bin_descriptions[bin_name],
+                showarrow=False,
                 font=dict(size=8.25, color="var(--muted-foreground)"),
+                align="left",
+                xanchor="left",
+                yanchor="middle",
+                bgcolor="rgba(255,255,255,0.7)",
+                borderpad=1,
             )
         )
     fig.update_layout(
         title=None,
+        xaxis=dict(
+            showgrid=False,
+            zeroline=False,
+            showticklabels=False,
+            title=None,
+            range=[0, 100],
+        ),
+        yaxis=dict(
+            showgrid=False,
+            zeroline=False,
+            showticklabels=False,
+            title=None,
+            range=[0, 100],
+            autorange="reversed",  # Keep largest at top
+        ),
         hovermode="closest",
         margin=dict(l=0, r=0, t=10, b=10),
+        coloraxis_colorbar=dict(
+            title=color_title,
+            title_font=dict(size=9),
+            tickfont=dict(size=8),
+            thickness=10,
+            len=0.6,
+            yanchor="middle",
+            y=0.5,
+            xpad=0,
+        ),
         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
         paper_bgcolor="rgba(0,0,0,0)",
         plot_bgcolor="rgba(0,0,0,0)",
         hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
+        annotations=annotations,  # Add bin labels as annotations
     )
     return fig
+# Topic details should be click-driven only (no hover triggers)
 @callback(
     [
         Output("topic-title", "children"),
         Output("selected-topic-store", "data"),
     ],
     [
+        Input("bubble-chart", "clickData"),
         Input("refresh-dialogs-btn", "n_clicks"),
     ],
+    [
+        State("stored-data", "data"),
+        State("data-cache-key", "data"),
+        State("selected-topic-store", "data"),
+    ],
 )
+def update_topic_details(
+    click_data, refresh_clicks, stored_data, cache_key, selected_topic_state
+):
+    # Determine topic based on click selection or prior selection on refresh
+    topic_name = None
     ctx = dash.callback_context
+    triggered = ctx.triggered[0]["prop_id"].split(".")[0] if ctx.triggered else None
+    if triggered == "bubble-chart":
+        if click_data and "points" in click_data and click_data["points"]:
+            topic_name = click_data["points"][0]["customdata"][0]
+    elif triggered == "refresh-dialogs-btn":
+        if selected_topic_state and selected_topic_state.get("topic_name"):
+            topic_name = selected_topic_state["topic_name"]
+    if not topic_name or not stored_data or not cache_key:
+        return (
+            "",
+            [],
+            [],
+            "",
+            {"display": "none"},
+            "",
+            {"display": "none"},
+            [],
+            {"display": "flex"},
+            None,
+        )
+    # Get stored data for this topic
     df_stored = pd.DataFrame(stored_data)
     topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]
+    # Get original data to sample conversations from server-side cache
+    df_full = _cache_get_df(cache_key)
+    if df_full is None:
+        return (
+            "",
+            [],
+            [],
+            "",
+            {"display": "none"},
+            "",
+            {"display": "none"},
+            [html.Div("Session expired. Please re-upload file.")],
+            {"display": "flex"},
+            None,
+        )
+    # Filter to this topic
     topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
+    # Create the title
     title = html.Div([html.Span(topic_name)])
+    # Create metadata items
     metadata_items = [
         html.Div(
             [
                 html.Span(f"{int(topic_data['count'])} dialogs"),
                 html.Button(
                     [
+                        html.I(
+                            className="fas fa-table", style={"marginRight": "0.25rem"}
+                        ),
+                        "Show all dialogs inside",
                     ],
                     id="show-all-dialogs-btn",
                     className="show-dialogs-btn",
             style={"display": "flex", "alignItems": "center", "width": "100%"},
         ),
     ]
+    # Create metrics boxes
     metrics_boxes = [
         html.Div(
             [
         ),
     ]
+    # Extract and process root causes
     root_causes_output = ""
     root_causes_section_style = {"display": "none"}
+    # Check if root_cause_subcluster column exists in the data
     if "root_cause_subcluster" in topic_conversations.columns:
+        # Get unique root causes for this specific cluster
+        root_causes = topic_conversations["root_cause_subcluster"].dropna().unique()
+        # Filter out common non-informative values including "Unclustered"
         filtered_root_causes = [
+            rc
+            for rc in root_causes
+            if rc
+            not in [
+                "Sub-clustering disabled",
+                "Not eligible for sub-clustering",
+                "No valid root causes",
+                "No Subcluster",
+                "Unclustered",
+                "",
+            ]
         ]
+        # Debug: Print the unique root causes for this cluster
+        print(f"\n[DEBUG] Root causes for cluster '{topic_name}':")
+        print(f"  All root causes: {list(root_causes)}")
+        print(f"  Filtered root causes: {filtered_root_causes}")
         if filtered_root_causes:
+            # Create beautifully styled root cause tags with clickable icons
             root_causes_output = html.Div(
                 [
                     html.Div(
                         [
+                            html.I(
+                                className="fas fa-exclamation-triangle root-cause-tag-icon"
+                            ),
                             html.Span(root_cause, style={"marginRight": "6px"}),
                             html.I(
                                 className="fas fa-external-link-alt root-cause-click-icon",
                                 id={"type": "root-cause-icon", "index": root_cause},
                                 title="Click to see specific chats assigned with this root cause.",
+                                style={
+                                    "cursor": "pointer",
+                                    "fontSize": "0.55rem",
+                                    "opacity": "0.8",
+                                },
                             ),
                         ],
                         className="root-cause-tag",
             )
             root_causes_section_style = {"display": "block"}
+    # Extract and process consolidated_tags with improved styling
     tags_list = []
+    for _, row in topic_conversations.iterrows():
+        tags_str = row.get("consolidated_tags", "")
+        if pd.notna(tags_str):
+            tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
+            tags_list.extend(tags)
+    # Count tag frequencies for better insight
     tag_counts = {}
     for tag in tags_list:
         tag_counts[tag] = tag_counts.get(tag, 0) + 1
+    # Sort by frequency (most common first) and then alphabetically for ties
+    sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))
+    # Keep only the top K tags
+    TOP_K = 15
+    sorted_tags = sorted_tags[:TOP_K]
+    # Set tags section visibility and output
     tags_section_style = {"display": "none"}
     if sorted_tags:
+        # Create beautifully styled tags with count indicators and consistent color
         tags_output = html.Div(
             [
                 html.Div(
         tags_section_style = {"display": "block"}
     else:
         tags_output = html.Div(
+            [
+                html.I(className="fas fa-info-circle", style={"marginRight": "5px"}),
+                "No tags found for this topic",
+            ],
             className="no-tags-message",
         )
+    # Sample up to 5 random dialogs
     sample_size = min(5, len(topic_conversations))
     if sample_size > 0:
+        sample_indices = random.sample(range(len(topic_conversations)), sample_size)
+        samples = topic_conversations.iloc[sample_indices]
         dialog_items = []
         for _, row in samples.iterrows():
+            # Create dialog item with tags
+            sentiment_tag = html.Span(
+                row["Sentiment"], className="dialog-tag tag-sentiment"
+            )
+            resolution_tag = html.Span(
+                row["Resolution"], className="dialog-tag tag-resolution"
+            )
+            urgency_tag = html.Span(row["Urgency"], className="dialog-tag tag-urgency")
+            # Add Chat ID tag if 'id' column exists
+            chat_id_tag = None
             if "id" in row:
+                chat_id_tag = html.Span(
+                    [
+                        f"Chat ID: {row['id']} ",
+                        html.I(
+                            className="fas fa-arrow-up-right-from-square conversation-icon",
+                            id={"type": "conversation-icon", "index": row["id"]},
+                            title="View full conversation",
+                            style={"marginLeft": "0.25rem"},
+                        ),
+                    ],
+                    className="dialog-tag tag-chat-id",
+                    style={"display": "inline-flex", "alignItems": "center"},
+                )
+            # Add Root Cause tag if 'Root Cause' column exists
+            root_cause_tag = None
+            if (
+                "Root_Cause" in row
+                and pd.notna(row["Root_Cause"])
+                and row["Root_Cause"] != "na"
+            ):
+                root_cause_tag = html.Span(
+                    f"Root Cause: {row['Root_Cause']}",
+                    className="dialog-tag tag-root-cause",
+                )
+            # Compile all tags, including the new Chat ID and Root Cause tags if available
+            tags = [sentiment_tag, resolution_tag, urgency_tag]
+            if chat_id_tag:
+                tags.append(chat_id_tag)
+            if root_cause_tag:
+                tags.append(root_cause_tag)
             dialog_items.append(
                 html.Div(
+                    [
+                        html.Div(row["Summary"], className="dialog-summary"),
+                        html.Div(
+                            tags,
+                            className="dialog-metadata",
+                        ),
+                    ],
                     className="dialog-item",
                 )
             )
         sample_dialogs = dialog_items
     else:
+        sample_dialogs = [
+            html.Div(
+                "No sample dialogs available for this topic.",
+                style={"color": "var(--muted-foreground)"},
+            )
+        ]
     return (
         title,
         tags_section_style,
         sample_dialogs,
         {"display": "none"},
+        {"topic_name": topic_name, "cache_key": cache_key},
     )
+# Callback to open modal when conversation icon is clicked
 @callback(
     [
         Output("conversation-modal", "style"),
         Output("conversation-subheader", "children"),
     ],
     [Input({"type": "conversation-icon", "index": dash.dependencies.ALL}, "n_clicks")],
+    [State("data-cache-key", "data")],
     prevent_initial_call=True,
 )
+def open_conversation_modal(n_clicks_list, cache_key):
+    # Check if any icon was clicked
+    if not any(n_clicks_list) or not cache_key:
         return {"display": "none"}, "", ""
+    # Get which icon was clicked
     ctx = dash.callback_context
     if not ctx.triggered:
+        return (
+            {"display": "none"},
+            "",
+            "",
+        )  # Extract the chat ID from the triggered input
     triggered_id = ctx.triggered[0]["prop_id"]
     chat_id = json.loads(triggered_id.split(".")[0])["index"]
+    # Get the full conversation from the cached DataFrame
+    df_full = _cache_get_df(cache_key)
+    if df_full is None:
+        return {"display": "none"}, "Session expired. Re-upload file.", ""
+    # Find the conversation with this chat ID
     conversation_row = df_full[df_full["id"] == chat_id]
     if len(conversation_row) == 0:
         conversation_text = "Conversation not found."
     else:
         row = conversation_row.iloc[0]
         conversation_text = row.get("conversation", "No conversation data available.")
+        # Get cluster name if available
         cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
+        # Create subheader with both Chat ID and cluster name
         subheader_content = html.Div(
             [
+                html.Span(
+                    f"Chat ID: {chat_id}",
+                    style={"fontWeight": "600", "marginRight": "1rem"},
+                ),
+                html.Span(
+                    f"Cluster: {cluster_name}",
+                    style={"color": "hsl(215.4, 16.3%, 46.9%)"},
+                ),
             ]
         )
     return {"display": "flex"}, conversation_text, subheader_content
+# Callback to close modal
 @callback(
     Output("conversation-modal", "style", allow_duplicate=True),
     [Input("close-modal-btn", "n_clicks")],
 def close_conversation_modal(n_clicks):
     if n_clicks:
         return {"display": "none"}
+    return {"display": "none"}
+# Callback to open dialogs table modal when "Show all dialogs inside" button is clicked
 @callback(
     [
         Output("dialogs-table-modal", "style"),
         Output("dialogs-table-content", "children"),
     ],
     [Input("show-all-dialogs-btn", "n_clicks")],
+    [State("selected-topic-store", "data")],
     prevent_initial_call=True,
 )
+def open_dialogs_table_modal(n_clicks, selected_topic_data):
+    if not n_clicks or not selected_topic_data:
         return {"display": "none"}, "", ""
     topic_name = selected_topic_data["topic_name"]
+    cache_key = selected_topic_data.get("cache_key")
+    # Get the full data from cache
+    df_full = _cache_get_df(cache_key)
+    if df_full is None:
+        return (
+            {"display": "none"},
+            "",
+            html.Div("Session expired. Please re-upload file."),
+        )
+    # Filter to this topic
     topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
+    # Create the table
+    table_rows = []
+    # Header row
+    table_rows.append(
+        html.Tr(
+            [
+                html.Th("Chat ID"),
+                html.Th("Summary"),
+                html.Th("Root Cause"),
+                html.Th("Sentiment"),
+                html.Th("Resolution"),
+                html.Th("Urgency"),
+                html.Th("Tags"),
+                html.Th("Action"),
+            ]
+        )
+    )
+    # Data rows
     for _, row in topic_conversations.iterrows():
+        # Process tags
+        tags_str = row.get("consolidated_tags", "")
+        if pd.notna(tags_str):
+            tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
+            tags_display = html.Div(
+                [
+                    html.Span(
+                        tag,
+                        className="dialog-tag-small",
+                        style={"backgroundColor": "#6c757d", "color": "white"},
+                    )
+                    for tag in tags[:3]  # Show only first 3 tags
+                ]
+                + (
+                    [
+                        html.Span(
+                            f"+{len(tags) - 3}",
+                            className="dialog-tag-small",
+                            style={"backgroundColor": "#6c757d", "color": "white"},
+                        )
+                    ]
+                    if len(tags) > 3
+                    else []
+                ),
+                className="dialog-tags-cell",
+            )
+        else:
+            tags_display = html.Span(
+                "No tags",
+                style={"color": "var(--muted-foreground)", "fontStyle": "italic"},
+            )
         table_rows.append(
+            html.Tr(
+                [
+                    html.Td(
+                        row["id"],
+                        style={"fontFamily": "monospace", "fontSize": "0.8rem"},
+                    ),
+                    html.Td(
+                        row.get("Summary", "No summary"),
+                        className="dialog-summary-cell",
+                    ),
+                    html.Td(
+                        html.Span(
+                            str(row.get("Root_Cause", "Unknown")).capitalize()
+                            if not pd.isna(row.get("Root_Cause"))
+                            else "Unknown",
+                            className="dialog-tag-small",
+                            style={
+                                "backgroundColor": "#8B4513",  # Brown color for root cause
+                                "color": "white",
+                            },
+                        )
+                    ),
+                    html.Td(
+                        html.Span(  # if sentiment is negative, color it red, otherwise grey
+                            row.get("Sentiment", "Unknown").capitalize(),
+                            className="dialog-tag-small",
+                            style={
+                                "backgroundColor": "#dc3545"
+                                if row.get("Sentiment") == "negative"
+                                else "#6c757d",
+                                "color": "white",
+                            },
+                        )
+                    ),
+                    html.Td(
+                        html.Span(  # if resolution is unresolved, color it red, otherwise grey
+                            row.get("Resolution", "Unknown").capitalize(),
+                            className="dialog-tag-small",
+                            style={
+                                "backgroundColor": "#dc3545"
+                                if row.get("Resolution") == "unresolved"
+                                else "#6c757d",
+                                "color": "white",
+                            },
+                        )
+                    ),
+                    html.Td(
+                        html.Span(  # if urgency is urgent, color it red, otherwise grey
+                            row.get("Urgency", "Unknown").capitalize(),
+                            className="dialog-tag-small",
+                            style={
+                                "backgroundColor": "#dc3545"
+                                if row.get("Urgency") == "urgent"
+                                else "#6c757d",
+                                "color": "white",
+                            },
+                        )
+                    ),
+                    html.Td(tags_display),
+                    html.Td(
+                        html.Button(
+                            [
+                                html.I(
+                                    className="fas fa-eye",
+                                    style={"marginRight": "0.25rem"},
+                                ),
+                                "View chat session",
+                            ],
+                            id={"type": "open-chat-btn", "index": row["id"]},
+                            className="open-chat-btn",
+                            n_clicks=0,
+                        )
+                    ),
+                ]
+            )
         )
     table = html.Table(table_rows, className="dialogs-table")
+    modal_title = (
+        f"All dialogs in Topic: {topic_name} ({len(topic_conversations)} dialogs)"
+    )
     return {"display": "flex"}, modal_title, table
+# Callback to close dialogs table modal
 @callback(
     Output("dialogs-table-modal", "style", allow_duplicate=True),
     [Input("close-dialogs-modal-btn", "n_clicks")],
 def close_dialogs_table_modal(n_clicks):
     if n_clicks:
         return {"display": "none"}
+    return {"display": "none"}
+# Callback to open conversation modal from dialogs table
 @callback(
     [
         Output("conversation-modal", "style", allow_duplicate=True),
         Output("conversation-subheader", "children", allow_duplicate=True),
     ],
     [Input({"type": "open-chat-btn", "index": dash.dependencies.ALL}, "n_clicks")],
+    [State("data-cache-key", "data")],
     prevent_initial_call=True,
 )
+def open_conversation_from_table(n_clicks_list, cache_key):
+    # Check if any button was clicked
+    if not any(n_clicks_list) or not cache_key:
         return {"display": "none"}, "", ""
+    # Get which button was clicked
     ctx = dash.callback_context
     if not ctx.triggered:
         return {"display": "none"}, "", ""
+    # Extract the chat ID from the triggered input
     triggered_id = ctx.triggered[0]["prop_id"]
     chat_id = json.loads(triggered_id.split(".")[0])["index"]
+    # Debug: print the chat_id to understand its type and value
+    print(f"DEBUG: Looking for chat_id: {chat_id} (type: {type(chat_id)})")
+    # Get the full conversation from the cached DataFrame
+    df_full = _cache_get_df(cache_key)
+    if df_full is None:
+        return {"display": "none"}, "Session expired. Re-upload file.", ""
+    # Debug: print some info about the dataframe
+    print(f"DEBUG: DataFrame shape: {df_full.shape}")
+    print(f"DEBUG: Available chat IDs (first 5): {df_full['id'].head().tolist()}")
+    print(f"DEBUG: Chat ID types in df: {df_full['id'].dtype}")
+    # Try to match with different data type conversions
     conversation_row = df_full[df_full["id"] == chat_id]
+    # If not found, try converting types
     if len(conversation_row) == 0:
+        # Try converting chat_id to string
+        conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
+    # If still not found, try converting df id to int
+    if len(conversation_row) == 0:
+        try:
+            conversation_row = df_full[df_full["id"] == int(chat_id)]
+        except (ValueError, TypeError):
+            pass
+    if len(conversation_row) == 0:
+        conversation_text = f"Conversation not found for Chat ID: {chat_id}. Available IDs: {df_full['id'].head(10).tolist()}"
         subheader_content = f"Chat ID: {chat_id} (Not Found)"
     else:
+        conversation_row = conversation_row.iloc[0]
+        conversation_text = conversation_row.get(
+            "conversation",
+            "No conversation available, oopsie.",  # fix here the conversation status
+        )
+        # Create subheader with metadata
+        subheader_content = f"Chat ID: {chat_id} | Topic: {conversation_row.get('deduplicated_topic_name', 'Unknown')} | Sentiment: {conversation_row.get('Sentiment', 'Unknown')} | Resolution: {conversation_row.get('Resolution', 'Unknown')}"
     return {"display": "flex"}, conversation_text, subheader_content
+# Callback to open root cause modal when root cause icon is clicked
 @callback(
     [
         Output("root-cause-modal", "style"),
         Output("root-cause-table-content", "children"),
     ],
     [Input({"type": "root-cause-icon", "index": dash.dependencies.ALL}, "n_clicks")],
+    [State("selected-topic-store", "data")],
     prevent_initial_call=True,
 )
+def open_root_cause_modal(n_clicks_list, selected_topic_data):
+    # Check if any icon was clicked
+    if not any(n_clicks_list) or not selected_topic_data:
         return {"display": "none"}, "", ""
+    # Get which icon was clicked
     ctx = dash.callback_context
     if not ctx.triggered:
         return {"display": "none"}, "", ""
     triggered_id = ctx.triggered[0]["prop_id"]
     root_cause = json.loads(triggered_id.split(".")[0])["index"]
     topic_name = selected_topic_data["topic_name"]
+    cache_key = selected_topic_data.get("cache_key")
+    # Get the full data from cache
+    df_full = _cache_get_df(cache_key)
+    if df_full is None:
+        return (
+            {"display": "none"},
+            "",
+            html.Div("Session expired. Please re-upload file."),
+        )
+    # Filter to this topic and root cause
     filtered_conversations = df_full[
         (df_full["deduplicated_topic_name"] == topic_name)
         & (df_full["root_cause_subcluster"] == root_cause)
     ]
+    # Create the table
+    table_rows = []
+    # Header row
+    table_rows.append(
+        html.Tr(
+            [
+                html.Th("Chat ID"),
+                html.Th("Summary"),
+                html.Th("Sentiment"),
+                html.Th("Resolution"),
+                html.Th("Urgency"),
+                html.Th("Tags"),
+                html.Th("Action"),
+            ]
+        )
+    )
+    # Data rows
     for _, row in filtered_conversations.iterrows():
+        # Process tags
+        tags_str = row.get("consolidated_tags", "")
+        if pd.notna(tags_str):
+            tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
+            tags_display = html.Div(
+                [
+                    html.Span(
+                        tag,
+                        className="dialog-tag-small",
+                        style={"backgroundColor": "#6c757d", "color": "white"},
+                    )
+                    for tag in tags[:3]  # Show only first 3 tags
+                ]
+                + (
+                    [
+                        html.Span(
+                            f"+{len(tags) - 3}",
+                            className="dialog-tag-small",
+                            style={"backgroundColor": "#6c757d", "color": "white"},
+                        )
+                    ]
+                    if len(tags) > 3
+                    else []
+                ),
+                className="dialog-tags-cell",
+            )
+        else:
+            tags_display = html.Span(
+                "No tags",
+                style={"color": "var(--muted-foreground)", "fontStyle": "italic"},
+            )
         table_rows.append(
+            html.Tr(
+                [
+                    html.Td(
+                        row["id"],
+                        style={"fontFamily": "monospace", "fontSize": "0.8rem"},
+                    ),
+                    html.Td(
+                        row.get("Summary", "No summary"),
+                        className="dialog-summary-cell",
+                    ),
+                    html.Td(
+                        html.Span(
+                            row.get("Sentiment", "Unknown").capitalize(),
+                            className="dialog-tag-small",
+                            style={
+                                "backgroundColor": "#dc3545"
+                                if row.get("Sentiment") == "negative"
+                                else "#6c757d",
+                                "color": "white",
+                            },
+                        )
+                    ),
+                    html.Td(
+                        html.Span(
+                            row.get("Resolution", "Unknown").capitalize(),
+                            className="dialog-tag-small",
+                            style={
+                                "backgroundColor": "#dc3545"
+                                if row.get("Resolution") == "unresolved"
+                                else "#6c757d",
+                                "color": "white",
+                            },
+                        )
+                    ),
+                    html.Td(
+                        html.Span(
+                            row.get("Urgency", "Unknown").capitalize(),
+                            className="dialog-tag-small",
+                            style={
+                                "backgroundColor": "#dc3545"
+                                if row.get("Urgency") == "urgent"
+                                else "#6c757d",
+                                "color": "white",
+                            },
+                        )
+                    ),
+                    html.Td(tags_display),
+                    html.Td(
+                        html.Button(
+                            [
+                                html.I(
+                                    className="fas fa-eye",
+                                    style={"marginRight": "0.25rem"},
+                                ),
+                                "View chat",
+                            ],
+                            id={"type": "open-chat-btn-rc", "index": row["id"]},
+                            className="open-chat-btn",
+                            n_clicks=0,
+                        )
+                    ),
+                ]
+            )
         )
     table = html.Table(table_rows, className="dialogs-table")
+    modal_title = f"Dialogs with Root Cause: {root_cause} (Topic: {topic_name})"
     count_info = html.P(
+        f"Found {len(filtered_conversations)} dialogs with this root cause",
+        style={
+            "margin": "0 0 1rem 0",
+            "color": "var(--muted-foreground)",
+            "fontSize": "0.875rem",
+        },
     )
     content = html.Div([count_info, table])
     return {"display": "flex"}, modal_title, content
+# Callback to close root cause modal
 @callback(
     Output("root-cause-modal", "style", allow_duplicate=True),
     [Input("close-root-cause-modal-btn", "n_clicks")],
 def close_root_cause_modal(n_clicks):
     if n_clicks:
         return {"display": "none"}
+    return {"display": "none"}
+# Callback to open conversation modal from root cause table
 @callback(
     [
         Output("conversation-modal", "style", allow_duplicate=True),
         Output("conversation-subheader", "children", allow_duplicate=True),
     ],
     [Input({"type": "open-chat-btn-rc", "index": dash.dependencies.ALL}, "n_clicks")],
+    [State("data-cache-key", "data")],
     prevent_initial_call=True,
 )
+def open_conversation_from_root_cause_table(n_clicks_list, cache_key):
+    # Check if any button was clicked
+    if not any(n_clicks_list) or not cache_key:
         return {"display": "none"}, "", ""
+    # Get which button was clicked
     ctx = dash.callback_context
     if not ctx.triggered:
         return {"display": "none"}, "", ""
     triggered_id = ctx.triggered[0]["prop_id"]
     chat_id = json.loads(triggered_id.split(".")[0])["index"]
+    # Get the full conversation from the cached DataFrame
+    df_full = _cache_get_df(cache_key)
+    if df_full is None:
+        return {"display": "none"}, "Session expired. Re-upload file.", ""
+    # Find the conversation with this chat ID
     conversation_row = df_full[df_full["id"] == chat_id]
+    # If not found, try converting types
     if len(conversation_row) == 0:
         conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
+    if len(conversation_row) == 0:
+        try:
+            conversation_row = df_full[df_full["id"] == int(chat_id)]
+        except (ValueError, TypeError):
+            pass
     if len(conversation_row) == 0:
         conversation_text = f"Conversation not found for Chat ID: {chat_id}"
         subheader_content = f"Chat ID: {chat_id} (Not Found)"
     else:
         row = conversation_row.iloc[0]
         conversation_text = row.get("conversation", "No conversation data available.")
+        # Get additional metadata
         root_cause = row.get("root_cause_subcluster", "Unknown")
         cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
+        # Create subheader with metadata including root cause
+        subheader_content = html.Div(
+            [
+                html.Span(
+                    f"Chat ID: {chat_id}",
+                    style={"fontWeight": "600", "marginRight": "1rem"},
+                ),
+                html.Span(
+                    f"Cluster: {cluster_name}",
+                    style={"color": "hsl(215.4, 16.3%, 46.9%)", "marginRight": "1rem"},
+                ),
+                html.Span(
+                    f"Root Cause: {root_cause}",
+                    style={"color": "#8b6f47", "fontWeight": "500"},
+                ),
+            ]
+        )
     return {"display": "flex"}, conversation_text, subheader_content
 # IMPORTANT: Expose the server for Gunicorn