eloukas commited on
Commit
62c9d32
·
verified ·
1 Parent(s): 8cbcb64

Optimize user interaction latency

Browse files
Files changed (1) hide show
  1. app.py +1038 -252
app.py CHANGED
@@ -2,6 +2,8 @@ import base64
2
  import io
3
  import json
4
  import random
 
 
5
 
6
  import dash
7
  import numpy as np
@@ -13,6 +15,62 @@ from dash import Input, Output, State, callback, dcc, html
13
  # Initialize the Dash app
14
  app = dash.Dash(__name__, suppress_callback_exceptions=True)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Define app layout
17
  app.layout = html.Div(
18
  [
@@ -83,12 +141,14 @@ app.layout = html.Div(
83
  children="Sessions Observatory",
84
  className="section-header",
85
  ),
 
86
  dcc.Graph(
87
  id="bubble-chart",
88
  style={"height": "calc(100% - 154px)"},
89
- ),
90
  html.Div(
91
  [
 
92
  html.Div(
93
  [
94
  html.Div(
@@ -101,6 +161,7 @@ app.layout = html.Div(
101
  ],
102
  className="control-labels-row",
103
  ),
 
104
  html.Div(
105
  [
106
  html.Div(
@@ -185,9 +246,10 @@ app.layout = html.Div(
185
  html.I(
186
  className="fas fa-info-circle",
187
  title="Root cause detection is experimental and may require manual review since it is generated by AI models. Root causes are only shown in clusters with identifiable root causes.",
 
188
  style={
189
  "marginLeft": "0.2rem",
190
- "color": "#6c757d",
191
  "fontSize": "0.9rem",
192
  "cursor": "pointer",
193
  "verticalAlign": "middle",
@@ -202,7 +264,9 @@ app.layout = html.Div(
202
  ),
203
  ],
204
  id="root-causes-section",
205
- style={"display": "none"},
 
 
206
  ),
207
  # Added Tags section
208
  html.Div(
@@ -217,7 +281,9 @@ app.layout = html.Div(
217
  ),
218
  ],
219
  id="tags-section",
220
- style={"display": "none"},
 
 
221
  ),
222
  ],
223
  className="details-section",
@@ -268,7 +334,7 @@ app.layout = html.Div(
268
  ),
269
  html.H3("No topic selected"),
270
  html.P(
271
- "Click a bubble to view topic details."
272
  ),
273
  ],
274
  className="no-selection-message",
@@ -387,8 +453,8 @@ app.layout = html.Div(
387
  ),
388
  # Store the processed data
389
  dcc.Store(id="stored-data"),
390
- # NEW: Store for the minimal raw dataframe
391
- dcc.Store(id="raw-data"),
392
  # Store the current selected topic for dialogs modal
393
  dcc.Store(id="selected-topic-store"),
394
  # Store the current selected root cause for root cause modal
@@ -397,7 +463,7 @@ app.layout = html.Div(
397
  className="app-container",
398
  )
399
 
400
- # Define CSS for the app (no changes needed here, so it's omitted for brevity)
401
  app.index_string = """
402
  <!DOCTYPE html>
403
  <html>
@@ -1221,10 +1287,10 @@ app.index_string = """
1221
  )
1222
  def update_topic_distribution_header(data):
1223
  if not data:
1224
- return "Sessions Observatory"
1225
 
1226
  df = pd.DataFrame(data)
1227
- total_dialogs = df["count"].sum()
1228
  return f"Sessions Observatory ({total_dialogs} dialogs)"
1229
 
1230
 
@@ -1232,91 +1298,107 @@ def update_topic_distribution_header(data):
1232
  @callback(
1233
  [
1234
  Output("stored-data", "data"),
1235
- Output("raw-data", "data"),
1236
  Output("upload-status", "children"),
1237
- Output("upload-status", "style"),
1238
  Output("main-content", "style"),
 
1239
  ],
1240
  [Input("upload-data", "contents")],
1241
  [State("upload-data", "filename")],
1242
  )
1243
  def process_upload(contents, filename):
1244
  if contents is None:
1245
- return None, None, "", {"display": "none"}, {"display": "none"}
1246
 
1247
  try:
 
1248
  content_type, content_string = contents.split(",")
1249
  decoded = base64.b64decode(content_string)
1250
 
1251
  if "csv" in filename.lower():
1252
- df = pd.read_csv(io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str})
 
 
1253
  elif "xls" in filename.lower():
1254
  df = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
1255
- else:
1256
- return (
1257
- None,
1258
- None,
1259
- html.Div(
1260
- ["Unsupported file. Please upload a CSV or Excel file."],
1261
- style={"color": "var(--destructive)"},
1262
- ),
1263
- {"display": "block"},
1264
- {"display": "none"},
1265
  )
 
 
 
 
 
 
 
 
 
 
 
1266
 
 
1267
  EXCLUDE_UNCLUSTERED = True
1268
  if EXCLUDE_UNCLUSTERED and "deduplicated_topic_name" in df.columns:
1269
  df = df[df["deduplicated_topic_name"] != "Unclustered"].copy()
 
 
 
 
1270
  else:
1271
  return (
1272
- None,
1273
  None,
1274
  html.Div(
1275
- ["Please upload a CSV or Excel file with a 'deduplicated_topic_name' column."],
 
 
 
 
 
 
1276
  style={"color": "var(--destructive)"},
1277
  ),
1278
- {"display": "block"},
1279
  {"display": "none"},
1280
  )
1281
 
1282
- # Compute aggregated topic stats once
1283
  topic_stats = analyze_topics(df)
1284
 
1285
- # Store only the columns you use elsewhere to keep payload smaller
1286
- needed_cols = [
1287
- "id",
1288
- "conversation",
1289
- "deduplicated_topic_name",
1290
- "consolidated_tags",
1291
- "Root_Cause",
1292
- "root_cause_subcluster",
1293
- "Sentiment",
1294
- "Resolution",
1295
- "Urgency",
1296
- "Summary",
1297
- ]
1298
- df_min = df[[c for c in needed_cols if c in df.columns]].copy()
1299
 
1300
  return (
1301
  topic_stats.to_dict("records"),
1302
- df_min.to_dict("records"),
1303
  html.Div(
1304
  [
1305
  html.I(
1306
  className="fas fa-check-circle",
1307
- style={"color": "hsl(142.1, 76.2%, 36.3%)", "marginRight": "8px"},
 
 
 
1308
  ),
1309
  f'Successfully uploaded "{filename}"',
1310
  ],
1311
  style={"color": "hsl(142.1, 76.2%, 36.3%)"},
1312
  ),
1313
- {"display": "block"},
1314
- {"display": "block", "height": "calc(100vh - 40px)"},
 
 
 
 
1315
  )
1316
 
1317
  except Exception as e:
1318
  return (
1319
- None,
1320
  None,
1321
  html.Div(
1322
  [
@@ -1324,18 +1406,23 @@ def process_upload(contents, filename):
1324
  className="fas fa-exclamation-triangle",
1325
  style={"color": "var(--destructive)", "marginRight": "8px"},
1326
  ),
1327
- f"Error: {e}",
1328
  ],
1329
  style={"color": "var(--destructive)"},
1330
  ),
1331
- {"display": "block"},
1332
  {"display": "none"},
 
1333
  )
1334
 
1335
 
1336
  # Function to analyze the topics and create statistics
1337
  def analyze_topics(df):
 
1338
  topic_stats = (
 
 
 
1339
  df.groupby("deduplicated_topic_name")
1340
  .agg(
1341
  count=("id", "count"),
@@ -1345,94 +1432,204 @@ def analyze_topics(df):
1345
  )
1346
  .reset_index()
1347
  )
1348
- topic_stats["negative_rate"] = (topic_stats["negative_count"] / topic_stats["count"] * 100).round(1)
1349
- topic_stats["unresolved_rate"] = (topic_stats["unresolved_count"] / topic_stats["count"] * 100).round(1)
1350
- topic_stats["urgent_rate"] = (topic_stats["urgent_count"] / topic_stats["count"] * 100).round(1)
 
 
 
 
 
 
 
 
 
 
1351
  topic_stats = apply_binned_layout(topic_stats)
 
1352
  return topic_stats
1353
 
1354
 
1355
- # New binned layout function (no changes needed)
 
 
1356
  def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1357
  df_sorted = df.copy()
 
 
 
1358
  if bin_config is None:
1359
  bin_config = [
1360
- (100, None, "100+ dialogs"), (50, 99, "50-99 dialogs"),
1361
- (25, 49, "25-49 dialogs"), (9, 24, "9-24 dialogs"),
1362
- (7, 8, "7-8 dialogs"), (5, 6, "5-6 dialogs"),
1363
- (4, 4, "4 dialogs"), (0, 3, "0-3 dialogs"),
 
 
 
 
1364
  ]
 
 
1365
  bin_descriptions = {}
1366
  conditions = []
1367
  bin_values = []
 
1368
  for i, (lower, upper, description) in enumerate(bin_config):
1369
  bin_name = f"Bin {i + 1}"
1370
  bin_descriptions[bin_name] = description
1371
  bin_values.append(bin_name)
1372
- if upper is None:
 
1373
  conditions.append(df_sorted["count"] >= lower)
1374
  else:
1375
- conditions.append((df_sorted["count"] >= lower) & (df_sorted["count"] <= upper))
1376
- df_sorted["bin"] = np.select(conditions, bin_values, default=f"Bin {len(bin_config)}")
 
 
 
 
1377
  df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)
 
 
1378
  df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
 
 
1379
  original_bins = df_sorted["bin"].unique()
1380
  new_rows = []
1381
  new_bin_descriptions = bin_descriptions.copy()
 
1382
  for bin_name in original_bins:
1383
  bin_mask = df_sorted["bin"] == bin_name
1384
  bin_group = df_sorted[bin_mask]
1385
  bin_size = len(bin_group)
 
 
1386
  if bin_size > max_items_per_row:
 
1387
  num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row
 
 
1388
  items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins
 
 
1389
  remainder = bin_size % num_sub_bins
1390
  for i in range(remainder):
1391
  items_per_sub_bin[i] += 1
 
 
1392
  original_description = bin_descriptions[bin_name]
 
 
1393
  start_idx = 0
1394
  for i in range(num_sub_bins):
 
1395
  new_bin_name = f"{bin_name}_{i + 1}"
 
 
1396
  new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
1397
  new_bin_descriptions[new_bin_name] = new_description
 
 
1398
  end_idx = start_idx + items_per_sub_bin[i]
1399
  sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()
 
 
1400
  sub_bin_rows["bin"] = new_bin_name
1401
  sub_bin_rows["bin_description"] = new_description
 
 
1402
  new_rows.append(sub_bin_rows)
 
 
1403
  start_idx = end_idx
 
 
1404
  df_sorted = df_sorted[~bin_mask]
 
 
1405
  if new_rows:
1406
  df_sorted = pd.concat([df_sorted] + new_rows)
 
 
1407
  df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
 
 
1408
  bins_with_topics = sorted(df_sorted["bin"].unique())
1409
  num_rows = len(bins_with_topics)
 
1410
  available_height = 100 - (2 * padding)
1411
  row_height = available_height / num_rows
1412
- row_positions = {bin_name: padding + i * row_height + (row_height / 2) for i, bin_name in enumerate(bins_with_topics)}
 
 
 
 
 
 
 
1413
  df_sorted["y"] = df_sorted["bin"].map(row_positions)
1414
- center_point = 50
 
 
1415
  for bin_name in bins_with_topics:
 
1416
  bin_mask = df_sorted["bin"] == bin_name
1417
  num_topics_in_bin = bin_mask.sum()
 
1418
  if num_topics_in_bin == 1:
 
1419
  df_sorted.loc[bin_mask, "x"] = center_point
1420
  else:
1421
- spacing = 17.5 if num_topics_in_bin < max_items_per_row else 15
1422
- total_width = (num_topics_in_bin - 1) * spacing
1423
- start_pos = center_point - (total_width / 2)
1424
- positions = [start_pos + (i * spacing) for i in range(num_topics_in_bin)]
1425
- df_sorted.loc[bin_mask, "x"] = positions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1426
  df_sorted["size_rank"] = range(1, len(df_sorted) + 1)
 
1427
  return df_sorted
1428
 
1429
 
1430
- # function to update positions based on selected size metric (no changes needed)
1431
  def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
 
1432
  return apply_binned_layout(df)
1433
 
1434
 
1435
- # Callback to update the bubble chart (no changes needed)
1436
  @callback(
1437
  Output("bubble-chart", "figure"),
1438
  [
@@ -1446,109 +1643,258 @@ def update_bubble_chart(data, color_metric):
1446
 
1447
  df = pd.DataFrame(data)
1448
 
1449
- # Note: `update_bubble_positions` is now called inside `analyze_topics` once
1450
- # and the results are stored. We don't call it here anymore.
1451
- # The 'x' and 'y' values are already in the `data`.
1452
- # df = update_bubble_positions(df) # This line can be removed if positions are pre-calculated
1453
 
 
1454
  size_values = df["count"]
1455
  raw_sizes = df["count"]
1456
  size_title = "Dialog Count"
1457
- min_size = 1
 
 
 
1458
  if size_values.max() > size_values.min():
 
1459
  log_sizes = np.log1p(size_values)
1460
- size_values = (min_size + (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50)
 
 
 
 
 
1461
  else:
 
1462
  size_values = np.ones(len(df)) * 12.5
1463
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1464
  if color_metric == "negative_rate":
1465
  color_values = df["negative_rate"]
 
1466
  color_title = "Negativity (%)"
 
 
 
 
1467
  color_scale = "Teal"
 
1468
  elif color_metric == "unresolved_rate":
1469
  color_values = df["unresolved_rate"]
1470
  color_title = "Unresolved (%)"
 
 
 
 
1471
  color_scale = "Teal"
1472
- else: # urgent_rate
1473
  color_values = df["urgent_rate"]
1474
  color_title = "Urgency (%)"
 
 
 
1475
  color_scale = "Teal"
1476
 
 
1477
  hover_text = [
1478
  f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}"
1479
- for topic, raw, color, bin_desc in zip(df["deduplicated_topic_name"], raw_sizes, color_values, df["bin_description"])
 
 
 
 
 
1480
  ]
1481
 
 
1482
  fig = px.scatter(
1483
  df,
1484
- x="x", y="y",
 
1485
  size=size_values,
1486
  color=color_values,
 
1487
  hover_name="deduplicated_topic_name",
1488
- hover_data={"x": False, "y": False, "bin_description": True},
1489
- size_max=42.5,
 
 
 
 
1490
  color_continuous_scale=color_scale,
1491
- custom_data=["deduplicated_topic_name", "count", "negative_rate", "unresolved_rate", "urgent_rate", "bin_description"],
 
 
 
 
 
 
 
1492
  )
1493
 
 
1494
  fig.update_traces(
1495
- mode="markers",
1496
  marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
1497
  hovertemplate="%{hovertext}<extra></extra>",
1498
  hovertext=hover_text,
1499
  )
1500
 
 
1501
  annotations = []
1502
  for i, row in df.iterrows():
 
1503
  words = row["deduplicated_topic_name"].split()
1504
- wrapped_text = "<br>".join([" ".join(words[i : i + 4]) for i in range(0, len(words), 4)])
1505
- # Use df.index.get_loc(i) to safely get the index position for size_values
1506
- marker_size = (size_values[df.index.get_loc(i)] / 20)
 
 
 
 
 
 
 
1507
  annotations.append(
1508
  dict(
1509
- x=row["x"], y=row["y"] + 0.125 + marker_size,
1510
- text=wrapped_text, showarrow=False, textangle=0,
1511
- font=dict(size=9, color="var(--foreground)", family="Arial, sans-serif", weight="bold"),
1512
- xanchor="center", yanchor="top",
1513
- bgcolor="rgba(255,255,255,0.7)", bordercolor="rgba(0,0,0,0.1)",
1514
- borderwidth=1, borderpad=1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1515
  )
1516
  )
1517
 
 
1518
  unique_bins = sorted(df["bin"].unique())
1519
- bin_y_positions = [df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins]
 
 
 
 
1520
  bin_descriptions = df.set_index("bin")["bin_description"].to_dict()
1521
 
1522
  for bin_name, bin_y in zip(unique_bins, bin_y_positions):
1523
- fig.add_shape(type="line", x0=0, y0=bin_y, x1=100, y1=bin_y, line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"), layer="below")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1524
  annotations.append(
1525
  dict(
1526
- x=0, y=bin_y, xref="x", yref="y",
1527
- text=bin_descriptions[bin_name], showarrow=False,
 
 
 
 
1528
  font=dict(size=8.25, color="var(--muted-foreground)"),
1529
- align="left", xanchor="left", yanchor="middle",
1530
- bgcolor="rgba(255,255,255,0.7)", borderpad=1,
 
 
 
1531
  )
1532
  )
1533
 
1534
  fig.update_layout(
1535
  title=None,
1536
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, title=None, range=[0, 100]),
1537
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, title=None, range=[0, 100], autorange="reversed"),
 
 
 
 
 
 
 
 
 
 
 
 
 
1538
  hovermode="closest",
1539
  margin=dict(l=0, r=0, t=10, b=10),
1540
- coloraxis_colorbar=dict(title=color_title, title_font=dict(size=9), tickfont=dict(size=8), thickness=10, len=0.6, yanchor="middle", y=0.5, xpad=0),
 
 
 
 
 
 
 
 
 
1541
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
1542
  paper_bgcolor="rgba(0,0,0,0)",
1543
  plot_bgcolor="rgba(0,0,0,0)",
1544
  hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
1545
- annotations=annotations,
1546
  )
1547
 
1548
  return fig
1549
 
1550
 
1551
- # NEW: Update the topic details callback to be CLICK-ONLY and use the raw-data store
1552
  @callback(
1553
  [
1554
  Output("topic-title", "children"),
@@ -1563,39 +1909,71 @@ def update_bubble_chart(data, color_metric):
1563
  Output("selected-topic-store", "data"),
1564
  ],
1565
  [
1566
- Input("bubble-chart", "clickData"), # Changed from hoverData
1567
  Input("refresh-dialogs-btn", "n_clicks"),
1568
  ],
1569
- [State("stored-data", "data"), State("raw-data", "data")],
 
 
 
 
1570
  )
1571
- def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
1572
- # This callback now only fires on click or refresh
 
 
 
1573
  ctx = dash.callback_context
1574
- triggered_id = ctx.triggered[0]["prop_id"].split(".")[0]
1575
-
1576
- # If nothing triggered this, or data is missing, show the initial message
1577
- if not triggered_id or not stored_data or not raw_data:
1578
- return "", [], [], "", {"display": "none"}, "", {"display": "none"}, [], {"display": "flex"}, None
1579
-
1580
- # We need to know which topic is currently selected if we are refreshing
1581
- if triggered_id == "refresh-dialogs-btn":
1582
- # To refresh, we would need to know the current topic. This requires
1583
- # getting it from a store. For simplicity, we can just use the last clickData.
1584
- # A more robust solution would use another dcc.Store for the *active* topic.
1585
- # For now, if there is no click_data, a refresh does nothing.
1586
- if not click_data:
1587
- return dash.no_update
1588
-
1589
- topic_name = click_data["points"][0]["customdata"][0]
 
 
 
 
 
 
 
 
1590
  df_stored = pd.DataFrame(stored_data)
1591
  topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]
1592
 
1593
- # Use the pre-processed data from the store - this is the fast part!
1594
- df_full = pd.DataFrame(raw_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1595
  topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
1596
 
1597
- # --- From here, all the UI building code is the same ---
1598
  title = html.Div([html.Span(topic_name)])
 
 
1599
  metadata_items = [
1600
  html.Div(
1601
  [
@@ -1603,8 +1981,10 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
1603
  html.Span(f"{int(topic_data['count'])} dialogs"),
1604
  html.Button(
1605
  [
1606
- html.I(className="fas fa-table", style={"marginRight": "0.25rem"}),
1607
- "Show all dialogs",
 
 
1608
  ],
1609
  id="show-all-dialogs-btn",
1610
  className="show-dialogs-btn",
@@ -1615,6 +1995,8 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
1615
  style={"display": "flex", "alignItems": "center", "width": "100%"},
1616
  ),
1617
  ]
 
 
1618
  metrics_boxes = [
1619
  html.Div(
1620
  [
@@ -1639,25 +2021,54 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
1639
  ),
1640
  ]
1641
 
 
1642
  root_causes_output = ""
1643
  root_causes_section_style = {"display": "none"}
 
 
1644
  if "root_cause_subcluster" in topic_conversations.columns:
 
 
 
 
1645
  filtered_root_causes = [
1646
- rc for rc in topic_conversations["root_cause_subcluster"].dropna().unique()
1647
- if rc not in ["Sub-clustering disabled", "Not eligible for sub-clustering", "No valid root causes", "No Subcluster", "Unclustered", ""]
 
 
 
 
 
 
 
 
 
1648
  ]
 
 
 
 
 
 
1649
  if filtered_root_causes:
 
1650
  root_causes_output = html.Div(
1651
  [
1652
  html.Div(
1653
  [
1654
- html.I(className="fas fa-exclamation-triangle root-cause-tag-icon"),
 
 
1655
  html.Span(root_cause, style={"marginRight": "6px"}),
1656
  html.I(
1657
  className="fas fa-external-link-alt root-cause-click-icon",
1658
  id={"type": "root-cause-icon", "index": root_cause},
1659
  title="Click to see specific chats assigned with this root cause.",
1660
- style={"cursor": "pointer", "fontSize": "0.55rem", "opacity": "0.8"},
 
 
 
 
1661
  ),
1662
  ],
1663
  className="root-cause-tag",
@@ -1669,19 +2080,30 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
1669
  )
1670
  root_causes_section_style = {"display": "block"}
1671
 
 
1672
  tags_list = []
1673
- if "consolidated_tags" in topic_conversations.columns:
1674
- for tags_str in topic_conversations["consolidated_tags"].dropna():
1675
- tags_list.extend([tag.strip() for tag in tags_str.split(",") if tag.strip()])
1676
-
 
 
 
1677
  tag_counts = {}
1678
  for tag in tags_list:
1679
  tag_counts[tag] = tag_counts.get(tag, 0) + 1
1680
 
1681
- sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))[:15]
 
1682
 
 
 
 
 
 
1683
  tags_section_style = {"display": "none"}
1684
  if sorted_tags:
 
1685
  tags_output = html.Div(
1686
  [
1687
  html.Div(
@@ -1698,37 +2120,87 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
1698
  tags_section_style = {"display": "block"}
1699
  else:
1700
  tags_output = html.Div(
1701
- [html.I(className="fas fa-info-circle", style={"marginRight": "5px"}), "No tags found for this topic"],
 
 
 
1702
  className="no-tags-message",
1703
  )
1704
 
 
1705
  sample_size = min(5, len(topic_conversations))
1706
  if sample_size > 0:
1707
- samples = topic_conversations.sample(n=sample_size)
 
 
1708
  dialog_items = []
1709
  for _, row in samples.iterrows():
1710
- tags = [
1711
- html.Span(row["Sentiment"], className="dialog-tag tag-sentiment"),
1712
- html.Span(row["Resolution"], className="dialog-tag tag-resolution"),
1713
- html.Span(row["Urgency"], className="dialog-tag tag-urgency"),
1714
- ]
 
 
 
 
 
 
1715
  if "id" in row:
1716
- tags.append(html.Span(
1717
- [f"Chat ID: {row['id']} ", html.I(className="fas fa-arrow-up-right-from-square conversation-icon", id={"type": "conversation-icon", "index": row["id"]}, title="View full conversation", style={"marginLeft": "0.25rem"})],
1718
- className="dialog-tag tag-chat-id", style={"display": "inline-flex", "alignItems": "center"}
1719
- ))
1720
- if "Root_Cause" in row and pd.notna(row["Root_Cause"]) and row["Root_Cause"] != "na":
1721
- tags.append(html.Span(f"Root Cause: {row['Root_Cause']}", className="dialog-tag tag-root-cause"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1722
 
1723
  dialog_items.append(
1724
  html.Div(
1725
- [html.Div(row["Summary"], className="dialog-summary"), html.Div(tags, className="dialog-metadata")],
 
 
 
 
 
 
1726
  className="dialog-item",
1727
  )
1728
  )
 
1729
  sample_dialogs = dialog_items
1730
  else:
1731
- sample_dialogs = [html.Div("No sample dialogs available for this topic.", style={"color": "var(--muted-foreground)"})]
 
 
 
 
 
1732
 
1733
  return (
1734
  title,
@@ -1740,11 +2212,11 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
1740
  tags_section_style,
1741
  sample_dialogs,
1742
  {"display": "none"},
1743
- {"topic_name": topic_name}, # Pass only the topic name
1744
  )
1745
 
1746
 
1747
- # NEW: Updated to use raw-data store
1748
  @callback(
1749
  [
1750
  Output("conversation-modal", "style"),
@@ -1752,22 +2224,31 @@ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
1752
  Output("conversation-subheader", "children"),
1753
  ],
1754
  [Input({"type": "conversation-icon", "index": dash.dependencies.ALL}, "n_clicks")],
1755
- [State("raw-data", "data")],
1756
  prevent_initial_call=True,
1757
  )
1758
- def open_conversation_modal(n_clicks_list, raw_data):
1759
- if not any(n_clicks_list) or not raw_data:
 
1760
  return {"display": "none"}, "", ""
1761
 
 
1762
  ctx = dash.callback_context
1763
  if not ctx.triggered:
1764
- return {"display": "none"}, "", ""
1765
-
 
 
 
1766
  triggered_id = ctx.triggered[0]["prop_id"]
1767
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
1768
 
1769
- df_full = pd.DataFrame(raw_data)
1770
-
 
 
 
 
1771
  conversation_row = df_full[df_full["id"] == chat_id]
1772
  if len(conversation_row) == 0:
1773
  conversation_text = "Conversation not found."
@@ -1775,17 +2256,28 @@ def open_conversation_modal(n_clicks_list, raw_data):
1775
  else:
1776
  row = conversation_row.iloc[0]
1777
  conversation_text = row.get("conversation", "No conversation data available.")
 
 
1778
  cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
 
 
1779
  subheader_content = html.Div(
1780
  [
1781
- html.Span(f"Chat ID: {chat_id}", style={"fontWeight": "600", "marginRight": "1rem"}),
1782
- html.Span(f"Cluster: {cluster_name}", style={"color": "hsl(215.4, 16.3%, 46.9%)"}),
 
 
 
 
 
 
1783
  ]
1784
  )
 
1785
  return {"display": "flex"}, conversation_text, subheader_content
1786
 
1787
 
1788
- # Callback to close modal (no changes needed)
1789
  @callback(
1790
  Output("conversation-modal", "style", allow_duplicate=True),
1791
  [Input("close-modal-btn", "n_clicks")],
@@ -1794,10 +2286,10 @@ def open_conversation_modal(n_clicks_list, raw_data):
1794
  def close_conversation_modal(n_clicks):
1795
  if n_clicks:
1796
  return {"display": "none"}
1797
- return dash.no_update
1798
 
1799
 
1800
- # NEW: Updated to use raw-data store
1801
  @callback(
1802
  [
1803
  Output("dialogs-table-modal", "style"),
@@ -1805,51 +2297,169 @@ def close_conversation_modal(n_clicks):
1805
  Output("dialogs-table-content", "children"),
1806
  ],
1807
  [Input("show-all-dialogs-btn", "n_clicks")],
1808
- [State("selected-topic-store", "data"), State("raw-data", "data")],
1809
  prevent_initial_call=True,
1810
  )
1811
- def open_dialogs_table_modal(n_clicks, selected_topic_data, raw_data):
1812
- if not n_clicks or not selected_topic_data or not raw_data:
1813
  return {"display": "none"}, "", ""
1814
 
1815
  topic_name = selected_topic_data["topic_name"]
1816
- df_full = pd.DataFrame(raw_data)
 
 
 
 
 
 
 
 
 
1817
 
 
1818
  topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
1819
-
1820
- table_rows = [
1821
- html.Tr([
1822
- html.Th("Chat ID"), html.Th("Summary"), html.Th("Root Cause"),
1823
- html.Th("Sentiment"), html.Th("Resolution"), html.Th("Urgency"),
1824
- html.Th("Tags"), html.Th("Action"),
1825
- ])
1826
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
1827
  for _, row in topic_conversations.iterrows():
1828
- tags_display = "No tags"
1829
- if "consolidated_tags" in row and pd.notna(row["consolidated_tags"]):
1830
- tags = [tag.strip() for tag in row["consolidated_tags"].split(",") if tag.strip()]
1831
- tags_display = html.Div([
1832
- html.Span(tag, className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"}) for tag in tags[:3]
1833
- ] + ([html.Span(f"+{len(tags) - 3}", className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"})] if len(tags) > 3 else []))
1834
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1835
  table_rows.append(
1836
- html.Tr([
1837
- html.Td(row["id"], style={"fontFamily": "monospace", "fontSize": "0.8rem"}),
1838
- html.Td(row.get("Summary", "No summary"), className="dialog-summary-cell"),
1839
- html.Td(html.Span(str(row.get("Root_Cause", "Unknown")).capitalize() if pd.notna(row.get("Root_Cause")) else "Unknown", className="dialog-tag-small", style={"backgroundColor": "#8B4513", "color": "white"})),
1840
- html.Td(html.Span(row.get("Sentiment", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Sentiment") == "negative" else "#6c757d", "color": "white"})),
1841
- html.Td(html.Span(row.get("Resolution", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Resolution") == "unresolved" else "#6c757d", "color": "white"})),
1842
- html.Td(html.Span(row.get("Urgency", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Urgency") == "urgent" else "#6c757d", "color": "white"})),
1843
- html.Td(tags_display, className="dialog-tags-cell"),
1844
- html.Td(html.Button([html.I(className="fas fa-eye", style={"marginRight": "0.25rem"}), "View chat"], id={"type": "open-chat-btn", "index": row["id"]}, className="open-chat-btn")),
1845
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1846
  )
 
1847
  table = html.Table(table_rows, className="dialogs-table")
1848
- modal_title = f"All dialogs in Topic: {topic_name} ({len(topic_conversations)} dialogs)"
 
 
 
 
1849
  return {"display": "flex"}, modal_title, table
1850
 
1851
 
1852
- # Callback to close dialogs table modal (no changes needed)
1853
  @callback(
1854
  Output("dialogs-table-modal", "style", allow_duplicate=True),
1855
  [Input("close-dialogs-modal-btn", "n_clicks")],
@@ -1858,10 +2468,10 @@ def open_dialogs_table_modal(n_clicks, selected_topic_data, raw_data):
1858
  def close_dialogs_table_modal(n_clicks):
1859
  if n_clicks:
1860
  return {"display": "none"}
1861
- return dash.no_update
1862
 
1863
 
1864
- # NEW: Updated to use raw-data store
1865
  @callback(
1866
  [
1867
  Output("conversation-modal", "style", allow_duplicate=True),
@@ -1869,34 +2479,68 @@ def close_dialogs_table_modal(n_clicks):
1869
  Output("conversation-subheader", "children", allow_duplicate=True),
1870
  ],
1871
  [Input({"type": "open-chat-btn", "index": dash.dependencies.ALL}, "n_clicks")],
1872
- [State("raw-data", "data")],
1873
  prevent_initial_call=True,
1874
  )
1875
- def open_conversation_from_table(n_clicks_list, raw_data):
1876
- if not any(n_clicks_list) or not raw_data:
 
1877
  return {"display": "none"}, "", ""
1878
 
 
1879
  ctx = dash.callback_context
1880
  if not ctx.triggered:
1881
  return {"display": "none"}, "", ""
1882
 
 
1883
  triggered_id = ctx.triggered[0]["prop_id"]
1884
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
1885
 
1886
- df_full = pd.DataFrame(raw_data)
1887
-
 
 
 
 
 
 
 
 
 
 
 
 
1888
  conversation_row = df_full[df_full["id"] == chat_id]
 
 
1889
  if len(conversation_row) == 0:
1890
- conversation_text = f"Conversation not found for Chat ID: {chat_id}"
 
 
 
 
 
 
 
 
 
 
 
1891
  subheader_content = f"Chat ID: {chat_id} (Not Found)"
1892
  else:
1893
- row = conversation_row.iloc[0]
1894
- conversation_text = row.get("conversation", "No conversation data available.")
1895
- subheader_content = f"Chat ID: {chat_id} | Topic: {row.get('deduplicated_topic_name', 'Unknown')} | Sentiment: {row.get('Sentiment', 'Unknown')} | Resolution: {row.get('Resolution', 'Unknown')}"
 
 
 
 
 
 
1896
  return {"display": "flex"}, conversation_text, subheader_content
1897
 
1898
 
1899
- # NEW: Updated to use raw-data store
1900
  @callback(
1901
  [
1902
  Output("root-cause-modal", "style"),
@@ -1904,64 +2548,176 @@ def open_conversation_from_table(n_clicks_list, raw_data):
1904
  Output("root-cause-table-content", "children"),
1905
  ],
1906
  [Input({"type": "root-cause-icon", "index": dash.dependencies.ALL}, "n_clicks")],
1907
- [State("selected-topic-store", "data"), State("raw-data", "data")],
1908
  prevent_initial_call=True,
1909
  )
1910
- def open_root_cause_modal(n_clicks_list, selected_topic_data, raw_data):
1911
- if not any(n_clicks_list) or not selected_topic_data or not raw_data:
 
1912
  return {"display": "none"}, "", ""
1913
 
 
1914
  ctx = dash.callback_context
1915
  if not ctx.triggered:
1916
  return {"display": "none"}, "", ""
1917
 
1918
  triggered_id = ctx.triggered[0]["prop_id"]
1919
  root_cause = json.loads(triggered_id.split(".")[0])["index"]
 
1920
  topic_name = selected_topic_data["topic_name"]
1921
- df_full = pd.DataFrame(raw_data)
1922
-
 
 
 
 
 
 
 
 
 
 
1923
  filtered_conversations = df_full[
1924
  (df_full["deduplicated_topic_name"] == topic_name)
1925
  & (df_full["root_cause_subcluster"] == root_cause)
1926
  ]
1927
 
1928
- table_rows = [
1929
- html.Tr([
1930
- html.Th("Chat ID"), html.Th("Summary"), html.Th("Sentiment"),
1931
- html.Th("Resolution"), html.Th("Urgency"), html.Th("Tags"), html.Th("Action"),
1932
- ])
1933
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
1934
  for _, row in filtered_conversations.iterrows():
1935
- tags_display = "No tags"
1936
- if "consolidated_tags" in row and pd.notna(row["consolidated_tags"]):
1937
- tags = [tag.strip() for tag in row["consolidated_tags"].split(",") if tag.strip()]
1938
- tags_display = html.Div([
1939
- html.Span(tag, className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"}) for tag in tags[:3]
1940
- ] + ([html.Span(f"+{len(tags) - 3}", className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"})] if len(tags) > 3 else []))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1941
 
1942
  table_rows.append(
1943
- html.Tr([
1944
- html.Td(row["id"], style={"fontFamily": "monospace", "fontSize": "0.8rem"}),
1945
- html.Td(row.get("Summary", "No summary"), className="dialog-summary-cell"),
1946
- html.Td(html.Span(row.get("Sentiment", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Sentiment") == "negative" else "#6c757d", "color": "white"})),
1947
- html.Td(html.Span(row.get("Resolution", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Resolution") == "unresolved" else "#6c757d", "color": "white"})),
1948
- html.Td(html.Span(row.get("Urgency", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Urgency") == "urgent" else "#6c757d", "color": "white"})),
1949
- html.Td(tags_display, className="dialog-tags-cell"),
1950
- html.Td(html.Button([html.I(className="fas fa-eye", style={"marginRight": "0.25rem"}), "View chat"], id={"type": "open-chat-btn-rc", "index": row["id"]}, className="open-chat-btn")),
1951
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1952
  )
1953
-
1954
  table = html.Table(table_rows, className="dialogs-table")
1955
- modal_title = f"Dialogs for Root Cause: {root_cause} (in Topic: {topic_name})"
 
1956
  count_info = html.P(
1957
- f"Found {len(filtered_conversations)} dialogs with this root cause.",
1958
- style={"margin": "0 0 1rem 0", "color": "var(--muted-foreground)", "fontSize": "0.875rem"},
 
 
 
 
1959
  )
 
1960
  content = html.Div([count_info, table])
 
1961
  return {"display": "flex"}, modal_title, content
1962
 
1963
 
1964
- # Callback to close root cause modal (no changes needed)
1965
  @callback(
1966
  Output("root-cause-modal", "style", allow_duplicate=True),
1967
  [Input("close-root-cause-modal-btn", "n_clicks")],
@@ -1970,10 +2726,10 @@ def open_root_cause_modal(n_clicks_list, selected_topic_data, raw_data):
1970
  def close_root_cause_modal(n_clicks):
1971
  if n_clicks:
1972
  return {"display": "none"}
1973
- return dash.no_update
1974
 
1975
 
1976
- # NEW: Updated to use raw-data store
1977
  @callback(
1978
  [
1979
  Output("conversation-modal", "style", allow_duplicate=True),
@@ -1981,39 +2737,69 @@ def close_root_cause_modal(n_clicks):
1981
  Output("conversation-subheader", "children", allow_duplicate=True),
1982
  ],
1983
  [Input({"type": "open-chat-btn-rc", "index": dash.dependencies.ALL}, "n_clicks")],
1984
- [State("raw-data", "data")],
1985
  prevent_initial_call=True,
1986
  )
1987
- def open_conversation_from_root_cause_table(n_clicks_list, raw_data):
1988
- if not any(n_clicks_list) or not raw_data:
 
1989
  return {"display": "none"}, "", ""
1990
 
 
1991
  ctx = dash.callback_context
1992
  if not ctx.triggered:
1993
  return {"display": "none"}, "", ""
1994
-
1995
  triggered_id = ctx.triggered[0]["prop_id"]
1996
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
1997
 
1998
- df_full = pd.DataFrame(raw_data)
1999
-
 
 
 
 
2000
  conversation_row = df_full[df_full["id"] == chat_id]
 
 
2001
  if len(conversation_row) == 0:
2002
  conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
2003
 
 
 
 
 
 
 
2004
  if len(conversation_row) == 0:
2005
  conversation_text = f"Conversation not found for Chat ID: {chat_id}"
2006
  subheader_content = f"Chat ID: {chat_id} (Not Found)"
2007
  else:
2008
  row = conversation_row.iloc[0]
2009
  conversation_text = row.get("conversation", "No conversation data available.")
 
 
2010
  root_cause = row.get("root_cause_subcluster", "Unknown")
2011
  cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
2012
- subheader_content = html.Div([
2013
- html.Span(f"Chat ID: {chat_id}", style={"fontWeight": "600", "marginRight": "1rem"}),
2014
- html.Span(f"Cluster: {cluster_name}", style={"color": "hsl(215.4, 16.3%, 46.9%)", "marginRight": "1rem"}),
2015
- html.Span(f"Root Cause: {root_cause}", style={"color": "#8b6f47", "fontWeight": "500"}),
2016
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2017
  return {"display": "flex"}, conversation_text, subheader_content
2018
 
2019
  # IMPORTANT: Expose the server for Gunicorn
 
2
  import io
3
  import json
4
  import random
5
+ import time
6
+ import uuid
7
 
8
  import dash
9
  import numpy as np
 
15
  # Initialize the Dash app
16
  app = dash.Dash(__name__, suppress_callback_exceptions=True)
17
 
18
+ # In-memory server-side cache for uploaded DataFrames (per-session key)
19
+ # Each entry: {"df": DataFrame, "created": float, "last_access": float}
20
+ _DF_CACHE: dict[str, dict] = {}
21
+ _CACHE_MAX_ENTRIES = 5
22
+ _CACHE_TTL_SECONDS = 10 * 60 * 60 # 10 hours
23
+
24
+
25
+ def _cache_prune() -> None:
26
+ now = time.time()
27
+ # Remove expired entries
28
+ expired_keys = [
29
+ k
30
+ for k, v in _DF_CACHE.items()
31
+ if (now - v.get("created", now)) > _CACHE_TTL_SECONDS
32
+ ]
33
+ for k in expired_keys:
34
+ _DF_CACHE.pop(k, None)
35
+
36
+ # Enforce max size with LRU eviction by last_access
37
+ if len(_DF_CACHE) > _CACHE_MAX_ENTRIES:
38
+ # Sort keys by last_access ascending (least recently used first)
39
+ keys_by_lru = sorted(
40
+ _DF_CACHE.items(),
41
+ key=lambda kv: kv[1].get("last_access", kv[1].get("created", 0)),
42
+ )
43
+ to_evict = len(_DF_CACHE) - _CACHE_MAX_ENTRIES
44
+ for i in range(to_evict):
45
+ _DF_CACHE.pop(keys_by_lru[i][0], None)
46
+
47
+
48
+ def _cache_put_df(df: pd.DataFrame) -> str:
49
+ _cache_prune()
50
+ key = str(uuid.uuid4())
51
+ now = time.time()
52
+ _DF_CACHE[key] = {"df": df, "created": now, "last_access": now}
53
+ _cache_prune()
54
+ return key
55
+
56
+
57
+ def _cache_get_df(key: str | None) -> pd.DataFrame | None:
58
+ if not key:
59
+ return None
60
+ entry = _DF_CACHE.get(key)
61
+ if not entry:
62
+ return None
63
+ # TTL check
64
+ now = time.time()
65
+ if (now - entry.get("created", now)) > _CACHE_TTL_SECONDS:
66
+ # Expired
67
+ _DF_CACHE.pop(key, None)
68
+ return None
69
+ # Update last access and return
70
+ entry["last_access"] = now
71
+ return entry["df"]
72
+
73
+
74
  # Define app layout
75
  app.layout = html.Div(
76
  [
 
141
  children="Sessions Observatory",
142
  className="section-header",
143
  ),
144
+ # dcc.Graph(id="bubble-chart", style={"height": "80vh"}),
145
  dcc.Graph(
146
  id="bubble-chart",
147
  style={"height": "calc(100% - 154px)"},
148
+ ), # this does not work for some reason
149
  html.Div(
150
  [
151
+ # Only keep Color by
152
  html.Div(
153
  [
154
  html.Div(
 
161
  ],
162
  className="control-labels-row",
163
  ),
164
+ # Only keep Color by options
165
  html.Div(
166
  [
167
  html.Div(
 
246
  html.I(
247
  className="fas fa-info-circle",
248
  title="Root cause detection is experimental and may require manual review since it is generated by AI models. Root causes are only shown in clusters with identifiable root causes.",
249
+ # Added title for info icon
250
  style={
251
  "marginLeft": "0.2rem",
252
+ "color": "#6c757d", # General gray
253
  "fontSize": "0.9rem",
254
  "cursor": "pointer",
255
  "verticalAlign": "middle",
 
264
  ),
265
  ],
266
  id="root-causes-section",
267
+ style={
268
+ "display": "none"
269
+ }, # Initially hidden
270
  ),
271
  # Added Tags section
272
  html.Div(
 
281
  ),
282
  ],
283
  id="tags-section",
284
+ style={
285
+ "display": "none"
286
+ }, # Initially hidden
287
  ),
288
  ],
289
  className="details-section",
 
334
  ),
335
  html.H3("No topic selected"),
336
  html.P(
337
+ "Click or hover on a bubble to view topic details."
338
  ),
339
  ],
340
  className="no-selection-message",
 
453
  ),
454
  # Store the processed data
455
  dcc.Store(id="stored-data"),
456
+ # Server-side cache key for full uploaded DataFrame
457
+ dcc.Store(id="data-cache-key"),
458
  # Store the current selected topic for dialogs modal
459
  dcc.Store(id="selected-topic-store"),
460
  # Store the current selected root cause for root cause modal
 
463
  className="app-container",
464
  )
465
 
466
+ # Define CSS for the app
467
  app.index_string = """
468
  <!DOCTYPE html>
469
  <html>
 
1287
  )
1288
  def update_topic_distribution_header(data):
1289
  if not data:
1290
+ return "Sessions Observatory" # Default when no data is available
1291
 
1292
  df = pd.DataFrame(data)
1293
+ total_dialogs = df["count"].sum() # Sum up the 'count' column
1294
  return f"Sessions Observatory ({total_dialogs} dialogs)"
1295
 
1296
 
 
1298
  @callback(
1299
  [
1300
  Output("stored-data", "data"),
 
1301
  Output("upload-status", "children"),
1302
+ Output("upload-status", "style"), # Add style output for visibility
1303
  Output("main-content", "style"),
1304
+ Output("data-cache-key", "data"),
1305
  ],
1306
  [Input("upload-data", "contents")],
1307
  [State("upload-data", "filename")],
1308
  )
1309
  def process_upload(contents, filename):
1310
  if contents is None:
1311
+ return None, "", {"display": "none"}, {"display": "none"}, None # Keep hidden
1312
 
1313
  try:
1314
+ # Parse uploaded file
1315
  content_type, content_string = contents.split(",")
1316
  decoded = base64.b64decode(content_string)
1317
 
1318
  if "csv" in filename.lower():
1319
+ df = pd.read_csv(
1320
+ io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
1321
+ )
1322
  elif "xls" in filename.lower():
1323
  df = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
1324
+
1325
+ # DEBUG
1326
+ # --- Print unique root_cause_subcluster values for each deduplicated_topic_name ---
1327
+ if (
1328
+ "deduplicated_topic_name" in df.columns
1329
+ and "root_cause_subcluster" in df.columns
1330
+ ):
1331
+ print(
1332
+ "\n[INFO] Unique root_cause_subcluster values for each deduplicated_topic_name:"
 
1333
  )
1334
+ for topic in df["deduplicated_topic_name"].unique():
1335
+ subclusters = (
1336
+ df[df["deduplicated_topic_name"] == topic]["root_cause_subcluster"]
1337
+ .dropna()
1338
+ .unique()
1339
+ )
1340
+ print(f"- {topic}:")
1341
+ for sub in subclusters:
1342
+ print(f" - {sub}")
1343
+ print()
1344
+ # --- End of DEBUG ---
1345
 
1346
+ # Hardcoded flag to exclude 'Unclustered' topics
1347
  EXCLUDE_UNCLUSTERED = True
1348
  if EXCLUDE_UNCLUSTERED and "deduplicated_topic_name" in df.columns:
1349
  df = df[df["deduplicated_topic_name"] != "Unclustered"].copy()
1350
+ # If we strip leading and trailing `"` or `'` from the topic name here, then
1351
+ # we will have a problem with the deduplicated names, as they will not match the
1352
+ # original topic names in the dataset.
1353
+ # Better do it in the first script.
1354
  else:
1355
  return (
 
1356
  None,
1357
  html.Div(
1358
+ [
1359
+ html.I(
1360
+ className="fas fa-exclamation-circle",
1361
+ style={"color": "var(--destructive)", "marginRight": "8px"},
1362
+ ),
1363
+ "Please upload a CSV or Excel file.",
1364
+ ],
1365
  style={"color": "var(--destructive)"},
1366
  ),
1367
+ {"display": "block"}, # Make visible after error
1368
  {"display": "none"},
1369
  )
1370
 
1371
+ # Process the dataframe to get topic statistics
1372
  topic_stats = analyze_topics(df)
1373
 
1374
+ # Put full DataFrame in server-side cache and return key
1375
+ cache_key = _cache_put_df(df)
 
 
 
 
 
 
 
 
 
 
 
 
1376
 
1377
  return (
1378
  topic_stats.to_dict("records"),
 
1379
  html.Div(
1380
  [
1381
  html.I(
1382
  className="fas fa-check-circle",
1383
+ style={
1384
+ "color": "hsl(142.1, 76.2%, 36.3%)",
1385
+ "marginRight": "8px",
1386
+ },
1387
  ),
1388
  f'Successfully uploaded "{filename}"',
1389
  ],
1390
  style={"color": "hsl(142.1, 76.2%, 36.3%)"},
1391
  ),
1392
+ {"display": "block"}, # maybe add the above line here too #TODO
1393
+ {
1394
+ "display": "block",
1395
+ "height": "calc(100vh - 40px)",
1396
+ }, # Make visible after successful upload
1397
+ cache_key,
1398
  )
1399
 
1400
  except Exception as e:
1401
  return (
 
1402
  None,
1403
  html.Div(
1404
  [
 
1406
  className="fas fa-exclamation-triangle",
1407
  style={"color": "var(--destructive)", "marginRight": "8px"},
1408
  ),
1409
+ f"Error processing file: {str(e)}",
1410
  ],
1411
  style={"color": "var(--destructive)"},
1412
  ),
1413
+ {"display": "block"}, # Make visible after error
1414
  {"display": "none"},
1415
+ None,
1416
  )
1417
 
1418
 
1419
  # Function to analyze the topics and create statistics
1420
  def analyze_topics(df):
1421
+ # Group by topic name and calculate metrics
1422
  topic_stats = (
1423
+ # IMPORTANT!
1424
+ # As deduplicated_topic_name, we have either the deduplicated names (if enabled by the process),
1425
+ # either the kmeans_reclustered name (where available) and the ClusterNames.
1426
  df.groupby("deduplicated_topic_name")
1427
  .agg(
1428
  count=("id", "count"),
 
1432
  )
1433
  .reset_index()
1434
  )
1435
+
1436
+ # Calculate rates
1437
+ topic_stats["negative_rate"] = (
1438
+ topic_stats["negative_count"] / topic_stats["count"] * 100
1439
+ ).round(1)
1440
+ topic_stats["unresolved_rate"] = (
1441
+ topic_stats["unresolved_count"] / topic_stats["count"] * 100
1442
+ ).round(1)
1443
+ topic_stats["urgent_rate"] = (
1444
+ topic_stats["urgent_count"] / topic_stats["count"] * 100
1445
+ ).round(1)
1446
+
1447
+ # Apply binned layout
1448
  topic_stats = apply_binned_layout(topic_stats)
1449
+
1450
  return topic_stats
1451
 
1452
 
1453
+ # New binned layout function
1454
+
1455
+
1456
  def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
1457
+ """
1458
+ Apply a binned layout where bubbles are grouped into rows based on dialog count.
1459
+ Bubbles in each row will be centered horizontally.
1460
+
1461
+ Args:
1462
+ df: DataFrame containing the topic data
1463
+ padding: Padding from edges as percentage
1464
+ bin_config: List of tuples defining bin ranges and descriptions.
1465
+ Example: [(300, None, "300+ dialogs"), (250, 299, "250-299 dialogs"), ...]
1466
+ max_items_per_row: Maximum number of items to display in a single row
1467
+
1468
+ Returns:
1469
+ DataFrame with updated x, y positions
1470
+ """
1471
+ # Create a copy of the dataframe to avoid modifying the original
1472
  df_sorted = df.copy()
1473
+
1474
+ # Default bin configuration if none is provided
1475
+ # 8 rows x 6 bubbles is usually good
1476
  if bin_config is None:
1477
  bin_config = [
1478
+ (100, None, "100+ dialogs"),
1479
+ (50, 99, "50-99 dialogs"),
1480
+ (25, 49, "25-49 dialogs"),
1481
+ (9, 24, "9-24 dialogs"),
1482
+ (7, 8, "7-8 dialogs"),
1483
+ (5, 7, "5-6 dialogs"),
1484
+ (4, 4, "4 dialogs"),
1485
+ (0, 3, "0-3 dialogs"),
1486
  ]
1487
+
1488
+ # Generate bin descriptions and conditions dynamically
1489
  bin_descriptions = {}
1490
  conditions = []
1491
  bin_values = []
1492
+
1493
  for i, (lower, upper, description) in enumerate(bin_config):
1494
  bin_name = f"Bin {i + 1}"
1495
  bin_descriptions[bin_name] = description
1496
  bin_values.append(bin_name)
1497
+
1498
+ if upper is None: # No upper limit
1499
  conditions.append(df_sorted["count"] >= lower)
1500
  else:
1501
+ conditions.append(
1502
+ (df_sorted["count"] >= lower) & (df_sorted["count"] <= upper)
1503
+ )
1504
+
1505
+ # Apply the conditions to create the bin column
1506
+ df_sorted["bin"] = np.select(conditions, bin_values, default="Bin 8")
1507
  df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)
1508
+
1509
+ # Sort by bin (ascending to get Bin 1 first) and by count (descending) within each bin
1510
  df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
1511
+
1512
+ # Now split bins that have more than max_items_per_row items
1513
  original_bins = df_sorted["bin"].unique()
1514
  new_rows = []
1515
  new_bin_descriptions = bin_descriptions.copy()
1516
+
1517
  for bin_name in original_bins:
1518
  bin_mask = df_sorted["bin"] == bin_name
1519
  bin_group = df_sorted[bin_mask]
1520
  bin_size = len(bin_group)
1521
+
1522
+ # If bin has more items than max_items_per_row, split it
1523
  if bin_size > max_items_per_row:
1524
+ # Calculate how many sub-bins we need
1525
  num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row
1526
+
1527
+ # Calculate items per sub-bin (distribute evenly)
1528
  items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins
1529
+
1530
+ # Distribute the remainder one by one to achieve balance
1531
  remainder = bin_size % num_sub_bins
1532
  for i in range(remainder):
1533
  items_per_sub_bin[i] += 1
1534
+
1535
+ # Original bin description
1536
  original_description = bin_descriptions[bin_name]
1537
+
1538
+ # Create new row entries and update bin assignments
1539
  start_idx = 0
1540
  for i in range(num_sub_bins):
1541
+ # Create new bin name with sub-bin index
1542
  new_bin_name = f"{bin_name}_{i + 1}"
1543
+
1544
+ # Create new bin description with sub-bin index
1545
  new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
1546
  new_bin_descriptions[new_bin_name] = new_description
1547
+
1548
+ # Get slice of dataframe for this sub-bin
1549
  end_idx = start_idx + items_per_sub_bin[i]
1550
  sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()
1551
+
1552
+ # Update bin name and description
1553
  sub_bin_rows["bin"] = new_bin_name
1554
  sub_bin_rows["bin_description"] = new_description
1555
+
1556
+ # Add to new rows
1557
  new_rows.append(sub_bin_rows)
1558
+
1559
+ # Update start index for next iteration
1560
  start_idx = end_idx
1561
+
1562
+ # Remove the original bin from df_sorted
1563
  df_sorted = df_sorted[~bin_mask]
1564
+
1565
+ # Combine the original dataframe (with small bins) and the new split bins
1566
  if new_rows:
1567
  df_sorted = pd.concat([df_sorted] + new_rows)
1568
+
1569
+ # Re-sort with the new bin names
1570
  df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
1571
+
1572
+ # Calculate the vertical positions for each row (bin)
1573
  bins_with_topics = sorted(df_sorted["bin"].unique())
1574
  num_rows = len(bins_with_topics)
1575
+
1576
  available_height = 100 - (2 * padding)
1577
  row_height = available_height / num_rows
1578
+
1579
+ # Calculate and assign y-positions (vertical positions)
1580
+ row_positions = {}
1581
+ for i, bin_name in enumerate(bins_with_topics):
1582
+ # Calculate row position (centered within its allocated space)
1583
+ row_pos = padding + i * row_height + (row_height / 2)
1584
+ row_positions[bin_name] = row_pos
1585
+
1586
  df_sorted["y"] = df_sorted["bin"].map(row_positions)
1587
+
1588
+ # Center the bubbles in each row horizontally
1589
+ center_point = 50 # Middle of the chart (0-100 scale)
1590
  for bin_name in bins_with_topics:
1591
+ # Get topics in this bin
1592
  bin_mask = df_sorted["bin"] == bin_name
1593
  num_topics_in_bin = bin_mask.sum()
1594
+
1595
  if num_topics_in_bin == 1:
1596
+ # If there's only one bubble, place it in the center
1597
  df_sorted.loc[bin_mask, "x"] = center_point
1598
  else:
1599
+ if num_topics_in_bin < max_items_per_row:
1600
+ # For fewer bubbles, add a little bit of spacing between them
1601
+ # Calculate the total width needed
1602
+ total_width = (num_topics_in_bin - 1) * 17.5 # 10 units between bubbles
1603
+ # Calculate starting position (to center the group)
1604
+ start_pos = center_point - (total_width / 2)
1605
+ # Assign positions
1606
+ positions = [start_pos + (i * 17.5) for i in range(num_topics_in_bin)]
1607
+ df_sorted.loc[bin_mask, "x"] = positions
1608
+ else:
1609
+ # For multiple bubbles, distribute them evenly around the center
1610
+ # Calculate the total width needed
1611
+ total_width = (num_topics_in_bin - 1) * 15 # 15 units between bubbles
1612
+
1613
+ # Calculate starting position (to center the group)
1614
+ start_pos = center_point - (total_width / 2)
1615
+
1616
+ # Assign positions
1617
+ positions = [start_pos + (i * 15) for i in range(num_topics_in_bin)]
1618
+ df_sorted.loc[bin_mask, "x"] = positions
1619
+
1620
+ # Add original rank for reference
1621
  df_sorted["size_rank"] = range(1, len(df_sorted) + 1)
1622
+
1623
  return df_sorted
1624
 
1625
 
1626
+ # New function to update positions based on selected size metric
1627
  def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
1628
+ # For the main chart, we always use the binned layout
1629
  return apply_binned_layout(df)
1630
 
1631
 
1632
+ # Callback to update the bubble chart
1633
  @callback(
1634
  Output("bubble-chart", "figure"),
1635
  [
 
1643
 
1644
  df = pd.DataFrame(data)
1645
 
1646
+ # Update positions using binned layout
1647
+ df = update_bubble_positions(df)
 
 
1648
 
1649
+ # Always use count for sizing
1650
  size_values = df["count"]
1651
  raw_sizes = df["count"]
1652
  size_title = "Dialog Count"
1653
+
1654
+ # Apply log scaling to the size values for better visualization
1655
+ # To make the smallest bubble bigger, increase the min_size value (currently 2.5).
1656
+ min_size = 1 # Minimum bubble size
1657
  if size_values.max() > size_values.min():
1658
+ # Log-scale the sizes
1659
  log_sizes = np.log1p(size_values)
1660
+ # Scale to a reasonable range for visualization
1661
+ # To make the biggest bubble smaller, reduce the multiplier (currently 50).
1662
+ size_values = (
1663
+ min_size
1664
+ + (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50
1665
+ )
1666
  else:
1667
+ # If all values are the same, use a default size
1668
  size_values = np.ones(len(df)) * 12.5
1669
 
1670
+ # DEBUG: Print sizes of bubbles in the first and second bins
1671
+ bins = sorted(df["bin"].unique())
1672
+ if len(bins) >= 1:
1673
+ # first_bin = bins[0]
1674
+ # print(f"DEBUG - First bin '{first_bin}' bubble sizes:")
1675
+ # first_bin_df = df[df["bin"] == first_bin]
1676
+ # for idx, row in first_bin_df.iterrows():
1677
+ # print(
1678
+ # f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
1679
+ # )
1680
+ pass
1681
+
1682
+ if len(bins) >= 2:
1683
+ # second_bin = bins[1]
1684
+ # print(f"DEBUG - Second bin '{second_bin}' bubble sizes:")
1685
+ # second_bin_df = df[df["bin"] == second_bin]
1686
+ # for idx, row in second_bin_df.iterrows():
1687
+ # print(
1688
+ # f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
1689
+ # )
1690
+ pass
1691
+
1692
+ # Determine color based on selected metric
1693
  if color_metric == "negative_rate":
1694
  color_values = df["negative_rate"]
1695
+ # color_title = "Negative Sentiment (%)"
1696
  color_title = "Negativity (%)"
1697
+ # color_scale = "RdBu" # no ice, RdBu - og is Reds - matter is good too
1698
+ # color_scale = "Portland"
1699
+ # color_scale = "RdYlGn_r"
1700
+ # color_scale = "Teal"
1701
  color_scale = "Teal"
1702
+
1703
  elif color_metric == "unresolved_rate":
1704
  color_values = df["unresolved_rate"]
1705
  color_title = "Unresolved (%)"
1706
+ # color_scale = "Burg" # og is YlOrRd
1707
+ # color_scale = "Temps"
1708
+ # color_scale = "Armyrose"
1709
+ # color_scale = "YlOrRd"
1710
  color_scale = "Teal"
1711
+ else:
1712
  color_values = df["urgent_rate"]
1713
  color_title = "Urgency (%)"
1714
+ # color_scale = "Magenta" # og is Blues
1715
+ # color_scale = "Tealrose"
1716
+ # color_scale = "Portland"
1717
  color_scale = "Teal"
1718
 
1719
+ # Create enhanced hover text that includes bin information
1720
  hover_text = [
1721
  f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}"
1722
+ for topic, raw, color, bin_desc in zip(
1723
+ df["deduplicated_topic_name"],
1724
+ raw_sizes,
1725
+ color_values,
1726
+ df["bin_description"],
1727
+ )
1728
  ]
1729
 
1730
+ # Create bubble chart
1731
  fig = px.scatter(
1732
  df,
1733
+ x="x",
1734
+ y="y",
1735
  size=size_values,
1736
  color=color_values,
1737
+ # text="deduplicated_topic_name", # Remove text here
1738
  hover_name="deduplicated_topic_name",
1739
+ hover_data={
1740
+ "x": False,
1741
+ "y": False,
1742
+ "bin_description": True,
1743
+ },
1744
+ size_max=42.5, # Maximum size of the bubbles, change this to adjust the size
1745
  color_continuous_scale=color_scale,
1746
+ custom_data=[
1747
+ "deduplicated_topic_name",
1748
+ "count",
1749
+ "negative_rate",
1750
+ "unresolved_rate",
1751
+ "urgent_rate",
1752
+ "bin_description",
1753
+ ],
1754
  )
1755
 
1756
+ # Update traces: Remove text related properties
1757
  fig.update_traces(
1758
+ mode="markers", # Remove '+text'
1759
  marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
1760
  hovertemplate="%{hovertext}<extra></extra>",
1761
  hovertext=hover_text,
1762
  )
1763
 
1764
+ # Create annotations for the bubbles
1765
  annotations = []
1766
  for i, row in df.iterrows():
1767
+ # Wrap text every 2 words
1768
  words = row["deduplicated_topic_name"].split()
1769
+ wrapped_text = "<br>".join(
1770
+ [" ".join(words[i : i + 4]) for i in range(0, len(words), 4)]
1771
+ )
1772
+
1773
+ # Calculate size for vertical offset (approximately based on the bubble size)
1774
+ # Add vertical offset based on bubble size to place text below the bubble
1775
+ marker_size = (
1776
+ size_values[i] / 20 # type: ignore # FIXME: size_values[df.index.get_loc(i)] / 20
1777
+ ) # Adjust this divisor as needed to get proper spacing
1778
+
1779
  annotations.append(
1780
  dict(
1781
+ x=row["x"],
1782
+ y=row["y"]
1783
+ + 0.125 # Adding this so in a row with maximum bubbles, the left one does not overlap with the bin label
1784
+ + marker_size, # Add vertical offset to position text below the bubble
1785
+ text=wrapped_text,
1786
+ showarrow=False,
1787
+ textangle=0,
1788
+ font=dict(
1789
+ # size=10,
1790
+ # size=15,
1791
+ size=9,
1792
+ color="var(--foreground)",
1793
+ family="Arial, sans-serif",
1794
+ weight="bold",
1795
+ ),
1796
+ xanchor="center",
1797
+ yanchor="top", # Anchor to top of text box so it hangs below the bubble
1798
+ bgcolor="rgba(255,255,255,0.7)", # Add semi-transparent background for better readability
1799
+ bordercolor="rgba(0,0,0,0.1)", # Add a subtle border color
1800
+ borderwidth=1,
1801
+ borderpad=1,
1802
+ # TODO: Radius for rounded corners
1803
  )
1804
  )
1805
 
1806
+ # Add bin labels and separator lines
1807
  unique_bins = sorted(df["bin"].unique())
1808
+ bin_y_positions = [
1809
+ df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins
1810
+ ]
1811
+
1812
+ # Dynamically extract bin descriptions
1813
  bin_descriptions = df.set_index("bin")["bin_description"].to_dict()
1814
 
1815
  for bin_name, bin_y in zip(unique_bins, bin_y_positions):
1816
+ # Add horizontal line
1817
+ fig.add_shape(
1818
+ type="line",
1819
+ x0=0,
1820
+ y0=bin_y,
1821
+ x1=100,
1822
+ y1=bin_y,
1823
+ line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
1824
+ layer="below",
1825
+ )
1826
+
1827
+ # Add subtle lines for each bin and bin labels
1828
+ for bin_name, bin_y in zip(unique_bins, bin_y_positions):
1829
+ # Add horizontal line
1830
+ fig.add_shape(
1831
+ type="line",
1832
+ x0=0,
1833
+ y0=bin_y,
1834
+ x1=100,
1835
+ y1=bin_y,
1836
+ line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
1837
+ layer="below",
1838
+ )
1839
+
1840
+ # Add bin label annotation
1841
  annotations.append(
1842
  dict(
1843
+ x=0, # Position the label on the left side
1844
+ y=bin_y,
1845
+ xref="x",
1846
+ yref="y",
1847
+ text=bin_descriptions[bin_name],
1848
+ showarrow=False,
1849
  font=dict(size=8.25, color="var(--muted-foreground)"),
1850
+ align="left",
1851
+ xanchor="left",
1852
+ yanchor="middle",
1853
+ bgcolor="rgba(255,255,255,0.7)",
1854
+ borderpad=1,
1855
  )
1856
  )
1857
 
1858
  fig.update_layout(
1859
  title=None,
1860
+ xaxis=dict(
1861
+ showgrid=False,
1862
+ zeroline=False,
1863
+ showticklabels=False,
1864
+ title=None,
1865
+ range=[0, 100],
1866
+ ),
1867
+ yaxis=dict(
1868
+ showgrid=False,
1869
+ zeroline=False,
1870
+ showticklabels=False,
1871
+ title=None,
1872
+ range=[0, 100],
1873
+ autorange="reversed", # Keep largest at top
1874
+ ),
1875
  hovermode="closest",
1876
  margin=dict(l=0, r=0, t=10, b=10),
1877
+ coloraxis_colorbar=dict(
1878
+ title=color_title,
1879
+ title_font=dict(size=9),
1880
+ tickfont=dict(size=8),
1881
+ thickness=10,
1882
+ len=0.6,
1883
+ yanchor="middle",
1884
+ y=0.5,
1885
+ xpad=0,
1886
+ ),
1887
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
1888
  paper_bgcolor="rgba(0,0,0,0)",
1889
  plot_bgcolor="rgba(0,0,0,0)",
1890
  hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
1891
+ annotations=annotations, # Add bin labels as annotations
1892
  )
1893
 
1894
  return fig
1895
 
1896
 
1897
+ # Topic details should be click-driven only (no hover triggers)
1898
  @callback(
1899
  [
1900
  Output("topic-title", "children"),
 
1909
  Output("selected-topic-store", "data"),
1910
  ],
1911
  [
1912
+ Input("bubble-chart", "clickData"),
1913
  Input("refresh-dialogs-btn", "n_clicks"),
1914
  ],
1915
+ [
1916
+ State("stored-data", "data"),
1917
+ State("data-cache-key", "data"),
1918
+ State("selected-topic-store", "data"),
1919
+ ],
1920
  )
1921
+ def update_topic_details(
1922
+ click_data, refresh_clicks, stored_data, cache_key, selected_topic_state
1923
+ ):
1924
+ # Determine topic based on click selection or prior selection on refresh
1925
+ topic_name = None
1926
  ctx = dash.callback_context
1927
+ triggered = ctx.triggered[0]["prop_id"].split(".")[0] if ctx.triggered else None
1928
+
1929
+ if triggered == "bubble-chart":
1930
+ if click_data and "points" in click_data and click_data["points"]:
1931
+ topic_name = click_data["points"][0]["customdata"][0]
1932
+ elif triggered == "refresh-dialogs-btn":
1933
+ if selected_topic_state and selected_topic_state.get("topic_name"):
1934
+ topic_name = selected_topic_state["topic_name"]
1935
+
1936
+ if not topic_name or not stored_data or not cache_key:
1937
+ return (
1938
+ "",
1939
+ [],
1940
+ [],
1941
+ "",
1942
+ {"display": "none"},
1943
+ "",
1944
+ {"display": "none"},
1945
+ [],
1946
+ {"display": "flex"},
1947
+ None,
1948
+ )
1949
+
1950
+ # Get stored data for this topic
1951
  df_stored = pd.DataFrame(stored_data)
1952
  topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]
1953
 
1954
+ # Get original data to sample conversations from server-side cache
1955
+ df_full = _cache_get_df(cache_key)
1956
+ if df_full is None:
1957
+ return (
1958
+ "",
1959
+ [],
1960
+ [],
1961
+ "",
1962
+ {"display": "none"},
1963
+ "",
1964
+ {"display": "none"},
1965
+ [html.Div("Session expired. Please re-upload file.")],
1966
+ {"display": "flex"},
1967
+ None,
1968
+ )
1969
+
1970
+ # Filter to this topic
1971
  topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
1972
 
1973
+ # Create the title
1974
  title = html.Div([html.Span(topic_name)])
1975
+
1976
+ # Create metadata items
1977
  metadata_items = [
1978
  html.Div(
1979
  [
 
1981
  html.Span(f"{int(topic_data['count'])} dialogs"),
1982
  html.Button(
1983
  [
1984
+ html.I(
1985
+ className="fas fa-table", style={"marginRight": "0.25rem"}
1986
+ ),
1987
+ "Show all dialogs inside",
1988
  ],
1989
  id="show-all-dialogs-btn",
1990
  className="show-dialogs-btn",
 
1995
  style={"display": "flex", "alignItems": "center", "width": "100%"},
1996
  ),
1997
  ]
1998
+
1999
+ # Create metrics boxes
2000
  metrics_boxes = [
2001
  html.Div(
2002
  [
 
2021
  ),
2022
  ]
2023
 
2024
+ # Extract and process root causes
2025
  root_causes_output = ""
2026
  root_causes_section_style = {"display": "none"}
2027
+
2028
+ # Check if root_cause_subcluster column exists in the data
2029
  if "root_cause_subcluster" in topic_conversations.columns:
2030
+ # Get unique root causes for this specific cluster
2031
+ root_causes = topic_conversations["root_cause_subcluster"].dropna().unique()
2032
+
2033
+ # Filter out common non-informative values including "Unclustered"
2034
  filtered_root_causes = [
2035
+ rc
2036
+ for rc in root_causes
2037
+ if rc
2038
+ not in [
2039
+ "Sub-clustering disabled",
2040
+ "Not eligible for sub-clustering",
2041
+ "No valid root causes",
2042
+ "No Subcluster",
2043
+ "Unclustered",
2044
+ "",
2045
+ ]
2046
  ]
2047
+
2048
+ # Debug: Print the unique root causes for this cluster
2049
+ print(f"\n[DEBUG] Root causes for cluster '{topic_name}':")
2050
+ print(f" All root causes: {list(root_causes)}")
2051
+ print(f" Filtered root causes: {filtered_root_causes}")
2052
+
2053
  if filtered_root_causes:
2054
+ # Create beautifully styled root cause tags with clickable icons
2055
  root_causes_output = html.Div(
2056
  [
2057
  html.Div(
2058
  [
2059
+ html.I(
2060
+ className="fas fa-exclamation-triangle root-cause-tag-icon"
2061
+ ),
2062
  html.Span(root_cause, style={"marginRight": "6px"}),
2063
  html.I(
2064
  className="fas fa-external-link-alt root-cause-click-icon",
2065
  id={"type": "root-cause-icon", "index": root_cause},
2066
  title="Click to see specific chats assigned with this root cause.",
2067
+ style={
2068
+ "cursor": "pointer",
2069
+ "fontSize": "0.55rem",
2070
+ "opacity": "0.8",
2071
+ },
2072
  ),
2073
  ],
2074
  className="root-cause-tag",
 
2080
  )
2081
  root_causes_section_style = {"display": "block"}
2082
 
2083
+ # Extract and process consolidated_tags with improved styling
2084
  tags_list = []
2085
+ for _, row in topic_conversations.iterrows():
2086
+ tags_str = row.get("consolidated_tags", "")
2087
+ if pd.notna(tags_str):
2088
+ tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
2089
+ tags_list.extend(tags)
2090
+
2091
+ # Count tag frequencies for better insight
2092
  tag_counts = {}
2093
  for tag in tags_list:
2094
  tag_counts[tag] = tag_counts.get(tag, 0) + 1
2095
 
2096
+ # Sort by frequency (most common first) and then alphabetically for ties
2097
+ sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))
2098
 
2099
+ # Keep only the top K tags
2100
+ TOP_K = 15
2101
+ sorted_tags = sorted_tags[:TOP_K]
2102
+
2103
+ # Set tags section visibility and output
2104
  tags_section_style = {"display": "none"}
2105
  if sorted_tags:
2106
+ # Create beautifully styled tags with count indicators and consistent color
2107
  tags_output = html.Div(
2108
  [
2109
  html.Div(
 
2120
  tags_section_style = {"display": "block"}
2121
  else:
2122
  tags_output = html.Div(
2123
+ [
2124
+ html.I(className="fas fa-info-circle", style={"marginRight": "5px"}),
2125
+ "No tags found for this topic",
2126
+ ],
2127
  className="no-tags-message",
2128
  )
2129
 
2130
+ # Sample up to 5 random dialogs
2131
  sample_size = min(5, len(topic_conversations))
2132
  if sample_size > 0:
2133
+ sample_indices = random.sample(range(len(topic_conversations)), sample_size)
2134
+ samples = topic_conversations.iloc[sample_indices]
2135
+
2136
  dialog_items = []
2137
  for _, row in samples.iterrows():
2138
+ # Create dialog item with tags
2139
+ sentiment_tag = html.Span(
2140
+ row["Sentiment"], className="dialog-tag tag-sentiment"
2141
+ )
2142
+ resolution_tag = html.Span(
2143
+ row["Resolution"], className="dialog-tag tag-resolution"
2144
+ )
2145
+ urgency_tag = html.Span(row["Urgency"], className="dialog-tag tag-urgency")
2146
+
2147
+ # Add Chat ID tag if 'id' column exists
2148
+ chat_id_tag = None
2149
  if "id" in row:
2150
+ chat_id_tag = html.Span(
2151
+ [
2152
+ f"Chat ID: {row['id']} ",
2153
+ html.I(
2154
+ className="fas fa-arrow-up-right-from-square conversation-icon",
2155
+ id={"type": "conversation-icon", "index": row["id"]},
2156
+ title="View full conversation",
2157
+ style={"marginLeft": "0.25rem"},
2158
+ ),
2159
+ ],
2160
+ className="dialog-tag tag-chat-id",
2161
+ style={"display": "inline-flex", "alignItems": "center"},
2162
+ )
2163
+
2164
+ # Add Root Cause tag if 'Root Cause' column exists
2165
+ root_cause_tag = None
2166
+ if (
2167
+ "Root_Cause" in row
2168
+ and pd.notna(row["Root_Cause"])
2169
+ and row["Root_Cause"] != "na"
2170
+ ):
2171
+ root_cause_tag = html.Span(
2172
+ f"Root Cause: {row['Root_Cause']}",
2173
+ className="dialog-tag tag-root-cause",
2174
+ )
2175
+
2176
+ # Compile all tags, including the new Chat ID and Root Cause tags if available
2177
+ tags = [sentiment_tag, resolution_tag, urgency_tag]
2178
+ if chat_id_tag:
2179
+ tags.append(chat_id_tag)
2180
+ if root_cause_tag:
2181
+ tags.append(root_cause_tag)
2182
 
2183
  dialog_items.append(
2184
  html.Div(
2185
+ [
2186
+ html.Div(row["Summary"], className="dialog-summary"),
2187
+ html.Div(
2188
+ tags,
2189
+ className="dialog-metadata",
2190
+ ),
2191
+ ],
2192
  className="dialog-item",
2193
  )
2194
  )
2195
+
2196
  sample_dialogs = dialog_items
2197
  else:
2198
+ sample_dialogs = [
2199
+ html.Div(
2200
+ "No sample dialogs available for this topic.",
2201
+ style={"color": "var(--muted-foreground)"},
2202
+ )
2203
+ ]
2204
 
2205
  return (
2206
  title,
 
2212
  tags_section_style,
2213
  sample_dialogs,
2214
  {"display": "none"},
2215
+ {"topic_name": topic_name, "cache_key": cache_key},
2216
  )
2217
 
2218
 
2219
+ # Callback to open modal when conversation icon is clicked
2220
  @callback(
2221
  [
2222
  Output("conversation-modal", "style"),
 
2224
  Output("conversation-subheader", "children"),
2225
  ],
2226
  [Input({"type": "conversation-icon", "index": dash.dependencies.ALL}, "n_clicks")],
2227
+ [State("data-cache-key", "data")],
2228
  prevent_initial_call=True,
2229
  )
2230
+ def open_conversation_modal(n_clicks_list, cache_key):
2231
+ # Check if any icon was clicked
2232
+ if not any(n_clicks_list) or not cache_key:
2233
  return {"display": "none"}, "", ""
2234
 
2235
+ # Get which icon was clicked
2236
  ctx = dash.callback_context
2237
  if not ctx.triggered:
2238
+ return (
2239
+ {"display": "none"},
2240
+ "",
2241
+ "",
2242
+ ) # Extract the chat ID from the triggered input
2243
  triggered_id = ctx.triggered[0]["prop_id"]
2244
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
2245
 
2246
+ # Get the full conversation from the cached DataFrame
2247
+ df_full = _cache_get_df(cache_key)
2248
+ if df_full is None:
2249
+ return {"display": "none"}, "Session expired. Re-upload file.", ""
2250
+
2251
+ # Find the conversation with this chat ID
2252
  conversation_row = df_full[df_full["id"] == chat_id]
2253
  if len(conversation_row) == 0:
2254
  conversation_text = "Conversation not found."
 
2256
  else:
2257
  row = conversation_row.iloc[0]
2258
  conversation_text = row.get("conversation", "No conversation data available.")
2259
+
2260
+ # Get cluster name if available
2261
  cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
2262
+
2263
+ # Create subheader with both Chat ID and cluster name
2264
  subheader_content = html.Div(
2265
  [
2266
+ html.Span(
2267
+ f"Chat ID: {chat_id}",
2268
+ style={"fontWeight": "600", "marginRight": "1rem"},
2269
+ ),
2270
+ html.Span(
2271
+ f"Cluster: {cluster_name}",
2272
+ style={"color": "hsl(215.4, 16.3%, 46.9%)"},
2273
+ ),
2274
  ]
2275
  )
2276
+
2277
  return {"display": "flex"}, conversation_text, subheader_content
2278
 
2279
 
2280
+ # Callback to close modal
2281
  @callback(
2282
  Output("conversation-modal", "style", allow_duplicate=True),
2283
  [Input("close-modal-btn", "n_clicks")],
 
2286
  def close_conversation_modal(n_clicks):
2287
  if n_clicks:
2288
  return {"display": "none"}
2289
+ return {"display": "none"}
2290
 
2291
 
2292
+ # Callback to open dialogs table modal when "Show all dialogs inside" button is clicked
2293
  @callback(
2294
  [
2295
  Output("dialogs-table-modal", "style"),
 
2297
  Output("dialogs-table-content", "children"),
2298
  ],
2299
  [Input("show-all-dialogs-btn", "n_clicks")],
2300
+ [State("selected-topic-store", "data")],
2301
  prevent_initial_call=True,
2302
  )
2303
+ def open_dialogs_table_modal(n_clicks, selected_topic_data):
2304
+ if not n_clicks or not selected_topic_data:
2305
  return {"display": "none"}, "", ""
2306
 
2307
  topic_name = selected_topic_data["topic_name"]
2308
+ cache_key = selected_topic_data.get("cache_key")
2309
+
2310
+ # Get the full data from cache
2311
+ df_full = _cache_get_df(cache_key)
2312
+ if df_full is None:
2313
+ return (
2314
+ {"display": "none"},
2315
+ "",
2316
+ html.Div("Session expired. Please re-upload file."),
2317
+ )
2318
 
2319
+ # Filter to this topic
2320
  topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
2321
+
2322
+ # Create the table
2323
+ table_rows = []
2324
+
2325
+ # Header row
2326
+ table_rows.append(
2327
+ html.Tr(
2328
+ [
2329
+ html.Th("Chat ID"),
2330
+ html.Th("Summary"),
2331
+ html.Th("Root Cause"),
2332
+ html.Th("Sentiment"),
2333
+ html.Th("Resolution"),
2334
+ html.Th("Urgency"),
2335
+ html.Th("Tags"),
2336
+ html.Th("Action"),
2337
+ ]
2338
+ )
2339
+ )
2340
+
2341
+ # Data rows
2342
  for _, row in topic_conversations.iterrows():
2343
+ # Process tags
2344
+ tags_str = row.get("consolidated_tags", "")
2345
+ if pd.notna(tags_str):
2346
+ tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
2347
+ tags_display = html.Div(
2348
+ [
2349
+ html.Span(
2350
+ tag,
2351
+ className="dialog-tag-small",
2352
+ style={"backgroundColor": "#6c757d", "color": "white"},
2353
+ )
2354
+ for tag in tags[:3] # Show only first 3 tags
2355
+ ]
2356
+ + (
2357
+ [
2358
+ html.Span(
2359
+ f"+{len(tags) - 3}",
2360
+ className="dialog-tag-small",
2361
+ style={"backgroundColor": "#6c757d", "color": "white"},
2362
+ )
2363
+ ]
2364
+ if len(tags) > 3
2365
+ else []
2366
+ ),
2367
+ className="dialog-tags-cell",
2368
+ )
2369
+ else:
2370
+ tags_display = html.Span(
2371
+ "No tags",
2372
+ style={"color": "var(--muted-foreground)", "fontStyle": "italic"},
2373
+ )
2374
+
2375
  table_rows.append(
2376
+ html.Tr(
2377
+ [
2378
+ html.Td(
2379
+ row["id"],
2380
+ style={"fontFamily": "monospace", "fontSize": "0.8rem"},
2381
+ ),
2382
+ html.Td(
2383
+ row.get("Summary", "No summary"),
2384
+ className="dialog-summary-cell",
2385
+ ),
2386
+ html.Td(
2387
+ html.Span(
2388
+ str(row.get("Root_Cause", "Unknown")).capitalize()
2389
+ if not pd.isna(row.get("Root_Cause"))
2390
+ else "Unknown",
2391
+ className="dialog-tag-small",
2392
+ style={
2393
+ "backgroundColor": "#8B4513", # Brown color for root cause
2394
+ "color": "white",
2395
+ },
2396
+ )
2397
+ ),
2398
+ html.Td(
2399
+ html.Span( # if sentiment is negative, color it red, otherwise grey
2400
+ row.get("Sentiment", "Unknown").capitalize(),
2401
+ className="dialog-tag-small",
2402
+ style={
2403
+ "backgroundColor": "#dc3545"
2404
+ if row.get("Sentiment") == "negative"
2405
+ else "#6c757d",
2406
+ "color": "white",
2407
+ },
2408
+ )
2409
+ ),
2410
+ html.Td(
2411
+ html.Span( # if resolution is unresolved, color it red, otherwise grey
2412
+ row.get("Resolution", "Unknown").capitalize(),
2413
+ className="dialog-tag-small",
2414
+ style={
2415
+ "backgroundColor": "#dc3545"
2416
+ if row.get("Resolution") == "unresolved"
2417
+ else "#6c757d",
2418
+ "color": "white",
2419
+ },
2420
+ )
2421
+ ),
2422
+ html.Td(
2423
+ html.Span( # if urgency is urgent, color it red, otherwise grey
2424
+ row.get("Urgency", "Unknown").capitalize(),
2425
+ className="dialog-tag-small",
2426
+ style={
2427
+ "backgroundColor": "#dc3545"
2428
+ if row.get("Urgency") == "urgent"
2429
+ else "#6c757d",
2430
+ "color": "white",
2431
+ },
2432
+ )
2433
+ ),
2434
+ html.Td(tags_display),
2435
+ html.Td(
2436
+ html.Button(
2437
+ [
2438
+ html.I(
2439
+ className="fas fa-eye",
2440
+ style={"marginRight": "0.25rem"},
2441
+ ),
2442
+ "View chat session",
2443
+ ],
2444
+ id={"type": "open-chat-btn", "index": row["id"]},
2445
+ className="open-chat-btn",
2446
+ n_clicks=0,
2447
+ )
2448
+ ),
2449
+ ]
2450
+ )
2451
  )
2452
+
2453
  table = html.Table(table_rows, className="dialogs-table")
2454
+
2455
+ modal_title = (
2456
+ f"All dialogs in Topic: {topic_name} ({len(topic_conversations)} dialogs)"
2457
+ )
2458
+
2459
  return {"display": "flex"}, modal_title, table
2460
 
2461
 
2462
+ # Callback to close dialogs table modal
2463
  @callback(
2464
  Output("dialogs-table-modal", "style", allow_duplicate=True),
2465
  [Input("close-dialogs-modal-btn", "n_clicks")],
 
2468
  def close_dialogs_table_modal(n_clicks):
2469
  if n_clicks:
2470
  return {"display": "none"}
2471
+ return {"display": "none"}
2472
 
2473
 
2474
+ # Callback to open conversation modal from dialogs table
2475
  @callback(
2476
  [
2477
  Output("conversation-modal", "style", allow_duplicate=True),
 
2479
  Output("conversation-subheader", "children", allow_duplicate=True),
2480
  ],
2481
  [Input({"type": "open-chat-btn", "index": dash.dependencies.ALL}, "n_clicks")],
2482
+ [State("data-cache-key", "data")],
2483
  prevent_initial_call=True,
2484
  )
2485
+ def open_conversation_from_table(n_clicks_list, cache_key):
2486
+ # Check if any button was clicked
2487
+ if not any(n_clicks_list) or not cache_key:
2488
  return {"display": "none"}, "", ""
2489
 
2490
+ # Get which button was clicked
2491
  ctx = dash.callback_context
2492
  if not ctx.triggered:
2493
  return {"display": "none"}, "", ""
2494
 
2495
+ # Extract the chat ID from the triggered input
2496
  triggered_id = ctx.triggered[0]["prop_id"]
2497
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
2498
 
2499
+ # Debug: print the chat_id to understand its type and value
2500
+ print(f"DEBUG: Looking for chat_id: {chat_id} (type: {type(chat_id)})")
2501
+
2502
+ # Get the full conversation from the cached DataFrame
2503
+ df_full = _cache_get_df(cache_key)
2504
+ if df_full is None:
2505
+ return {"display": "none"}, "Session expired. Re-upload file.", ""
2506
+
2507
+ # Debug: print some info about the dataframe
2508
+ print(f"DEBUG: DataFrame shape: {df_full.shape}")
2509
+ print(f"DEBUG: Available chat IDs (first 5): {df_full['id'].head().tolist()}")
2510
+ print(f"DEBUG: Chat ID types in df: {df_full['id'].dtype}")
2511
+
2512
+ # Try to match with different data type conversions
2513
  conversation_row = df_full[df_full["id"] == chat_id]
2514
+
2515
+ # If not found, try converting types
2516
  if len(conversation_row) == 0:
2517
+ # Try converting chat_id to string
2518
+ conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
2519
+
2520
+ # If still not found, try converting df id to int
2521
+ if len(conversation_row) == 0:
2522
+ try:
2523
+ conversation_row = df_full[df_full["id"] == int(chat_id)]
2524
+ except (ValueError, TypeError):
2525
+ pass
2526
+
2527
+ if len(conversation_row) == 0:
2528
+ conversation_text = f"Conversation not found for Chat ID: {chat_id}. Available IDs: {df_full['id'].head(10).tolist()}"
2529
  subheader_content = f"Chat ID: {chat_id} (Not Found)"
2530
  else:
2531
+ conversation_row = conversation_row.iloc[0]
2532
+ conversation_text = conversation_row.get(
2533
+ "conversation",
2534
+ "No conversation available, oopsie.", # fix here the conversation status
2535
+ )
2536
+
2537
+ # Create subheader with metadata
2538
+ subheader_content = f"Chat ID: {chat_id} | Topic: {conversation_row.get('deduplicated_topic_name', 'Unknown')} | Sentiment: {conversation_row.get('Sentiment', 'Unknown')} | Resolution: {conversation_row.get('Resolution', 'Unknown')}"
2539
+
2540
  return {"display": "flex"}, conversation_text, subheader_content
2541
 
2542
 
2543
+ # Callback to open root cause modal when root cause icon is clicked
2544
  @callback(
2545
  [
2546
  Output("root-cause-modal", "style"),
 
2548
  Output("root-cause-table-content", "children"),
2549
  ],
2550
  [Input({"type": "root-cause-icon", "index": dash.dependencies.ALL}, "n_clicks")],
2551
+ [State("selected-topic-store", "data")],
2552
  prevent_initial_call=True,
2553
  )
2554
+ def open_root_cause_modal(n_clicks_list, selected_topic_data):
2555
+ # Check if any icon was clicked
2556
+ if not any(n_clicks_list) or not selected_topic_data:
2557
  return {"display": "none"}, "", ""
2558
 
2559
+ # Get which icon was clicked
2560
  ctx = dash.callback_context
2561
  if not ctx.triggered:
2562
  return {"display": "none"}, "", ""
2563
 
2564
  triggered_id = ctx.triggered[0]["prop_id"]
2565
  root_cause = json.loads(triggered_id.split(".")[0])["index"]
2566
+
2567
  topic_name = selected_topic_data["topic_name"]
2568
+ cache_key = selected_topic_data.get("cache_key")
2569
+
2570
+ # Get the full data from cache
2571
+ df_full = _cache_get_df(cache_key)
2572
+ if df_full is None:
2573
+ return (
2574
+ {"display": "none"},
2575
+ "",
2576
+ html.Div("Session expired. Please re-upload file."),
2577
+ )
2578
+
2579
+ # Filter to this topic and root cause
2580
  filtered_conversations = df_full[
2581
  (df_full["deduplicated_topic_name"] == topic_name)
2582
  & (df_full["root_cause_subcluster"] == root_cause)
2583
  ]
2584
 
2585
+ # Create the table
2586
+ table_rows = []
2587
+
2588
+ # Header row
2589
+ table_rows.append(
2590
+ html.Tr(
2591
+ [
2592
+ html.Th("Chat ID"),
2593
+ html.Th("Summary"),
2594
+ html.Th("Sentiment"),
2595
+ html.Th("Resolution"),
2596
+ html.Th("Urgency"),
2597
+ html.Th("Tags"),
2598
+ html.Th("Action"),
2599
+ ]
2600
+ )
2601
+ )
2602
+
2603
+ # Data rows
2604
  for _, row in filtered_conversations.iterrows():
2605
+ # Process tags
2606
+ tags_str = row.get("consolidated_tags", "")
2607
+ if pd.notna(tags_str):
2608
+ tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
2609
+ tags_display = html.Div(
2610
+ [
2611
+ html.Span(
2612
+ tag,
2613
+ className="dialog-tag-small",
2614
+ style={"backgroundColor": "#6c757d", "color": "white"},
2615
+ )
2616
+ for tag in tags[:3] # Show only first 3 tags
2617
+ ]
2618
+ + (
2619
+ [
2620
+ html.Span(
2621
+ f"+{len(tags) - 3}",
2622
+ className="dialog-tag-small",
2623
+ style={"backgroundColor": "#6c757d", "color": "white"},
2624
+ )
2625
+ ]
2626
+ if len(tags) > 3
2627
+ else []
2628
+ ),
2629
+ className="dialog-tags-cell",
2630
+ )
2631
+ else:
2632
+ tags_display = html.Span(
2633
+ "No tags",
2634
+ style={"color": "var(--muted-foreground)", "fontStyle": "italic"},
2635
+ )
2636
 
2637
  table_rows.append(
2638
+ html.Tr(
2639
+ [
2640
+ html.Td(
2641
+ row["id"],
2642
+ style={"fontFamily": "monospace", "fontSize": "0.8rem"},
2643
+ ),
2644
+ html.Td(
2645
+ row.get("Summary", "No summary"),
2646
+ className="dialog-summary-cell",
2647
+ ),
2648
+ html.Td(
2649
+ html.Span(
2650
+ row.get("Sentiment", "Unknown").capitalize(),
2651
+ className="dialog-tag-small",
2652
+ style={
2653
+ "backgroundColor": "#dc3545"
2654
+ if row.get("Sentiment") == "negative"
2655
+ else "#6c757d",
2656
+ "color": "white",
2657
+ },
2658
+ )
2659
+ ),
2660
+ html.Td(
2661
+ html.Span(
2662
+ row.get("Resolution", "Unknown").capitalize(),
2663
+ className="dialog-tag-small",
2664
+ style={
2665
+ "backgroundColor": "#dc3545"
2666
+ if row.get("Resolution") == "unresolved"
2667
+ else "#6c757d",
2668
+ "color": "white",
2669
+ },
2670
+ )
2671
+ ),
2672
+ html.Td(
2673
+ html.Span(
2674
+ row.get("Urgency", "Unknown").capitalize(),
2675
+ className="dialog-tag-small",
2676
+ style={
2677
+ "backgroundColor": "#dc3545"
2678
+ if row.get("Urgency") == "urgent"
2679
+ else "#6c757d",
2680
+ "color": "white",
2681
+ },
2682
+ )
2683
+ ),
2684
+ html.Td(tags_display),
2685
+ html.Td(
2686
+ html.Button(
2687
+ [
2688
+ html.I(
2689
+ className="fas fa-eye",
2690
+ style={"marginRight": "0.25rem"},
2691
+ ),
2692
+ "View chat",
2693
+ ],
2694
+ id={"type": "open-chat-btn-rc", "index": row["id"]},
2695
+ className="open-chat-btn",
2696
+ n_clicks=0,
2697
+ )
2698
+ ),
2699
+ ]
2700
+ )
2701
  )
2702
+
2703
  table = html.Table(table_rows, className="dialogs-table")
2704
+
2705
+ modal_title = f"Dialogs with Root Cause: {root_cause} (Topic: {topic_name})"
2706
  count_info = html.P(
2707
+ f"Found {len(filtered_conversations)} dialogs with this root cause",
2708
+ style={
2709
+ "margin": "0 0 1rem 0",
2710
+ "color": "var(--muted-foreground)",
2711
+ "fontSize": "0.875rem",
2712
+ },
2713
  )
2714
+
2715
  content = html.Div([count_info, table])
2716
+
2717
  return {"display": "flex"}, modal_title, content
2718
 
2719
 
2720
+ # Callback to close root cause modal
2721
  @callback(
2722
  Output("root-cause-modal", "style", allow_duplicate=True),
2723
  [Input("close-root-cause-modal-btn", "n_clicks")],
 
2726
  def close_root_cause_modal(n_clicks):
2727
  if n_clicks:
2728
  return {"display": "none"}
2729
+ return {"display": "none"}
2730
 
2731
 
2732
+ # Callback to open conversation modal from root cause table
2733
  @callback(
2734
  [
2735
  Output("conversation-modal", "style", allow_duplicate=True),
 
2737
  Output("conversation-subheader", "children", allow_duplicate=True),
2738
  ],
2739
  [Input({"type": "open-chat-btn-rc", "index": dash.dependencies.ALL}, "n_clicks")],
2740
+ [State("data-cache-key", "data")],
2741
  prevent_initial_call=True,
2742
  )
2743
+ def open_conversation_from_root_cause_table(n_clicks_list, cache_key):
2744
+ # Check if any button was clicked
2745
+ if not any(n_clicks_list) or not cache_key:
2746
  return {"display": "none"}, "", ""
2747
 
2748
+ # Get which button was clicked
2749
  ctx = dash.callback_context
2750
  if not ctx.triggered:
2751
  return {"display": "none"}, "", ""
2752
+
2753
  triggered_id = ctx.triggered[0]["prop_id"]
2754
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
2755
 
2756
+ # Get the full conversation from the cached DataFrame
2757
+ df_full = _cache_get_df(cache_key)
2758
+ if df_full is None:
2759
+ return {"display": "none"}, "Session expired. Re-upload file.", ""
2760
+
2761
+ # Find the conversation with this chat ID
2762
  conversation_row = df_full[df_full["id"] == chat_id]
2763
+
2764
+ # If not found, try converting types
2765
  if len(conversation_row) == 0:
2766
  conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
2767
 
2768
+ if len(conversation_row) == 0:
2769
+ try:
2770
+ conversation_row = df_full[df_full["id"] == int(chat_id)]
2771
+ except (ValueError, TypeError):
2772
+ pass
2773
+
2774
  if len(conversation_row) == 0:
2775
  conversation_text = f"Conversation not found for Chat ID: {chat_id}"
2776
  subheader_content = f"Chat ID: {chat_id} (Not Found)"
2777
  else:
2778
  row = conversation_row.iloc[0]
2779
  conversation_text = row.get("conversation", "No conversation data available.")
2780
+
2781
+ # Get additional metadata
2782
  root_cause = row.get("root_cause_subcluster", "Unknown")
2783
  cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
2784
+
2785
+ # Create subheader with metadata including root cause
2786
+ subheader_content = html.Div(
2787
+ [
2788
+ html.Span(
2789
+ f"Chat ID: {chat_id}",
2790
+ style={"fontWeight": "600", "marginRight": "1rem"},
2791
+ ),
2792
+ html.Span(
2793
+ f"Cluster: {cluster_name}",
2794
+ style={"color": "hsl(215.4, 16.3%, 46.9%)", "marginRight": "1rem"},
2795
+ ),
2796
+ html.Span(
2797
+ f"Root Cause: {root_cause}",
2798
+ style={"color": "#8b6f47", "fontWeight": "500"},
2799
+ ),
2800
+ ]
2801
+ )
2802
+
2803
  return {"display": "flex"}, conversation_text, subheader_content
2804
 
2805
  # IMPORTANT: Expose the server for Gunicorn