anderson-ufrj commited on
Commit
640d7fb
·
1 Parent(s): f8a1f65

feat(monitoring): add comprehensive SLO/SLA Grafana dashboard

Browse files

- Create dashboard with key SLO indicators (99% availability, P95 < 200ms)
- Add gauges for real-time SLO tracking
- Include trend charts for availability and latency over 24h
- Add tables for worst performing endpoints by error rate and latency
- Create agent task success rate heatmap
- Include monthly SLA summary report table
- Track cache hit rate SLO (90% target)
- Monitor agent success rate SLO (99% target)
- Add request rate breakdown by status code
- Display agent response time percentiles
- Configure auto-refresh and time range controls

monitoring/grafana/dashboards/slo-sla-dashboard.json ADDED
@@ -0,0 +1,1003 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "annotations": {
3
+ "list": [
4
+ {
5
+ "builtIn": 1,
6
+ "datasource": "-- Grafana --",
7
+ "enable": true,
8
+ "hide": true,
9
+ "iconColor": "rgba(0, 211, 255, 1)",
10
+ "name": "Annotations & Alerts",
11
+ "type": "dashboard"
12
+ }
13
+ ]
14
+ },
15
+ "editable": true,
16
+ "gnetId": null,
17
+ "graphTooltip": 0,
18
+ "id": null,
19
+ "links": [],
20
+ "panels": [
21
+ {
22
+ "datasource": "Prometheus",
23
+ "fieldConfig": {
24
+ "defaults": {
25
+ "color": {
26
+ "mode": "thresholds"
27
+ },
28
+ "mappings": [],
29
+ "thresholds": {
30
+ "mode": "absolute",
31
+ "steps": [
32
+ {
33
+ "color": "red",
34
+ "value": null
35
+ },
36
+ {
37
+ "color": "yellow",
38
+ "value": 95
39
+ },
40
+ {
41
+ "color": "green",
42
+ "value": 99
43
+ }
44
+ ]
45
+ },
46
+ "unit": "percent"
47
+ },
48
+ "overrides": []
49
+ },
50
+ "gridPos": {
51
+ "h": 4,
52
+ "w": 6,
53
+ "x": 0,
54
+ "y": 0
55
+ },
56
+ "id": 1,
57
+ "options": {
58
+ "orientation": "auto",
59
+ "reduceOptions": {
60
+ "calcs": [
61
+ "lastNotNull"
62
+ ],
63
+ "fields": "",
64
+ "values": false
65
+ },
66
+ "showThresholdLabels": false,
67
+ "showThresholdMarkers": true
68
+ },
69
+ "pluginVersion": "8.0.0",
70
+ "targets": [
71
+ {
72
+ "expr": "(1 - (sum(rate(cidadao_ai_http_errors_total[5m])) / sum(rate(cidadao_ai_http_requests_total[5m])))) * 100",
73
+ "refId": "A"
74
+ }
75
+ ],
76
+ "title": "API Availability SLO (99%)",
77
+ "type": "gauge"
78
+ },
79
+ {
80
+ "datasource": "Prometheus",
81
+ "fieldConfig": {
82
+ "defaults": {
83
+ "color": {
84
+ "mode": "thresholds"
85
+ },
86
+ "mappings": [],
87
+ "thresholds": {
88
+ "mode": "absolute",
89
+ "steps": [
90
+ {
91
+ "color": "red",
92
+ "value": null
93
+ },
94
+ {
95
+ "color": "yellow",
96
+ "value": 90
97
+ },
98
+ {
99
+ "color": "green",
100
+ "value": 95
101
+ }
102
+ ]
103
+ },
104
+ "unit": "percent"
105
+ },
106
+ "overrides": []
107
+ },
108
+ "gridPos": {
109
+ "h": 4,
110
+ "w": 6,
111
+ "x": 6,
112
+ "y": 0
113
+ },
114
+ "id": 2,
115
+ "options": {
116
+ "orientation": "auto",
117
+ "reduceOptions": {
118
+ "calcs": [
119
+ "lastNotNull"
120
+ ],
121
+ "fields": "",
122
+ "values": false
123
+ },
124
+ "showThresholdLabels": false,
125
+ "showThresholdMarkers": true
126
+ },
127
+ "pluginVersion": "8.0.0",
128
+ "targets": [
129
+ {
130
+ "expr": "(sum(rate(cidadao_ai_request_duration_seconds_bucket{le=\"0.2\"}[5m])) / sum(rate(cidadao_ai_request_duration_seconds_count[5m]))) * 100",
131
+ "refId": "A"
132
+ }
133
+ ],
134
+ "title": "Latency SLO - P95 < 200ms (95%)",
135
+ "type": "gauge"
136
+ },
137
+ {
138
+ "datasource": "Prometheus",
139
+ "fieldConfig": {
140
+ "defaults": {
141
+ "color": {
142
+ "mode": "thresholds"
143
+ },
144
+ "mappings": [],
145
+ "thresholds": {
146
+ "mode": "absolute",
147
+ "steps": [
148
+ {
149
+ "color": "red",
150
+ "value": null
151
+ },
152
+ {
153
+ "color": "yellow",
154
+ "value": 85
155
+ },
156
+ {
157
+ "color": "green",
158
+ "value": 90
159
+ }
160
+ ]
161
+ },
162
+ "unit": "percent"
163
+ },
164
+ "overrides": []
165
+ },
166
+ "gridPos": {
167
+ "h": 4,
168
+ "w": 6,
169
+ "x": 12,
170
+ "y": 0
171
+ },
172
+ "id": 3,
173
+ "options": {
174
+ "orientation": "auto",
175
+ "reduceOptions": {
176
+ "calcs": [
177
+ "lastNotNull"
178
+ ],
179
+ "fields": "",
180
+ "values": false
181
+ },
182
+ "showThresholdLabels": false,
183
+ "showThresholdMarkers": true
184
+ },
185
+ "pluginVersion": "8.0.0",
186
+ "targets": [
187
+ {
188
+ "expr": "(sum(rate(cidadao_ai_cache_operations_total{result=\"hit\"}[5m])) / sum(rate(cidadao_ai_cache_operations_total[5m]))) * 100",
189
+ "refId": "A"
190
+ }
191
+ ],
192
+ "title": "Cache Hit Rate SLO (90%)",
193
+ "type": "gauge"
194
+ },
195
+ {
196
+ "datasource": "Prometheus",
197
+ "fieldConfig": {
198
+ "defaults": {
199
+ "color": {
200
+ "mode": "thresholds"
201
+ },
202
+ "mappings": [],
203
+ "thresholds": {
204
+ "mode": "absolute",
205
+ "steps": [
206
+ {
207
+ "color": "red",
208
+ "value": null
209
+ },
210
+ {
211
+ "color": "yellow",
212
+ "value": 95
213
+ },
214
+ {
215
+ "color": "green",
216
+ "value": 99
217
+ }
218
+ ]
219
+ },
220
+ "unit": "percent"
221
+ },
222
+ "overrides": []
223
+ },
224
+ "gridPos": {
225
+ "h": 4,
226
+ "w": 6,
227
+ "x": 18,
228
+ "y": 0
229
+ },
230
+ "id": 4,
231
+ "options": {
232
+ "orientation": "auto",
233
+ "reduceOptions": {
234
+ "calcs": [
235
+ "lastNotNull"
236
+ ],
237
+ "fields": "",
238
+ "values": false
239
+ },
240
+ "showThresholdLabels": false,
241
+ "showThresholdMarkers": true
242
+ },
243
+ "pluginVersion": "8.0.0",
244
+ "targets": [
245
+ {
246
+ "expr": "(sum(rate(cidadao_ai_agent_tasks_total{status=\"completed\"}[5m])) / sum(rate(cidadao_ai_agent_tasks_total[5m]))) * 100",
247
+ "refId": "A"
248
+ }
249
+ ],
250
+ "title": "Agent Success Rate SLO (99%)",
251
+ "type": "gauge"
252
+ },
253
+ {
254
+ "datasource": "Prometheus",
255
+ "fieldConfig": {
256
+ "defaults": {
257
+ "color": {
258
+ "mode": "palette-classic"
259
+ },
260
+ "custom": {
261
+ "axisLabel": "",
262
+ "axisPlacement": "auto",
263
+ "barAlignment": 0,
264
+ "drawStyle": "line",
265
+ "fillOpacity": 10,
266
+ "gradientMode": "none",
267
+ "hideFrom": {
268
+ "tooltip": false,
269
+ "viz": false,
270
+ "legend": false
271
+ },
272
+ "lineInterpolation": "linear",
273
+ "lineWidth": 1,
274
+ "pointSize": 5,
275
+ "scaleDistribution": {
276
+ "type": "linear"
277
+ },
278
+ "showPoints": "never",
279
+ "spanNulls": true,
280
+ "stacking": {
281
+ "group": "A",
282
+ "mode": "none"
283
+ },
284
+ "thresholdsStyle": {
285
+ "mode": "off"
286
+ }
287
+ },
288
+ "mappings": [],
289
+ "thresholds": {
290
+ "mode": "absolute",
291
+ "steps": [
292
+ {
293
+ "color": "green",
294
+ "value": null
295
+ }
296
+ ]
297
+ },
298
+ "unit": "percentunit"
299
+ },
300
+ "overrides": [
301
+ {
302
+ "matcher": {
303
+ "id": "byName",
304
+ "options": "SLO Target"
305
+ },
306
+ "properties": [
307
+ {
308
+ "id": "color",
309
+ "value": {
310
+ "fixedColor": "red",
311
+ "mode": "fixed"
312
+ }
313
+ },
314
+ {
315
+ "id": "custom.drawStyle",
316
+ "value": "line"
317
+ },
318
+ {
319
+ "id": "custom.lineStyle",
320
+ "value": {
321
+ "dash": [10, 10],
322
+ "fill": "dash"
323
+ }
324
+ }
325
+ ]
326
+ }
327
+ ]
328
+ },
329
+ "gridPos": {
330
+ "h": 8,
331
+ "w": 12,
332
+ "x": 0,
333
+ "y": 4
334
+ },
335
+ "id": 5,
336
+ "options": {
337
+ "tooltip": {
338
+ "mode": "single"
339
+ },
340
+ "legend": {
341
+ "calcs": [
342
+ "mean",
343
+ "min"
344
+ ],
345
+ "displayMode": "table",
346
+ "placement": "bottom"
347
+ }
348
+ },
349
+ "pluginVersion": "8.0.0",
350
+ "targets": [
351
+ {
352
+ "expr": "1 - (sum(rate(cidadao_ai_http_errors_total[5m])) / sum(rate(cidadao_ai_http_requests_total[5m])))",
353
+ "legendFormat": "API Availability",
354
+ "refId": "A"
355
+ },
356
+ {
357
+ "expr": "0.99",
358
+ "legendFormat": "SLO Target",
359
+ "refId": "B"
360
+ }
361
+ ],
362
+ "title": "API Availability Trend (24h)",
363
+ "type": "timeseries"
364
+ },
365
+ {
366
+ "datasource": "Prometheus",
367
+ "fieldConfig": {
368
+ "defaults": {
369
+ "color": {
370
+ "mode": "palette-classic"
371
+ },
372
+ "custom": {
373
+ "axisLabel": "",
374
+ "axisPlacement": "auto",
375
+ "barAlignment": 0,
376
+ "drawStyle": "line",
377
+ "fillOpacity": 10,
378
+ "gradientMode": "none",
379
+ "hideFrom": {
380
+ "tooltip": false,
381
+ "viz": false,
382
+ "legend": false
383
+ },
384
+ "lineInterpolation": "linear",
385
+ "lineWidth": 1,
386
+ "pointSize": 5,
387
+ "scaleDistribution": {
388
+ "type": "linear"
389
+ },
390
+ "showPoints": "never",
391
+ "spanNulls": true,
392
+ "stacking": {
393
+ "group": "A",
394
+ "mode": "none"
395
+ },
396
+ "thresholdsStyle": {
397
+ "mode": "off"
398
+ }
399
+ },
400
+ "mappings": [],
401
+ "thresholds": {
402
+ "mode": "absolute",
403
+ "steps": [
404
+ {
405
+ "color": "green",
406
+ "value": null
407
+ }
408
+ ]
409
+ },
410
+ "unit": "s"
411
+ },
412
+ "overrides": [
413
+ {
414
+ "matcher": {
415
+ "id": "byName",
416
+ "options": "SLO Target"
417
+ },
418
+ "properties": [
419
+ {
420
+ "id": "color",
421
+ "value": {
422
+ "fixedColor": "red",
423
+ "mode": "fixed"
424
+ }
425
+ },
426
+ {
427
+ "id": "custom.lineStyle",
428
+ "value": {
429
+ "dash": [10, 10],
430
+ "fill": "dash"
431
+ }
432
+ }
433
+ ]
434
+ }
435
+ ]
436
+ },
437
+ "gridPos": {
438
+ "h": 8,
439
+ "w": 12,
440
+ "x": 12,
441
+ "y": 4
442
+ },
443
+ "id": 6,
444
+ "options": {
445
+ "tooltip": {
446
+ "mode": "single"
447
+ },
448
+ "legend": {
449
+ "calcs": [
450
+ "mean",
451
+ "max"
452
+ ],
453
+ "displayMode": "table",
454
+ "placement": "bottom"
455
+ }
456
+ },
457
+ "pluginVersion": "8.0.0",
458
+ "targets": [
459
+ {
460
+ "expr": "histogram_quantile(0.95, sum(rate(cidadao_ai_request_duration_seconds_bucket[5m])) by (le))",
461
+ "legendFormat": "P95 Latency",
462
+ "refId": "A"
463
+ },
464
+ {
465
+ "expr": "0.2",
466
+ "legendFormat": "SLO Target",
467
+ "refId": "B"
468
+ }
469
+ ],
470
+ "title": "P95 Latency Trend (24h)",
471
+ "type": "timeseries"
472
+ },
473
+ {
474
+ "datasource": "Prometheus",
475
+ "fieldConfig": {
476
+ "defaults": {
477
+ "color": {
478
+ "mode": "thresholds"
479
+ },
480
+ "custom": {
481
+ "align": "auto",
482
+ "displayMode": "auto"
483
+ },
484
+ "mappings": [],
485
+ "thresholds": {
486
+ "mode": "absolute",
487
+ "steps": [
488
+ {
489
+ "color": "green",
490
+ "value": null
491
+ },
492
+ {
493
+ "color": "red",
494
+ "value": 80
495
+ }
496
+ ]
497
+ },
498
+ "unit": "percent"
499
+ },
500
+ "overrides": [
501
+ {
502
+ "matcher": {
503
+ "id": "byName",
504
+ "options": "Endpoint"
505
+ },
506
+ "properties": [
507
+ {
508
+ "id": "custom.width",
509
+ "value": 300
510
+ }
511
+ ]
512
+ }
513
+ ]
514
+ },
515
+ "gridPos": {
516
+ "h": 8,
517
+ "w": 12,
518
+ "x": 0,
519
+ "y": 12
520
+ },
521
+ "id": 7,
522
+ "options": {
523
+ "showHeader": true,
524
+ "sortBy": [
525
+ {
526
+ "desc": false,
527
+ "displayName": "Error Rate"
528
+ }
529
+ ]
530
+ },
531
+ "pluginVersion": "8.0.0",
532
+ "targets": [
533
+ {
534
+ "expr": "topk(10, (sum by (endpoint) (rate(cidadao_ai_http_errors_total[5m])) / sum by (endpoint) (rate(cidadao_ai_http_requests_total[5m]))) * 100)",
535
+ "format": "table",
536
+ "instant": true,
537
+ "refId": "A"
538
+ }
539
+ ],
540
+ "title": "Top 10 Endpoints by Error Rate",
541
+ "transformations": [
542
+ {
543
+ "id": "organize",
544
+ "options": {
545
+ "excludeByName": {
546
+ "Time": true
547
+ },
548
+ "indexByName": {},
549
+ "renameByName": {
550
+ "endpoint": "Endpoint",
551
+ "Value": "Error Rate"
552
+ }
553
+ }
554
+ }
555
+ ],
556
+ "type": "table"
557
+ },
558
+ {
559
+ "datasource": "Prometheus",
560
+ "fieldConfig": {
561
+ "defaults": {
562
+ "color": {
563
+ "mode": "thresholds"
564
+ },
565
+ "custom": {
566
+ "align": "auto",
567
+ "displayMode": "auto"
568
+ },
569
+ "mappings": [],
570
+ "thresholds": {
571
+ "mode": "absolute",
572
+ "steps": [
573
+ {
574
+ "color": "green",
575
+ "value": null
576
+ },
577
+ {
578
+ "color": "red",
579
+ "value": 1
580
+ }
581
+ ]
582
+ },
583
+ "unit": "s"
584
+ },
585
+ "overrides": []
586
+ },
587
+ "gridPos": {
588
+ "h": 8,
589
+ "w": 12,
590
+ "x": 12,
591
+ "y": 12
592
+ },
593
+ "id": 8,
594
+ "options": {
595
+ "showHeader": true,
596
+ "sortBy": [
597
+ {
598
+ "desc": true,
599
+ "displayName": "P95 Latency"
600
+ }
601
+ ]
602
+ },
603
+ "pluginVersion": "8.0.0",
604
+ "targets": [
605
+ {
606
+ "expr": "topk(10, histogram_quantile(0.95, sum by (endpoint, le) (rate(cidadao_ai_request_duration_seconds_bucket[5m]))))",
607
+ "format": "table",
608
+ "instant": true,
609
+ "refId": "A"
610
+ }
611
+ ],
612
+ "title": "Top 10 Slowest Endpoints (P95)",
613
+ "transformations": [
614
+ {
615
+ "id": "organize",
616
+ "options": {
617
+ "excludeByName": {
618
+ "Time": true
619
+ },
620
+ "indexByName": {},
621
+ "renameByName": {
622
+ "endpoint": "Endpoint",
623
+ "Value": "P95 Latency"
624
+ }
625
+ }
626
+ }
627
+ ],
628
+ "type": "table"
629
+ },
630
+ {
631
+ "datasource": "Prometheus",
632
+ "fieldConfig": {
633
+ "defaults": {
634
+ "color": {
635
+ "mode": "palette-classic"
636
+ },
637
+ "custom": {
638
+ "axisLabel": "",
639
+ "axisPlacement": "auto",
640
+ "barAlignment": 0,
641
+ "drawStyle": "bars",
642
+ "fillOpacity": 100,
643
+ "gradientMode": "none",
644
+ "hideFrom": {
645
+ "tooltip": false,
646
+ "viz": false,
647
+ "legend": false
648
+ },
649
+ "lineInterpolation": "linear",
650
+ "lineWidth": 1,
651
+ "pointSize": 5,
652
+ "scaleDistribution": {
653
+ "type": "linear"
654
+ },
655
+ "showPoints": "never",
656
+ "spanNulls": true,
657
+ "stacking": {
658
+ "group": "A",
659
+ "mode": "normal"
660
+ },
661
+ "thresholdsStyle": {
662
+ "mode": "off"
663
+ }
664
+ },
665
+ "mappings": [],
666
+ "thresholds": {
667
+ "mode": "absolute",
668
+ "steps": [
669
+ {
670
+ "color": "green",
671
+ "value": null
672
+ }
673
+ ]
674
+ },
675
+ "unit": "short"
676
+ },
677
+ "overrides": []
678
+ },
679
+ "gridPos": {
680
+ "h": 8,
681
+ "w": 12,
682
+ "x": 0,
683
+ "y": 20
684
+ },
685
+ "id": 9,
686
+ "options": {
687
+ "tooltip": {
688
+ "mode": "multi"
689
+ },
690
+ "legend": {
691
+ "calcs": [],
692
+ "displayMode": "list",
693
+ "placement": "bottom"
694
+ }
695
+ },
696
+ "pluginVersion": "8.0.0",
697
+ "targets": [
698
+ {
699
+ "expr": "sum by (status_code) (rate(cidadao_ai_http_requests_total[5m]))",
700
+ "legendFormat": "{{status_code}}",
701
+ "refId": "A"
702
+ }
703
+ ],
704
+ "title": "Request Rate by Status Code",
705
+ "type": "timeseries"
706
+ },
707
+ {
708
+ "datasource": "Prometheus",
709
+ "fieldConfig": {
710
+ "defaults": {
711
+ "color": {
712
+ "mode": "palette-classic"
713
+ },
714
+ "custom": {
715
+ "axisLabel": "",
716
+ "axisPlacement": "auto",
717
+ "barAlignment": 0,
718
+ "drawStyle": "line",
719
+ "fillOpacity": 10,
720
+ "gradientMode": "none",
721
+ "hideFrom": {
722
+ "tooltip": false,
723
+ "viz": false,
724
+ "legend": false
725
+ },
726
+ "lineInterpolation": "linear",
727
+ "lineWidth": 1,
728
+ "pointSize": 5,
729
+ "scaleDistribution": {
730
+ "type": "linear"
731
+ },
732
+ "showPoints": "never",
733
+ "spanNulls": true,
734
+ "stacking": {
735
+ "group": "A",
736
+ "mode": "none"
737
+ },
738
+ "thresholdsStyle": {
739
+ "mode": "off"
740
+ }
741
+ },
742
+ "mappings": [],
743
+ "thresholds": {
744
+ "mode": "absolute",
745
+ "steps": [
746
+ {
747
+ "color": "green",
748
+ "value": null
749
+ }
750
+ ]
751
+ },
752
+ "unit": "s"
753
+ },
754
+ "overrides": []
755
+ },
756
+ "gridPos": {
757
+ "h": 8,
758
+ "w": 12,
759
+ "x": 12,
760
+ "y": 20
761
+ },
762
+ "id": 10,
763
+ "options": {
764
+ "tooltip": {
765
+ "mode": "single"
766
+ },
767
+ "legend": {
768
+ "calcs": [],
769
+ "displayMode": "list",
770
+ "placement": "bottom"
771
+ }
772
+ },
773
+ "pluginVersion": "8.0.0",
774
+ "targets": [
775
+ {
776
+ "expr": "histogram_quantile(0.5, sum(rate(cidadao_ai_agent_task_duration_seconds_bucket[5m])) by (agent_name, le))",
777
+ "legendFormat": "{{agent_name}} P50",
778
+ "refId": "A"
779
+ },
780
+ {
781
+ "expr": "histogram_quantile(0.95, sum(rate(cidadao_ai_agent_task_duration_seconds_bucket[5m])) by (agent_name, le))",
782
+ "legendFormat": "{{agent_name}} P95",
783
+ "refId": "B"
784
+ }
785
+ ],
786
+ "title": "Agent Response Time",
787
+ "type": "timeseries"
788
+ },
789
+ {
790
+ "datasource": "Prometheus",
791
+ "fieldConfig": {
792
+ "defaults": {
793
+ "color": {
794
+ "mode": "continuous-GrYlRd"
795
+ },
796
+ "custom": {
797
+ "hideFrom": {
798
+ "tooltip": false,
799
+ "viz": false,
800
+ "legend": false
801
+ },
802
+ "scaleDistribution": {
803
+ "type": "linear"
804
+ }
805
+ },
806
+ "mappings": [],
807
+ "thresholds": {
808
+ "mode": "absolute",
809
+ "steps": [
810
+ {
811
+ "color": "green",
812
+ "value": null
813
+ }
814
+ ]
815
+ },
816
+ "unit": "percentunit"
817
+ },
818
+ "overrides": []
819
+ },
820
+ "gridPos": {
821
+ "h": 8,
822
+ "w": 12,
823
+ "x": 0,
824
+ "y": 28
825
+ },
826
+ "id": 11,
827
+ "options": {
828
+ "calculate": false,
829
+ "cellGap": 1,
830
+ "color": {
831
+ "exponent": 0.5,
832
+ "fill": "dark-orange",
833
+ "mode": "scheme",
834
+ "reverse": false,
835
+ "scale": "exponential",
836
+ "scheme": "RdYlGn",
837
+ "steps": 64
838
+ },
839
+ "exemplar": {
840
+ "color": "rgba(255,0,255,0.7)"
841
+ },
842
+ "filterValues": {
843
+ "le": 1e-9
844
+ },
845
+ "legend": {
846
+ "show": false
847
+ },
848
+ "rowsFrame": {
849
+ "layout": "auto"
850
+ },
851
+ "tooltip": {
852
+ "show": true,
853
+ "yHistogram": false
854
+ },
855
+ "yAxis": {
856
+ "axisPlacement": "left",
857
+ "reverse": false
858
+ }
859
+ },
860
+ "pluginVersion": "8.0.0",
861
+ "targets": [
862
+ {
863
+ "expr": "sum by(agent_name, task_type) (rate(cidadao_ai_agent_tasks_total{status=\"completed\"}[5m])) / sum by(agent_name, task_type) (rate(cidadao_ai_agent_tasks_total[5m]))",
864
+ "refId": "A"
865
+ }
866
+ ],
867
+ "title": "Agent Task Success Rate Heatmap",
868
+ "type": "heatmap"
869
+ },
870
+ {
871
+ "datasource": "Prometheus",
872
+ "description": "Monthly SLA Report Summary",
873
+ "fieldConfig": {
874
+ "defaults": {
875
+ "color": {
876
+ "mode": "thresholds"
877
+ },
878
+ "custom": {
879
+ "align": "auto",
880
+ "displayMode": "color-background"
881
+ },
882
+ "mappings": [],
883
+ "thresholds": {
884
+ "mode": "absolute",
885
+ "steps": [
886
+ {
887
+ "color": "red",
888
+ "value": null
889
+ },
890
+ {
891
+ "color": "yellow",
892
+ "value": 95
893
+ },
894
+ {
895
+ "color": "green",
896
+ "value": 99
897
+ }
898
+ ]
899
+ },
900
+ "unit": "percent"
901
+ },
902
+ "overrides": []
903
+ },
904
+ "gridPos": {
905
+ "h": 8,
906
+ "w": 12,
907
+ "x": 12,
908
+ "y": 28
909
+ },
910
+ "id": 12,
911
+ "options": {
912
+ "showHeader": true
913
+ },
914
+ "pluginVersion": "8.0.0",
915
+ "targets": [
916
+ {
917
+ "expr": "(1 - (sum(increase(cidadao_ai_http_errors_total[30d])) / sum(increase(cidadao_ai_http_requests_total[30d])))) * 100",
918
+ "format": "table",
919
+ "instant": true,
920
+ "legendFormat": "API Availability",
921
+ "refId": "A"
922
+ },
923
+ {
924
+ "expr": "(sum(increase(cidadao_ai_request_duration_seconds_bucket{le=\"0.2\"}[30d])) / sum(increase(cidadao_ai_request_duration_seconds_count[30d]))) * 100",
925
+ "format": "table",
926
+ "instant": true,
927
+ "legendFormat": "Latency SLA",
928
+ "refId": "B"
929
+ },
930
+ {
931
+ "expr": "(sum(increase(cidadao_ai_cache_operations_total{result=\"hit\"}[30d])) / sum(increase(cidadao_ai_cache_operations_total[30d]))) * 100",
932
+ "format": "table",
933
+ "instant": true,
934
+ "legendFormat": "Cache Hit Rate",
935
+ "refId": "C"
936
+ },
937
+ {
938
+ "expr": "(sum(increase(cidadao_ai_agent_tasks_total{status=\"completed\"}[30d])) / sum(increase(cidadao_ai_agent_tasks_total[30d]))) * 100",
939
+ "format": "table",
940
+ "instant": true,
941
+ "legendFormat": "Agent Success Rate",
942
+ "refId": "D"
943
+ }
944
+ ],
945
+ "title": "Monthly SLA Summary",
946
+ "transformations": [
947
+ {
948
+ "id": "merge",
949
+ "options": {}
950
+ },
951
+ {
952
+ "id": "organize",
953
+ "options": {
954
+ "excludeByName": {
955
+ "Time": true
956
+ },
957
+ "indexByName": {},
958
+ "renameByName": {
959
+ "Value #A": "API Availability",
960
+ "Value #B": "Latency < 200ms",
961
+ "Value #C": "Cache Hit Rate",
962
+ "Value #D": "Agent Success Rate"
963
+ }
964
+ }
965
+ }
966
+ ],
967
+ "type": "table"
968
+ }
969
+ ],
970
+ "refresh": "10s",
971
+ "schemaVersion": 30,
972
+ "style": "dark",
973
+ "tags": [
974
+ "slo",
975
+ "sla",
976
+ "cidadao-ai"
977
+ ],
978
+ "templating": {
979
+ "list": []
980
+ },
981
+ "time": {
982
+ "from": "now-24h",
983
+ "to": "now"
984
+ },
985
+ "timepicker": {
986
+ "refresh_intervals": [
987
+ "5s",
988
+ "10s",
989
+ "30s",
990
+ "1m",
991
+ "5m",
992
+ "15m",
993
+ "30m",
994
+ "1h",
995
+ "2h",
996
+ "1d"
997
+ ]
998
+ },
999
+ "timezone": "",
1000
+ "title": "Cidadão.AI - SLO/SLA Dashboard",
1001
+ "uid": "cidadao-ai-slo-sla",
1002
+ "version": 0
1003
+ }