Spaces:
Sleeping
Sleeping
File size: 13,622 Bytes
ef57bca 0480b49 af270fc 5b60af7 7dddc8e af270fc 0480b49 d42b36c 5f043aa 0480b49 5b60af7 af270fc 5f043aa 7dddc8e 5f043aa 5b60af7 af270fc a7001d6 7dddc8e a7001d6 d42b36c a7001d6 d42b36c a7001d6 d42b36c a7001d6 5b60af7 d42b36c af270fc a7001d6 7dddc8e a7001d6 af270fc a7001d6 af270fc a7001d6 7dddc8e af270fc 0480b49 a7001d6 af270fc a7001d6 0480b49 d958b89 a7001d6 af270fc 0480b49 a7001d6 0480b49 af270fc 0480b49 5b60af7 0480b49 5b60af7 af270fc 0480b49 5f043aa b23009b 5b60af7 7dddc8e 5f043aa af270fc 5f043aa 0480b49 d42b36c 5b60af7 a7001d6 5b60af7 af270fc 7dddc8e a7001d6 af270fc 5f043aa d958b89 0480b49 af270fc d958b89 af270fc cc6f8c8 af270fc 5b60af7 0480b49 af270fc 5b60af7 7dddc8e d42b36c 5f043aa d42b36c 7dddc8e d42b36c 0480b49 7dddc8e af270fc d42b36c 5f043aa d42b36c 7dddc8e af270fc d42b36c af270fc d42b36c 5f043aa af270fc d42b36c 5f043aa af270fc d42b36c af270fc d42b36c 5f043aa d42b36c af270fc d42b36c af270fc 5b60af7 af270fc 0480b49 5f043aa 0480b49 af270fc 0480b49 d42b36c af270fc 2bf53dd af270fc 5f043aa 0480b49 a7001d6 af270fc 5f043aa af270fc 5f043aa a7001d6 af270fc 5f043aa af270fc 5f043aa a7001d6 5b60af7 d42b36c 5f043aa d42b36c 5b60af7 a7001d6 5f043aa a7001d6 af270fc 5b60af7 7dddc8e 5b60af7 af270fc 7dddc8e 5b60af7 af270fc 5b60af7 7dddc8e 5b60af7 7dddc8e 5b60af7 af270fc d42b36c 7dddc8e d42b36c 5b60af7 af270fc 7dddc8e 5f043aa 5b60af7 7dddc8e d42b36c a7001d6 d42b36c 5b60af7 af270fc d42b36c 5f043aa d42b36c a7001d6 5b60af7 0480b49 d958b89 7dddc8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
# paragraph_annotation_tool.py NEW ui
"""
Paragraph-level annotation tool with per-model comments
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β’ Upload a CSV containing at least:
Content_Paragraph,
<model>_prompt1, <model>_prompt2 β¦ for each model
β’ Enter your annotator name and click βStart / Resumeβ.
β’ Rate each prompt A / B / C, optionally leave a comment for every model,
navigate Back / Next, download the annotated CSV.
New in this version
βββββββββββββββββββ
β’ One **comment textbox per model** (shown next to the modelβs two ratings).
β’ Comments are stored in the CSV under columns named ``comment_<model>``.
β’ Blank comments are saved as the literal string ``"no comment"``.
"""
from __future__ import annotations
import gradio as gr, pandas as pd
import random, time, os, shutil, uuid
from typing import List
# βββ CONFIG βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
MAX_MODELS = 10 # pre-allocate up to this many models
CONTENT_COL = "Content_Paragraph"
PROMPT1_SUFFIX = "_prompt1"
PROMPT2_SUFFIX = "_prompt2"
COMMENT_PREFIX = "comment_" # <COMMENT_PREFIX><model>
PERM_COL = "perm_models"
RATING_OPTS = ["A", "B", "C"]
# βββ GLOBALS (filled after CSV load) βββββββββββββββββββββββββββββββββββββββ
df: pd.DataFrame | None = None
csv_path: str | None = None
models: List[str] = []
TOTAL = 0
annotator = ""
current_start: float | None = None
# βββ CSV HELPERS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def load_csv(path: str):
"""Read CSV, discover model columns, add helper columns if needed."""
global df, models, TOTAL, csv_path
csv_path = path
df = pd.read_csv(csv_path, keep_default_na=False)
TOTAL = len(df)
models.clear()
for col in df.columns:
if col.endswith(PROMPT1_SUFFIX) and not col.startswith("rating_"):
m = col[: -len(PROMPT1_SUFFIX)]
if f"{m}{PROMPT2_SUFFIX}" not in df.columns:
raise ValueError(f"Found '{col}' but no '{m}{PROMPT2_SUFFIX}'")
models.append(m)
if not models:
raise ValueError(f"No '*{PROMPT1_SUFFIX}' columns found")
if len(models) > MAX_MODELS:
raise ValueError(
f"CSV has {len(models)} models but MAX_MODELS is {MAX_MODELS}")
# helper columns
if PERM_COL not in df.columns:
df[PERM_COL] = ""
for m in models:
# rating columns per prompt
for p in ("prompt1", "prompt2"):
rc = f"rating_{m}__{p}"
if rc not in df.columns:
df[rc] = ""
# NEW β comment column per model
cc = f"{COMMENT_PREFIX}{m}"
if cc not in df.columns:
df[cc] = "no comment" # default value
for col in ("annotator", "annotation_time"):
if col not in df.columns:
df[col] = "" if col == "annotator" else 0.0
# βββ BOOK-KEEPING ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def first_incomplete() -> int:
for i, row in df.iterrows():
for m in models:
if (
row[f"rating_{m}__prompt1"] == "" or
row[f"rating_{m}__prompt2"] == ""
):
return i
return 0
def get_perm(idx: int) -> List[str]:
cell = str(df.at[idx, PERM_COL])
if not cell:
seq = models.copy()
random.shuffle(seq)
df.at[idx, PERM_COL] = "|".join(seq)
df.to_csv(csv_path, index=False)
return df.at[idx, PERM_COL].split("|")
def build_row(idx: int):
"""Return fixed-length tuple of widget values for example *idx*."""
global current_start
row = df.loc[idx]
order = get_perm(idx)
outs, rates, comms = [], [], []
for m in order:
outs.append(row[f"{m}{PROMPT1_SUFFIX}"])
outs.append(row[f"{m}{PROMPT2_SUFFIX}"])
rates.append(row[f"rating_{m}__prompt1"] or None)
rates.append(row[f"rating_{m}__prompt2"] or None)
val = row[f"{COMMENT_PREFIX}{m}"]
comms.append("" if val == "no comment" else val)
# pad up to MAX_MODELS
outs += [""] * (MAX_MODELS*2 - len(outs))
rates += [None]* (MAX_MODELS*2 - len(rates))
comms += ["" ] * (MAX_MODELS - len(comms))
ready = all(r in RATING_OPTS for r in rates[: 2*len(models)])
current_start = time.time()
header = f"Example {idx+1}/{TOTAL}"
return (
idx, header, row[CONTENT_COL],
*outs, *rates, *comms,
gr.update(visible=True), # back_btn update
gr.update(visible=True, interactive=ready) # next_btn update
)
def save_row(idx: int, ratings: List[str], comments: List[str]):
"""Persist ratings & comments for example *idx* β CSV."""
if not all(r in RATING_OPTS for r in ratings[: 2*len(models)]):
return
elapsed = time.time() - current_start if current_start else 0.0
order = get_perm(idx)
p = 0 # rating pointer
for m in order:
df.at[idx, f"rating_{m}__prompt1"] = ratings[p]; p += 1
df.at[idx, f"rating_{m}__prompt2"] = ratings[p]; p += 1
# comments
for m, c in zip(order, comments):
clean = (c or "").strip()
df.at[idx, f"{COMMENT_PREFIX}{m}"] = clean or "no comment"
df.at[idx, "annotator"] = annotator
df.at[idx, "annotation_time"] = float(elapsed)
df.to_csv(csv_path, index=False)
def _writable_dir() -> str:
"""Return /data on Spaces, /tmp elsewhere β whichever is writeable."""
for d in ("/data", "/tmp"):
try:
os.makedirs(d, exist_ok=True)
with open(os.path.join(d, ".touch"), "w"):
pass
return d
except PermissionError:
continue
raise PermissionError("No writable directory found.")
# βββ GRADIO UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(title="Paragraph Annotation Tool") as demo:
# shared state
idx_state = gr.State(0) # current example index
nmodels_state = gr.State(0) # how many model slots are active
gr.Markdown("## Paragraph Annotation Tool")
with gr.Row():
upload_box = gr.File(label="Upload / Resume CSV", file_types=[".csv"])
annot_box = gr.Textbox(label="Annotator name")
start_btn = gr.Button("Start / Resume")
annotator_label = gr.Markdown(visible=False)
annotation_area = gr.Column(visible=False)
with annotation_area:
idx_box = gr.Number(label="Index", interactive=False)
hdr_box = gr.Markdown()
para_box = gr.Textbox(lines=6, interactive=False,
label="Content Paragraph")
# Pre-allocate up to MAX_MODELS slots
out_boxes, radio_widgets, comment_boxes = [], [], []
for _ in range(MAX_MODELS):
with gr.Row():
# prompts + ratings
with gr.Column(scale=2):
out1 = gr.Textbox(lines=6, interactive=False)
rad1 = gr.Radio(RATING_OPTS, label="Rating (P1)", value=None)
with gr.Column(scale=2):
out2 = gr.Textbox(lines=6, interactive=False)
rad2 = gr.Radio(RATING_OPTS, label="Rating (P2)", value=None)
# NEW β comment textbox
with gr.Column(scale=1):
com = gr.Textbox(lines=2, label="Comment", placeholder="Optionalβ¦")
out_boxes.extend((out1, out2))
radio_widgets.extend((rad1, rad2))
comment_boxes.append(com)
back_btn = gr.Button("β΅ Back", visible=False)
next_btn = gr.Button("Save & Next βΆ", visible=False)
download_btn = gr.Button("πΎ Download CSV", visible=False)
# Enable NEXT when visible radios are filled (comments are optional)
def toggle_next(model_cnt: int, *vals):
needed = vals[: model_cnt*2] # only rating radios
return gr.update(interactive=all(v in RATING_OPTS for v in needed))
for r in radio_widgets:
r.change(toggle_next,
inputs=[nmodels_state]+radio_widgets,
outputs=next_btn)
# ββ navigation callbacks ββββββββββββββββββββββββββββββββββββββββββββββ
def goto(step: int):
def _fn(idx: int, model_cnt: int, *vals):
"""Handle Back / Next logic."""
# structure of *vals*: radios (model_cnt*2) + comments (model_cnt) + next_btn
RADIO_COUNT = MAX_MODELS * 2
ratings = list(vals[: model_cnt * 2])
comments = list(vals[RADIO_COUNT : RADIO_COUNT + model_cnt])
# save current row unless we attempted to go back without finishing ratings
if step != -1 or all(r in RATING_OPTS for r in ratings):
save_row(idx, ratings, comments)
new_idx = max(0, min(idx+step, TOTAL-1))
return build_row(new_idx)
return _fn
back_btn.click(
goto(-1),
inputs=[idx_state, nmodels_state]+radio_widgets+comment_boxes+[next_btn],
outputs=[idx_state, hdr_box, para_box,
*out_boxes, *radio_widgets, *comment_boxes,
back_btn, next_btn],
)
next_btn.click(
goto(1),
inputs=[idx_state, nmodels_state]+radio_widgets+comment_boxes+[next_btn],
outputs=[idx_state, hdr_box, para_box,
*out_boxes, *radio_widgets, *comment_boxes,
back_btn, next_btn],
)
# CSV download
def make_download():
if df is None:
raise gr.Error("No CSV loaded yet.")
tmp = os.path.join(_writable_dir(),
f"annotations_{uuid.uuid4().hex}.csv")
df.to_csv(tmp, index=False)
return tmp
download_btn.click(make_download, outputs=gr.File())
# ββ Start / Resume ββββββββββββββββββββββββββββββββββββββββββββββββββββ
def start_app(csv_file, name):
global annotator
if csv_file is None or not name.strip():
raise gr.Error("Please upload a CSV and enter your name.")
new_path = os.path.join(_writable_dir(), f"{uuid.uuid4().hex}.csv")
shutil.copy(csv_file.name, new_path)
load_csv(new_path)
annotator = name.strip()
# visibility flags β one boolean per model slot
vis_flags = [i < len(models) for i in range(MAX_MODELS)]
# build first row values
row_vals = build_row(first_incomplete())
idx_val, hdr_val, para_val = row_vals[:3]
outs = row_vals[3 : 3 + MAX_MODELS*2]
rates = row_vals[3 + MAX_MODELS*2 : 3 + MAX_MODELS*4]
comms = row_vals[3 + MAX_MODELS*4 : 3 + MAX_MODELS*5]
back_update, next_update = row_vals[-2:]
# updates for textboxes, radios, comments
out_updates = [
gr.update(value=outs[i], visible=vis_flags[i//2])
for i in range(MAX_MODELS*2)
]
radio_updates = [
gr.update(value=rates[i], visible=vis_flags[i//2])
for i in range(MAX_MODELS*2)
]
comment_updates = [
gr.update(value=comms[i], visible=vis_flags[i])
for i in range(MAX_MODELS)
]
return (
first_incomplete(), # idx_state
len(models), # nmodels_state
gr.update(value=idx_val), # idx_box
gr.update(value=hdr_val), # hdr_box
gr.update(value=para_val), # para_box
*out_updates,
*radio_updates,
*comment_updates,
back_update, next_update, # nav buttons
gr.update(visible=True,
value=f"**Annotator:** {annotator}"),
gr.update(visible=True), # download_btn
gr.update(visible=True) # annotation_area
)
start_btn.click(
start_app,
inputs=[upload_box, annot_box],
outputs=[
idx_state, nmodels_state,
idx_box, hdr_box, para_box,
*out_boxes, *radio_widgets, *comment_boxes,
back_btn, next_btn,
annotator_label,
download_btn,
annotation_area
],
)
# βββ RUN βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if __name__ == "__main__":
demo.queue()
demo.launch() # keep share=False on HF Spaces
|