|
from unsloth import FastLanguageModel, FastModel |
|
import torch |
|
from datasets import load_dataset |
|
from trl import GRPOConfig, GRPOTrainer |
|
from transformers.trainer_callback import TrainerCallback, TrainerControl, TrainerState |
|
from transformers.training_args import TrainingArguments |
|
from unsloth.chat_templates import standardize_data_formats, train_on_responses_only |
|
import re |
|
from lexicalrichness import LexicalRichness |
|
from collections import deque |
|
|
|
|
|
|
|
_regexes = [ |
|
r"voice barely (audible|above a whisper)", |
|
r"(wasn['’]t|was not)[^.!?]*?[;,] it ", |
|
r"(down|up)[^.!?]*?spine", |
|
r"(wasn['’]t|was not|hadn['’]t|had not|weren['’]t|were not)[^.!?]*?[;,] but ", |
|
r"(weren['’]t|were not)[^.!?]*?[;,]\s*\w*? were ", |
|
r"not[^.!?]*?[;,] but ", |
|
r"cross(es|ed)? \w+ (arms|legs)", |
|
r"[^.!?]*([^.!?]*,){4,}[^.!?]*", |
|
r"crackle(d|s)? with tension", |
|
r"(mix\w*|blend) of \w+ and \w+", |
|
r"as \w+ as (it|they) (was|is|were|are) \w+", |
|
r"(widen(ed|s)?|narrow(ed|s)?) \w+ eyes", |
|
r"eyes (widen(ed|s)?|narrow(ed|s)?)", |
|
r"here['’\s\w]+? topic:", |
|
] |
|
|
|
REGEXES = tuple(re.compile(text, re.IGNORECASE) for text in _regexes) |
|
PHRASES = set( |
|
[ |
|
"a testament to", |
|
"living rock", |
|
"air hung", |
|
"hung heavy", |
|
"hung thick", |
|
"air thick", |
|
"air was thick", |
|
"clung", |
|
"palpable", |
|
"unsettling stillness", |
|
"forged in the fires", |
|
"suffocating blanket", |
|
", almost", |
|
"tasted of", |
|
"monument to", |
|
"settled over", |
|
"chilling ", |
|
"beacon of", |
|
"mirrored the", |
|
"something uniquely", |
|
"of ozone", |
|
"heady scent", |
|
"breath hitch", |
|
"---", |
|
"felt like a", |
|
"a stark contrast", |
|
"desperate attempt to", |
|
"stark contrast to", |
|
"a desperate attempt", |
|
"rain hammered against", |
|
"word piece exploring", |
|
"hung thick with", |
|
"a carefully constructed", |
|
"air hung thick", |
|
"the rain continued", |
|
"a constant reminder", |
|
"a desperate need", |
|
"beneath the surface", |
|
"continued to fall", |
|
"something far more", |
|
"rain continued to", |
|
"constant reminder of", |
|
"metallic tang of", |
|
"desperate need to", |
|
"a stark reminder", |
|
"the metallic tang", |
|
"felt less like", |
|
"wasnt simply a", |
|
"face etched with", |
|
"chilling certainty that", |
|
"wasnt merely a", |
|
"to fall washing", |
|
"a chilling certainty", |
|
"profound sense of", |
|
"palpable sense of", |
|
"said his voice", |
|
"a profound sense", |
|
"it felt like", |
|
"stark reminder of", |
|
"a slow deliberate", |
|
"a subtle shift", |
|
"subtle shifts in", |
|
"the carefully constructed", |
|
"less like a", |
|
"a desperate gamble", |
|
"a palpable sense", |
|
"the rain intensified", |
|
"acutely aware of", |
|
"voice a low", |
|
"the subtle shifts", |
|
"the encroaching darkness", |
|
"flicker of something", |
|
"subtle shift in", |
|
"air crackled with", |
|
"a desperate plea", |
|
"sense of unease", |
|
"a slow agonizing", |
|
"threatened to consume", |
|
"a carefully orchestrated", |
|
"a chilling realization", |
|
"silent testament to", |
|
"a chilling reminder", |
|
"the unsettling feeling", |
|
"inextricably linked to", |
|
"the traditional sense", |
|
"washing away the", |
|
"desperate need for", |
|
"it wasnt simply", |
|
"desperately trying to", |
|
"a silent testament", |
|
"his face etched", |
|
"a different kind", |
|
"hung thick and", |
|
"different kind of", |
|
"leaving behind a", |
|
"reminder that even", |
|
"carefully constructed facade", |
|
"something akin to", |
|
"wasnt a simple", |
|
"scent of damp", |
|
"of damp earth", |
|
"voice low and", |
|
"rain hammered down", |
|
"relentless pursuit of", |
|
"his voice low", |
|
"chilling reminder of", |
|
"hung heavy in", |
|
"last vestiges of", |
|
"faces etched with", |
|
"the last vestiges", |
|
"tang of blood", |
|
"the air grew", |
|
"damp earth and", |
|
"a strange unsettling", |
|
"his carefully constructed", |
|
"swirling vortex of", |
|
"air thick with", |
|
"growing sense of", |
|
"a solitary figure", |
|
"gaze fixed on", |
|
"the air thick", |
|
"the delicate balance", |
|
"exploring the scenario", |
|
"the sheer volume", |
|
"the corrugated iron", |
|
"a silent observer", |
|
"threatened to unravel", |
|
"honed by years", |
|
"the sheer scale", |
|
"felt like an", |
|
"find a way", |
|
"need to understand", |
|
"sheer scale of", |
|
"leaving behind only", |
|
"a swirling vortex", |
|
"clung to everything", |
|
"word exploration of", |
|
"a word exploration", |
|
"now felt like", |
|
"sheer volume of", |
|
"it wasnt merely", |
|
"wasnt driven by", |
|
"futile attempt to", |
|
"a growing sense", |
|
"scent of pine", |
|
"amidst the chaos", |
|
"the scenario youve", |
|
"rising tide of", |
|
"fall washing away", |
|
"sense of dread", |
|
"chipped away at", |
|
"voice laced with", |
|
"a physical blow", |
|
"yet beneath the", |
|
"threatened to overwhelm", |
|
"lingering scent of", |
|
"felt a strange", |
|
"their faces etched", |
|
"mirroring the frantic", |
|
"a suffocating blanket", |
|
"a desperate scramble", |
|
"felt a profound", |
|
"barely audible above", |
|
"enduring power of", |
|
"desperate plea for", |
|
"stubborn refusal to", |
|
"voice barely audible", |
|
"desperately tried to", |
|
"wasnt born of", |
|
"the enduring power", |
|
"skeletal remains of", |
|
"the conventional sense", |
|
"a subtle almost", |
|
"a grim determination", |
|
"seemed to absorb", |
|
"brute force but", |
|
"air thickened with", |
|
"the skeletal remains", |
|
"mirroring the relentless", |
|
"of something akin", |
|
"a silent promise", |
|
"the looming threat", |
|
"chilling testament to", |
|
"corrugated iron roof", |
|
"what felt like", |
|
"prickle of unease", |
|
"a meticulously crafted", |
|
"a slow insidious", |
|
"like a physical", |
|
"physical manifestation of", |
|
"a chilling testament", |
|
"far more complex", |
|
"a relentless grey", |
|
"deep within the", |
|
"a palpable tension", |
|
"a delicate dance", |
|
"a low rumble", |
|
"delicate balance of", |
|
"chilling clarity that", |
|
"a desperate hope", |
|
"his voice barely", |
|
"the natural world", |
|
"utterly devoid of", |
|
"the established order", |
|
"always seemed to", |
|
"a deliberate attempt", |
|
"stumbled upon a", |
|
"like a shroud", |
|
"a futile attempt", |
|
"a silent watchful", |
|
"like an eternity", |
|
"response exploring the", |
|
"a quiet almost", |
|
"his gaze fixed", |
|
"a physical manifestation", |
|
"deliberate attempt to", |
|
"profound and unsettling", |
|
"wasnt a warrior", |
|
"wasnt interested in", |
|
"beneath the veneer", |
|
"the air thickened", |
|
"something ancient and", |
|
"rustle of leaves", |
|
"desperately wanted to", |
|
"a stubborn refusal", |
|
"the storm brewing", |
|
"a calculated risk", |
|
"mirroring the storm", |
|
"a relentless drumming", |
|
"eyes burning with", |
|
"wasnt simply about", |
|
"trapped within the", |
|
"face a mask", |
|
"far more sinister", |
|
"the lingering scent", |
|
"act of defiance", |
|
"a primal fear", |
|
"a primal instinct", |
|
"the rising tide", |
|
"swift and brutal", |
|
"the air crackled", |
|
"he wasnt simply", |
|
"wasnt a grand", |
|
"unsettling feeling that", |
|
"a desperate almost", |
|
"a life lived", |
|
"his hand instinctively", |
|
"this wasnt simply", |
|
"primal need to", |
|
"her voice barely", |
|
"the first time", |
|
"hardened by years", |
|
"spent the last", |
|
"perhaps focusing on", |
|
"something older something", |
|
"sickening certainty that", |
|
"deliberate act of", |
|
"a desperate dance", |
|
"a desperate struggle", |
|
"a specific aspect", |
|
"desperate scramble for", |
|
"devastating consequences of", |
|
"continued its relentless", |
|
"the devastating consequences", |
|
"the chilling realization", |
|
"a gilded cage", |
|
"a bygone era", |
|
"something beyond the", |
|
"a chilling clarity", |
|
"hadnt stopped for", |
|
"the relentless pursuit", |
|
"fabric of reality", |
|
"far more insidious", |
|
"a deliberate act", |
|
"a silent acknowledgment", |
|
"wasnt a dramatic", |
|
"unsettling feeling of", |
|
"a deliberate almost", |
|
"the desperate need", |
|
"fall washing over", |
|
"a deeply ingrained", |
|
"frantic beat of", |
|
"stark reminder that", |
|
"the relentless drumming", |
|
"his voice strained", |
|
"a strange almost", |
|
"like a betrayal", |
|
"couldnt shake the", |
|
"silent promise of", |
|
"the oppressive atmosphere", |
|
"sliver of hope", |
|
"the frantic beat", |
|
"simple act of", |
|
"in hushed tones", |
|
"something else something", |
|
"a sickening certainty", |
|
"a word description", |
|
"a primal need", |
|
"was inextricably linked", |
|
"a grim reminder", |
|
"whirlwind of steel", |
|
"hung heavy with", |
|
"veteran of countless", |
|
"a relentless percussion", |
|
"a delicate balance", |
|
"chilling realization dawned", |
|
"it felt less", |
|
"drip of water", |
|
"within the confines", |
|
"a silent acknowledgement", |
|
"felt the weight", |
|
"fall washing the", |
|
"air thrummed with", |
|
"the air around", |
|
"voice barely a", |
|
"word response on", |
|
"silent acknowledgment of", |
|
"a cold dread", |
|
"grim testament to", |
|
"a fragile shield", |
|
"with brutal efficiency", |
|
"a small almost", |
|
"of countless battles", |
|
"a grim testament", |
|
"carried the weight", |
|
"force of nature", |
|
"air hung heavy", |
|
"far more dangerous", |
|
"exploring the provided", |
|
"felt a prickle", |
|
"seemed to mirror", |
|
"a small intricately", |
|
"rain hadnt stopped", |
|
"her carefully constructed", |
|
"chilling realization that", |
|
"small intricately carved", |
|
"and perhaps even", |
|
"a poignant reminder", |
|
"nestled amongst the", |
|
"voice tight with", |
|
"he couldnt quite", |
|
"shake the feeling", |
|
"radiating an unsettling", |
|
"a low resonant", |
|
"unwavering belief in", |
|
"seemed to amplify", |
|
"specific aspect of", |
|
"uncanny ability to", |
|
"a grizzled veteran", |
|
"shadows across the", |
|
"devoid of emotion", |
|
"seemed to vibrate", |
|
"the oppressive silence", |
|
"the unsettling truth", |
|
"fueled by adrenaline", |
|
"display of power", |
|
"storm brewing within", |
|
"wasnt a gentle", |
|
"face of overwhelming", |
|
"silent acknowledgement of", |
|
"yet amidst the", |
|
"grim reminder of", |
|
"a brutal reminder", |
|
"key to unlocking", |
|
"far more unsettling", |
|
"held the key", |
|
"steel and fury", |
|
"a profound unsettling", |
|
"struggle for survival", |
|
"crushing weight of", |
|
"brutal reminder of", |
|
"a silent sentinel", |
|
"a mirror reflecting", |
|
"eyes fixed on", |
|
"rain intensified blurring", |
|
"found himself increasingly", |
|
"exploring the described", |
|
"the subtle shift", |
|
"the air thrummed", |
|
"subtle almost imperceptible", |
|
"forgotten corner of", |
|
"perhaps just perhaps", |
|
"a grotesque parody", |
|
"a man built", |
|
"the constant threat", |
|
"glimmer of hope", |
|
"the unsettling realization", |
|
"the storm raging", |
|
"a living breathing", |
|
"seemed to shift", |
|
"with terrifying speed", |
|
"the watchful eyes", |
|
"looming threat of", |
|
"and something else", |
|
"of immense power", |
|
"the darkness within", |
|
"a temporary reprieve", |
|
"watchful eyes of", |
|
"mirroring the tempest", |
|
"eyes scanning the", |
|
"with chilling certainty", |
|
"barely a whisper", |
|
"a constant mournful", |
|
"the true nature", |
|
"air was thick", |
|
"rain intensified washing", |
|
"grotesque parody of", |
|
"a constant unsettling", |
|
"a final desperate", |
|
"the slow agonizing", |
|
"with chilling clarity", |
|
"true nature of", |
|
"spent his life", |
|
"flicker of hope", |
|
"weight of responsibility", |
|
"ever the pragmatist", |
|
"---", |
|
"carefully constructed", |
|
"wasnt simply", |
|
"stark contrast", |
|
"rain hammered", |
|
"hung thick", |
|
"desperate attempt", |
|
"wasnt merely", |
|
"piece exploring", |
|
"something far", |
|
"air hung", |
|
"desperate need", |
|
"rain continued", |
|
"metallic tang", |
|
"constant reminder", |
|
"chilling certainty", |
|
"leaving behind", |
|
"less like", |
|
"slow deliberate", |
|
"stark reminder", |
|
"felt less", |
|
"subtle shift", |
|
"meticulously crafted", |
|
"brute force", |
|
"hung heavy", |
|
"chilling realization", |
|
"face etched", |
|
"almost imperceptible", |
|
"slow agonizing", |
|
"damp earth", |
|
"carefully orchestrated", |
|
"fall washing", |
|
"subtle shifts", |
|
"profound sense", |
|
"palpable sense", |
|
"trapped within", |
|
"acutely aware", |
|
"relentless pursuit", |
|
"desperate plea", |
|
"unsettling feeling", |
|
"grim determination", |
|
"desperate gamble", |
|
"brutal efficiency", |
|
"rain intensified", |
|
"delicate balance", |
|
"something else", |
|
"deep within", |
|
"encroaching darkness", |
|
"inextricably linked", |
|
"voice low", |
|
"relentless drumming", |
|
"washing away", |
|
"chilling reminder", |
|
"profoundly unsettling", |
|
"werent simply", |
|
"air crackled", |
|
"stumbled upon", |
|
"perhaps even", |
|
"silent testament", |
|
"voice barely", |
|
"desperate hope", |
|
"traditional sense", |
|
"stone walls", |
|
"unsettling stillness", |
|
"desperately trying", |
|
"different kind", |
|
"unwavering loyalty", |
|
"felt increasingly", |
|
"something beyond", |
|
"constructed facade", |
|
"something akin", |
|
"desperate struggle", |
|
"three days", |
|
"barely audible", |
|
"strange unsettling", |
|
"relentless assault", |
|
"settled upon", |
|
"faces etched", |
|
"air grew", |
|
"last vestiges", |
|
"something ancient", |
|
"felt profoundly", |
|
"solitary figure", |
|
"hadnt simply", |
|
"swirling vortex", |
|
"didnt offer", |
|
"far beyond", |
|
"air thick", |
|
"chilling clarity", |
|
"quiet strength", |
|
"raw power", |
|
"intricately carved", |
|
"desperate scramble", |
|
"young man", |
|
"growing sense", |
|
"sheer scale", |
|
"hadnt stopped", |
|
"wasnt born", |
|
"primal fear", |
|
"gaze fixed", |
|
"werent merely", |
|
"hand instinctively", |
|
"almost unsettling", |
|
"offered little", |
|
"silent observer", |
|
"sheer volume", |
|
"corrugated iron", |
|
"couldnt quite", |
|
"subtle almost", |
|
"crumbling stone", |
|
"air thickened", |
|
"suffocating blanket", |
|
"primal instinct", |
|
"yet beneath", |
|
"carefully crafted", |
|
"gilded cage", |
|
"didnt understand", |
|
"countless battles", |
|
"deeply ingrained", |
|
"looming threat", |
|
"rising tide", |
|
"futile attempt", |
|
"silent watchful", |
|
"palpable tension", |
|
"meticulously constructed", |
|
"wasnt driven", |
|
"impending doom", |
|
"carefully cultivated", |
|
"chaotic energy", |
|
"slow insidious", |
|
"brewing within", |
|
"conventional sense", |
|
"remained stubbornly", |
|
"far greater", |
|
"something deeper", |
|
"primal need", |
|
"physical blow", |
|
"lingering scent", |
|
"suffocating weight", |
|
"meticulously documented", |
|
"chipped away", |
|
"skeletal remains", |
|
"voice laced", |
|
"grey stone", |
|
"perpetual twilight", |
|
"simmering resentment", |
|
"unlike anything", |
|
"delicate dance", |
|
"unsettling beauty", |
|
"enduring power", |
|
"stubborn refusal", |
|
"former self", |
|
"realization dawned", |
|
"established order", |
|
"unsettling silence", |
|
"unsettling truth", |
|
"wasnt seeking", |
|
"ruthless efficiency", |
|
"relentless grey", |
|
"nestled amongst", |
|
"pressed onward", |
|
"voice strained", |
|
"silent promise", |
|
"bruised purple", |
|
"something profoundly", |
|
"calculated risk", |
|
"desperately tried", |
|
"isnt simply", |
|
"meticulously planned", |
|
"quiet almost", |
|
"deliberate almost", |
|
"felt utterly", |
|
"air around", |
|
"storm brewing", |
|
"growing unease", |
|
"man whose", |
|
"every step", |
|
"white tower", |
|
"something older", |
|
"desperate dance", |
|
"response exploring", |
|
"never truly", |
|
"always felt", |
|
"older something", |
|
"years spent", |
|
"far older", |
|
"chilling testament", |
|
"iron roof", |
|
"devastating consequences", |
|
"physical manifestation", |
|
"deliberate act", |
|
"always seemed", |
|
"low rumble", |
|
"fragile hope", |
|
"deeply unsettling", |
|
"natural world", |
|
"descended upon", |
|
"mirror reflecting", |
|
"rain hadnt", |
|
"desolate landscape", |
|
"hed witnessed", |
|
"deliberate attempt", |
|
"utterly devoid", |
|
"immense power", |
|
"oppressive atmosphere", |
|
"desperately wanted", |
|
"voice tight", |
|
"spoke volumes", |
|
"sickening certainty", |
|
"watchful eyes", |
|
"specific aspect", |
|
"wasnt interested", |
|
"eyes burning", |
|
"damp stone", |
|
"hidden beneath", |
|
"small almost", |
|
"desperate almost", |
|
"simple act", |
|
"unsettling energy", |
|
"cold calculating", |
|
"grizzled veteran", |
|
"dust motes", |
|
"didnt simply", |
|
"true nature", |
|
"almost ritualistic", |
|
"unwavering belief", |
|
"fragile peace", |
|
"strange almost", |
|
"first time", |
|
"unsettling quiet", |
|
"left behind", |
|
"chilling precision", |
|
"life lived", |
|
"constant threat", |
|
"felt hollow", |
|
"grey sky", |
|
"stepped forward", |
|
"frantic energy", |
|
"yet amidst", |
|
"initially dismissed", |
|
"unsettling grace", |
|
"raw untamed", |
|
"growing dread", |
|
"perhaps focusing", |
|
"quiet observation", |
|
"wasnt sure", |
|
"silent acknowledgment", |
|
"couldnt shake", |
|
"didnt speak", |
|
"unsettling realization", |
|
"hidden within", |
|
"genuine connection", |
|
"unwavering resolve", |
|
"surged forward", |
|
"focal point", |
|
"cold dread", |
|
"almost obsessive", |
|
"oppressive silence", |
|
"final desperate", |
|
"wasnt built", |
|
"rain mirrored", |
|
"shift occurred", |
|
"bygone era", |
|
"tapestry woven", |
|
"darkness within", |
|
"frantic beat", |
|
"carefully curated", |
|
"quiet dignity", |
|
"hushed tones", |
|
"shadows across", |
|
"small intricately", |
|
"profound unsettling", |
|
"almost clinical", |
|
"potential threat", |
|
"unsettling atmosphere", |
|
"unsettling presence", |
|
"began subtly", |
|
"chaotic dance", |
|
"crushing weight", |
|
"desperately needed", |
|
"centuries ago", |
|
"else something", |
|
"momentarily stunned", |
|
"forgotten lore", |
|
"polished obsidian", |
|
"grim reminder", |
|
"desperate act", |
|
"silent acknowledgement", |
|
"brute strength", |
|
"direct confrontation", |
|
"etched onto", |
|
"meticulously documenting", |
|
"relentless percussion", |
|
"something darker", |
|
"grim testament", |
|
"fragile shield", |
|
"relentless advance", |
|
"fragmented memories", |
|
"deeply rooted", |
|
"chilling efficiency", |
|
"brutal reality", |
|
"treacherous currents", |
|
"biting wind", |
|
"air thrummed", |
|
"something vaguely", |
|
"spent years", |
|
"profound understanding", |
|
"wasnt entirely", |
|
"couldnt afford", |
|
"felt heavy", |
|
"raging within", |
|
"dancing shadows", |
|
"poignant reminder", |
|
"brutal reminder", |
|
"drip drip", |
|
"turning point", |
|
"eyes scanning", |
|
"unnerving stillness", |
|
"brow furrowed", |
|
"quiet intensity", |
|
"immediate threat", |
|
"shared understanding", |
|
"relentless pressure", |
|
"watchful presence", |
|
"forgotten corner", |
|
"unsettling calm", |
|
"profound sadness", |
|
"low resonant", |
|
"ancient texts", |
|
"constant unsettling", |
|
"salt spray", |
|
"silent sentinel", |
|
"fleeting moment", |
|
"eyes reflecting", |
|
"chilling awareness", |
|
"silas blackwood", |
|
"simmering rage", |
|
"hed initially", |
|
"intricate patterns", |
|
"vantage point", |
|
"felt different", |
|
"swift decisive", |
|
"world around", |
|
"horrifying truth", |
|
"desperately sought", |
|
"storm raging", |
|
"strategically placed", |
|
"faces grim", |
|
"watchful gaze", |
|
"relentless tide", |
|
"centered around", |
|
"shared experience", |
|
"flickering light", |
|
"barely perceptible", |
|
"precarious balance", |
|
"overwhelming odds", |
|
"uncanny ability", |
|
"silent witness", |
|
"living breathing", |
|
"casting long", |
|
"frantic rhythm", |
|
"ground beneath", |
|
"silence punctuated", |
|
"swirling around", |
|
"felt strangely", |
|
"buried beneath", |
|
"desperately seeking", |
|
"eyes fixed", |
|
"intensified blurring", |
|
"temporary reprieve", |
|
"swift brutal", |
|
"terrifying speed", |
|
"every movement", |
|
"another layer", |
|
"something within", |
|
"hed anticipated", |
|
"unsettling power", |
|
"irrevocably altered", |
|
"chilling echo", |
|
"shifting sands", |
|
"almost frantic", |
|
"metallic scent", |
|
"blackwood manor", |
|
"damp air", |
|
"stripping away", |
|
"chilling premonition", |
|
"face pale", |
|
"perilous journey", |
|
"face grim", |
|
"constant companion", |
|
"however remained", |
|
"stripped bare", |
|
"flickering candlelight", |
|
"suddenly felt", |
|
"brief respite", |
|
"grotesque parody", |
|
"almost mournful", |
|
"subtle unsettling", |
|
"practiced grace", |
|
"constant mournful", |
|
"brutal dance", |
|
"nervous energy", |
|
"almost palpable", |
|
"isnt merely", |
|
"precious seconds", |
|
"desperate yearning", |
|
"desperate desire", |
|
"constructed illusion", |
|
"almost unbearable", |
|
"unpredictable nature", |
|
"internal struggle", |
|
"intensified washing", |
|
"unwavering focus", |
|
"weathered stone", |
|
"something metallic", |
|
"utterly alien", |
|
"every move", |
|
"oppressive darkness", |
|
"grim reality", |
|
"man sculpted", |
|
"jagged peaks", |
|
"something shifted", |
|
"overwhelming force", |
|
] |
|
) |
|
|
|
PUNC = ["—", "–", "*", "#", "…"] |
|
COUNT = 0 |
|
|
|
|
|
def score_lexical(text: str): |
|
mtld = LexicalRichness(text).mtld() |
|
score = min(mtld - 100.0, 20.0) |
|
print(f"MTLD = {mtld}, score = {score}") |
|
return score |
|
|
|
|
|
def score_slop(prompt: str, completion: str): |
|
count = 0 |
|
prep = re.sub(r"[^a-zA-Z ]", "", completion) |
|
if ("rain" in completion) and (not "rain" in prompt): |
|
r = completion.count("rain") |
|
count += r * 2 |
|
print(f"rain = {r}") |
|
if ("air" in completion) and (not "air" in prompt): |
|
a = completion.count("air") |
|
count += a * 2 |
|
print(f"air = {a}") |
|
count += sum(completion.count(p) for p in PUNC) |
|
count += sum(prep.count(p) for p in PHRASES) |
|
count += sum(len(r.findall(completion)) for r in REGEXES) |
|
score = 25.0 - (count * 5.0) |
|
print(f"slop count = {count}, score = {score}") |
|
return score |
|
|
|
|
|
def score_word_count(prompt: str, completion: str): |
|
requested = int(prompt.split(" ")[1]) |
|
actual = len(completion.split(" ")) |
|
diff = abs(requested - actual) / (requested + 1e-9) |
|
score = 20.0 - (diff * 200.0) |
|
print( |
|
f"requested words = {requested}, actual = {actual}, diff = {diff:.3f}, score = {score}" |
|
) |
|
return score |
|
|
|
|
|
def slop_reward(prompts, completions, **kwargs) -> list[float]: |
|
global COUNT |
|
COUNT += 1 |
|
responses = [completion[0]["content"] for completion in completions] |
|
rewards = [] |
|
for i, r in enumerate(responses): |
|
if COUNT < 200: |
|
rewards.append( |
|
score_slop(prompts[i][0]["content"], r) |
|
+ score_word_count(prompts[i][0]["content"], r) |
|
) |
|
else: |
|
rewards.append( |
|
score_slop(prompts[i][0]["content"], r) |
|
+ score_word_count(prompts[i][0]["content"], r) |
|
+ score_lexical(r) |
|
) |
|
return rewards |
|
|
|
|
|
MAXLEN = 3 |
|
REWARDS = deque(maxlen=MAXLEN) |
|
REWARD_MAX = 65.0 |
|
|
|
|
|
class MonitorRewardCallback(TrainerCallback): |
|
global REWARDS |
|
|
|
def on_log( |
|
self, |
|
args: TrainingArguments, |
|
state: TrainerState, |
|
control: TrainerControl, |
|
**kwargs, |
|
): |
|
logs = kwargs.get("logs") |
|
if logs: |
|
reward = logs.get("reward") or 1e-9 |
|
REWARDS.append(reward) |
|
mean = sum(REWARDS) / float(MAXLEN) |
|
if mean >= REWARD_MAX - 5.0: |
|
control.should_training_stop = True |
|
|
|
|
|
max_seq_length = 1200 |
|
|
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_name="google/gemma-3-4b-it", |
|
|
|
max_seq_length=max_seq_length, |
|
load_in_4bit=False, |
|
load_in_8bit=False, |
|
) |
|
|
|
model = FastLanguageModel.get_peft_model( |
|
model, |
|
finetune_vision_layers=False, |
|
finetune_language_layers=True, |
|
finetune_attention_modules=True, |
|
finetune_mlp_modules=True, |
|
r=64, |
|
lora_alpha=128, |
|
lora_dropout=0, |
|
bias="none", |
|
random_state=888, |
|
use_rslora=True, |
|
use_gradient_checkpointing="unsloth", |
|
) |
|
|
|
dataset = load_dataset( |
|
"json", data_files="/home/anon/dataset_small.json", split="train" |
|
) |
|
|
|
max_prompt_length = 200 |
|
|
|
training_args = GRPOConfig( |
|
|
|
|
|
temperature=1.5, |
|
min_p=0.1, |
|
|
|
loss_type="bnpo", |
|
learning_rate=3e-6, |
|
adam_beta1=0.9, |
|
adam_beta2=0.9, |
|
weight_decay=5e-6, |
|
warmup_ratio=0.1, |
|
lr_scheduler_type="cosine", |
|
|
|
optim="adamw_torch_fused", |
|
scale_rewards=False, |
|
logging_steps=1, |
|
per_device_train_batch_size=1, |
|
gradient_accumulation_steps=1, |
|
num_generations=15, |
|
max_prompt_length=max_prompt_length, |
|
max_completion_length=max_seq_length - max_prompt_length, |
|
num_train_epochs=1, |
|
max_grad_norm=0.1, |
|
report_to="wandb", |
|
|
|
output_dir="outputs", |
|
) |
|
|
|
trainer = GRPOTrainer( |
|
model=model, |
|
processing_class=tokenizer, |
|
reward_funcs=[slop_reward], |
|
args=training_args, |
|
train_dataset=dataset, |
|
callbacks=[MonitorRewardCallback()], |
|
) |
|
|
|
gpu_stats = torch.cuda.get_device_properties(0) |
|
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) |
|
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3) |
|
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.") |
|
print(f"{start_gpu_memory} GB of memory reserved.") |
|
|
|
trainer_stats = trainer.train() |
|
|
|
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) |
|
used_memory_for_lora = round(used_memory - start_gpu_memory, 3) |
|
used_percentage = round(used_memory / max_memory * 100, 3) |
|
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3) |
|
print( |
|
f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training." |
|
) |
|
print(f"Peak reserved memory = {used_memory} GB.") |
|
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.") |
|
print(f"Peak reserved memory % of max memory = {used_percentage} %.") |
|
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.") |
|
|
|
model.save_pretrained("gemma3_tune") |
|
tokenizer.save_pretrained("gemma3_tune") |
|
model.save_pretrained_merged("gemma3_tune_merged", tokenizer) |
|
|