Spaces:

AIML-TUDA
/

VerifiableRewardsForScalableLogicalReasoning

Sleeping

App Files Files Community

Lukas Helff commited on Aug 14

Commit

f567a45

1 Parent(s): 62ba87c

fix validation program

Browse files

Files changed (1) hide show

VerifiableRewardsForScalableLogicalReasoning.py +16 -24

VerifiableRewardsForScalableLogicalReasoning.py CHANGED Viewed

@@ -99,19 +99,21 @@ Returns:
     detailed_results (`list` of `dict`): Per-example results including correctness, partial score, execution time, and any errors encountered.
 """
-def validate_rule_no_hardcoded_cars(prediction):
-    """Reject rules that hardcode specific car identifiers"""
-    import re
-    # Look for has_car with a constant (lowercase) in second position
-    hardcoded_pattern = r'has_car\([^,]+,\s*([a-z][a-z0-9_]*)\)'
-    matches = re.findall(hardcoded_pattern, prediction)
-    if matches:
-        return False, f"Cars must be variables: {matches[0]}"
-    return True, "Rule is valid"
 def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5):
@@ -128,15 +130,6 @@ def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5
     # extract predicate from rule_to_evaluate
     rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
-    is_valid, validation_msg = validate_rule_no_hardcoded_cars(rule_to_evaluate)
-    if not is_valid:
-        return {
-            "is_correct": False,
-            "partial_score": 0.0,
-            "syntax_valid": False,
-            "error": f"Rule validation failed: {validation_msg}"
-        }
     if positive_pred not in rule_to_evaluate:
         p = prediction.replace('\n', ' ')
         return {
@@ -175,8 +168,7 @@ check_count(Count) :-
 check_all :- forall((pos({vars});neg({vars})), check({vars})).
     """
     # Add the rule to evaluate
-    validation_program = re.sub(rf'\b{positive_pred}\b', 'pos', validation_program)
-    validation_program = re.sub(rf'\b{negative_pred}\b', 'neg', validation_program)
     pos_negs = validation_program.count("pos(") + validation_program.count("neg(")
     validation_program = '\n'.join(sorted(validation_program.splitlines()))

     detailed_results (`list` of `dict`): Per-example results including correctness, partial score, execution time, and any errors encountered.
 """
+def fix_validation_program(validation_program, positive_pred="eastbound", negative_pred="westbound"):
+    """
+    Fixes the validation program by ensuring it has a consistent format.
+    - Removes comments
+    - Ensures all rules end with a period
+    - Removes empty lines
+    """
+    # anonymize train and car instances, and head predicates
+    validation_program = re.sub(rf'\b{positive_pred}\b', 'pos', validation_program)
+    validation_program = re.sub(rf'\b{negative_pred}\b', 'neg', validation_program)
+    # replace train with mytrain and car with mycar
+    #trains must follow a digit pattern train\d+ and cars must follow a pattern car\d+_\d+
+    validation_program = validation_program.replace('(train', '(mytrain')
+    validation_program = validation_program.replace('(car', '(mycar').replace(', car', ', mycar')
+    return validation_program
 def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5):
     # extract predicate from rule_to_evaluate
     rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
     if positive_pred not in rule_to_evaluate:
         p = prediction.replace('\n', ' ')
         return {
 check_all :- forall((pos({vars});neg({vars})), check({vars})).
     """
     # Add the rule to evaluate
+    validation_program = fix_validation_program(validation_program, positive_pred, negative_pred)
     pos_negs = validation_program.count("pos(") + validation_program.count("neg(")
     validation_program = '\n'.join(sorted(validation_program.splitlines()))