Rename
Browse files- .gitignore +1 -1
- README.md +2 -2
- app.py +1 -1
- images/{solbench.svg → soliditybench.svg} +67 -45
- soliditybench.svg +105 -0
- src/about.py +1 -1
- src/display/utils.py +1 -1
- src/envs.py +1 -1
- src/leaderboard/read_evals.py +3 -3
- src/populate.py +1 -1
.gitignore
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
ignore/
|
| 3 |
auto_evals/
|
| 4 |
venv/
|
|
|
|
| 1 |
+
soliditybench/
|
| 2 |
ignore/
|
| 3 |
auto_evals/
|
| 4 |
venv/
|
README.md
CHANGED
|
@@ -7,11 +7,11 @@ sdk: gradio
|
|
| 7 |
app_file: app.py
|
| 8 |
pinned: true
|
| 9 |
datasets:
|
| 10 |
-
- braindao/
|
| 11 |
- braindao/humaneval-for-solidity-25
|
| 12 |
license: apache-2.0
|
| 13 |
sdk_version: 4.40.0
|
| 14 |
thumbnail: >-
|
| 15 |
https://cdn-uploads.huggingface.co/production/uploads/5f19edf678d261307936f4c8/4v6TPbN8qa6JptyCFUy-J.png
|
| 16 |
-
short_description:
|
| 17 |
---
|
|
|
|
| 7 |
app_file: app.py
|
| 8 |
pinned: true
|
| 9 |
datasets:
|
| 10 |
+
- braindao/soliditybench-naive-judge-openzeppelin-v1
|
| 11 |
- braindao/humaneval-for-solidity-25
|
| 12 |
license: apache-2.0
|
| 13 |
sdk_version: 4.40.0
|
| 14 |
thumbnail: >-
|
| 15 |
https://cdn-uploads.huggingface.co/production/uploads/5f19edf678d261307936f4c8/4v6TPbN8qa6JptyCFUy-J.png
|
| 16 |
+
short_description: SolidityBench Leaderboard
|
| 17 |
---
|
app.py
CHANGED
|
@@ -254,5 +254,5 @@ scheduler.add_job(restart_space, "interval", seconds=900)
|
|
| 254 |
scheduler.start()
|
| 255 |
demo.queue(default_concurrency_limit=40).launch(
|
| 256 |
server_name="0.0.0.0",
|
| 257 |
-
allowed_paths=["images/
|
| 258 |
)
|
|
|
|
| 254 |
scheduler.start()
|
| 255 |
demo.queue(default_concurrency_limit=40).launch(
|
| 256 |
server_name="0.0.0.0",
|
| 257 |
+
allowed_paths=["images/soliditybench.svg"],
|
| 258 |
)
|
images/{solbench.svg → soliditybench.svg}
RENAMED
|
File without changes
|
soliditybench.svg
ADDED
|
|
src/about.py
CHANGED
|
@@ -29,7 +29,7 @@ class Tasks(Enum):
|
|
| 29 |
# ---------------------------------------------------
|
| 30 |
|
| 31 |
# Your leaderboard name
|
| 32 |
-
TITLE = """<br><img src="file/images/
|
| 33 |
<h2 align="center" id="space-title">Solidity Leaderboard | Powered by IQ</h2>"""
|
| 34 |
|
| 35 |
# What does your leaderboard evaluate?
|
|
|
|
| 29 |
# ---------------------------------------------------
|
| 30 |
|
| 31 |
# Your leaderboard name
|
| 32 |
+
TITLE = """<br><img src="file/images/soliditybench.svg" width="500" style="display: block; margin-left: auto; margin-right: auto;">
|
| 33 |
<h2 align="center" id="space-title">Solidity Leaderboard | Powered by IQ</h2>"""
|
| 34 |
|
| 35 |
# What does your leaderboard evaluate?
|
src/display/utils.py
CHANGED
|
@@ -38,7 +38,7 @@ auto_eval_column_dict = [
|
|
| 38 |
"", "str", True, never_hidden=True)),
|
| 39 |
("model", ColumnContent, create_column_content(
|
| 40 |
"Model", "markdown", True, never_hidden=True)),
|
| 41 |
-
("
|
| 42 |
# ("average", ColumnContent, create_column_content("Average", "number", True)),
|
| 43 |
]
|
| 44 |
|
|
|
|
| 38 |
"", "str", True, never_hidden=True)),
|
| 39 |
("model", ColumnContent, create_column_content(
|
| 40 |
"Model", "markdown", True, never_hidden=True)),
|
| 41 |
+
("soliditybench", ColumnContent, create_column_content("Score", "number", True)),
|
| 42 |
# ("average", ColumnContent, create_column_content("Average", "number", True)),
|
| 43 |
]
|
| 44 |
|
src/envs.py
CHANGED
|
@@ -18,7 +18,7 @@ REQUESTS_REPO = os.environ.get("REQUESTS_REPO")
|
|
| 18 |
RESULTS_REPO = os.environ.get("RESULTS_REPO")
|
| 19 |
|
| 20 |
# If you setup a cache later, just change HF_HOME
|
| 21 |
-
CACHE_PATH = os.path.join(os.getenv("HF_HOME", "."), "
|
| 22 |
|
| 23 |
# Local caches
|
| 24 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "requests")
|
|
|
|
| 18 |
RESULTS_REPO = os.environ.get("RESULTS_REPO")
|
| 19 |
|
| 20 |
# If you setup a cache later, just change HF_HOME
|
| 21 |
+
CACHE_PATH = os.path.join(os.getenv("HF_HOME", "."), "soliditybench")
|
| 22 |
|
| 23 |
# Local caches
|
| 24 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "requests")
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -127,7 +127,7 @@ class EvalResult:
|
|
| 127 |
'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0)
|
| 128 |
}
|
| 129 |
|
| 130 |
-
|
| 131 |
non_zero_scores = {k: v for k, v in scores.items() if v != 0}
|
| 132 |
if non_zero_scores:
|
| 133 |
weights = {
|
|
@@ -136,7 +136,7 @@ class EvalResult:
|
|
| 136 |
'human_eval_solidity_pass_3': 0.2
|
| 137 |
}
|
| 138 |
total_weight = sum(weights[k] for k in non_zero_scores)
|
| 139 |
-
|
| 140 |
|
| 141 |
data_dict = {
|
| 142 |
"eval_name": self.eval_name, # not a column, just a save name,
|
|
@@ -148,7 +148,7 @@ class EvalResult:
|
|
| 148 |
AutoEvalColumn.model.name: make_clickable_model(self.model_name),
|
| 149 |
AutoEvalColumn.revision.name: self.revision,
|
| 150 |
# AutoEvalColumn.average.name: average,
|
| 151 |
-
AutoEvalColumn.
|
| 152 |
AutoEvalColumn.license.name: self.license,
|
| 153 |
AutoEvalColumn.likes.name: self.likes,
|
| 154 |
AutoEvalColumn.params.name: self.num_params,
|
|
|
|
| 127 |
'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0)
|
| 128 |
}
|
| 129 |
|
| 130 |
+
soliditybench = 0
|
| 131 |
non_zero_scores = {k: v for k, v in scores.items() if v != 0}
|
| 132 |
if non_zero_scores:
|
| 133 |
weights = {
|
|
|
|
| 136 |
'human_eval_solidity_pass_3': 0.2
|
| 137 |
}
|
| 138 |
total_weight = sum(weights[k] for k in non_zero_scores)
|
| 139 |
+
soliditybench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)
|
| 140 |
|
| 141 |
data_dict = {
|
| 142 |
"eval_name": self.eval_name, # not a column, just a save name,
|
|
|
|
| 148 |
AutoEvalColumn.model.name: make_clickable_model(self.model_name),
|
| 149 |
AutoEvalColumn.revision.name: self.revision,
|
| 150 |
# AutoEvalColumn.average.name: average,
|
| 151 |
+
AutoEvalColumn.soliditybench.name: soliditybench,
|
| 152 |
AutoEvalColumn.license.name: self.license,
|
| 153 |
AutoEvalColumn.likes.name: self.likes,
|
| 154 |
AutoEvalColumn.params.name: self.num_params,
|
src/populate.py
CHANGED
|
@@ -20,7 +20,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 20 |
|
| 21 |
df = pd.DataFrame.from_records(all_data_json)
|
| 22 |
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 23 |
-
df = df.sort_values(by=[AutoEvalColumn.
|
| 24 |
df = df[cols].round(decimals=2)
|
| 25 |
|
| 26 |
# filter out if any of the benchmarks have not been produced
|
|
|
|
| 20 |
|
| 21 |
df = pd.DataFrame.from_records(all_data_json)
|
| 22 |
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 23 |
+
df = df.sort_values(by=[AutoEvalColumn.soliditybench.name], ascending=False)
|
| 24 |
df = df[cols].round(decimals=2)
|
| 25 |
|
| 26 |
# filter out if any of the benchmarks have not been produced
|