Spaces:
Build error
Build error
AgentVerse
commited on
Commit
·
906ccdd
1
Parent(s):
083e7b4
code cleaning
Browse files- dataloader/__init__.py +10 -0
- dataloader/commongen.py +21 -0
- dataloader/dataloader.py +19 -0
- dataloader/gsm8k.py +22 -0
- dataloader/humaneval.py +21 -0
- dataloader/logic_grid.py +22 -0
- dataloader/mgsm.py +23 -0
- dataloader/responsegen.py +21 -0
dataloader/__init__.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from agentverse.registry import Registry
|
2 |
+
|
3 |
+
dataloader_registry = Registry(name="dataloader")
|
4 |
+
|
5 |
+
from .gsm8k import GSM8KLoader
|
6 |
+
from .responsegen import ResponseGenLoader
|
7 |
+
from .humaneval import HumanevalLoader
|
8 |
+
from .commongen import CommongenLoader
|
9 |
+
from .mgsm import MGSMLoader
|
10 |
+
from .logic_grid import LogicGridLoader
|
dataloader/commongen.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .dataloader import DataLoader
|
2 |
+
from . import dataloader_registry
|
3 |
+
import json
|
4 |
+
|
5 |
+
|
6 |
+
@dataloader_registry.register("tasksolving/commongen/gpt-4")
|
7 |
+
@dataloader_registry.register("tasksolving/commongen/gpt-3.5")
|
8 |
+
class CommongenLoader(DataLoader):
|
9 |
+
def __init__(self, path: str):
|
10 |
+
super().__init__(path)
|
11 |
+
|
12 |
+
def load(self):
|
13 |
+
with open(self.path) as f:
|
14 |
+
for line in f:
|
15 |
+
line = json.loads(line)
|
16 |
+
self.examples.append(
|
17 |
+
{
|
18 |
+
"input": line["concepts"],
|
19 |
+
"answer": None,
|
20 |
+
}
|
21 |
+
)
|
dataloader/dataloader.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from abc import abstractmethod
|
3 |
+
|
4 |
+
|
5 |
+
class DataLoader:
|
6 |
+
def __init__(self, path: str):
|
7 |
+
self.path = path
|
8 |
+
self.examples = []
|
9 |
+
self.load()
|
10 |
+
|
11 |
+
@abstractmethod
|
12 |
+
def load(self):
|
13 |
+
"""Make sure that each example is formatted as {"input": ..., "answer": ...}"""
|
14 |
+
with open(self.path) as f:
|
15 |
+
for line in f:
|
16 |
+
self.examples.append(json.loads(line))
|
17 |
+
|
18 |
+
def __iter__(self):
|
19 |
+
return iter(self.examples)
|
dataloader/gsm8k.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .dataloader import DataLoader
|
2 |
+
from . import dataloader_registry
|
3 |
+
import json
|
4 |
+
import re
|
5 |
+
|
6 |
+
|
7 |
+
@dataloader_registry.register("tasksolving/gsm8k")
|
8 |
+
class GSM8KLoader(DataLoader):
|
9 |
+
def __init__(self, path: str):
|
10 |
+
self.answer_pat = re.compile(r"#### (-?\d+)")
|
11 |
+
super().__init__(path)
|
12 |
+
|
13 |
+
def load(self):
|
14 |
+
with open(self.path) as f:
|
15 |
+
for line in f:
|
16 |
+
line = json.loads(line)
|
17 |
+
self.examples.append(
|
18 |
+
{
|
19 |
+
"input": line["question"],
|
20 |
+
"answer": line["answer"].split('#### ')[-1],
|
21 |
+
}
|
22 |
+
)
|
dataloader/humaneval.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .dataloader import DataLoader
|
2 |
+
from . import dataloader_registry
|
3 |
+
import json
|
4 |
+
|
5 |
+
|
6 |
+
@dataloader_registry.register("tasksolving/humaneval/gpt-4")
|
7 |
+
@dataloader_registry.register("tasksolving/humaneval/gpt-3.5")
|
8 |
+
class HumanevalLoader(DataLoader):
|
9 |
+
def __init__(self, path: str):
|
10 |
+
super().__init__(path)
|
11 |
+
|
12 |
+
def load(self):
|
13 |
+
with open(self.path) as f:
|
14 |
+
for line in f:
|
15 |
+
line = json.loads(line)
|
16 |
+
self.examples.append(
|
17 |
+
{
|
18 |
+
"input": line["prompt"],
|
19 |
+
"answer": line["test"],
|
20 |
+
}
|
21 |
+
)
|
dataloader/logic_grid.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .dataloader import DataLoader
|
2 |
+
from . import dataloader_registry
|
3 |
+
import json
|
4 |
+
import re
|
5 |
+
|
6 |
+
|
7 |
+
@dataloader_registry.register("tasksolving/logic_grid/gpt-4")
|
8 |
+
class LogicGridLoader(DataLoader):
|
9 |
+
def __init__(self, path: str):
|
10 |
+
self.answer_pat = re.compile(r"#### (-?\d+)")
|
11 |
+
super().__init__(path)
|
12 |
+
|
13 |
+
def load(self):
|
14 |
+
with open(self.path) as f:
|
15 |
+
for line in f:
|
16 |
+
line = json.loads(line)
|
17 |
+
self.examples.append(
|
18 |
+
{
|
19 |
+
"input": line["inputs"],
|
20 |
+
"answer": line["targets"][0],
|
21 |
+
}
|
22 |
+
)
|
dataloader/mgsm.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .dataloader import DataLoader
|
2 |
+
from . import dataloader_registry
|
3 |
+
import json
|
4 |
+
import re
|
5 |
+
|
6 |
+
|
7 |
+
@dataloader_registry.register("tasksolving/mgsm/gpt-4")
|
8 |
+
@dataloader_registry.register("tasksolving/mgsm/gpt-3.5")
|
9 |
+
class MGSMLoader(DataLoader):
|
10 |
+
def __init__(self, path: str):
|
11 |
+
self.answer_pat = re.compile(r"#### (-?\d+)")
|
12 |
+
super().__init__(path)
|
13 |
+
|
14 |
+
def load(self):
|
15 |
+
with open(self.path) as f:
|
16 |
+
for line in f:
|
17 |
+
line = json.loads(line)
|
18 |
+
self.examples.append(
|
19 |
+
{
|
20 |
+
"input": line["question"],
|
21 |
+
"answer": line["answer_number"],
|
22 |
+
}
|
23 |
+
)
|
dataloader/responsegen.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .dataloader import DataLoader
|
2 |
+
from . import dataloader_registry
|
3 |
+
import json
|
4 |
+
|
5 |
+
|
6 |
+
@dataloader_registry.register("tasksolving/responsegen/gpt-3.5")
|
7 |
+
@dataloader_registry.register("tasksolving/responsegen/gpt-4")
|
8 |
+
class ResponseGenLoader(DataLoader):
|
9 |
+
def __init__(self, path: str):
|
10 |
+
super().__init__(path)
|
11 |
+
|
12 |
+
def load(self):
|
13 |
+
with open(self.path) as f:
|
14 |
+
for line in f:
|
15 |
+
line = json.loads(line)
|
16 |
+
self.examples.append(
|
17 |
+
{
|
18 |
+
"input": line["input"],
|
19 |
+
"answer": line["answer"],
|
20 |
+
}
|
21 |
+
)
|