Upload folder using huggingface_hub
Browse files- data/capybara_sharegpt.json +3 -0
- data/merged_all.json +3 -0
- data/remove_empty_output.py +13 -0
- data/synthia-v1.3_sharegpt_12500.json +3 -0
data/capybara_sharegpt.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1947d28999416a2f468d1e09654cfdfc9bab8ccd03aa184598d20f0000dd6e4
|
3 |
+
size 76361785
|
data/merged_all.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f915219dc691de207f9b8b448f005386330035f3c7f6cc6379dd183ac7662d2f
|
3 |
+
size 306656943
|
data/remove_empty_output.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
with open('merged_all.json', 'r') as file:
|
4 |
+
data = json.load(file)
|
5 |
+
|
6 |
+
print(f"Normal len: {len(data)}")
|
7 |
+
|
8 |
+
data = [row for row in data if row["output"] != ""]
|
9 |
+
|
10 |
+
print(f"After len: {len(data)}")
|
11 |
+
|
12 |
+
with open('merged_all.json', 'w') as file:
|
13 |
+
json.dump(data, file, indent=1)
|
data/synthia-v1.3_sharegpt_12500.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbdbc7413a3c7fc65a900518f0db8627bb5ced53e1e8ee82613d09856c1b3b70
|
3 |
+
size 30638009
|