Weyaxi commited on
Commit
27f3843
·
verified ·
1 Parent(s): be80ca1

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ capybara_sharegpt.json filter=lfs diff=lfs merge=lfs -text
37
+ merged_all.json filter=lfs diff=lfs merge=lfs -text
38
+ synthia-v1.3_sharegpt_12500.json filter=lfs diff=lfs merge=lfs -text
capybara_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1947d28999416a2f468d1e09654cfdfc9bab8ccd03aa184598d20f0000dd6e4
3
+ size 76361785
merged_all.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f915219dc691de207f9b8b448f005386330035f3c7f6cc6379dd183ac7662d2f
3
+ size 306656943
remove_empty_output.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ with open('merged_all.json', 'r') as file:
4
+ data = json.load(file)
5
+
6
+ print(f"Normal len: {len(data)}")
7
+
8
+ data = [row for row in data if row["output"] != ""]
9
+
10
+ print(f"After len: {len(data)}")
11
+
12
+ with open('merged_all.json', 'w') as file:
13
+ json.dump(data, file, indent=1)
synthia-v1.3_sharegpt_12500.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbdbc7413a3c7fc65a900518f0db8627bb5ced53e1e8ee82613d09856c1b3b70
3
+ size 30638009