Spaces:
Running
Running
victormiller
commited on
Update curated.py
Browse files- curated.py +36 -26
curated.py
CHANGED
@@ -438,6 +438,36 @@ phil_filter = pd.DataFrame(
|
|
438 |
table_html_phil = phil_filter.to_html(index=False, border=0)
|
439 |
table_div_phil = Div(NotStr(table_html_phil), style="margin: 40px;")
|
440 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
filtering_process = Div(
|
442 |
Section(
|
443 |
H3("This section contains the specific filtering steps taken for all 14 curated datasets."),
|
@@ -622,6 +652,11 @@ filtering_process = Div(
|
|
622 |
Li("Local dedup was done within freelaw itself which removed 90%+ duplicates"),
|
623 |
),
|
624 |
table_div_freelaw,
|
|
|
|
|
|
|
|
|
|
|
625 |
),
|
626 |
),
|
627 |
Section(
|
@@ -920,27 +955,7 @@ def get_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo")
|
|
920 |
|
921 |
|
922 |
|
923 |
-
def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo"):
|
924 |
-
doc_id = max(0, min(int(doc_id), 9))
|
925 |
|
926 |
-
if data_source == "Freelaw":
|
927 |
-
raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json"))
|
928 |
-
extracted_sample_doc = json.load(
|
929 |
-
open("data/curated_samples/freelaw_extract.json")
|
930 |
-
)
|
931 |
-
else:
|
932 |
-
raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
|
933 |
-
|
934 |
-
raw_json = raw_sample_doc[doc_id]
|
935 |
-
extracted_json = extracted_sample_doc[doc_id]
|
936 |
-
return view_data(
|
937 |
-
raw_json,
|
938 |
-
extracted_json,
|
939 |
-
doc_id=doc_id,
|
940 |
-
data_source=data_source,
|
941 |
-
data_sources=data_sources,
|
942 |
-
target=target,
|
943 |
-
)
|
944 |
|
945 |
|
946 |
def update(target: str, request):
|
@@ -1000,12 +1015,7 @@ fig.update_layout(
|
|
1000 |
# Show the plot
|
1001 |
diff2_stacked_bar = fig
|
1002 |
|
1003 |
-
|
1004 |
-
Div(
|
1005 |
-
get_freelaw_data(target=gen_random_id()),
|
1006 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
1007 |
-
),
|
1008 |
-
)
|
1009 |
|
1010 |
def curated(request):
|
1011 |
|
|
|
438 |
table_html_phil = phil_filter.to_html(index=False, border=0)
|
439 |
table_div_phil = Div(NotStr(table_html_phil), style="margin: 40px;")
|
440 |
|
441 |
+
|
442 |
+
def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo"):
|
443 |
+
doc_id = max(0, min(int(doc_id), 9))
|
444 |
+
|
445 |
+
if data_source == "Freelaw":
|
446 |
+
raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json"))
|
447 |
+
extracted_sample_doc = json.load(
|
448 |
+
open("data/curated_samples/freelaw_extract.json")
|
449 |
+
)
|
450 |
+
else:
|
451 |
+
raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
|
452 |
+
|
453 |
+
raw_json = raw_sample_doc[doc_id]
|
454 |
+
extracted_json = extracted_sample_doc[doc_id]
|
455 |
+
return view_data(
|
456 |
+
raw_json,
|
457 |
+
extracted_json,
|
458 |
+
doc_id=doc_id,
|
459 |
+
data_source="Freelaw",
|
460 |
+
data_sources=data_sources,
|
461 |
+
target=target,
|
462 |
+
)
|
463 |
+
|
464 |
+
freelaw_examples = Div(
|
465 |
+
Div(
|
466 |
+
get_freelaw_data(target=gen_random_id()),
|
467 |
+
style="border: 1px solid #ccc; padding: 20px;",
|
468 |
+
),
|
469 |
+
)
|
470 |
+
|
471 |
filtering_process = Div(
|
472 |
Section(
|
473 |
H3("This section contains the specific filtering steps taken for all 14 curated datasets."),
|
|
|
652 |
Li("Local dedup was done within freelaw itself which removed 90%+ duplicates"),
|
653 |
),
|
654 |
table_div_freelaw,
|
655 |
+
Details(
|
656 |
+
Summary("FreeLaw Filtering Examples")
|
657 |
+
freelaw_examples,
|
658 |
+
)
|
659 |
+
|
660 |
),
|
661 |
),
|
662 |
Section(
|
|
|
955 |
|
956 |
|
957 |
|
|
|
|
|
958 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
959 |
|
960 |
|
961 |
def update(target: str, request):
|
|
|
1015 |
# Show the plot
|
1016 |
diff2_stacked_bar = fig
|
1017 |
|
1018 |
+
|
|
|
|
|
|
|
|
|
|
|
1019 |
|
1020 |
def curated(request):
|
1021 |
|