erikjm commited on
Commit
78938b7
·
verified ·
1 Parent(s): 2b87df4

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +37 -25
  2. interface_utils.py +18 -11
app.py CHANGED
@@ -8,33 +8,37 @@ checkbox_choices = [
8
  ["Yes", "No", "NA"]
9
  ]
10
 
11
- conversation_data = load_from_jsonl('./data/conversations_unlabeled.jsonl')
12
- max_conversation_length = max([len(conversation['transcript']) for conversation in conversation_data])
13
- conversation = get_conversation(conversation_data)
14
 
 
15
 
16
- def save_labels(conv_id, skipped, submaxim_0=None):
 
17
  data = {
18
  'conv_id': conv_id,
 
19
  'maxim': maxim,
20
  'skipped': skipped,
21
- 'submaxim_0': submaxim_0
22
  }
23
  os.makedirs("./labels", exist_ok=True)
24
 
25
- with open(f"./labels/{maxim}_human_labels_{conv_id}.json", 'w') as f:
26
  json.dump(data, f, indent=4)
27
 
28
 
29
  def update_interface(new_conversation):
30
  new_conv_id = new_conversation['conv_id']
31
- new_transcript = pad_transcript(new_conversation['transcript'], max_conversation_length)
 
32
 
33
  markdown_blocks = [None] * max_conversation_length
34
  for i in range(max_conversation_length):
35
- if new_transcript[i]['speaker'] != '':
36
- markdown_blocks[i] = gr.Markdown(f"""  **{new_transcript[i]['speaker']}**:      {new_transcript[i]['response']}""",
37
- visible=True)
 
38
  else:
39
  markdown_blocks[i] = gr.Markdown("", visible=False)
40
 
@@ -51,32 +55,34 @@ def update_interface(new_conversation):
51
  visible=True)
52
  conv_len = gr.Number(value=len(new_transcript), visible=False)
53
 
54
- return [new_conv_id] + list(markdown_blocks) + [new_last_response] + [new_radio_0_base] + [conv_len]
55
 
56
 
57
  def submit(*args):
58
  conv_id = args[0]
 
59
  submaxim_0 = args[-2]
60
 
61
- save_labels(conv_id, skipped=False, submaxim_0=submaxim_0)
62
 
63
- new_conversation = get_conversation(conversation_data)
64
  return update_interface(new_conversation)
65
 
66
 
67
  def skip(*args):
68
  conv_id = args[0]
69
- save_labels(conv_id, skipped=True)
 
70
 
71
- new_conversation = get_conversation(conversation_data)
72
- return update_interface(new_conversation)
73
 
74
 
75
  with gr.Blocks(theme=gr.themes.Default()) as interface:
76
  conv_id = conversation['conv_id']
 
77
  transcript = conversation['transcript']
78
  conv_len = gr.Number(value=len(transcript), visible=False)
79
- padded_transcript = pad_transcript(transcript, max_conversation_length)
80
 
81
  markdown_blocks = [None] * max_conversation_length
82
  with gr.Column(scale=1, min_width=600):
@@ -84,7 +90,11 @@ with gr.Blocks(theme=gr.themes.Default()) as interface:
84
  gr.Markdown("""<span style='font-size: 16px;'>&nbsp;&nbsp;&nbsp;&nbsp;**Conversational context** </span>""",
85
  visible=True)
86
  for i in range(max_conversation_length):
87
- markdown_blocks[i] = gr.Markdown(f"""&nbsp;&nbsp;**{padded_transcript[i]['speaker']}**: &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{padded_transcript[i]['response']}""")
 
 
 
 
88
  if i >= conv_len.value:
89
  markdown_blocks[i].visible = False
90
 
@@ -109,7 +119,9 @@ with gr.Blocks(theme=gr.themes.Default()) as interface:
109
  skip_button = gr.Button("Skip")
110
 
111
  conv_id_element = gr.Text(value=conv_id, visible=False)
 
112
  input_list = [conv_id_element] + \
 
113
  markdown_blocks + \
114
  [last_response] + \
115
  [radio_submaxim_0_base] + \
@@ -118,6 +130,7 @@ with gr.Blocks(theme=gr.themes.Default()) as interface:
118
  fn=submit,
119
  inputs=input_list,
120
  outputs=[conv_id_element,
 
121
  *markdown_blocks,
122
  last_response,
123
  radio_submaxim_0_base,
@@ -127,6 +140,7 @@ with gr.Blocks(theme=gr.themes.Default()) as interface:
127
  fn=skip,
128
  inputs=input_list,
129
  outputs=[conv_id_element,
 
130
  *markdown_blocks,
131
  last_response,
132
  radio_submaxim_0_base,
@@ -135,17 +149,15 @@ with gr.Blocks(theme=gr.themes.Default()) as interface:
135
 
136
  css = """
137
  #textbox_id textarea {
138
- background-color: white;
139
  }
140
 
141
  .bottom-aligned-group {
142
- display: flex;
143
- flex-direction: column;
144
- justify-content: flex-end;
145
- height: 100%;
146
  }
147
  """
148
  interface.css = css
149
  interface.launch()
150
-
151
-
 
8
  ["Yes", "No", "NA"]
9
  ]
10
 
11
+ conversation_data = load_from_jsonl('./data/conversations_unlabeled_sliced.jsonl')
12
+ max_conversation_length = max([len(conversation['transcript']) for conversation in conversation_data_sliced])
 
13
 
14
+ conversation = get_conversation(conversation_data_sliced)
15
 
16
+
17
+ def save_labels(conv_id, slice_idx, skipped, submaxim_0=None):
18
  data = {
19
  'conv_id': conv_id,
20
+ 'slice_idx': int(slice_idx),
21
  'maxim': maxim,
22
  'skipped': skipped,
23
+ 'submaxim_0': submaxim_0,
24
  }
25
  os.makedirs("./labels", exist_ok=True)
26
 
27
+ with open(f"./labels/{maxim}_human_labels_{conv_id}_{slice_idx}.json", 'w') as f:
28
  json.dump(data, f, indent=4)
29
 
30
 
31
  def update_interface(new_conversation):
32
  new_conv_id = new_conversation['conv_id']
33
+ new_slice_idx = new_conversation['slice_idx']
34
+ new_transcript = new_conversation['transcript']
35
 
36
  markdown_blocks = [None] * max_conversation_length
37
  for i in range(max_conversation_length):
38
+ if i < len(new_transcript) and new_transcript[i]['speaker'] != '':
39
+ markdown_blocks[i] = gr.Markdown(
40
+ f"""&nbsp;&nbsp;**{new_transcript[i]['speaker']}**: &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{new_transcript[i]['response']}""",
41
+ visible=True)
42
  else:
43
  markdown_blocks[i] = gr.Markdown("", visible=False)
44
 
 
55
  visible=True)
56
  conv_len = gr.Number(value=len(new_transcript), visible=False)
57
 
58
+ return [new_conv_id] + [new_slice_idx] + list(markdown_blocks) + [new_last_response] + [new_radio_0_base] + [conv_len]
59
 
60
 
61
  def submit(*args):
62
  conv_id = args[0]
63
+ slice_idx = args[1]
64
  submaxim_0 = args[-2]
65
 
66
+ save_labels(conv_id, slice_idx, skipped=False, submaxim_0=submaxim_0)
67
 
68
+ new_conversation = get_conversation(conversation_data_sliced)
69
  return update_interface(new_conversation)
70
 
71
 
72
  def skip(*args):
73
  conv_id = args[0]
74
+ slice_idx = args[1]
75
+ save_labels(conv_id, slice_idx, skipped=True)
76
 
77
+ new_conversation = get_conversation(conversation_data_sliced)
78
+ return update_interface(new_conversation, slice_idx)
79
 
80
 
81
  with gr.Blocks(theme=gr.themes.Default()) as interface:
82
  conv_id = conversation['conv_id']
83
+ slice_idx = conversation['slice_idx']
84
  transcript = conversation['transcript']
85
  conv_len = gr.Number(value=len(transcript), visible=False)
 
86
 
87
  markdown_blocks = [None] * max_conversation_length
88
  with gr.Column(scale=1, min_width=600):
 
90
  gr.Markdown("""<span style='font-size: 16px;'>&nbsp;&nbsp;&nbsp;&nbsp;**Conversational context** </span>""",
91
  visible=True)
92
  for i in range(max_conversation_length):
93
+ if i < len(transcript):
94
+ markdown_blocks[i] = gr.Markdown(
95
+ f"""&nbsp;&nbsp;**{transcript[i]['speaker']}**: &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{transcript[i]['response']}""")
96
+ else:
97
+ markdown_blocks[i] = gr.Markdown("")
98
  if i >= conv_len.value:
99
  markdown_blocks[i].visible = False
100
 
 
119
  skip_button = gr.Button("Skip")
120
 
121
  conv_id_element = gr.Text(value=conv_id, visible=False)
122
+ slice_idx_element = gr.Text(value=slice_idx, visible=False)
123
  input_list = [conv_id_element] + \
124
+ [slice_idx_element] + \
125
  markdown_blocks + \
126
  [last_response] + \
127
  [radio_submaxim_0_base] + \
 
130
  fn=submit,
131
  inputs=input_list,
132
  outputs=[conv_id_element,
133
+ slice_idx_element,
134
  *markdown_blocks,
135
  last_response,
136
  radio_submaxim_0_base,
 
140
  fn=skip,
141
  inputs=input_list,
142
  outputs=[conv_id_element,
143
+ slice_idx_element,
144
  *markdown_blocks,
145
  last_response,
146
  radio_submaxim_0_base,
 
149
 
150
  css = """
151
  #textbox_id textarea {
152
+ background-color: white;
153
  }
154
 
155
  .bottom-aligned-group {
156
+ display: flex;
157
+ flex-direction: column;
158
+ justify-content: flex-end;
159
+ height: 100%;
160
  }
161
  """
162
  interface.css = css
163
  interface.launch()
 
 
interface_utils.py CHANGED
@@ -31,17 +31,24 @@ def save_to_jsonl(data, filename):
31
  file.write(json_line + '\n')
32
 
33
 
34
- def get_conversation(conversation_data):
35
- conv = random.choice(conversation_data)
36
- return conv
37
-
38
-
39
- def pad_transcript(transcript, max_length):
40
- padding_count = max_length - len(transcript)
41
- if padding_count > 0:
42
- for _ in range(padding_count):
43
- transcript.append({'speaker': '', 'response': ''})
44
- return transcript
 
 
 
 
 
 
 
45
 
46
 
47
  def get_last_response(transcript):
 
31
  file.write(json_line + '\n')
32
 
33
 
34
+ def get_conversation(data, min_length=0):
35
+ conv = random.choice(data)
36
+ transcript = conv['transcript']
37
+ slice_index = random.randint(min_length, len(transcript) - 1)
38
+ conv_slice = transcript[slice_index]
39
+ return {
40
+ 'conv_id': conv['conv_id'],
41
+ 'slice_idx': slice_index,
42
+ 'transcript': conv_slice
43
+ }
44
+
45
+
46
+ # def pad_transcript(transcript, max_length):
47
+ # padding_count = max_length - len(transcript)
48
+ # if padding_count > 0:
49
+ # for _ in range(padding_count):
50
+ # transcript.append({'speaker': '', 'response': ''})
51
+ # return transcript
52
 
53
 
54
  def get_last_response(transcript):