giseldo commited on
Commit
cfa1f4f
·
1 Parent(s): 1dcc182

ultima versao

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. __pycache__/util.cpython-310.pyc +0 -0
  2. _gerador_planilha.ipynb +423 -0
  3. _hiper_param.ipynb +145 -0
  4. _metrics generator.ipynb +149 -0
  5. app.py +37 -12
  6. database/tawos/README.md +48 -0
  7. database/tawos/deep/ALOY_deep-se.csv +0 -0
  8. database/tawos/deep/APSTUD_deep-se.csv +0 -0
  9. database/tawos/deep/CLI_deep-se.csv +0 -0
  10. database/tawos/deep/CLOV_deep-se.csv +0 -0
  11. database/tawos/deep/COMPASS_deep-se.csv +0 -0
  12. database/tawos/deep/CONFCLOUD_deep-se.csv +0 -0
  13. database/tawos/deep/CONFSERVER_deep-se.csv +0 -0
  14. database/tawos/deep/DAEMON_deep-se.csv +0 -0
  15. database/tawos/deep/DM_deep-se.csv +0 -0
  16. database/tawos/deep/DNN_deep-se.csv +0 -0
  17. database/tawos/deep/DURACLOUD_deep-se.csv +0 -0
  18. database/tawos/deep/EVG_deep-se.csv +0 -0
  19. database/tawos/deep/FAB_deep-se.csv +0 -0
  20. database/tawos/deep/MDL_deep-se.csv +0 -0
  21. database/tawos/deep/MESOS_deep-se.csv +0 -0
  22. database/tawos/deep/MULE_deep-se.csv +0 -0
  23. database/tawos/deep/NEXUS_deep-se.csv +0 -0
  24. database/tawos/deep/SERVER_deep-se.csv +0 -0
  25. database/tawos/deep/STL_deep-se.csv +0 -0
  26. database/tawos/deep/TIDOC_deep-se.csv +0 -0
  27. database/tawos/deep/TIMOB_deep-se.csv +0 -0
  28. database/tawos/deep/TISTUD_deep-se.csv +0 -0
  29. database/tawos/deep/XD_deep-se.csv +0 -0
  30. database/tawos/deep/metricas_NEXUS_MbR.csv +300 -0
  31. database/tawos/deep/metricas_NEXUS_NEOSP_SVR.csv +300 -0
  32. database/tawos/tfidf/ALOY_tfidf-se.csv +0 -0
  33. database/tawos/tfidf/APSTUD_tfidf-se.csv +0 -0
  34. database/tawos/tfidf/CLI_tfidf-se.csv +0 -0
  35. database/tawos/tfidf/CLOV_tfidf-se.csv +0 -0
  36. database/tawos/tfidf/COMPASS_tfidf-se.csv +0 -0
  37. database/tawos/tfidf/CONFCLOUD_tfidf-se.csv +0 -0
  38. database/tawos/tfidf/CONFSERVER_tfidf-se.csv +0 -0
  39. database/tawos/tfidf/DAEMON_tfidf-se.csv +0 -0
  40. database/tawos/tfidf/DM_tfidf-se.csv +0 -0
  41. database/tawos/tfidf/DNN_tfidf-se.csv +0 -0
  42. database/tawos/tfidf/DURACLOUD_tfidf-se.csv +0 -0
  43. database/tawos/tfidf/EVG_tfidf-se.csv +0 -0
  44. database/tawos/tfidf/FAB_tfidf-se.csv +0 -0
  45. database/tawos/tfidf/MDL_tfidf-se.csv +0 -0
  46. database/tawos/tfidf/MESOS_tfidf-se.csv +0 -0
  47. database/tawos/tfidf/MULE_tfidf-se.csv +0 -0
  48. database/tawos/tfidf/NEXUS_tfidf-se.csv +0 -0
  49. database/tawos/tfidf/SERVER_tfidf-se.csv +0 -0
  50. database/tawos/tfidf/STL_tfidf-se.csv +0 -0
__pycache__/util.cpython-310.pyc ADDED
Binary file (2.55 kB). View file
 
_gerador_planilha.ipynb ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 9,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import csv\n",
11
+ "import numpy as np\n",
12
+ "import pandas as pd\n",
13
+ "\n",
14
+ "def gerar_metricas(nome_projeto):\n",
15
+ " list_output_MbR = []\n",
16
+ " with open(\"metricas_{}_MbR.csv\".format(nome_projeto), \"r\") as arquivo:\n",
17
+ " arquivo_csv = csv.reader(arquivo)\n",
18
+ " for i, linha in enumerate(arquivo_csv):\n",
19
+ " list_output_MbR.append(float(linha[0]))\n",
20
+ " list_output_NEOSP_SVR = []\n",
21
+ " with open(\"metricas_{}_NEOSP_SVR.csv\".format(nome_projeto), \"r\") as arquivo:\n",
22
+ " arquivo_csv = csv.reader(arquivo)\n",
23
+ " for i, linha in enumerate(arquivo_csv):\n",
24
+ " list_output_NEOSP_SVR.append(float(linha[0]))\n",
25
+ " list_output_TFIDF_SVR = []\n",
26
+ " with open(\"metricas_{}_TFIDF.csv\".format(nome_projeto), \"r\") as arquivo:\n",
27
+ " arquivo_csv = csv.reader(arquivo)\n",
28
+ " for i, linha in enumerate(arquivo_csv):\n",
29
+ " list_output_TFIDF_SVR.append(float(linha[0]))\n",
30
+ " \n",
31
+ " list_results = [[\"MbR Regressor\", np.mean(list_output_MbR)], [\"NEOSP-SVR Regressor\", np.mean(list_output_NEOSP_SVR)], [\"TFIDF-SVR Regressor\", np.mean(list_output_TFIDF_SVR)]]\n",
32
+ " \n",
33
+ " df = pd.DataFrame(list_results, columns=[\"Model\", \"MAE\"])\n",
34
+ " \n",
35
+ " df_list_output_MbR = pd.DataFrame(list_output_MbR, columns=[\"MAE\"])\n",
36
+ " df_list_output_NEOSP = pd.DataFrame(list_output_NEOSP_SVR, columns=[\"MAE\"])\n",
37
+ " df_list_output_TFIDF = pd.DataFrame(list_output_TFIDF_SVR, columns=[\"MAE\"])\n",
38
+ " \n",
39
+ " \n",
40
+ " return df_list_output_MbR, df_list_output_NEOSP, df_list_output_TFIDF"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 10,
46
+ "metadata": {},
47
+ "outputs": [
48
+ {
49
+ "name": "stdout",
50
+ "output_type": "stream",
51
+ "text": [
52
+ " MAE\n",
53
+ "0 1.719630\n",
54
+ "1 1.619240\n",
55
+ "2 2.035138\n",
56
+ "3 2.714286\n",
57
+ "4 1.690284\n",
58
+ ".. ...\n",
59
+ "295 1.877688\n",
60
+ "296 1.825845\n",
61
+ "297 2.281874\n",
62
+ "298 1.448541\n",
63
+ "299 2.011329\n",
64
+ "\n",
65
+ "[300 rows x 1 columns]\n",
66
+ " MAE\n",
67
+ "0 3.991628\n",
68
+ "1 5.509540\n",
69
+ "2 4.276285\n",
70
+ "3 3.210280\n",
71
+ "4 4.419198\n",
72
+ ".. ...\n",
73
+ "295 4.515966\n",
74
+ "296 4.156574\n",
75
+ "297 4.072459\n",
76
+ "298 3.360661\n",
77
+ "299 4.133661\n",
78
+ "\n",
79
+ "[300 rows x 1 columns]\n",
80
+ " MAE\n",
81
+ "0 1.749430\n",
82
+ "1 1.615589\n",
83
+ "2 1.891128\n",
84
+ "3 1.352403\n",
85
+ "4 2.137931\n",
86
+ ".. ...\n",
87
+ "295 1.505617\n",
88
+ "296 1.497649\n",
89
+ "297 1.944096\n",
90
+ "298 1.594697\n",
91
+ "299 1.889890\n",
92
+ "\n",
93
+ "[300 rows x 1 columns]\n",
94
+ " MAE\n",
95
+ "0 6.520257\n",
96
+ "1 7.011882\n",
97
+ "2 3.508960\n",
98
+ "3 3.912252\n",
99
+ "4 6.778048\n",
100
+ ".. ...\n",
101
+ "295 5.067004\n",
102
+ "296 3.978298\n",
103
+ "297 4.770977\n",
104
+ "298 3.592259\n",
105
+ "299 3.753875\n",
106
+ "\n",
107
+ "[300 rows x 1 columns]\n",
108
+ " MAE\n",
109
+ "0 1.279750\n",
110
+ "1 1.747535\n",
111
+ "2 1.429980\n",
112
+ "3 1.367193\n",
113
+ "4 1.769231\n",
114
+ ".. ...\n",
115
+ "295 1.736029\n",
116
+ "296 1.297173\n",
117
+ "297 1.407627\n",
118
+ "298 1.742932\n",
119
+ "299 1.625247\n",
120
+ "\n",
121
+ "[300 rows x 1 columns]\n",
122
+ " MAE\n",
123
+ "0 1.484524\n",
124
+ "1 0.870833\n",
125
+ "2 1.362500\n",
126
+ "3 1.141270\n",
127
+ "4 1.118586\n",
128
+ ".. ...\n",
129
+ "295 1.162477\n",
130
+ "296 1.446631\n",
131
+ "297 1.257882\n",
132
+ "298 2.020915\n",
133
+ "299 1.351226\n",
134
+ "\n",
135
+ "[300 rows x 1 columns]\n",
136
+ " MAE\n",
137
+ "0 1.562142\n",
138
+ "1 1.121421\n",
139
+ "2 1.011506\n",
140
+ "3 1.309862\n",
141
+ "4 1.386479\n",
142
+ ".. ...\n",
143
+ "295 1.217656\n",
144
+ "296 1.166991\n",
145
+ "297 1.309732\n",
146
+ "298 1.546553\n",
147
+ "299 1.458637\n",
148
+ "\n",
149
+ "[300 rows x 1 columns]\n",
150
+ " MAE\n",
151
+ "0 3.183489\n",
152
+ "1 3.254141\n",
153
+ "2 2.820652\n",
154
+ "3 3.147516\n",
155
+ "4 3.724896\n",
156
+ ".. ...\n",
157
+ "295 3.343784\n",
158
+ "296 2.467027\n",
159
+ "297 2.586486\n",
160
+ "298 2.901351\n",
161
+ "299 2.793243\n",
162
+ "\n",
163
+ "[300 rows x 1 columns]\n",
164
+ " MAE\n",
165
+ "0 3.085758\n",
166
+ "1 2.960811\n",
167
+ "2 2.402817\n",
168
+ "3 3.173362\n",
169
+ "4 3.209479\n",
170
+ ".. ...\n",
171
+ "295 2.802939\n",
172
+ "296 2.611202\n",
173
+ "297 2.317784\n",
174
+ "298 3.191521\n",
175
+ "299 2.972561\n",
176
+ "\n",
177
+ "[300 rows x 1 columns]\n",
178
+ " MAE\n",
179
+ "0 0.817816\n",
180
+ "1 1.292632\n",
181
+ "2 0.798383\n",
182
+ "3 0.766996\n",
183
+ "4 0.811808\n",
184
+ ".. ...\n",
185
+ "295 0.764333\n",
186
+ "296 0.871069\n",
187
+ "297 0.830722\n",
188
+ "298 1.263252\n",
189
+ "299 0.915469\n",
190
+ "\n",
191
+ "[300 rows x 1 columns]\n",
192
+ " MAE\n",
193
+ "0 0.861718\n",
194
+ "1 1.440051\n",
195
+ "2 0.793965\n",
196
+ "3 1.079316\n",
197
+ "4 0.765291\n",
198
+ ".. ...\n",
199
+ "295 1.312637\n",
200
+ "296 0.746560\n",
201
+ "297 0.861487\n",
202
+ "298 0.844606\n",
203
+ "299 0.893629\n",
204
+ "\n",
205
+ "[300 rows x 1 columns]\n",
206
+ " MAE\n",
207
+ "0 0.651576\n",
208
+ "1 0.649636\n",
209
+ "2 0.624728\n",
210
+ "3 0.599720\n",
211
+ "4 0.670485\n",
212
+ ".. ...\n",
213
+ "295 0.625787\n",
214
+ "296 0.620482\n",
215
+ "297 0.617713\n",
216
+ "298 0.653711\n",
217
+ "299 0.671403\n",
218
+ "\n",
219
+ "[300 rows x 1 columns]\n",
220
+ " MAE\n",
221
+ "0 1.843216\n",
222
+ "1 1.947581\n",
223
+ "2 1.326613\n",
224
+ "3 1.818193\n",
225
+ "4 1.829548\n",
226
+ ".. ...\n",
227
+ "295 1.692674\n",
228
+ "296 1.968132\n",
229
+ "297 3.074359\n",
230
+ "298 1.361050\n",
231
+ "299 2.180586\n",
232
+ "\n",
233
+ "[300 rows x 1 columns]\n",
234
+ " MAE\n",
235
+ "0 8.977660\n",
236
+ "1 13.935714\n",
237
+ "2 11.211688\n",
238
+ "3 10.565220\n",
239
+ "4 10.296718\n",
240
+ ".. ...\n",
241
+ "295 12.521110\n",
242
+ "296 11.826106\n",
243
+ "297 9.696185\n",
244
+ "298 11.501992\n",
245
+ "299 10.755109\n",
246
+ "\n",
247
+ "[300 rows x 1 columns]\n",
248
+ " MAE\n",
249
+ "0 1.563933\n",
250
+ "1 1.527785\n",
251
+ "2 1.514647\n",
252
+ "3 1.419521\n",
253
+ "4 1.537474\n",
254
+ ".. ...\n",
255
+ "295 1.508380\n",
256
+ "296 1.713129\n",
257
+ "297 1.520726\n",
258
+ "298 1.499336\n",
259
+ "299 1.515071\n",
260
+ "\n",
261
+ "[300 rows x 1 columns]\n",
262
+ " MAE\n",
263
+ "0 3.124855\n",
264
+ "1 2.894180\n",
265
+ "2 3.055756\n",
266
+ "3 2.742524\n",
267
+ "4 2.910823\n",
268
+ ".. ...\n",
269
+ "295 2.968017\n",
270
+ "296 2.876247\n",
271
+ "297 2.786590\n",
272
+ "298 2.969128\n",
273
+ "299 2.820308\n",
274
+ "\n",
275
+ "[300 rows x 1 columns]\n",
276
+ " MAE\n",
277
+ "0 1.275430\n",
278
+ "1 0.953375\n",
279
+ "2 0.943311\n",
280
+ "3 1.077534\n",
281
+ "4 0.880661\n",
282
+ ".. ...\n",
283
+ "295 0.955650\n",
284
+ "296 0.951646\n",
285
+ "297 0.953232\n",
286
+ "298 0.975788\n",
287
+ "299 1.029818\n",
288
+ "\n",
289
+ "[300 rows x 1 columns]\n",
290
+ " MAE\n",
291
+ "0 2.097472\n",
292
+ "1 1.692843\n",
293
+ "2 1.335077\n",
294
+ "3 1.525819\n",
295
+ "4 1.529649\n",
296
+ ".. ...\n",
297
+ "295 1.723645\n",
298
+ "296 1.577294\n",
299
+ "297 1.154258\n",
300
+ "298 1.946014\n",
301
+ "299 1.708333\n",
302
+ "\n",
303
+ "[300 rows x 1 columns]\n",
304
+ " MAE\n",
305
+ "0 0.933591\n",
306
+ "1 0.871815\n",
307
+ "2 0.946461\n",
308
+ "3 0.765251\n",
309
+ "4 0.945946\n",
310
+ ".. ...\n",
311
+ "295 0.565508\n",
312
+ "296 0.833871\n",
313
+ "297 1.040323\n",
314
+ "298 1.303763\n",
315
+ "299 0.954301\n",
316
+ "\n",
317
+ "[300 rows x 1 columns]\n",
318
+ " MAE\n",
319
+ "0 2.833315\n",
320
+ "1 2.558738\n",
321
+ "2 3.003724\n",
322
+ "3 2.535398\n",
323
+ "4 2.797720\n",
324
+ ".. ...\n",
325
+ "295 2.425558\n",
326
+ "296 2.596320\n",
327
+ "297 2.665249\n",
328
+ "298 2.642077\n",
329
+ "299 3.175072\n",
330
+ "\n",
331
+ "[300 rows x 1 columns]\n",
332
+ " MAE\n",
333
+ "0 2.732684\n",
334
+ "1 2.229971\n",
335
+ "2 2.475378\n",
336
+ "3 2.495666\n",
337
+ "4 2.462650\n",
338
+ ".. ...\n",
339
+ "295 2.512257\n",
340
+ "296 2.619655\n",
341
+ "297 2.390718\n",
342
+ "298 2.241314\n",
343
+ "299 2.395140\n",
344
+ "\n",
345
+ "[300 rows x 1 columns]\n",
346
+ " MAE\n",
347
+ "0 2.290064\n",
348
+ "1 2.309586\n",
349
+ "2 2.167920\n",
350
+ "3 2.191076\n",
351
+ "4 2.249119\n",
352
+ ".. ...\n",
353
+ "295 2.204164\n",
354
+ "296 2.246984\n",
355
+ "297 2.521996\n",
356
+ "298 2.094582\n",
357
+ "299 2.248426\n",
358
+ "\n",
359
+ "[300 rows x 1 columns]\n",
360
+ " MAE\n",
361
+ "0 1.699304\n",
362
+ "1 1.956807\n",
363
+ "2 1.673685\n",
364
+ "3 1.788314\n",
365
+ "4 1.908642\n",
366
+ ".. ...\n",
367
+ "295 1.915542\n",
368
+ "296 2.274142\n",
369
+ "297 1.923457\n",
370
+ "298 1.721884\n",
371
+ "299 1.901150\n",
372
+ "\n",
373
+ "[300 rows x 1 columns]\n"
374
+ ]
375
+ }
376
+ ],
377
+ "source": [
378
+ "LIBRARIES = [\"ALOY\", \"APSTUD\", \"CLI\", \"CLOV\", \"COMPASS\", \"CONFCLOUD\", \"CONFSERVER\", \"DAEMON\", \"DM\", \"DNN\", \"DURACLOUD\", \"EVG\", \"FAB\", \n",
379
+ " \"MDL\", \"MESOS\" ,\"MULE\", \"NEXUS\", \"SERVER\", \"STL\", \"TIDOC\", \"TIMOB\", \"TISTUD\", \"XD\"]\n",
380
+ "\n",
381
+ "for lp in LIBRARIES:\n",
382
+ " df_list_output_MbR, df_list_output_NEOSP, df_list_output_TFIDF = gerar_metricas(lp)\n",
383
+ " print(df_list_output_MbR)"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "execution_count": null,
389
+ "metadata": {},
390
+ "outputs": [],
391
+ "source": []
392
+ },
393
+ {
394
+ "cell_type": "code",
395
+ "execution_count": null,
396
+ "metadata": {},
397
+ "outputs": [],
398
+ "source": []
399
+ }
400
+ ],
401
+ "metadata": {
402
+ "kernelspec": {
403
+ "display_name": "Python 3",
404
+ "language": "python",
405
+ "name": "python3"
406
+ },
407
+ "language_info": {
408
+ "codemirror_mode": {
409
+ "name": "ipython",
410
+ "version": 3
411
+ },
412
+ "file_extension": ".py",
413
+ "mimetype": "text/x-python",
414
+ "name": "python",
415
+ "nbconvert_exporter": "python",
416
+ "pygments_lexer": "ipython3",
417
+ "version": "3.10.11"
418
+ },
419
+ "orig_nbformat": 4
420
+ },
421
+ "nbformat": 4,
422
+ "nbformat_minor": 2
423
+ }
_hiper_param.ipynb ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from sklearn.dummy import DummyRegressor\n",
10
+ "from nltk.corpus import stopwords\n",
11
+ "from textblob import TextBlob\n",
12
+ "import textstat\n",
13
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
14
+ "from sklearn import svm\n",
15
+ "from sklearn.linear_model import LinearRegression\n",
16
+ "from sklearn.feature_selection import SelectKBest\n",
17
+ "import pandas as pd\n",
18
+ "from util import escape_tags_and_content, escape_tags, escape_strings, escape_links, escape_hex_character_codes, escape_punctuation_boundaries, escape_odd_spaces\n",
19
+ "from sklearn.model_selection import RepeatedKFold\n",
20
+ "from sklearn.svm import SVR\n",
21
+ "from sklearn.model_selection import GridSearchCV\n",
22
+ "import numpy as np\n",
23
+ "import util"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 6,
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "def gerar_metricas(project_name):\n",
33
+ "\n",
34
+ " # carregando os dados\n",
35
+ " df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
36
+ "\n",
37
+ " # criação de uma nova coluna\n",
38
+ " df[\"context\"] = df[\"title\"] + df[\"description\"]\n",
39
+ "\n",
40
+ " # pré-processamento\n",
41
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags_and_content(x))\n",
42
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags(x))\n",
43
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_strings(x))\n",
44
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_links(x))\n",
45
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_hex_character_codes(x))\n",
46
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_punctuation_boundaries(x))\n",
47
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_odd_spaces(x))\n",
48
+ "\n",
49
+ " # removendo stop-words\n",
50
+ " stop = stopwords.words('english')\n",
51
+ " df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))\n",
52
+ "\n",
53
+ " # renomeando as colunas porque senão dá um problema com a extração de features do NEOSP\n",
54
+ " df = df.rename(columns={ \"issuekey\": \"issuekey_\", \"created\": \"created_\", \"description\": \"description_\", \"title\": \"title_\", \"context\": \"context_\", \"storypoint\": \"storypoint_\"})\n",
55
+ " y = df[\"storypoint_\"]\n",
56
+ " df = df.drop(columns=['storypoint_'])\n",
57
+ "\n",
58
+ " # 5º coluna -> extração das features para o neosp\n",
59
+ " df[\"gunning_fog_\"] = df['context_'].apply(textstat.gunning_fog)\n",
60
+ " df[\"flesch_reading_ease_\"] = df['context_'].apply(textstat.flesch_reading_ease)\n",
61
+ " df[\"flesch_kincaid_grade_\"] = df['context_'].apply(textstat.flesch_kincaid_grade)\n",
62
+ " df[\"smog_index_\"] = df['context_'].apply(textstat.smog_index)\n",
63
+ " df[\"coleman_liau_index_\"] = df['context_'].apply(textstat.coleman_liau_index)\n",
64
+ " df[\"automated_readability_index_\"] = df['context_'].apply(textstat.automated_readability_index)\n",
65
+ " df[\"dale_chall_readability_score_\"] = df['context_'].apply(textstat.dale_chall_readability_score)\n",
66
+ " df[\"difficult_words_\"] = df['context_'].apply(textstat.difficult_words)\n",
67
+ " df[\"linsear_write_formula_\"] = df['context_'].apply(textstat.linsear_write_formula)\n",
68
+ " df[\"polarity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.polarity)\n",
69
+ " df[\"subjectivity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.subjectivity)\n",
70
+ " # 16º colunas\n",
71
+ "\n",
72
+ " # Extração das features para o TFIDF\n",
73
+ " vectorizer = TfidfVectorizer()\n",
74
+ " X_vec = vectorizer.fit_transform(df[\"context_\"])\n",
75
+ "\n",
76
+ " df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())\n",
77
+ "\n",
78
+ " # Juntando as features do neosp com o tfidf\n",
79
+ " df = df.join(df_vec)\n",
80
+ " X = df\n",
81
+ " \n",
82
+ " grid = GridSearchCV(\n",
83
+ " estimator=SVR(kernel='rbf'),\n",
84
+ " param_grid={\n",
85
+ " 'C': [1.1, 5.4, 170, 1001],\n",
86
+ " 'epsilon': [0.0003, 0.007, 0.0109, 0.019, 0.14, 0.05, 8, 0.2, 3, 2, 7],\n",
87
+ " 'gamma': [0.7001, 0.008, 0.001, 3.1, 1, 1.3, 5]\n",
88
+ " }, \n",
89
+ " cv=10, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)\n",
90
+ "\n",
91
+ " #print the best parameters from all possible combinations\n",
92
+ " grid.fit(X[X.columns[5:16]], y)\n",
93
+ " print(grid.best_params_)"
94
+ ]
95
+ },
96
+ {
97
+ "cell_type": "code",
98
+ "execution_count": 7,
99
+ "metadata": {},
100
+ "outputs": [
101
+ {
102
+ "name": "stdout",
103
+ "output_type": "stream",
104
+ "text": [
105
+ "{'C': 5.4, 'epsilon': 0.2, 'gamma': 5}\n",
106
+ "{'C': 1.1, 'epsilon': 2, 'gamma': 0.7001}\n"
107
+ ]
108
+ }
109
+ ],
110
+ "source": [
111
+ "import pandas as pd\n",
112
+ "\n",
113
+ "LIBRARIES = [\"ALOY\", \"CLI\"]\n",
114
+ " #\"APSTUD\", \n",
115
+ " #\"CLOV\", \"COMPASS\", \"CONFCLOUD\", \"CONFSERVER\", \"DAEMON\", \"DM\", \"DNN\", \"DURACLOUD\", \"EVG\", \"FAB\", \n",
116
+ " #\"MDL\", \"MESOS\" ,\"MULE\", \"NEXUS\", \"SERVER\", \"STL\", \"TIDOC\", \"TIMOB\", \"TISTUD\", \"XD\"]\n",
117
+ "\n",
118
+ "for lp in LIBRARIES:\n",
119
+ " gerar_metricas(lp)"
120
+ ]
121
+ }
122
+ ],
123
+ "metadata": {
124
+ "kernelspec": {
125
+ "display_name": "Python 3",
126
+ "language": "python",
127
+ "name": "python3"
128
+ },
129
+ "language_info": {
130
+ "codemirror_mode": {
131
+ "name": "ipython",
132
+ "version": 3
133
+ },
134
+ "file_extension": ".py",
135
+ "mimetype": "text/x-python",
136
+ "name": "python",
137
+ "nbconvert_exporter": "python",
138
+ "pygments_lexer": "ipython3",
139
+ "version": "3.10.11"
140
+ },
141
+ "orig_nbformat": 4
142
+ },
143
+ "nbformat": 4,
144
+ "nbformat_minor": 2
145
+ }
_metrics generator.ipynb ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 13,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from sklearn.dummy import DummyRegressor\n",
10
+ "from nltk.corpus import stopwords\n",
11
+ "from textblob import TextBlob\n",
12
+ "import textstat\n",
13
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
14
+ "from sklearn import svm\n",
15
+ "from sklearn.linear_model import LinearRegression\n",
16
+ "from sklearn.feature_selection import SelectKBest\n",
17
+ "import pandas as pd\n",
18
+ "from util import escape_tags_and_content, escape_tags, escape_strings, escape_links, escape_hex_character_codes, escape_punctuation_boundaries, escape_odd_spaces\n",
19
+ "from sklearn.model_selection import cross_val_score\n",
20
+ "from sklearn.model_selection import RepeatedKFold\n",
21
+ "from sklearn.pipeline import make_pipeline\n",
22
+ "from sklearn.preprocessing import StandardScaler\n",
23
+ "from sklearn.feature_selection import f_classif, f_regression"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 14,
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "def gerar_metricas(project_name):\n",
33
+ "\n",
34
+ " # carregando os dados\n",
35
+ " df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
36
+ "\n",
37
+ " # criação de uma nova coluna\n",
38
+ " df[\"context\"] = df[\"title\"] + df[\"description\"]\n",
39
+ "\n",
40
+ " # pré-processamento\n",
41
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags_and_content(x))\n",
42
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags(x))\n",
43
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_strings(x))\n",
44
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_links(x))\n",
45
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_hex_character_codes(x))\n",
46
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_punctuation_boundaries(x))\n",
47
+ " df[\"context\"] = df[\"context\"].apply(lambda x: escape_odd_spaces(x))\n",
48
+ "\n",
49
+ " # removendo stop-words\n",
50
+ " stop = stopwords.words('english')\n",
51
+ " df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))\n",
52
+ "\n",
53
+ " # renomeando as colunas porque senão dá um problema com a extração de features do NEOSP\n",
54
+ " df = df.rename(columns={ \"issuekey\": \"issuekey_\", \"created\": \"created_\", \"description\": \"description_\", \"title\": \"title_\", \"context\": \"context_\", \"storypoint\": \"storypoint_\"})\n",
55
+ " y = df[\"storypoint_\"]\n",
56
+ " df = df.drop(columns=['storypoint_'])\n",
57
+ "\n",
58
+ " # 5º coluna -> extração das features para o neosp\n",
59
+ " df[\"gunning_fog_\"] = df['context_'].apply(textstat.gunning_fog)\n",
60
+ " df[\"flesch_reading_ease_\"] = df['context_'].apply(textstat.flesch_reading_ease)\n",
61
+ " df[\"flesch_kincaid_grade_\"] = df['context_'].apply(textstat.flesch_kincaid_grade)\n",
62
+ " df[\"smog_index_\"] = df['context_'].apply(textstat.smog_index)\n",
63
+ " df[\"coleman_liau_index_\"] = df['context_'].apply(textstat.coleman_liau_index)\n",
64
+ " df[\"automated_readability_index_\"] = df['context_'].apply(textstat.automated_readability_index)\n",
65
+ " df[\"dale_chall_readability_score_\"] = df['context_'].apply(textstat.dale_chall_readability_score)\n",
66
+ " df[\"difficult_words_\"] = df['context_'].apply(textstat.difficult_words)\n",
67
+ " df[\"linsear_write_formula_\"] = df['context_'].apply(textstat.linsear_write_formula)\n",
68
+ " df[\"polarity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.polarity)\n",
69
+ " df[\"subjectivity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.subjectivity)\n",
70
+ " # 16º colunas\n",
71
+ "\n",
72
+ " # Extração das features para o TFIDF\n",
73
+ " vectorizer = TfidfVectorizer()\n",
74
+ " X_vec = vectorizer.fit_transform(df[\"context_\"])\n",
75
+ "\n",
76
+ " df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())\n",
77
+ "\n",
78
+ " # Juntando as features do neosp com o tfidf\n",
79
+ " df = df.join(df_vec)\n",
80
+ " X = df\n",
81
+ "\n",
82
+ " results = list()\n",
83
+ "\n",
84
+ " rkf = RepeatedKFold(n_splits=10, n_repeats=30, random_state=2652124)\n",
85
+ " \n",
86
+ " #model = DummyRegressor(strategy=\"mean\")\n",
87
+ " #results.append(cross_val_score(model, X[X.columns[5:16]], y, cv = rkf, scoring=\"neg_mean_absolute_error\"))\n",
88
+ " \n",
89
+ " model = make_pipeline(SelectKBest(f_regression, k=50), StandardScaler(), svm.SVR())\n",
90
+ " results.append(cross_val_score(model, X[X.columns[16:]], y, cv = rkf, scoring=\"neg_mean_absolute_error\"))\n",
91
+ " \n",
92
+ " list_results_TFIDF_MbR = results[0]\n",
93
+ " df_results_MAE_TFIDF = pd.DataFrame(list_results_TFIDF_MbR, columns = [\"MAE\"])\n",
94
+ " df_results_MAE_TFIDF = df_results_MAE_TFIDF.apply(lambda x: x*-1)\n",
95
+ " df_results_MAE_TFIDF.to_csv(\"metricas_{}_TFIDF.csv\".format(project_name),index = False, header=False)\n",
96
+ " \n",
97
+ " #list_results = [ [\"MbR\", results[0].mean()*-1], [\"NEOSP-SVR\", results[1].mean()*-1] ]\n",
98
+ " #df = pd.DataFrame(list_results, columns = [\"Model\",\"MAE\"])\n",
99
+ "\n",
100
+ " #list_results_MAE_MbR = results[0]\n",
101
+ " #df_results_MAE_MbR = pd.DataFrame(list_results_MAE_MbR, columns = [\"MAE\"])\n",
102
+ " #df_results_MAE_MbR = df_results_MAE_MbR.apply(lambda x: x*-1)\n",
103
+ " #df_results_MAE_MbR.to_csv(\"metricas_{}_MbR.csv\".format(project_name),index = False, header=False)\n",
104
+ "\n",
105
+ " #list_results_MAE_NEOSP = results[1]\n",
106
+ " #df_results_MAE_NEOSP = pd.DataFrame(list_results_MAE_NEOSP, columns = [\"MAE\"])\n",
107
+ " #df_results_MAE_NEOSP = df_results_MAE_NEOSP.apply(lambda x: x*-1)\n",
108
+ " #df_results_MAE_NEOSP.to_csv(\"metricas_{}_NEOSP_SVR.csv\".format(project_name), index = False, header=False)"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 15,
114
+ "metadata": {},
115
+ "outputs": [],
116
+ "source": [
117
+ "import pandas as pd\n",
118
+ "\n",
119
+ "LIBRARIES = [\"ALOY\", \"APSTUD\", \"CLI\", \"CLOV\", \"COMPASS\", \"CONFCLOUD\", \"CONFSERVER\", \"DAEMON\", \"DM\", \"DNN\", \"DURACLOUD\", \"EVG\", \"FAB\", \n",
120
+ " \"MDL\", \"MESOS\" ,\"MULE\", \"NEXUS\", \"SERVER\", \"STL\", \"TIDOC\", \"TIMOB\", \"TISTUD\", \"XD\"]\n",
121
+ "\n",
122
+ "for lp in LIBRARIES:\n",
123
+ " gerar_metricas(lp)"
124
+ ]
125
+ }
126
+ ],
127
+ "metadata": {
128
+ "kernelspec": {
129
+ "display_name": "Python 3",
130
+ "language": "python",
131
+ "name": "python3"
132
+ },
133
+ "language_info": {
134
+ "codemirror_mode": {
135
+ "name": "ipython",
136
+ "version": 3
137
+ },
138
+ "file_extension": ".py",
139
+ "mimetype": "text/x-python",
140
+ "name": "python",
141
+ "nbconvert_exporter": "python",
142
+ "pygments_lexer": "ipython3",
143
+ "version": "3.10.11"
144
+ },
145
+ "orig_nbformat": 4
146
+ },
147
+ "nbformat": 4,
148
+ "nbformat_minor": 2
149
+ }
app.py CHANGED
@@ -9,11 +9,15 @@ import matplotlib.gridspec as gridspec
9
  LIBRARIES = ["ALOY", "APSTUD", "CLI", "CLOV", "COMPASS", "CONFCLOUD", "CONFSERVER", "DAEMON", "DM", "DNN", "DURACLOUD", "EVG", "FAB",
10
  "MDL", "MESOS" ,"MULE", "NEXUS", "SERVER", "STL", "TIDOC", "TIMOB", "TISTUD", "XD"]
11
 
12
- def grafico(list_output_MbR, list_output_NEOSP, nome_projeto, pip_choices):
13
- list_results = [["MbR Regressor", np.mean(list_output_MbR)], ["NEOSP-SVR Regressor", np.mean(list_output_NEOSP)]]
 
14
  df = pd.DataFrame(list_results, columns=["Model", "MAE"])
 
15
  df_list_output_MbR = pd.DataFrame(list_output_MbR, columns=["MAE"])
16
  df_list_output_NEOSP = pd.DataFrame(list_output_NEOSP, columns=["MAE"])
 
 
17
  fig, ax = plt.subplots(2, 2)
18
 
19
  G = gridspec.GridSpec(2, 2)
@@ -31,16 +35,21 @@ def grafico(list_output_MbR, list_output_NEOSP, nome_projeto, pip_choices):
31
  axes_1.scatter(range(1,51), df_list_output_MbR["MAE"].loc[1:50], label="MbR Regressor", color="red", alpha=0.5,)
32
  if "NEOSP-SVR Regressor" in pip_choices:
33
  axes_1.scatter(range(1,51), df_list_output_NEOSP["MAE"].loc[1:50], label="NEOSP-SVR Regressor", color = "blue", alpha=0.5)
 
 
34
 
35
  # ax2
36
  axes_2.set_ylabel("MAE Médio")
37
  axes_2.set_xlabel("Modelos")
38
  if "MbR Regressor" in pip_choices:
39
  graf1 = axes_2.bar(df["Model"].iloc[[0]], df["MAE"].iloc[[0]], color="red", alpha=0.5)
40
- axes_2.bar_label(graf1, fmt="%.01f", size=10, label_type="edge")
41
  if "NEOSP-SVR Regressor" in pip_choices:
42
  graf2 = axes_2.bar(df["Model"].iloc[[1]], df["MAE"].iloc[[1]], color = "blue", alpha=0.5)
43
- axes_2.bar_label(graf2, fmt="%.01f", size=10, label_type="edge")
 
 
 
44
 
45
  # ax3
46
  axes_3.set_xlabel("MAE")
@@ -49,6 +58,8 @@ def grafico(list_output_MbR, list_output_NEOSP, nome_projeto, pip_choices):
49
  axes_3.hist(df_list_output_MbR["MAE"], color="red", alpha=0.5)
50
  if "NEOSP-SVR Regressor" in pip_choices:
51
  axes_3.hist(df_list_output_NEOSP["MAE"], color="blue", alpha=0.5)
 
 
52
 
53
  # graficos geral
54
  fig.set_figwidth(15)
@@ -56,12 +67,18 @@ def grafico(list_output_MbR, list_output_NEOSP, nome_projeto, pip_choices):
56
  fig.suptitle("Projeto {}".format(nome_projeto))
57
  # text
58
 
59
- resultado = ""
60
- if (pip_choices == ["NEOSP-SVR Regressor", "MbR Regressor"]) or (pip_choices == ["MbR Regressor", "NEOSP-SVR Regressor"]):
61
- res = wilcoxon(list_output_MbR, list_output_NEOSP)
62
- resultado = "MbR vs. NEOSP-SVR => Statistics: {} | valor-p: {}".format(res.statistic, res.pvalue)
 
 
 
 
 
 
63
 
64
- return gr.update(value=plt, visible=True), gr.update(value=resultado, visible=True)
65
 
66
 
67
  def create_pip_plot(libraries, pip_choices):
@@ -76,7 +93,13 @@ def create_pip_plot(libraries, pip_choices):
76
  arquivo_csv = csv.reader(arquivo)
77
  for i, linha in enumerate(arquivo_csv):
78
  list_output_NEOSP_SVR.append(float(linha[0]))
79
- return grafico(list_output_MbR, list_output_NEOSP_SVR, nome_projeto, pip_choices)
 
 
 
 
 
 
80
 
81
 
82
  demo = gr.Blocks()
@@ -88,7 +111,7 @@ with demo:
88
  libraries = gr.Dropdown(choices=LIBRARIES, label="Projeto", value="ALOY")
89
  with gr.Column():
90
  gr.Markdown("## Gráficos")
91
- pip = gr.CheckboxGroup(choices=["MbR Regressor", "NEOSP-SVR Regressor"], label="Modelos Preditivos")
92
  # stars = gr.CheckboxGroup(choices=["Stars", "Week over Week"], label="")
93
  # issues = gr.CheckboxGroup(choices=["Issue", "Exclude org members", "week over week"], label="")
94
  with gr.Row():
@@ -96,11 +119,13 @@ with demo:
96
  with gr.Row():
97
  with gr.Column():
98
  star_plot = gr.Text(visible=False, label="Wilcoxon Test")
 
 
99
  pip_plot = gr.Plot(visible=False)
100
 
101
  # issue_plot = gr.Plot(visible=False)
102
 
103
- fetch.click(create_pip_plot, inputs=[libraries, pip], outputs=[pip_plot, star_plot])
104
  #fetch.click(create_star_plot, inputs=[libraries, pip], outputs=star_plot)
105
  # fetch.click(create_issue_plot, inputs=[libraries, issues], outputs=issue_plot)
106
 
 
9
  LIBRARIES = ["ALOY", "APSTUD", "CLI", "CLOV", "COMPASS", "CONFCLOUD", "CONFSERVER", "DAEMON", "DM", "DNN", "DURACLOUD", "EVG", "FAB",
10
  "MDL", "MESOS" ,"MULE", "NEXUS", "SERVER", "STL", "TIDOC", "TIMOB", "TISTUD", "XD"]
11
 
12
+ def grafico(list_output_MbR, list_output_NEOSP, list_output_TFIDF, nome_projeto, pip_choices):
13
+ list_results = [["MbR Regressor", np.mean(list_output_MbR)], ["NEOSP-SVR Regressor", np.mean(list_output_NEOSP)], ["TFIDF-SVR Regressor", np.mean(list_output_TFIDF)]]
14
+
15
  df = pd.DataFrame(list_results, columns=["Model", "MAE"])
16
+
17
  df_list_output_MbR = pd.DataFrame(list_output_MbR, columns=["MAE"])
18
  df_list_output_NEOSP = pd.DataFrame(list_output_NEOSP, columns=["MAE"])
19
+ df_list_output_TFIDF = pd.DataFrame(list_output_TFIDF, columns=["MAE"])
20
+
21
  fig, ax = plt.subplots(2, 2)
22
 
23
  G = gridspec.GridSpec(2, 2)
 
35
  axes_1.scatter(range(1,51), df_list_output_MbR["MAE"].loc[1:50], label="MbR Regressor", color="red", alpha=0.5,)
36
  if "NEOSP-SVR Regressor" in pip_choices:
37
  axes_1.scatter(range(1,51), df_list_output_NEOSP["MAE"].loc[1:50], label="NEOSP-SVR Regressor", color = "blue", alpha=0.5)
38
+ if "TFIDF-SVR Regressor" in pip_choices:
39
+ axes_1.scatter(range(1,51), df_list_output_TFIDF["MAE"].loc[1:50], label="TFIDF-SVR Regressor", color = "green", alpha=0.5)
40
 
41
  # ax2
42
  axes_2.set_ylabel("MAE Médio")
43
  axes_2.set_xlabel("Modelos")
44
  if "MbR Regressor" in pip_choices:
45
  graf1 = axes_2.bar(df["Model"].iloc[[0]], df["MAE"].iloc[[0]], color="red", alpha=0.5)
46
+ axes_2.bar_label(graf1, fmt="%.03f", size=10, label_type="edge")
47
  if "NEOSP-SVR Regressor" in pip_choices:
48
  graf2 = axes_2.bar(df["Model"].iloc[[1]], df["MAE"].iloc[[1]], color = "blue", alpha=0.5)
49
+ axes_2.bar_label(graf2, fmt="%.03f", size=10, label_type="edge")
50
+ if "TFIDF-SVR Regressor" in pip_choices:
51
+ graf3 = axes_2.bar(df["Model"].iloc[[2]], df["MAE"].iloc[[2]], color = "green", alpha=0.5)
52
+ axes_2.bar_label(graf3, fmt="%.03f", size=10, label_type="edge")
53
 
54
  # ax3
55
  axes_3.set_xlabel("MAE")
 
58
  axes_3.hist(df_list_output_MbR["MAE"], color="red", alpha=0.5)
59
  if "NEOSP-SVR Regressor" in pip_choices:
60
  axes_3.hist(df_list_output_NEOSP["MAE"], color="blue", alpha=0.5)
61
+ if "TFIDF-SVR Regressor" in pip_choices:
62
+ axes_3.hist(df_list_output_TFIDF["MAE"], color="green", alpha=0.5)
63
 
64
  # graficos geral
65
  fig.set_figwidth(15)
 
67
  fig.suptitle("Projeto {}".format(nome_projeto))
68
  # text
69
 
70
+ resultado, resultado2 = "", ""
71
+ #if (pip_choices == ["NEOSP-SVR Regressor", "MbR Regressor"]) or (pip_choices == ["MbR Regressor", "NEOSP-SVR Regressor"]):
72
+ res = wilcoxon(list_output_NEOSP, list_output_MbR)
73
+ resultado = "NEOSP-SVR vs. MbR => Statistics: {} | valor-p: {}".format(res.statistic, res.pvalue)
74
+ #if (pip_choices == ["NEOSP-SVR Regressor", "MbR Regressor"]) or (pip_choices == ["MbR Regressor", "NEOSP-SVR Regressor"]):
75
+ res = wilcoxon(list_output_NEOSP, list_output_TFIDF)
76
+ resultado2 = "NEOSP-SVR vs. TFIDF-SVR => Statistics: {} | valor-p: {}".format(res.statistic, res.pvalue)
77
+
78
+ res = wilcoxon(list_output_TFIDF, list_output_MbR)
79
+ resultado3 = "TFIDF-SVR vs. MbR => Statistics: {} | valor-p: {}".format(res.statistic, res.pvalue)
80
 
81
+ return gr.update(value=plt, visible=True), gr.update(value=resultado, visible=True), gr.update(value=resultado2, visible=True), gr.update(value=resultado3, visible=True)
82
 
83
 
84
  def create_pip_plot(libraries, pip_choices):
 
93
  arquivo_csv = csv.reader(arquivo)
94
  for i, linha in enumerate(arquivo_csv):
95
  list_output_NEOSP_SVR.append(float(linha[0]))
96
+ list_output_TFIDF_SVR = []
97
+ with open("metricas_{}_TFIDF.csv".format(nome_projeto), "r") as arquivo:
98
+ arquivo_csv = csv.reader(arquivo)
99
+ for i, linha in enumerate(arquivo_csv):
100
+ list_output_TFIDF_SVR.append(float(linha[0]))
101
+
102
+ return grafico(list_output_MbR, list_output_NEOSP_SVR, list_output_TFIDF_SVR, nome_projeto, pip_choices)
103
 
104
 
105
  demo = gr.Blocks()
 
111
  libraries = gr.Dropdown(choices=LIBRARIES, label="Projeto", value="ALOY")
112
  with gr.Column():
113
  gr.Markdown("## Gráficos")
114
+ pip = gr.CheckboxGroup(choices=["MbR Regressor", "NEOSP-SVR Regressor", "TFIDF-SVR Regressor"], label="Modelos Preditivos")
115
  # stars = gr.CheckboxGroup(choices=["Stars", "Week over Week"], label="")
116
  # issues = gr.CheckboxGroup(choices=["Issue", "Exclude org members", "week over week"], label="")
117
  with gr.Row():
 
119
  with gr.Row():
120
  with gr.Column():
121
  star_plot = gr.Text(visible=False, label="Wilcoxon Test")
122
+ star_plot2 = gr.Text(visible=False, label="Wilcoxon Test")
123
+ star_plot3 = gr.Text(visible=False, label="Wilcoxon Test")
124
  pip_plot = gr.Plot(visible=False)
125
 
126
  # issue_plot = gr.Plot(visible=False)
127
 
128
+ fetch.click(create_pip_plot, inputs=[libraries, pip], outputs=[pip_plot, star_plot, star_plot2, star_plot3])
129
  #fetch.click(create_star_plot, inputs=[libraries, pip], outputs=star_plot)
130
  # fetch.click(create_issue_plot, inputs=[libraries, issues], outputs=issue_plot)
131
 
database/tawos/README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tawosi Dataset
2
+
3
+ This directory consists 46 files, two files per each of the 26-3\*=23 projects:
4
+ - 23 files with "\_deep-se" suffix are prepared to be used by Deep-SE.
5
+ - 23 files with "\_tfidf-se" suffix are prepared to be used by TF/IDF-SE.
6
+
7
+ \* <sub>One of the repositories including three projects has been removed from the public domain during the time that the manuscript for this study [1] was under revision. Therefore, although the paper reports the results for all 26 projects, the replication package includes 23 projects as we refrain from publishing the data for the three remaining projects in accordance with The General Data Protection Regulation.</sub>
8
+
9
+ These 23 files are collected from 12 open source repositories by Tawosi et al. up until August, 2020.
10
+ The files named after their project key as "[project key]\_[approach].csv" e.g. MESOS_deep-se.csv, which is the set of issues collected from Appache repository Mesos project, and contains the features that Deep-SE needs for prediction. The following table shows the list of projects and the repositories where the project was collected from.
11
+
12
+ ## Project list
13
+
14
+ | Repository | Project | Key | File for Deep-SE | File for TF/IDF-SE |
15
+ |--------------|-----------------------------------|------------|---------------------------|---------------------------|
16
+ | Apache | Mesos | MESOS | MESOS_deeep-se.csv | MESOS_tfidf-se.csv |
17
+ | Apache | Alloy | ALOY | ALOY_deeep-se.csv | ALOY_tfidf-se.csv |
18
+ | Appcelerator | Appcelerator studio | TISTUD | TISTUD_deeep-se.csv | TISTUD_tfidf-se.csv |
19
+ | Appcelerator | Aptana studio | APSTUD | APSTUD_deeep-se.csv | APSTUD_tfidf-se.csv |
20
+ | Appcelerator | Command-Line Interface | CLI | CLI_deeep-se.csv | CLI_tfidf-se.csv |
21
+ | Appcelerator | Daemon | DAEMON | DAEMON_deeep-se.csv | DAEMON_tfidf-se.csv |
22
+ | Appcelerator | Documentation | TIDOC | TIDOC_deeep-se.csv | TIDOC_tfidf-se.csv |
23
+ | Appcelerator | Titanium | TIMOB | TIMOB_deeep-se.csv | TIMOB_tfidf-se.csv |
24
+ | Atlassian | Clover | CLOV | CLOV_deeep-se.csv | CLOV_tfidf-se.csv |
25
+ | Atlassian | Confluence Cloud | CONFCLOUD | CONFCLOUD_deeep-se.csv | CONFCLOUD_tfidf-se.csv |
26
+ | Atlassian | Confluence Server and Data Center | CONFSERVER | CONFSERVER_deeep-se.csv | CONFSERVER_tfidf-se.csv |
27
+ | DNNSoftware | DNN | DNN | DNN_deeep-se.csv | DNN_tfidf-se.csv |
28
+ | Duraspace | Duracloud | DURACLOUD | DURACLOUD_deeep-se.csv | DURACLOUD_tfidf-se.csv |
29
+ | Hyperledger | Fabric | FAB | FAB_deeep-se.csv | FAB_tfidf-se.csv |
30
+ | Hyperledger | Sawtooth | STL | STL_deeep-se.csv | STL_tfidf-se.csv |
31
+ | Lsstcorp | Data management | DM | DM_deeep-se.csv | DM_tfidf-se.csv |
32
+ | MongoDB | Compass | COMPASS | COMPASS_deeep-se.csv | COMPASS_tfidf-se.csv |
33
+ | MongoDB | Core Server | SERVER | SERVER_deeep-se.csv | SERVER_tfidf-se.csv |
34
+ | MongoDB | Evergreen | EVG | EVG_deeep-se.csv | EVG_tfidf-se.csv |
35
+ | Moodle | Moodle | MDL | MDL_deeep-se.csv | MDL_tfidf-se.csv |
36
+ | Mulesoft | Mule | MULE | MULE_deeep-se.csv | MULE_tfidf-se.csv |
37
+ | Sonatype | Sonatype’s Nexus | NEXUS | NEXUS_deeep-se.csv | NEXUS_tfidf-se.csv |
38
+ | Spring | Spring XD | XD | XD_deeep-se.csv | XD_tfidf-se.csv |
39
+
40
+ ## Content of the files
41
+
42
+ - Each csv file for Deep-SE approach contains 4 columns: *issuekey*, *created*, *title*, *description*, and *storypoint*.
43
+
44
+ - Each csv file for TF/IDF-SE approach contains more than 4 columns: starting with *issuekey*, *created*, *storypoint*, *context*, *codesnippet*, and a set of one-hot columns for issue type (header starting with t\_) followed by component(s) (header starting with c\_).
45
+
46
+ - The issues are sorted based on issue's creation time (i.e. the former issues was created before the latter issues).
47
+
48
+ [1] Vali Tawosi, Rebecca Moussa, and Federica Sarro. "Agile Effort Estimation: Have We Solved the Problem Yet? Insights From A Replication Study." IEEE Transactions on Software Engineering, no. TBA (2022): pp. TBA.
database/tawos/deep/ALOY_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/APSTUD_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/CLI_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/CLOV_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/COMPASS_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/CONFCLOUD_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/CONFSERVER_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/DAEMON_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/DM_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/DNN_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/DURACLOUD_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/EVG_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/FAB_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/MDL_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/MESOS_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/MULE_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/NEXUS_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/SERVER_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/STL_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/TIDOC_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/TIMOB_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/TISTUD_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/XD_deep-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/deep/metricas_NEXUS_MbR.csv ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0.9232760219499689
2
+ 1.0214754044707242
3
+ 1.2796930059020544
4
+ 1.0425662481044697
5
+ 0.870375724119874
6
+ 1.2029848616249328
7
+ 1.1264422074144007
8
+ 1.0686551107110316
9
+ 1.0611490454809922
10
+ 1.0216262500960556
11
+ 1.0316785398688675
12
+ 1.0164215659535472
13
+ 1.1173946957878316
14
+ 1.0193207728309133
15
+ 0.9270179898105017
16
+ 1.409685705817132
17
+ 0.9975327412644224
18
+ 1.1267523300363365
19
+ 0.9221921552698891
20
+ 1.0456758477599817
21
+ 0.9248579034070454
22
+ 1.1553462138485537
23
+ 1.0811396092207326
24
+ 1.0859043452647197
25
+ 0.8713957649215059
26
+ 1.0458130701590682
27
+ 1.0008233343945199
28
+ 1.2413687110974498
29
+ 1.0448607467094069
30
+ 1.1712096428924288
31
+ 1.0825660299139237
32
+ 0.9964653131579807
33
+ 1.025817396332217
34
+ 1.438955740047784
35
+ 0.9019615330067748
36
+ 0.9927134906084989
37
+ 1.0340531105573427
38
+ 0.993196513453284
39
+ 1.155440044789391
40
+ 1.0006449452757071
41
+ 0.9843148271385402
42
+ 1.3817598158471796
43
+ 0.9542290782540391
44
+ 1.0492237871333034
45
+ 0.927950754393812
46
+ 1.0144467741758425
47
+ 0.9415267913011977
48
+ 1.3784291877531751
49
+ 0.9974751078568057
50
+ 0.9935999473065987
51
+ 1.1535379597002065
52
+ 1.0342177323456576
53
+ 0.9484524835538877
54
+ 1.0390124695896925
55
+ 1.0765603351406785
56
+ 0.9191073957384212
57
+ 1.2250090566783398
58
+ 1.0575812631047394
59
+ 1.0056755184262238
60
+ 1.1572102137376088
61
+ 1.028711148445938
62
+ 1.2392841168192184
63
+ 1.1528533868627475
64
+ 0.9198667946717868
65
+ 0.9708470156988099
66
+ 1.04692457159167
67
+ 1.1560795011691347
68
+ 1.1096214857343594
69
+ 1.0004336227811137
70
+ 0.9870434610782387
71
+ 1.153726149045962
72
+ 0.9795446363309078
73
+ 1.0696027841113647
74
+ 1.021489041379837
75
+ 0.9977417278509323
76
+ 1.1207941334679943
77
+ 1.132770904460277
78
+ 1.2018074934407692
79
+ 1.0306115727882492
80
+ 0.9089227492782099
81
+ 1.1736005803868517
82
+ 1.1017449788900646
83
+ 0.9784455014564218
84
+ 0.9582846950241642
85
+ 1.23775405561677
86
+ 0.9749184898949426
87
+ 1.051334899498315
88
+ 0.9718995971150362
89
+ 1.2895804287925525
90
+ 0.8923764723963422
91
+ 1.1122835822523809
92
+ 0.9552763928738969
93
+ 1.068950939855776
94
+ 1.1003540141605663
95
+ 1.164174748808134
96
+ 0.9429429264597718
97
+ 0.9814585094354124
98
+ 1.1913785911101842
99
+ 0.9525292832599648
100
+ 1.1499511488259253
101
+ 0.9862867241962406
102
+ 0.987759510380415
103
+ 0.9237178578052214
104
+ 0.9452369003851062
105
+ 1.0241482386568188
106
+ 1.0599469772649928
107
+ 1.221798052539712
108
+ 0.9753932793957822
109
+ 1.1154452043515968
110
+ 1.38041616809195
111
+ 1.1612100847670273
112
+ 1.2166523024557347
113
+ 1.0060111495368906
114
+ 0.9225505383851716
115
+ 0.979440995821651
116
+ 1.2812537736159748
117
+ 1.0349560339433326
118
+ 1.0403900409471638
119
+ 0.8553428913308377
120
+ 1.1184860527153568
121
+ 1.1964860412598322
122
+ 1.0433735531239432
123
+ 1.058218692384059
124
+ 0.9226487241307835
125
+ 1.0598769405321666
126
+ 0.9863738157706959
127
+ 1.0053571624603426
128
+ 0.9031539196206074
129
+ 1.2612247922452877
130
+ 1.175469026160078
131
+ 1.157435388324624
132
+ 0.9202213543087178
133
+ 1.0184452832658761
134
+ 1.0802532101284053
135
+ 0.9754726552698472
136
+ 1.0301642277672267
137
+ 1.0307789841151351
138
+ 1.1540650763505427
139
+ 1.0905146388855345
140
+ 1.1553028223903044
141
+ 1.3041821672866916
142
+ 1.0170925018818935
143
+ 0.9933942812257944
144
+ 1.2464216750488202
145
+ 1.005473855317849
146
+ 0.9698714500565357
147
+ 0.9650274993687771
148
+ 1.2525907588947558
149
+ 0.9202435972028588
150
+ 0.9404866455161209
151
+ 1.3407209015633352
152
+ 1.054274898268658
153
+ 0.9704324536617827
154
+ 1.1889802864841867
155
+ 0.9465514984235732
156
+ 1.0683175436092784
157
+ 0.9992370434610783
158
+ 0.9608339828526891
159
+ 0.9778468158914515
160
+ 1.1140373025369679
161
+ 1.2547947372440353
162
+ 0.9001014586037985
163
+ 0.9846448403390682
164
+ 1.091768216183193
165
+ 0.9604775100094916
166
+ 1.188483198489456
167
+ 1.2245699450012624
168
+ 1.031920674475536
169
+ 1.042706355043747
170
+ 0.934188137398044
171
+ 1.1585672517809804
172
+ 1.1207902861569008
173
+ 1.0137923698766134
174
+ 1.0363996378036942
175
+ 1.0777358367061955
176
+ 0.9339109481518889
177
+ 1.1579155368689142
178
+ 1.1943178948986202
179
+ 0.942756303997014
180
+ 0.9780965606577892
181
+ 0.9724261697740638
182
+ 1.190071239213205
183
+ 0.8739295026346509
184
+ 1.0714301299324698
185
+ 0.9589201749888177
186
+ 1.14809041309431
187
+ 1.0279961138616578
188
+ 1.2843879332111139
189
+ 0.9980898642047139
190
+ 1.0910882285137167
191
+ 1.1910994621603048
192
+ 1.1976779071162846
193
+ 1.0270638098251204
194
+ 1.2220934291917132
195
+ 0.9611484459378375
196
+ 0.9871587278934714
197
+ 0.9060246122095001
198
+ 1.0959843237131284
199
+ 0.9700223946955309
200
+ 1.0498803420679965
201
+ 1.0475382651669705
202
+ 1.0059266007003917
203
+ 1.0714737680416309
204
+ 0.951384419013124
205
+ 0.9479888286440549
206
+ 1.0542577365988604
207
+ 1.016238898707914
208
+ 1.0995054504736916
209
+ 1.1477940127122832
210
+ 1.2866411250041168
211
+ 1.0650098731221977
212
+ 0.9860548967413242
213
+ 0.9771063569815518
214
+ 1.1689613039067017
215
+ 1.2131012513227801
216
+ 0.9157838692325425
217
+ 1.196035919335185
218
+ 0.9984466424423392
219
+ 1.0338884436784386
220
+ 1.0604190223178511
221
+ 1.0778312950699847
222
+ 1.0141905676227048
223
+ 1.1351854074162966
224
+ 1.0823751131863455
225
+ 1.0774467342330059
226
+ 1.1988599563083882
227
+ 1.0597823103860888
228
+ 0.9490575565630728
229
+ 1.0265415564313394
230
+ 0.9928424796636406
231
+ 0.9649558709621114
232
+ 1.1639729225532656
233
+ 1.2690971275214644
234
+ 0.9947852459552926
235
+ 1.1224921724141694
236
+ 1.0579984191979626
237
+ 1.1126019562425213
238
+ 1.0158244870626723
239
+ 0.9555618982797799
240
+ 0.9588195580340969
241
+ 1.0645762194124129
242
+ 1.0415816632665307
243
+ 1.1812427042536247
244
+ 1.0065975366287379
245
+ 0.960924800628389
246
+ 1.3573079160857586
247
+ 0.9866235605370336
248
+ 0.9916212003117695
249
+ 0.9010352057787099
250
+ 1.121277156312779
251
+ 0.9437804784918672
252
+ 0.996091661848292
253
+ 0.9487661324634803
254
+ 1.1527688380262484
255
+ 0.9992745164352029
256
+ 1.1975508546211016
257
+ 0.9293414422623033
258
+ 1.0797344472132873
259
+ 0.9049405552567156
260
+ 1.4611880166423323
261
+ 1.0274947361530822
262
+ 1.2240380524311882
263
+ 1.0168988577724927
264
+ 0.8988195891472022
265
+ 1.0475682663670183
266
+ 1.0856048214462144
267
+ 1.1092784297366427
268
+ 0.990279165248702
269
+ 1.230371708034646
270
+ 0.993492913835311
271
+ 1.2045836378909702
272
+ 0.9515726083588797
273
+ 1.0752648287749693
274
+ 1.012390495619825
275
+ 1.0192225870853018
276
+ 1.0912666176325294
277
+ 0.9729562095880036
278
+ 1.1183845081400325
279
+ 1.1544739990998212
280
+ 1.011480025907589
281
+ 1.0130941601300414
282
+ 1.0422444170494096
283
+ 0.943715021328126
284
+ 1.1456803726694522
285
+ 1.1858219783336788
286
+ 1.1180551743822247
287
+ 1.2189108932629291
288
+ 1.0211240161153987
289
+ 0.8945939863655825
290
+ 1.0306115727882492
291
+ 1.0799777445643282
292
+ 1.0963747640814723
293
+ 1.1204684551018405
294
+ 1.162824694805974
295
+ 0.9947143340279065
296
+ 0.9049295774647885
297
+ 0.9464722865642804
298
+ 0.9961934506493363
299
+ 1.0163322099392929
300
+ 1.2927969218271438
database/tawos/deep/metricas_NEXUS_NEOSP_SVR.csv ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0.8062983011257235
2
+ 1.032432282578063
3
+ 1.2057998735271693
4
+ 0.944706263679146
5
+ 0.8498769193979343
6
+ 0.9195844550517124
7
+ 0.7898471521104582
8
+ 0.8854971101703727
9
+ 0.8602599380336894
10
+ 1.0864997730132382
11
+ 0.9047051224946047
12
+ 0.830785127821595
13
+ 0.968528602823701
14
+ 0.9647427013409863
15
+ 0.8803396144523119
16
+ 0.8255218351240419
17
+ 1.2845506655908328
18
+ 0.8051110278846986
19
+ 0.9427991039091772
20
+ 1.005890546016643
21
+ 0.7573702741076634
22
+ 0.8910354720452858
23
+ 0.8532160135640033
24
+ 1.1444878549523763
25
+ 1.200291770273267
26
+ 0.7739849924768679
27
+ 0.9866980663533863
28
+ 0.9730769325677584
29
+ 0.9064133611041554
30
+ 0.9282202415375351
31
+ 1.0486656268899255
32
+ 1.1985104953142145
33
+ 0.8749949418910377
34
+ 0.83204613878934
35
+ 0.8959416966154746
36
+ 1.437771437808204
37
+ 0.7972563445081011
38
+ 0.7975273573133316
39
+ 0.8222179756366598
40
+ 0.6852410576316454
41
+ 0.9072674952237534
42
+ 0.9097594731194828
43
+ 0.9293712908418625
44
+ 0.8186130143598132
45
+ 0.9883094445636065
46
+ 0.9874950726126849
47
+ 0.9067444630521493
48
+ 0.8147424643808405
49
+ 0.9769770365163615
50
+ 1.1553146441052748
51
+ 0.9248074923021079
52
+ 0.779245503302422
53
+ 1.171052186094203
54
+ 0.9635483710837806
55
+ 0.8179008239979184
56
+ 0.6706407685947057
57
+ 0.9624294750099868
58
+ 1.2197978757110461
59
+ 0.7909531661194524
60
+ 1.0803593199940296
61
+ 0.9114802914406473
62
+ 0.8252694331948631
63
+ 0.9347535309765345
64
+ 0.8405034094880942
65
+ 0.8211541907990811
66
+ 0.7787658854356531
67
+ 1.009785380988038
68
+ 1.008226110084284
69
+ 1.2914676591475316
70
+ 0.9751104999980018
71
+ 1.0706215209428913
72
+ 0.8772282116130847
73
+ 0.8593988642651856
74
+ 0.9079857814376465
75
+ 0.9575733886415408
76
+ 1.1830808195812754
77
+ 0.9340204958762801
78
+ 0.8068959312713764
79
+ 0.8785332027825754
80
+ 0.9444707697242278
81
+ 0.7996509768420726
82
+ 0.9528596740965873
83
+ 1.1181609590985686
84
+ 1.261248935906939
85
+ 0.8912968642131908
86
+ 0.8752395772477681
87
+ 0.8888225813829433
88
+ 1.0407737904180023
89
+ 0.8013724379315329
90
+ 0.7708916952796265
91
+ 1.1070191918175212
92
+ 0.8380851426639113
93
+ 1.0374894873156406
94
+ 0.9320872369155058
95
+ 0.8220542074025031
96
+ 0.9198507502198694
97
+ 0.8291189746539606
98
+ 1.1506964191835722
99
+ 0.8566249054892338
100
+ 0.9138515917941085
101
+ 0.9300386138019211
102
+ 0.9094946453335703
103
+ 0.7549057366322102
104
+ 1.292293219118459
105
+ 1.0339805275036393
106
+ 0.9073673092656004
107
+ 0.7778625962124713
108
+ 1.0298527587901136
109
+ 0.920483759137768
110
+ 0.8193841392290081
111
+ 1.0389378821239195
112
+ 0.8238613534551746
113
+ 0.8101942552706594
114
+ 0.9317092158950279
115
+ 1.0984604526123922
116
+ 1.1358523733087718
117
+ 0.9452041917676606
118
+ 0.8699528570757884
119
+ 0.8409362714783831
120
+ 0.8936521097306183
121
+ 0.7758223400733447
122
+ 0.8772174708652997
123
+ 0.8406740142916599
124
+ 1.040694658204138
125
+ 0.9273182074179123
126
+ 0.7603688674090336
127
+ 0.9751945109955814
128
+ 1.084143802894714
129
+ 1.2693484262096395
130
+ 0.8354019193843212
131
+ 0.907543709539674
132
+ 1.0558465727293567
133
+ 0.8098147953254534
134
+ 0.9455703161148263
135
+ 0.7079852370456268
136
+ 0.8473878587815239
137
+ 1.0673654702360211
138
+ 0.955726481508599
139
+ 1.2230726452990561
140
+ 0.8886914699421246
141
+ 0.7804290260424386
142
+ 1.202118855066672
143
+ 0.8807908998171425
144
+ 0.9116630434799675
145
+ 0.8483734995133959
146
+ 0.769211807302471
147
+ 1.023532758777747
148
+ 1.0791785484281715
149
+ 0.9144447420127926
150
+ 0.9710861453625045
151
+ 1.0995850108613672
152
+ 0.9234888197385054
153
+ 0.8912959890653649
154
+ 0.8994072287620939
155
+ 0.7472749005262459
156
+ 1.0741407140097319
157
+ 1.0966061372874463
158
+ 0.8467868821555279
159
+ 0.8451718913921347
160
+ 0.9861835319749075
161
+ 0.8186757593792123
162
+ 0.9397285981399016
163
+ 0.9146047477560897
164
+ 1.159060349392317
165
+ 0.8503272703797968
166
+ 1.0105263054193905
167
+ 0.9536999081229803
168
+ 0.7993847773267928
169
+ 0.8893513243889266
170
+ 1.0382695432055158
171
+ 0.9839238899744519
172
+ 0.6675479590080553
173
+ 1.2622108526908364
174
+ 0.7574483075036766
175
+ 1.0199103711971074
176
+ 0.8317914180574144
177
+ 1.0047217448917451
178
+ 1.0026569141034547
179
+ 0.972001136328143
180
+ 0.8950288784648693
181
+ 0.7934024113133407
182
+ 0.7867116845217534
183
+ 1.0033144664203992
184
+ 0.9999694902310314
185
+ 0.7711521636440622
186
+ 0.8703573258222197
187
+ 0.9800476904481517
188
+ 0.9566390554883409
189
+ 1.2894743620335434
190
+ 0.9365329133029027
191
+ 1.0707039511039709
192
+ 0.9102437248688723
193
+ 0.984623302325445
194
+ 0.9165021125842072
195
+ 0.9929743127229799
196
+ 0.8953514979233843
197
+ 0.9526667988313307
198
+ 0.8391000273235487
199
+ 0.906741725448664
200
+ 0.9470387767561318
201
+ 1.1158151696482175
202
+ 0.9131595541107055
203
+ 1.1191124888663837
204
+ 0.9375500838294942
205
+ 1.0746952559298346
206
+ 0.7855234715998117
207
+ 0.7219982214660823
208
+ 0.8126066743597553
209
+ 1.0144470523753417
210
+ 0.8745130481615869
211
+ 0.8747406848331973
212
+ 0.9374340214256928
213
+ 0.9114517219303082
214
+ 1.1745071374999996
215
+ 0.9381349516761602
216
+ 1.1205979899220924
217
+ 0.7319877791095145
218
+ 0.8465155380408272
219
+ 0.9750574246276522
220
+ 0.9382537130916152
221
+ 0.9983632120749328
222
+ 0.9673934322532552
223
+ 0.9041894369630821
224
+ 0.8645259183532821
225
+ 0.83241964885402
226
+ 0.9164686438155271
227
+ 1.1618951759179148
228
+ 0.900018125852499
229
+ 0.883026573437026
230
+ 0.9623901873030714
231
+ 1.0239433550593133
232
+ 1.0280591942417596
233
+ 1.0089593617703552
234
+ 0.9164704412474767
235
+ 1.0202054944422532
236
+ 0.8154127399049739
237
+ 0.9007610664592686
238
+ 1.0454781791028918
239
+ 0.8077225216446633
240
+ 0.8161118640140392
241
+ 0.7469815532665242
242
+ 0.9686866292175995
243
+ 1.1237386561981368
244
+ 0.7824729157794813
245
+ 0.8666679857701298
246
+ 0.9021121210099953
247
+ 1.0415137899498605
248
+ 1.0349081000175604
249
+ 0.9775219992005959
250
+ 0.9348422460410203
251
+ 0.915344763863182
252
+ 0.8615701041051325
253
+ 1.015979522300568
254
+ 0.9969287393217002
255
+ 0.9778850399979674
256
+ 0.8178266380135665
257
+ 1.148272402601704
258
+ 0.7779209600702256
259
+ 0.9576367447673856
260
+ 0.9214056966523123
261
+ 0.8096689908517556
262
+ 0.7788213250890322
263
+ 1.031452887130781
264
+ 0.8219926626328246
265
+ 0.9197182177159859
266
+ 1.2908804092941828
267
+ 0.7994061306066543
268
+ 0.9376222436231869
269
+ 1.0644538120570135
270
+ 0.9417870761480787
271
+ 0.8107359878993705
272
+ 1.0258002728053766
273
+ 0.8590877381484838
274
+ 0.9012475400390169
275
+ 0.9587117084461273
276
+ 0.9118721042185678
277
+ 0.9278792165901055
278
+ 1.138065220528389
279
+ 0.9019604707421052
280
+ 0.9725894652112981
281
+ 0.7387274090609192
282
+ 0.8466713388045666
283
+ 1.1938142160256782
284
+ 1.0033000967880104
285
+ 1.029468496912473
286
+ 0.8156563978430295
287
+ 1.0966776784782355
288
+ 0.8421186310574211
289
+ 0.8839992503338451
290
+ 0.9254241126751358
291
+ 0.8718575268230014
292
+ 1.0434012110915953
293
+ 0.8738917107699167
294
+ 0.8924406897892762
295
+ 0.923814367476265
296
+ 0.9198097772768266
297
+ 0.8318099509930444
298
+ 1.2262911636345184
299
+ 0.9122391372452223
300
+ 0.8687238804048997
database/tawos/tfidf/ALOY_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/APSTUD_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/CLI_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/CLOV_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/COMPASS_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/CONFCLOUD_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/CONFSERVER_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/DAEMON_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/DM_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/DNN_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/DURACLOUD_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/EVG_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/FAB_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/MDL_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/MESOS_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/MULE_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/NEXUS_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/SERVER_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff
 
database/tawos/tfidf/STL_tfidf-se.csv ADDED
The diff for this file is too large to render. See raw diff