Spaces:

giseldo
/

story_point_estimator_metrics

Sleeping

App Files Files Community

giseldo commited on Sep 3, 2023

Commit

cfa1f4f

1 Parent(s): 1dcc182

ultima versao

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

__pycache__/util.cpython-310.pyc +0 -0
_gerador_planilha.ipynb +423 -0
_hiper_param.ipynb +145 -0
_metrics generator.ipynb +149 -0
app.py +37 -12
database/tawos/README.md +48 -0
database/tawos/deep/ALOY_deep-se.csv +0 -0
database/tawos/deep/APSTUD_deep-se.csv +0 -0
database/tawos/deep/CLI_deep-se.csv +0 -0
database/tawos/deep/CLOV_deep-se.csv +0 -0
database/tawos/deep/COMPASS_deep-se.csv +0 -0
database/tawos/deep/CONFCLOUD_deep-se.csv +0 -0
database/tawos/deep/CONFSERVER_deep-se.csv +0 -0
database/tawos/deep/DAEMON_deep-se.csv +0 -0
database/tawos/deep/DM_deep-se.csv +0 -0
database/tawos/deep/DNN_deep-se.csv +0 -0
database/tawos/deep/DURACLOUD_deep-se.csv +0 -0
database/tawos/deep/EVG_deep-se.csv +0 -0
database/tawos/deep/FAB_deep-se.csv +0 -0
database/tawos/deep/MDL_deep-se.csv +0 -0
database/tawos/deep/MESOS_deep-se.csv +0 -0
database/tawos/deep/MULE_deep-se.csv +0 -0
database/tawos/deep/NEXUS_deep-se.csv +0 -0
database/tawos/deep/SERVER_deep-se.csv +0 -0
database/tawos/deep/STL_deep-se.csv +0 -0
database/tawos/deep/TIDOC_deep-se.csv +0 -0
database/tawos/deep/TIMOB_deep-se.csv +0 -0
database/tawos/deep/TISTUD_deep-se.csv +0 -0
database/tawos/deep/XD_deep-se.csv +0 -0
database/tawos/deep/metricas_NEXUS_MbR.csv +300 -0
database/tawos/deep/metricas_NEXUS_NEOSP_SVR.csv +300 -0
database/tawos/tfidf/ALOY_tfidf-se.csv +0 -0
database/tawos/tfidf/APSTUD_tfidf-se.csv +0 -0
database/tawos/tfidf/CLI_tfidf-se.csv +0 -0
database/tawos/tfidf/CLOV_tfidf-se.csv +0 -0
database/tawos/tfidf/COMPASS_tfidf-se.csv +0 -0
database/tawos/tfidf/CONFCLOUD_tfidf-se.csv +0 -0
database/tawos/tfidf/CONFSERVER_tfidf-se.csv +0 -0
database/tawos/tfidf/DAEMON_tfidf-se.csv +0 -0
database/tawos/tfidf/DM_tfidf-se.csv +0 -0
database/tawos/tfidf/DNN_tfidf-se.csv +0 -0
database/tawos/tfidf/DURACLOUD_tfidf-se.csv +0 -0
database/tawos/tfidf/EVG_tfidf-se.csv +0 -0
database/tawos/tfidf/FAB_tfidf-se.csv +0 -0
database/tawos/tfidf/MDL_tfidf-se.csv +0 -0
database/tawos/tfidf/MESOS_tfidf-se.csv +0 -0
database/tawos/tfidf/MULE_tfidf-se.csv +0 -0
database/tawos/tfidf/NEXUS_tfidf-se.csv +0 -0
database/tawos/tfidf/SERVER_tfidf-se.csv +0 -0
database/tawos/tfidf/STL_tfidf-se.csv +0 -0

__pycache__/util.cpython-310.pyc ADDED Viewed

Binary file (2.55 kB). View file

_gerador_planilha.ipynb ADDED Viewed

	@@ -0,0 +1,423 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import csv\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "def gerar_metricas(nome_projeto):\n",
+    "    list_output_MbR = []\n",
+    "    with open(\"metricas_{}_MbR.csv\".format(nome_projeto), \"r\") as arquivo:\n",
+    "        arquivo_csv = csv.reader(arquivo)\n",
+    "        for i, linha in enumerate(arquivo_csv):\n",
+    "            list_output_MbR.append(float(linha[0]))\n",
+    "    list_output_NEOSP_SVR = []\n",
+    "    with open(\"metricas_{}_NEOSP_SVR.csv\".format(nome_projeto), \"r\") as arquivo:\n",
+    "        arquivo_csv = csv.reader(arquivo)\n",
+    "        for i, linha in enumerate(arquivo_csv):\n",
+    "            list_output_NEOSP_SVR.append(float(linha[0]))\n",
+    "    list_output_TFIDF_SVR = []\n",
+    "    with open(\"metricas_{}_TFIDF.csv\".format(nome_projeto), \"r\") as arquivo:\n",
+    "        arquivo_csv = csv.reader(arquivo)\n",
+    "        for i, linha in enumerate(arquivo_csv):\n",
+    "            list_output_TFIDF_SVR.append(float(linha[0]))\n",
+    "            \n",
+    "    list_results = [[\"MbR Regressor\", np.mean(list_output_MbR)], [\"NEOSP-SVR Regressor\", np.mean(list_output_NEOSP_SVR)], [\"TFIDF-SVR Regressor\", np.mean(list_output_TFIDF_SVR)]]\n",
+    "    \n",
+    "    df = pd.DataFrame(list_results, columns=[\"Model\", \"MAE\"])\n",
+    "    \n",
+    "    df_list_output_MbR = pd.DataFrame(list_output_MbR, columns=[\"MAE\"])\n",
+    "    df_list_output_NEOSP = pd.DataFrame(list_output_NEOSP_SVR, columns=[\"MAE\"])\n",
+    "    df_list_output_TFIDF = pd.DataFrame(list_output_TFIDF_SVR, columns=[\"MAE\"])\n",
+    "    \n",
+    "    \n",
+    "    return df_list_output_MbR, df_list_output_NEOSP, df_list_output_TFIDF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "          MAE\n",
+      "0    1.719630\n",
+      "1    1.619240\n",
+      "2    2.035138\n",
+      "3    2.714286\n",
+      "4    1.690284\n",
+      "..        ...\n",
+      "295  1.877688\n",
+      "296  1.825845\n",
+      "297  2.281874\n",
+      "298  1.448541\n",
+      "299  2.011329\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    3.991628\n",
+      "1    5.509540\n",
+      "2    4.276285\n",
+      "3    3.210280\n",
+      "4    4.419198\n",
+      "..        ...\n",
+      "295  4.515966\n",
+      "296  4.156574\n",
+      "297  4.072459\n",
+      "298  3.360661\n",
+      "299  4.133661\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    1.749430\n",
+      "1    1.615589\n",
+      "2    1.891128\n",
+      "3    1.352403\n",
+      "4    2.137931\n",
+      "..        ...\n",
+      "295  1.505617\n",
+      "296  1.497649\n",
+      "297  1.944096\n",
+      "298  1.594697\n",
+      "299  1.889890\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    6.520257\n",
+      "1    7.011882\n",
+      "2    3.508960\n",
+      "3    3.912252\n",
+      "4    6.778048\n",
+      "..        ...\n",
+      "295  5.067004\n",
+      "296  3.978298\n",
+      "297  4.770977\n",
+      "298  3.592259\n",
+      "299  3.753875\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    1.279750\n",
+      "1    1.747535\n",
+      "2    1.429980\n",
+      "3    1.367193\n",
+      "4    1.769231\n",
+      "..        ...\n",
+      "295  1.736029\n",
+      "296  1.297173\n",
+      "297  1.407627\n",
+      "298  1.742932\n",
+      "299  1.625247\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    1.484524\n",
+      "1    0.870833\n",
+      "2    1.362500\n",
+      "3    1.141270\n",
+      "4    1.118586\n",
+      "..        ...\n",
+      "295  1.162477\n",
+      "296  1.446631\n",
+      "297  1.257882\n",
+      "298  2.020915\n",
+      "299  1.351226\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    1.562142\n",
+      "1    1.121421\n",
+      "2    1.011506\n",
+      "3    1.309862\n",
+      "4    1.386479\n",
+      "..        ...\n",
+      "295  1.217656\n",
+      "296  1.166991\n",
+      "297  1.309732\n",
+      "298  1.546553\n",
+      "299  1.458637\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    3.183489\n",
+      "1    3.254141\n",
+      "2    2.820652\n",
+      "3    3.147516\n",
+      "4    3.724896\n",
+      "..        ...\n",
+      "295  3.343784\n",
+      "296  2.467027\n",
+      "297  2.586486\n",
+      "298  2.901351\n",
+      "299  2.793243\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    3.085758\n",
+      "1    2.960811\n",
+      "2    2.402817\n",
+      "3    3.173362\n",
+      "4    3.209479\n",
+      "..        ...\n",
+      "295  2.802939\n",
+      "296  2.611202\n",
+      "297  2.317784\n",
+      "298  3.191521\n",
+      "299  2.972561\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    0.817816\n",
+      "1    1.292632\n",
+      "2    0.798383\n",
+      "3    0.766996\n",
+      "4    0.811808\n",
+      "..        ...\n",
+      "295  0.764333\n",
+      "296  0.871069\n",
+      "297  0.830722\n",
+      "298  1.263252\n",
+      "299  0.915469\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    0.861718\n",
+      "1    1.440051\n",
+      "2    0.793965\n",
+      "3    1.079316\n",
+      "4    0.765291\n",
+      "..        ...\n",
+      "295  1.312637\n",
+      "296  0.746560\n",
+      "297  0.861487\n",
+      "298  0.844606\n",
+      "299  0.893629\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    0.651576\n",
+      "1    0.649636\n",
+      "2    0.624728\n",
+      "3    0.599720\n",
+      "4    0.670485\n",
+      "..        ...\n",
+      "295  0.625787\n",
+      "296  0.620482\n",
+      "297  0.617713\n",
+      "298  0.653711\n",
+      "299  0.671403\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    1.843216\n",
+      "1    1.947581\n",
+      "2    1.326613\n",
+      "3    1.818193\n",
+      "4    1.829548\n",
+      "..        ...\n",
+      "295  1.692674\n",
+      "296  1.968132\n",
+      "297  3.074359\n",
+      "298  1.361050\n",
+      "299  2.180586\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "           MAE\n",
+      "0     8.977660\n",
+      "1    13.935714\n",
+      "2    11.211688\n",
+      "3    10.565220\n",
+      "4    10.296718\n",
+      "..         ...\n",
+      "295  12.521110\n",
+      "296  11.826106\n",
+      "297   9.696185\n",
+      "298  11.501992\n",
+      "299  10.755109\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    1.563933\n",
+      "1    1.527785\n",
+      "2    1.514647\n",
+      "3    1.419521\n",
+      "4    1.537474\n",
+      "..        ...\n",
+      "295  1.508380\n",
+      "296  1.713129\n",
+      "297  1.520726\n",
+      "298  1.499336\n",
+      "299  1.515071\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    3.124855\n",
+      "1    2.894180\n",
+      "2    3.055756\n",
+      "3    2.742524\n",
+      "4    2.910823\n",
+      "..        ...\n",
+      "295  2.968017\n",
+      "296  2.876247\n",
+      "297  2.786590\n",
+      "298  2.969128\n",
+      "299  2.820308\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    1.275430\n",
+      "1    0.953375\n",
+      "2    0.943311\n",
+      "3    1.077534\n",
+      "4    0.880661\n",
+      "..        ...\n",
+      "295  0.955650\n",
+      "296  0.951646\n",
+      "297  0.953232\n",
+      "298  0.975788\n",
+      "299  1.029818\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    2.097472\n",
+      "1    1.692843\n",
+      "2    1.335077\n",
+      "3    1.525819\n",
+      "4    1.529649\n",
+      "..        ...\n",
+      "295  1.723645\n",
+      "296  1.577294\n",
+      "297  1.154258\n",
+      "298  1.946014\n",
+      "299  1.708333\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    0.933591\n",
+      "1    0.871815\n",
+      "2    0.946461\n",
+      "3    0.765251\n",
+      "4    0.945946\n",
+      "..        ...\n",
+      "295  0.565508\n",
+      "296  0.833871\n",
+      "297  1.040323\n",
+      "298  1.303763\n",
+      "299  0.954301\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    2.833315\n",
+      "1    2.558738\n",
+      "2    3.003724\n",
+      "3    2.535398\n",
+      "4    2.797720\n",
+      "..        ...\n",
+      "295  2.425558\n",
+      "296  2.596320\n",
+      "297  2.665249\n",
+      "298  2.642077\n",
+      "299  3.175072\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    2.732684\n",
+      "1    2.229971\n",
+      "2    2.475378\n",
+      "3    2.495666\n",
+      "4    2.462650\n",
+      "..        ...\n",
+      "295  2.512257\n",
+      "296  2.619655\n",
+      "297  2.390718\n",
+      "298  2.241314\n",
+      "299  2.395140\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    2.290064\n",
+      "1    2.309586\n",
+      "2    2.167920\n",
+      "3    2.191076\n",
+      "4    2.249119\n",
+      "..        ...\n",
+      "295  2.204164\n",
+      "296  2.246984\n",
+      "297  2.521996\n",
+      "298  2.094582\n",
+      "299  2.248426\n",
+      "\n",
+      "[300 rows x 1 columns]\n",
+      "          MAE\n",
+      "0    1.699304\n",
+      "1    1.956807\n",
+      "2    1.673685\n",
+      "3    1.788314\n",
+      "4    1.908642\n",
+      "..        ...\n",
+      "295  1.915542\n",
+      "296  2.274142\n",
+      "297  1.923457\n",
+      "298  1.721884\n",
+      "299  1.901150\n",
+      "\n",
+      "[300 rows x 1 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "LIBRARIES = [\"ALOY\", \"APSTUD\", \"CLI\", \"CLOV\", \"COMPASS\", \"CONFCLOUD\", \"CONFSERVER\", \"DAEMON\", \"DM\", \"DNN\", \"DURACLOUD\", \"EVG\", \"FAB\", \n",
+    "             \"MDL\", \"MESOS\" ,\"MULE\", \"NEXUS\", \"SERVER\", \"STL\", \"TIDOC\", \"TIMOB\", \"TISTUD\", \"XD\"]\n",
+    "\n",
+    "for lp in LIBRARIES:\n",
+    "    df_list_output_MbR, df_list_output_NEOSP, df_list_output_TFIDF = gerar_metricas(lp)\n",
+    "    print(df_list_output_MbR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

_hiper_param.ipynb ADDED Viewed

	@@ -0,0 +1,145 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.dummy import DummyRegressor\n",
+    "from nltk.corpus import stopwords\n",
+    "from textblob import TextBlob\n",
+    "import textstat\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn import svm\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.feature_selection import SelectKBest\n",
+    "import pandas as pd\n",
+    "from util import escape_tags_and_content, escape_tags, escape_strings, escape_links, escape_hex_character_codes, escape_punctuation_boundaries, escape_odd_spaces\n",
+    "from sklearn.model_selection import RepeatedKFold\n",
+    "from sklearn.svm import SVR\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "import numpy as np\n",
+    "import util"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def gerar_metricas(project_name):\n",
+    "\n",
+    "    # carregando os dados\n",
+    "    df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
+    "\n",
+    "    # criação de uma nova coluna\n",
+    "    df[\"context\"] = df[\"title\"] + df[\"description\"]\n",
+    "\n",
+    "    # pré-processamento\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags_and_content(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_strings(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_links(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_hex_character_codes(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_punctuation_boundaries(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_odd_spaces(x))\n",
+    "\n",
+    "    # removendo stop-words\n",
+    "    stop = stopwords.words('english')\n",
+    "    df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))\n",
+    "\n",
+    "    # renomeando as colunas porque senão dá um problema com a extração de features do NEOSP\n",
+    "    df = df.rename(columns={ \"issuekey\": \"issuekey_\", \"created\": \"created_\", \"description\": \"description_\", \"title\": \"title_\", \"context\": \"context_\", \"storypoint\": \"storypoint_\"})\n",
+    "    y = df[\"storypoint_\"]\n",
+    "    df = df.drop(columns=['storypoint_'])\n",
+    "\n",
+    "    # 5º coluna -> extração das features para o neosp\n",
+    "    df[\"gunning_fog_\"] = df['context_'].apply(textstat.gunning_fog)\n",
+    "    df[\"flesch_reading_ease_\"] = df['context_'].apply(textstat.flesch_reading_ease)\n",
+    "    df[\"flesch_kincaid_grade_\"] = df['context_'].apply(textstat.flesch_kincaid_grade)\n",
+    "    df[\"smog_index_\"] = df['context_'].apply(textstat.smog_index)\n",
+    "    df[\"coleman_liau_index_\"] = df['context_'].apply(textstat.coleman_liau_index)\n",
+    "    df[\"automated_readability_index_\"] = df['context_'].apply(textstat.automated_readability_index)\n",
+    "    df[\"dale_chall_readability_score_\"] = df['context_'].apply(textstat.dale_chall_readability_score)\n",
+    "    df[\"difficult_words_\"] = df['context_'].apply(textstat.difficult_words)\n",
+    "    df[\"linsear_write_formula_\"] = df['context_'].apply(textstat.linsear_write_formula)\n",
+    "    df[\"polarity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.polarity)\n",
+    "    df[\"subjectivity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.subjectivity)\n",
+    "    # 16º colunas\n",
+    "\n",
+    "    # Extração das features para o TFIDF\n",
+    "    vectorizer = TfidfVectorizer()\n",
+    "    X_vec = vectorizer.fit_transform(df[\"context_\"])\n",
+    "\n",
+    "    df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())\n",
+    "\n",
+    "    # Juntando as features do neosp com o tfidf\n",
+    "    df = df.join(df_vec)\n",
+    "    X = df\n",
+    "   \n",
+    "    grid = GridSearchCV(\n",
+    "            estimator=SVR(kernel='rbf'),\n",
+    "            param_grid={\n",
+    "                'C': [1.1, 5.4, 170, 1001],\n",
+    "                'epsilon': [0.0003, 0.007, 0.0109, 0.019, 0.14, 0.05, 8, 0.2, 3, 2, 7],\n",
+    "                'gamma': [0.7001, 0.008, 0.001, 3.1, 1, 1.3, 5]\n",
+    "            }, \n",
+    "            cv=10, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)\n",
+    "\n",
+    "    #print the best parameters from all possible combinations\n",
+    "    grid.fit(X[X.columns[5:16]], y)\n",
+    "    print(grid.best_params_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'C': 5.4, 'epsilon': 0.2, 'gamma': 5}\n",
+      "{'C': 1.1, 'epsilon': 2, 'gamma': 0.7001}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "LIBRARIES = [\"ALOY\", \"CLI\"]\n",
+    "             #\"APSTUD\", \n",
+    "             #\"CLOV\", \"COMPASS\", \"CONFCLOUD\", \"CONFSERVER\", \"DAEMON\", \"DM\", \"DNN\", \"DURACLOUD\", \"EVG\", \"FAB\", \n",
+    "             #\"MDL\", \"MESOS\" ,\"MULE\", \"NEXUS\", \"SERVER\", \"STL\", \"TIDOC\", \"TIMOB\", \"TISTUD\", \"XD\"]\n",
+    "\n",
+    "for lp in LIBRARIES:\n",
+    "    gerar_metricas(lp)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

_metrics generator.ipynb ADDED Viewed

	@@ -0,0 +1,149 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.dummy import DummyRegressor\n",
+    "from nltk.corpus import stopwords\n",
+    "from textblob import TextBlob\n",
+    "import textstat\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn import svm\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.feature_selection import SelectKBest\n",
+    "import pandas as pd\n",
+    "from util import escape_tags_and_content, escape_tags, escape_strings, escape_links, escape_hex_character_codes, escape_punctuation_boundaries, escape_odd_spaces\n",
+    "from sklearn.model_selection import cross_val_score\n",
+    "from sklearn.model_selection import RepeatedKFold\n",
+    "from sklearn.pipeline import make_pipeline\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.feature_selection import f_classif, f_regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def gerar_metricas(project_name):\n",
+    "\n",
+    "    # carregando os dados\n",
+    "    df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
+    "\n",
+    "    # criação de uma nova coluna\n",
+    "    df[\"context\"] = df[\"title\"] + df[\"description\"]\n",
+    "\n",
+    "    # pré-processamento\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags_and_content(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_strings(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_links(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_hex_character_codes(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_punctuation_boundaries(x))\n",
+    "    df[\"context\"] = df[\"context\"].apply(lambda x: escape_odd_spaces(x))\n",
+    "\n",
+    "    # removendo stop-words\n",
+    "    stop = stopwords.words('english')\n",
+    "    df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))\n",
+    "\n",
+    "    # renomeando as colunas porque senão dá um problema com a extração de features do NEOSP\n",
+    "    df = df.rename(columns={ \"issuekey\": \"issuekey_\", \"created\": \"created_\", \"description\": \"description_\", \"title\": \"title_\", \"context\": \"context_\", \"storypoint\": \"storypoint_\"})\n",
+    "    y = df[\"storypoint_\"]\n",
+    "    df = df.drop(columns=['storypoint_'])\n",
+    "\n",
+    "    # 5º coluna -> extração das features para o neosp\n",
+    "    df[\"gunning_fog_\"] = df['context_'].apply(textstat.gunning_fog)\n",
+    "    df[\"flesch_reading_ease_\"] = df['context_'].apply(textstat.flesch_reading_ease)\n",
+    "    df[\"flesch_kincaid_grade_\"] = df['context_'].apply(textstat.flesch_kincaid_grade)\n",
+    "    df[\"smog_index_\"] = df['context_'].apply(textstat.smog_index)\n",
+    "    df[\"coleman_liau_index_\"] = df['context_'].apply(textstat.coleman_liau_index)\n",
+    "    df[\"automated_readability_index_\"] = df['context_'].apply(textstat.automated_readability_index)\n",
+    "    df[\"dale_chall_readability_score_\"] = df['context_'].apply(textstat.dale_chall_readability_score)\n",
+    "    df[\"difficult_words_\"] = df['context_'].apply(textstat.difficult_words)\n",
+    "    df[\"linsear_write_formula_\"] = df['context_'].apply(textstat.linsear_write_formula)\n",
+    "    df[\"polarity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.polarity)\n",
+    "    df[\"subjectivity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.subjectivity)\n",
+    "    # 16º colunas\n",
+    "\n",
+    "    # Extração das features para o TFIDF\n",
+    "    vectorizer = TfidfVectorizer()\n",
+    "    X_vec = vectorizer.fit_transform(df[\"context_\"])\n",
+    "\n",
+    "    df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())\n",
+    "\n",
+    "    # Juntando as features do neosp com o tfidf\n",
+    "    df = df.join(df_vec)\n",
+    "    X = df\n",
+    "\n",
+    "    results = list()\n",
+    "\n",
+    "    rkf = RepeatedKFold(n_splits=10, n_repeats=30, random_state=2652124)\n",
+    "    \n",
+    "    #model = DummyRegressor(strategy=\"mean\")\n",
+    "    #results.append(cross_val_score(model, X[X.columns[5:16]], y, cv = rkf, scoring=\"neg_mean_absolute_error\"))\n",
+    "    \n",
+    "    model = make_pipeline(SelectKBest(f_regression, k=50), StandardScaler(), svm.SVR())\n",
+    "    results.append(cross_val_score(model, X[X.columns[16:]], y, cv = rkf, scoring=\"neg_mean_absolute_error\"))\n",
+    "    \n",
+    "    list_results_TFIDF_MbR = results[0]\n",
+    "    df_results_MAE_TFIDF = pd.DataFrame(list_results_TFIDF_MbR, columns = [\"MAE\"])\n",
+    "    df_results_MAE_TFIDF = df_results_MAE_TFIDF.apply(lambda x: x*-1)\n",
+    "    df_results_MAE_TFIDF.to_csv(\"metricas_{}_TFIDF.csv\".format(project_name),index = False, header=False)\n",
+    "    \n",
+    "    #list_results = [ [\"MbR\", results[0].mean()*-1], [\"NEOSP-SVR\", results[1].mean()*-1] ]\n",
+    "    #df = pd.DataFrame(list_results, columns = [\"Model\",\"MAE\"])\n",
+    "\n",
+    "    #list_results_MAE_MbR = results[0]\n",
+    "    #df_results_MAE_MbR = pd.DataFrame(list_results_MAE_MbR, columns = [\"MAE\"])\n",
+    "    #df_results_MAE_MbR = df_results_MAE_MbR.apply(lambda x: x*-1)\n",
+    "    #df_results_MAE_MbR.to_csv(\"metricas_{}_MbR.csv\".format(project_name),index = False, header=False)\n",
+    "\n",
+    "    #list_results_MAE_NEOSP = results[1]\n",
+    "    #df_results_MAE_NEOSP = pd.DataFrame(list_results_MAE_NEOSP, columns = [\"MAE\"])\n",
+    "    #df_results_MAE_NEOSP = df_results_MAE_NEOSP.apply(lambda x: x*-1)\n",
+    "    #df_results_MAE_NEOSP.to_csv(\"metricas_{}_NEOSP_SVR.csv\".format(project_name), index = False, header=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "LIBRARIES = [\"ALOY\", \"APSTUD\", \"CLI\", \"CLOV\", \"COMPASS\", \"CONFCLOUD\", \"CONFSERVER\", \"DAEMON\", \"DM\", \"DNN\", \"DURACLOUD\", \"EVG\", \"FAB\", \n",
+    "             \"MDL\", \"MESOS\" ,\"MULE\", \"NEXUS\", \"SERVER\", \"STL\", \"TIDOC\", \"TIMOB\", \"TISTUD\", \"XD\"]\n",
+    "\n",
+    "for lp in LIBRARIES:\n",
+    "    gerar_metricas(lp)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

app.py CHANGED Viewed

@@ -9,11 +9,15 @@ import matplotlib.gridspec as gridspec
 LIBRARIES = ["ALOY", "APSTUD", "CLI", "CLOV", "COMPASS", "CONFCLOUD", "CONFSERVER", "DAEMON", "DM", "DNN", "DURACLOUD", "EVG", "FAB",
              "MDL", "MESOS" ,"MULE", "NEXUS", "SERVER", "STL", "TIDOC", "TIMOB", "TISTUD", "XD"]
-def grafico(list_output_MbR, list_output_NEOSP, nome_projeto, pip_choices):
-    list_results = [["MbR Regressor", np.mean(list_output_MbR)], ["NEOSP-SVR Regressor", np.mean(list_output_NEOSP)]]
     df = pd.DataFrame(list_results, columns=["Model", "MAE"])
     df_list_output_MbR = pd.DataFrame(list_output_MbR, columns=["MAE"])
     df_list_output_NEOSP = pd.DataFrame(list_output_NEOSP, columns=["MAE"])
     fig, ax = plt.subplots(2, 2)
     G = gridspec.GridSpec(2, 2)
@@ -31,16 +35,21 @@ def grafico(list_output_MbR, list_output_NEOSP, nome_projeto, pip_choices):
         axes_1.scatter(range(1,51), df_list_output_MbR["MAE"].loc[1:50],  label="MbR Regressor", color="red", alpha=0.5,)
     if "NEOSP-SVR Regressor" in pip_choices:
         axes_1.scatter(range(1,51), df_list_output_NEOSP["MAE"].loc[1:50], label="NEOSP-SVR Regressor", color = "blue",  alpha=0.5)
     # ax2
     axes_2.set_ylabel("MAE Médio")
     axes_2.set_xlabel("Modelos")
     if "MbR Regressor" in pip_choices:
         graf1 = axes_2.bar(df["Model"].iloc[[0]], df["MAE"].iloc[[0]], color="red", alpha=0.5)
-        axes_2.bar_label(graf1, fmt="%.01f", size=10, label_type="edge")
     if "NEOSP-SVR Regressor" in pip_choices:
         graf2 = axes_2.bar(df["Model"].iloc[[1]], df["MAE"].iloc[[1]], color = "blue",  alpha=0.5)
-        axes_2.bar_label(graf2, fmt="%.01f", size=10, label_type="edge")
     # ax3
     axes_3.set_xlabel("MAE")
@@ -49,6 +58,8 @@ def grafico(list_output_MbR, list_output_NEOSP, nome_projeto, pip_choices):
         axes_3.hist(df_list_output_MbR["MAE"], color="red",  alpha=0.5)
     if "NEOSP-SVR Regressor" in pip_choices:
         axes_3.hist(df_list_output_NEOSP["MAE"], color="blue", alpha=0.5)
     # graficos geral
     fig.set_figwidth(15)
@@ -56,12 +67,18 @@ def grafico(list_output_MbR, list_output_NEOSP, nome_projeto, pip_choices):
     fig.suptitle("Projeto {}".format(nome_projeto))
     # text
-    resultado = ""
-    if  (pip_choices == ["NEOSP-SVR Regressor", "MbR Regressor"]) or (pip_choices == ["MbR Regressor", "NEOSP-SVR Regressor"]):
-        res = wilcoxon(list_output_MbR, list_output_NEOSP)
-        resultado = "MbR vs. NEOSP-SVR => Statistics: {} | valor-p: {}".format(res.statistic, res.pvalue)
-    return gr.update(value=plt, visible=True), gr.update(value=resultado, visible=True)
 def create_pip_plot(libraries, pip_choices):
@@ -76,7 +93,13 @@ def create_pip_plot(libraries, pip_choices):
         arquivo_csv = csv.reader(arquivo)
         for i, linha in enumerate(arquivo_csv):
             list_output_NEOSP_SVR.append(float(linha[0]))
-    return grafico(list_output_MbR, list_output_NEOSP_SVR, nome_projeto, pip_choices)
 demo = gr.Blocks()
@@ -88,7 +111,7 @@ with demo:
             libraries = gr.Dropdown(choices=LIBRARIES, label="Projeto", value="ALOY")
         with gr.Column():
             gr.Markdown("## Gráficos")
-            pip = gr.CheckboxGroup(choices=["MbR Regressor", "NEOSP-SVR Regressor"], label="Modelos Preditivos")
             # stars = gr.CheckboxGroup(choices=["Stars", "Week over Week"], label="")
             # issues = gr.CheckboxGroup(choices=["Issue", "Exclude org members", "week over week"], label="")
     with gr.Row():
@@ -96,11 +119,13 @@ with demo:
     with gr.Row():
         with gr.Column():
             star_plot = gr.Text(visible=False, label="Wilcoxon Test")
             pip_plot = gr.Plot(visible=False)
             # issue_plot = gr.Plot(visible=False)
-    fetch.click(create_pip_plot, inputs=[libraries, pip], outputs=[pip_plot, star_plot])
     #fetch.click(create_star_plot, inputs=[libraries, pip], outputs=star_plot)
     # fetch.click(create_issue_plot, inputs=[libraries, issues], outputs=issue_plot)

 LIBRARIES = ["ALOY", "APSTUD", "CLI", "CLOV", "COMPASS", "CONFCLOUD", "CONFSERVER", "DAEMON", "DM", "DNN", "DURACLOUD", "EVG", "FAB",
              "MDL", "MESOS" ,"MULE", "NEXUS", "SERVER", "STL", "TIDOC", "TIMOB", "TISTUD", "XD"]
+def grafico(list_output_MbR, list_output_NEOSP, list_output_TFIDF, nome_projeto, pip_choices):
+    list_results = [["MbR Regressor", np.mean(list_output_MbR)], ["NEOSP-SVR Regressor", np.mean(list_output_NEOSP)], ["TFIDF-SVR Regressor", np.mean(list_output_TFIDF)]]
     df = pd.DataFrame(list_results, columns=["Model", "MAE"])
     df_list_output_MbR = pd.DataFrame(list_output_MbR, columns=["MAE"])
     df_list_output_NEOSP = pd.DataFrame(list_output_NEOSP, columns=["MAE"])
+    df_list_output_TFIDF = pd.DataFrame(list_output_TFIDF, columns=["MAE"])
     fig, ax = plt.subplots(2, 2)
     G = gridspec.GridSpec(2, 2)
         axes_1.scatter(range(1,51), df_list_output_MbR["MAE"].loc[1:50],  label="MbR Regressor", color="red", alpha=0.5,)
     if "NEOSP-SVR Regressor" in pip_choices:
         axes_1.scatter(range(1,51), df_list_output_NEOSP["MAE"].loc[1:50], label="NEOSP-SVR Regressor", color = "blue",  alpha=0.5)
+    if "TFIDF-SVR Regressor" in pip_choices:
+        axes_1.scatter(range(1,51), df_list_output_TFIDF["MAE"].loc[1:50], label="TFIDF-SVR Regressor", color = "green",  alpha=0.5)
     # ax2
     axes_2.set_ylabel("MAE Médio")
     axes_2.set_xlabel("Modelos")
     if "MbR Regressor" in pip_choices:
         graf1 = axes_2.bar(df["Model"].iloc[[0]], df["MAE"].iloc[[0]], color="red", alpha=0.5)
+        axes_2.bar_label(graf1, fmt="%.03f", size=10, label_type="edge")
     if "NEOSP-SVR Regressor" in pip_choices:
         graf2 = axes_2.bar(df["Model"].iloc[[1]], df["MAE"].iloc[[1]], color = "blue",  alpha=0.5)
+        axes_2.bar_label(graf2, fmt="%.03f", size=10, label_type="edge")
+    if "TFIDF-SVR Regressor" in pip_choices:
+        graf3 = axes_2.bar(df["Model"].iloc[[2]], df["MAE"].iloc[[2]], color = "green",  alpha=0.5)
+        axes_2.bar_label(graf3, fmt="%.03f", size=10, label_type="edge")
     # ax3
     axes_3.set_xlabel("MAE")
         axes_3.hist(df_list_output_MbR["MAE"], color="red",  alpha=0.5)
     if "NEOSP-SVR Regressor" in pip_choices:
         axes_3.hist(df_list_output_NEOSP["MAE"], color="blue", alpha=0.5)
+    if "TFIDF-SVR Regressor" in pip_choices:
+        axes_3.hist(df_list_output_TFIDF["MAE"], color="green", alpha=0.5)
     # graficos geral
     fig.set_figwidth(15)
     fig.suptitle("Projeto {}".format(nome_projeto))
     # text
+    resultado, resultado2 = "", ""
+    #if  (pip_choices == ["NEOSP-SVR Regressor", "MbR Regressor"]) or (pip_choices == ["MbR Regressor", "NEOSP-SVR Regressor"]):
+    res = wilcoxon(list_output_NEOSP, list_output_MbR)
+    resultado = "NEOSP-SVR vs. MbR => Statistics: {} | valor-p: {}".format(res.statistic, res.pvalue)
+    #if  (pip_choices == ["NEOSP-SVR Regressor", "MbR Regressor"]) or (pip_choices == ["MbR Regressor", "NEOSP-SVR Regressor"]):
+    res = wilcoxon(list_output_NEOSP, list_output_TFIDF)
+    resultado2 = "NEOSP-SVR vs. TFIDF-SVR  => Statistics: {} | valor-p: {}".format(res.statistic, res.pvalue)
+    res = wilcoxon(list_output_TFIDF, list_output_MbR)
+    resultado3 = "TFIDF-SVR vs. MbR  => Statistics: {} | valor-p: {}".format(res.statistic, res.pvalue)
+    return gr.update(value=plt, visible=True), gr.update(value=resultado, visible=True), gr.update(value=resultado2, visible=True), gr.update(value=resultado3, visible=True)
 def create_pip_plot(libraries, pip_choices):
         arquivo_csv = csv.reader(arquivo)
         for i, linha in enumerate(arquivo_csv):
             list_output_NEOSP_SVR.append(float(linha[0]))
+    list_output_TFIDF_SVR = []
+    with open("metricas_{}_TFIDF.csv".format(nome_projeto), "r") as arquivo:
+        arquivo_csv = csv.reader(arquivo)
+        for i, linha in enumerate(arquivo_csv):
+            list_output_TFIDF_SVR.append(float(linha[0]))
+    return grafico(list_output_MbR, list_output_NEOSP_SVR, list_output_TFIDF_SVR, nome_projeto, pip_choices)
 demo = gr.Blocks()
             libraries = gr.Dropdown(choices=LIBRARIES, label="Projeto", value="ALOY")
         with gr.Column():
             gr.Markdown("## Gráficos")
+            pip = gr.CheckboxGroup(choices=["MbR Regressor", "NEOSP-SVR Regressor", "TFIDF-SVR Regressor"], label="Modelos Preditivos")
             # stars = gr.CheckboxGroup(choices=["Stars", "Week over Week"], label="")
             # issues = gr.CheckboxGroup(choices=["Issue", "Exclude org members", "week over week"], label="")
     with gr.Row():
     with gr.Row():
         with gr.Column():
             star_plot = gr.Text(visible=False, label="Wilcoxon Test")
+            star_plot2 = gr.Text(visible=False, label="Wilcoxon Test")
+            star_plot3 = gr.Text(visible=False, label="Wilcoxon Test")
             pip_plot = gr.Plot(visible=False)
             # issue_plot = gr.Plot(visible=False)
+    fetch.click(create_pip_plot, inputs=[libraries, pip], outputs=[pip_plot, star_plot, star_plot2, star_plot3])
     #fetch.click(create_star_plot, inputs=[libraries, pip], outputs=star_plot)
     # fetch.click(create_issue_plot, inputs=[libraries, issues], outputs=issue_plot)

database/tawos/README.md ADDED Viewed

	@@ -0,0 +1,48 @@

+# Tawosi Dataset
+This directory consists 46 files, two files per each of the 26-3\*=23 projects:
+- 23 files with "\_deep-se" suffix are prepared to be used by Deep-SE.
+- 23 files with "\_tfidf-se" suffix are prepared to be used by TF/IDF-SE.
+\* <sub>One of the repositories including three projects has been removed from the public domain during the time that the manuscript for this study [1] was under revision. Therefore, although the paper reports the results for all 26 projects, the replication package includes 23 projects as we refrain from publishing the data for the three remaining projects in accordance with The General Data Protection Regulation.</sub>
+These 23 files are collected from 12 open source repositories by Tawosi et al. up until August, 2020.
+The files named after their project key as "[project key]\_[approach].csv" e.g. MESOS_deep-se.csv, which is the set of issues collected from Appache repository Mesos project, and contains the features that Deep-SE needs for prediction. The following table shows the list of projects and the repositories where the project was collected from.
+## Project list
+| Repository   | Project                           | Key        | File for Deep-SE          | File for TF/IDF-SE        |
+|--------------|-----------------------------------|------------|---------------------------|---------------------------|
+| Apache       | Mesos                             | MESOS      | MESOS_deeep-se.csv        | MESOS_tfidf-se.csv        |
+| Apache       | Alloy                             | ALOY       | ALOY_deeep-se.csv         | ALOY_tfidf-se.csv         |
+| Appcelerator | Appcelerator studio               | TISTUD     | TISTUD_deeep-se.csv       | TISTUD_tfidf-se.csv       |
+| Appcelerator | Aptana studio                     | APSTUD     | APSTUD_deeep-se.csv       | APSTUD_tfidf-se.csv       |
+| Appcelerator | Command-Line Interface            | CLI        | CLI_deeep-se.csv          | CLI_tfidf-se.csv          |
+| Appcelerator | Daemon                            | DAEMON     | DAEMON_deeep-se.csv       | DAEMON_tfidf-se.csv       |
+| Appcelerator | Documentation                     | TIDOC      | TIDOC_deeep-se.csv        | TIDOC_tfidf-se.csv        |
+| Appcelerator | Titanium                          | TIMOB      | TIMOB_deeep-se.csv        | TIMOB_tfidf-se.csv        |
+| Atlassian    | Clover                            | CLOV       | CLOV_deeep-se.csv         | CLOV_tfidf-se.csv         |
+| Atlassian    | Confluence Cloud                  | CONFCLOUD  | CONFCLOUD_deeep-se.csv    | CONFCLOUD_tfidf-se.csv    |
+| Atlassian    | Confluence Server and Data Center | CONFSERVER | CONFSERVER_deeep-se.csv   | CONFSERVER_tfidf-se.csv   |
+| DNNSoftware  | DNN                               | DNN        | DNN_deeep-se.csv          | DNN_tfidf-se.csv          |
+| Duraspace    | Duracloud                         | DURACLOUD  | DURACLOUD_deeep-se.csv    | DURACLOUD_tfidf-se.csv    |
+| Hyperledger  | Fabric                            | FAB        | FAB_deeep-se.csv          | FAB_tfidf-se.csv          |
+| Hyperledger  | Sawtooth                          | STL        | STL_deeep-se.csv          | STL_tfidf-se.csv          |
+| Lsstcorp     | Data management                   | DM         | DM_deeep-se.csv           | DM_tfidf-se.csv           |
+| MongoDB      | Compass                           | COMPASS    | COMPASS_deeep-se.csv      | COMPASS_tfidf-se.csv      |
+| MongoDB      | Core Server                       | SERVER     | SERVER_deeep-se.csv       | SERVER_tfidf-se.csv       |
+| MongoDB      | Evergreen                         | EVG        | EVG_deeep-se.csv          | EVG_tfidf-se.csv          |
+| Moodle       | Moodle                            | MDL        | MDL_deeep-se.csv          | MDL_tfidf-se.csv          |
+| Mulesoft     | Mule                              | MULE       | MULE_deeep-se.csv         | MULE_tfidf-se.csv         |
+| Sonatype     | Sonatype’s Nexus                  | NEXUS      | NEXUS_deeep-se.csv        | NEXUS_tfidf-se.csv        |
+| Spring       | Spring XD                         | XD         | XD_deeep-se.csv           | XD_tfidf-se.csv           |
+## Content of the files
+- Each csv file for Deep-SE approach contains 4 columns: *issuekey*, *created*, *title*, *description*, and *storypoint*.
+- Each csv file for TF/IDF-SE approach contains more than 4 columns: starting with *issuekey*, *created*, *storypoint*, *context*, *codesnippet*, and a set of one-hot columns for issue type (header starting with t\_) followed by component(s) (header starting with c\_).
+- The issues are sorted based on issue's creation time (i.e. the former issues was created before the latter issues).
+[1] Vali Tawosi, Rebecca Moussa, and Federica Sarro. "Agile Effort Estimation: Have We Solved the Problem Yet? Insights From A Replication Study." IEEE Transactions on Software Engineering, no. TBA (2022): pp. TBA.

database/tawos/deep/ALOY_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/APSTUD_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/CLI_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/CLOV_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/COMPASS_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/CONFCLOUD_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/CONFSERVER_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/DAEMON_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/DM_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/DNN_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/DURACLOUD_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/EVG_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/FAB_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/MDL_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/MESOS_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/MULE_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/NEXUS_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/SERVER_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/STL_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/TIDOC_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/TIMOB_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/TISTUD_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/XD_deep-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/deep/metricas_NEXUS_MbR.csv ADDED Viewed

	@@ -0,0 +1,300 @@

+0.9232760219499689
+1.0214754044707242
+1.2796930059020544
+1.0425662481044697
+0.870375724119874
+1.2029848616249328
+1.1264422074144007
+1.0686551107110316
+1.0611490454809922
+1.0216262500960556
+1.0316785398688675
+1.0164215659535472
+1.1173946957878316
+1.0193207728309133
+0.9270179898105017
+1.409685705817132
+0.9975327412644224
+1.1267523300363365
+0.9221921552698891
+1.0456758477599817
+0.9248579034070454
+1.1553462138485537
+1.0811396092207326
+1.0859043452647197
+0.8713957649215059
+1.0458130701590682
+1.0008233343945199
+1.2413687110974498
+1.0448607467094069
+1.1712096428924288
+1.0825660299139237
+0.9964653131579807
+1.025817396332217
+1.438955740047784
+0.9019615330067748
+0.9927134906084989
+1.0340531105573427
+0.993196513453284
+1.155440044789391
+1.0006449452757071
+0.9843148271385402
+1.3817598158471796
+0.9542290782540391
+1.0492237871333034
+0.927950754393812
+1.0144467741758425
+0.9415267913011977
+1.3784291877531751
+0.9974751078568057
+0.9935999473065987
+1.1535379597002065
+1.0342177323456576
+0.9484524835538877
+1.0390124695896925
+1.0765603351406785
+0.9191073957384212
+1.2250090566783398
+1.0575812631047394
+1.0056755184262238
+1.1572102137376088
+1.028711148445938
+1.2392841168192184
+1.1528533868627475
+0.9198667946717868
+0.9708470156988099
+1.04692457159167
+1.1560795011691347
+1.1096214857343594
+1.0004336227811137
+0.9870434610782387
+1.153726149045962
+0.9795446363309078
+1.0696027841113647
+1.021489041379837
+0.9977417278509323
+1.1207941334679943
+1.132770904460277
+1.2018074934407692
+1.0306115727882492
+0.9089227492782099
+1.1736005803868517
+1.1017449788900646
+0.9784455014564218
+0.9582846950241642
+1.23775405561677
+0.9749184898949426
+1.051334899498315
+0.9718995971150362
+1.2895804287925525
+0.8923764723963422
+1.1122835822523809
+0.9552763928738969
+1.068950939855776
+1.1003540141605663
+1.164174748808134
+0.9429429264597718
+0.9814585094354124
+1.1913785911101842
+0.9525292832599648
+1.1499511488259253
+0.9862867241962406
+0.987759510380415
+0.9237178578052214
+0.9452369003851062
+1.0241482386568188
+1.0599469772649928
+1.221798052539712
+0.9753932793957822
+1.1154452043515968
+1.38041616809195
+1.1612100847670273
+1.2166523024557347
+1.0060111495368906
+0.9225505383851716
+0.979440995821651
+1.2812537736159748
+1.0349560339433326
+1.0403900409471638
+0.8553428913308377
+1.1184860527153568
+1.1964860412598322
+1.0433735531239432
+1.058218692384059
+0.9226487241307835
+1.0598769405321666
+0.9863738157706959
+1.0053571624603426
+0.9031539196206074
+1.2612247922452877
+1.175469026160078
+1.157435388324624
+0.9202213543087178
+1.0184452832658761
+1.0802532101284053
+0.9754726552698472
+1.0301642277672267
+1.0307789841151351
+1.1540650763505427
+1.0905146388855345
+1.1553028223903044
+1.3041821672866916
+1.0170925018818935
+0.9933942812257944
+1.2464216750488202
+1.005473855317849
+0.9698714500565357
+0.9650274993687771
+1.2525907588947558
+0.9202435972028588
+0.9404866455161209
+1.3407209015633352
+1.054274898268658
+0.9704324536617827
+1.1889802864841867
+0.9465514984235732
+1.0683175436092784
+0.9992370434610783
+0.9608339828526891
+0.9778468158914515
+1.1140373025369679
+1.2547947372440353
+0.9001014586037985
+0.9846448403390682
+1.091768216183193
+0.9604775100094916
+1.188483198489456
+1.2245699450012624
+1.031920674475536
+1.042706355043747
+0.934188137398044
+1.1585672517809804
+1.1207902861569008
+1.0137923698766134
+1.0363996378036942
+1.0777358367061955
+0.9339109481518889
+1.1579155368689142
+1.1943178948986202
+0.942756303997014
+0.9780965606577892
+0.9724261697740638
+1.190071239213205
+0.8739295026346509
+1.0714301299324698
+0.9589201749888177
+1.14809041309431
+1.0279961138616578
+1.2843879332111139
+0.9980898642047139
+1.0910882285137167
+1.1910994621603048
+1.1976779071162846
+1.0270638098251204
+1.2220934291917132
+0.9611484459378375
+0.9871587278934714
+0.9060246122095001
+1.0959843237131284
+0.9700223946955309
+1.0498803420679965
+1.0475382651669705
+1.0059266007003917
+1.0714737680416309
+0.951384419013124
+0.9479888286440549
+1.0542577365988604
+1.016238898707914
+1.0995054504736916
+1.1477940127122832
+1.2866411250041168
+1.0650098731221977
+0.9860548967413242
+0.9771063569815518
+1.1689613039067017
+1.2131012513227801
+0.9157838692325425
+1.196035919335185
+0.9984466424423392
+1.0338884436784386
+1.0604190223178511
+1.0778312950699847
+1.0141905676227048
+1.1351854074162966
+1.0823751131863455
+1.0774467342330059
+1.1988599563083882
+1.0597823103860888
+0.9490575565630728
+1.0265415564313394
+0.9928424796636406
+0.9649558709621114
+1.1639729225532656
+1.2690971275214644
+0.9947852459552926
+1.1224921724141694
+1.0579984191979626
+1.1126019562425213
+1.0158244870626723
+0.9555618982797799
+0.9588195580340969
+1.0645762194124129
+1.0415816632665307
+1.1812427042536247
+1.0065975366287379
+0.960924800628389
+1.3573079160857586
+0.9866235605370336
+0.9916212003117695
+0.9010352057787099
+1.121277156312779
+0.9437804784918672
+0.996091661848292
+0.9487661324634803
+1.1527688380262484
+0.9992745164352029
+1.1975508546211016
+0.9293414422623033
+1.0797344472132873
+0.9049405552567156
+1.4611880166423323
+1.0274947361530822
+1.2240380524311882
+1.0168988577724927
+0.8988195891472022
+1.0475682663670183
+1.0856048214462144
+1.1092784297366427
+0.990279165248702
+1.230371708034646
+0.993492913835311
+1.2045836378909702
+0.9515726083588797
+1.0752648287749693
+1.012390495619825
+1.0192225870853018
+1.0912666176325294
+0.9729562095880036
+1.1183845081400325
+1.1544739990998212
+1.011480025907589
+1.0130941601300414
+1.0422444170494096
+0.943715021328126
+1.1456803726694522
+1.1858219783336788
+1.1180551743822247
+1.2189108932629291
+1.0211240161153987
+0.8945939863655825
+1.0306115727882492
+1.0799777445643282
+1.0963747640814723
+1.1204684551018405
+1.162824694805974
+0.9947143340279065
+0.9049295774647885
+0.9464722865642804
+0.9961934506493363
+1.0163322099392929
+1.2927969218271438

database/tawos/deep/metricas_NEXUS_NEOSP_SVR.csv ADDED Viewed

	@@ -0,0 +1,300 @@

+0.8062983011257235
+1.032432282578063
+1.2057998735271693
+0.944706263679146
+0.8498769193979343
+0.9195844550517124
+0.7898471521104582
+0.8854971101703727
+0.8602599380336894
+1.0864997730132382
+0.9047051224946047
+0.830785127821595
+0.968528602823701
+0.9647427013409863
+0.8803396144523119
+0.8255218351240419
+1.2845506655908328
+0.8051110278846986
+0.9427991039091772
+1.005890546016643
+0.7573702741076634
+0.8910354720452858
+0.8532160135640033
+1.1444878549523763
+1.200291770273267
+0.7739849924768679
+0.9866980663533863
+0.9730769325677584
+0.9064133611041554
+0.9282202415375351
+1.0486656268899255
+1.1985104953142145
+0.8749949418910377
+0.83204613878934
+0.8959416966154746
+1.437771437808204
+0.7972563445081011
+0.7975273573133316
+0.8222179756366598
+0.6852410576316454
+0.9072674952237534
+0.9097594731194828
+0.9293712908418625
+0.8186130143598132
+0.9883094445636065
+0.9874950726126849
+0.9067444630521493
+0.8147424643808405
+0.9769770365163615
+1.1553146441052748
+0.9248074923021079
+0.779245503302422
+1.171052186094203
+0.9635483710837806
+0.8179008239979184
+0.6706407685947057
+0.9624294750099868
+1.2197978757110461
+0.7909531661194524
+1.0803593199940296
+0.9114802914406473
+0.8252694331948631
+0.9347535309765345
+0.8405034094880942
+0.8211541907990811
+0.7787658854356531
+1.009785380988038
+1.008226110084284
+1.2914676591475316
+0.9751104999980018
+1.0706215209428913
+0.8772282116130847
+0.8593988642651856
+0.9079857814376465
+0.9575733886415408
+1.1830808195812754
+0.9340204958762801
+0.8068959312713764
+0.8785332027825754
+0.9444707697242278
+0.7996509768420726
+0.9528596740965873
+1.1181609590985686
+1.261248935906939
+0.8912968642131908
+0.8752395772477681
+0.8888225813829433
+1.0407737904180023
+0.8013724379315329
+0.7708916952796265
+1.1070191918175212
+0.8380851426639113
+1.0374894873156406
+0.9320872369155058
+0.8220542074025031
+0.9198507502198694
+0.8291189746539606
+1.1506964191835722
+0.8566249054892338
+0.9138515917941085
+0.9300386138019211
+0.9094946453335703
+0.7549057366322102
+1.292293219118459
+1.0339805275036393
+0.9073673092656004
+0.7778625962124713
+1.0298527587901136
+0.920483759137768
+0.8193841392290081
+1.0389378821239195
+0.8238613534551746
+0.8101942552706594
+0.9317092158950279
+1.0984604526123922
+1.1358523733087718
+0.9452041917676606
+0.8699528570757884
+0.8409362714783831
+0.8936521097306183
+0.7758223400733447
+0.8772174708652997
+0.8406740142916599
+1.040694658204138
+0.9273182074179123
+0.7603688674090336
+0.9751945109955814
+1.084143802894714
+1.2693484262096395
+0.8354019193843212
+0.907543709539674
+1.0558465727293567
+0.8098147953254534
+0.9455703161148263
+0.7079852370456268
+0.8473878587815239
+1.0673654702360211
+0.955726481508599
+1.2230726452990561
+0.8886914699421246
+0.7804290260424386
+1.202118855066672
+0.8807908998171425
+0.9116630434799675
+0.8483734995133959
+0.769211807302471
+1.023532758777747
+1.0791785484281715
+0.9144447420127926
+0.9710861453625045
+1.0995850108613672
+0.9234888197385054
+0.8912959890653649
+0.8994072287620939
+0.7472749005262459
+1.0741407140097319
+1.0966061372874463
+0.8467868821555279
+0.8451718913921347
+0.9861835319749075
+0.8186757593792123
+0.9397285981399016
+0.9146047477560897
+1.159060349392317
+0.8503272703797968
+1.0105263054193905
+0.9536999081229803
+0.7993847773267928
+0.8893513243889266
+1.0382695432055158
+0.9839238899744519
+0.6675479590080553
+1.2622108526908364
+0.7574483075036766
+1.0199103711971074
+0.8317914180574144
+1.0047217448917451
+1.0026569141034547
+0.972001136328143
+0.8950288784648693
+0.7934024113133407
+0.7867116845217534
+1.0033144664203992
+0.9999694902310314
+0.7711521636440622
+0.8703573258222197
+0.9800476904481517
+0.9566390554883409
+1.2894743620335434
+0.9365329133029027
+1.0707039511039709
+0.9102437248688723
+0.984623302325445
+0.9165021125842072
+0.9929743127229799
+0.8953514979233843
+0.9526667988313307
+0.8391000273235487
+0.906741725448664
+0.9470387767561318
+1.1158151696482175
+0.9131595541107055
+1.1191124888663837
+0.9375500838294942
+1.0746952559298346
+0.7855234715998117
+0.7219982214660823
+0.8126066743597553
+1.0144470523753417
+0.8745130481615869
+0.8747406848331973
+0.9374340214256928
+0.9114517219303082
+1.1745071374999996
+0.9381349516761602
+1.1205979899220924
+0.7319877791095145
+0.8465155380408272
+0.9750574246276522
+0.9382537130916152
+0.9983632120749328
+0.9673934322532552
+0.9041894369630821
+0.8645259183532821
+0.83241964885402
+0.9164686438155271
+1.1618951759179148
+0.900018125852499
+0.883026573437026
+0.9623901873030714
+1.0239433550593133
+1.0280591942417596
+1.0089593617703552
+0.9164704412474767
+1.0202054944422532
+0.8154127399049739
+0.9007610664592686
+1.0454781791028918
+0.8077225216446633
+0.8161118640140392
+0.7469815532665242
+0.9686866292175995
+1.1237386561981368
+0.7824729157794813
+0.8666679857701298
+0.9021121210099953
+1.0415137899498605
+1.0349081000175604
+0.9775219992005959
+0.9348422460410203
+0.915344763863182
+0.8615701041051325
+1.015979522300568
+0.9969287393217002
+0.9778850399979674
+0.8178266380135665
+1.148272402601704
+0.7779209600702256
+0.9576367447673856
+0.9214056966523123
+0.8096689908517556
+0.7788213250890322
+1.031452887130781
+0.8219926626328246
+0.9197182177159859
+1.2908804092941828
+0.7994061306066543
+0.9376222436231869
+1.0644538120570135
+0.9417870761480787
+0.8107359878993705
+1.0258002728053766
+0.8590877381484838
+0.9012475400390169
+0.9587117084461273
+0.9118721042185678
+0.9278792165901055
+1.138065220528389
+0.9019604707421052
+0.9725894652112981
+0.7387274090609192
+0.8466713388045666
+1.1938142160256782
+1.0033000967880104
+1.029468496912473
+0.8156563978430295
+1.0966776784782355
+0.8421186310574211
+0.8839992503338451
+0.9254241126751358
+0.8718575268230014
+1.0434012110915953
+0.8738917107699167
+0.8924406897892762
+0.923814367476265
+0.9198097772768266
+0.8318099509930444
+1.2262911636345184
+0.9122391372452223
+0.8687238804048997

database/tawos/tfidf/ALOY_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/APSTUD_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/CLI_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/CLOV_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/COMPASS_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/CONFCLOUD_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/CONFSERVER_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/DAEMON_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/DM_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/DNN_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/DURACLOUD_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/EVG_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/FAB_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/MDL_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/MESOS_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/MULE_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/NEXUS_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/SERVER_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

database/tawos/tfidf/STL_tfidf-se.csv ADDED Viewed

The diff for this file is too large to render. See raw diff