diff --git a/src/datasets/__init__.py b/src/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..446a3d278b7f86d608e6ff6c85dae19b2ebbe222
--- /dev/null
+++ b/src/datasets/__init__.py
@@ -0,0 +1,7 @@
+from src.datasets.toxic_spans_tokens import *
+from src.datasets.toxic_spans_tokens_3cls import *
+from src.datasets.toxic_spans_spans import *
+from src.datasets.toxic_spans_tokens_spans import *
+from src.datasets.toxic_spans_multi_spans import *
+from src.datasets.toxic_spans_crf_tokens import *
+from src.datasets.toxic_spans_crf_3cls_tokens import *
\ No newline at end of file
diff --git a/src/datasets/__pycache__/__init__.cpython-38.pyc b/src/datasets/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..02e06f5d9e62aa916dc0e261dc691618b2f66182
Binary files /dev/null and b/src/datasets/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/datasets/__pycache__/toxic_spans_crf_3cls_tokens.cpython-38.pyc b/src/datasets/__pycache__/toxic_spans_crf_3cls_tokens.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..370e8050d79e33e517e81571457655bc88984f49
Binary files /dev/null and b/src/datasets/__pycache__/toxic_spans_crf_3cls_tokens.cpython-38.pyc differ
diff --git a/src/datasets/__pycache__/toxic_spans_crf_tokens.cpython-38.pyc b/src/datasets/__pycache__/toxic_spans_crf_tokens.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a81958897e7fbad60c3e7865e91a342bc6d5511
Binary files /dev/null and b/src/datasets/__pycache__/toxic_spans_crf_tokens.cpython-38.pyc differ
diff --git a/src/datasets/__pycache__/toxic_spans_multi_spans.cpython-38.pyc b/src/datasets/__pycache__/toxic_spans_multi_spans.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..997f999d2c6507e501cd7240f5896ebb8e6d44e7
Binary files /dev/null and b/src/datasets/__pycache__/toxic_spans_multi_spans.cpython-38.pyc differ
diff --git a/src/datasets/__pycache__/toxic_spans_spans.cpython-38.pyc b/src/datasets/__pycache__/toxic_spans_spans.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ebf1e3e783a8f886b92c6a51d27a2b1c0327ee38
Binary files /dev/null and b/src/datasets/__pycache__/toxic_spans_spans.cpython-38.pyc differ
diff --git a/src/datasets/__pycache__/toxic_spans_tokens.cpython-38.pyc b/src/datasets/__pycache__/toxic_spans_tokens.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..109c2cc9ce22c4ca60c4602054d3fbd6c50fcccb
Binary files /dev/null and b/src/datasets/__pycache__/toxic_spans_tokens.cpython-38.pyc differ
diff --git a/src/datasets/__pycache__/toxic_spans_tokens_3cls.cpython-38.pyc b/src/datasets/__pycache__/toxic_spans_tokens_3cls.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..755457bea7152b0255933132d414fb8498957795
Binary files /dev/null and b/src/datasets/__pycache__/toxic_spans_tokens_3cls.cpython-38.pyc differ
diff --git a/src/datasets/__pycache__/toxic_spans_tokens_spans.cpython-38.pyc b/src/datasets/__pycache__/toxic_spans_tokens_spans.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1a66bc618223b8d580d6692b2175cd4f447bb287
Binary files /dev/null and b/src/datasets/__pycache__/toxic_spans_tokens_spans.cpython-38.pyc differ
diff --git a/src/datasets/toxic_spans_crf_3cls_tokens.py b/src/datasets/toxic_spans_crf_3cls_tokens.py
new file mode 100644
index 0000000000000000000000000000000000000000..81442d2a76926154bd4df106a84ddb7e17b18c4f
--- /dev/null
+++ b/src/datasets/toxic_spans_crf_3cls_tokens.py
@@ -0,0 +1,132 @@
+from src.utils.mapper import configmapper
+from transformers import AutoTokenizer
+from datasets import load_dataset
+import numpy as np
+
+
+@configmapper.map("datasets", "toxic_spans_crf_3cls_tokens")
+class ToxicSpansCRF3ClsTokenDataset:
+    def __init__(self, config):
+        self.config = config
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.config.model_checkpoint_name
+        )
+        self.dataset = load_dataset("csv", data_files=dict(self.config.train_files))
+        self.test_dataset = load_dataset("csv", data_files=dict(self.config.eval_files))
+
+        self.tokenized_inputs = self.dataset.map(
+            self.tokenize_and_align_labels_for_train, batched=True
+        )
+        self.test_tokenized_inputs = self.test_dataset.map(
+            self.tokenize_for_test, batched=True
+        )
+
+    def tokenize_and_align_labels_for_train(self, examples):
+        tokenized_inputs = self.tokenizer(
+            examples["text"], **self.config.tokenizer_params
+        )
+
+        # tokenized_inputs["text"] = examples["text"]
+        example_spans = []
+        labels = []
+        prediction_mask = np.zeros_like(np.array(tokenized_inputs["input_ids"]))
+        offsets_mapping = tokenized_inputs["offset_mapping"]
+
+        ## Wrong Code
+        # for i, offset_mapping in enumerate(offsets_mapping):
+        #     j = 0
+        #     while j < len(offset_mapping):  # [tok1, tok2, tok3] [(0,5),(1,4),(5,7)]
+        #         if tokenized_inputs["input_ids"][i][j] in [
+        #             self.tokenizer.sep_token_id,
+        #             self.tokenizer.pad_token_id,
+        #             self.tokenizer.cls_token_id,
+        #         ]:
+        #             j = j + 1
+        #             continue
+        #         else:
+        #             k = j + 1
+        #             while self.tokenizer.convert_ids_to_tokens(
+        #                 tokenized_inputs["input_ids"][i][k]
+        #             ).startswith("##"):
+        #                 offset_mapping[i][j][1] = offset_mapping[i][k][1]
+        #             j = k
+
+        for i, offset_mapping in enumerate(offsets_mapping):
+            labels.append([])
+
+            spans = eval(examples["spans"][i])
+            Bs = eval(examples["Bs"][i])
+            Is = eval(examples["Is"][i])
+
+            example_spans.append(spans)
+            # cls_label = 2  ## DUMMY LABEL
+            cls_label = 3  ## DUMMY LABEL
+            for j, offsets in enumerate(offset_mapping):
+                if tokenized_inputs["input_ids"][i][j] in [
+                    self.tokenizer.sep_token_id,
+                    self.tokenizer.pad_token_id,
+                ]:
+                    tokenized_inputs["attention_mask"][i][j] = 0
+
+                if tokenized_inputs["input_ids"][i][j] == self.tokenizer.cls_token_id:
+                    labels[-1].append(cls_label)
+                    prediction_mask[i][j] = 1
+
+                elif offsets[0] == offsets[1] and offsets[0] == 0:
+                    # labels[-1].append(2)  ## DUMMY
+                    labels[-1].append(cls_label)  ## DUMMY
+
+                else:
+                    # toxic_offsets = [x in spans for x in range(offsets[0], offsets[1])]
+                    # ## If any part of the the token is in span, mark it as Toxic
+                    # if (
+                    #     len(toxic_offsets) > 0
+                    #     and sum(toxic_offsets) / len(toxic_offsets) > 0.0
+                    # ):
+                    #     labels[-1].append(1)
+                    # else:
+                    #     labels[-1].append(0)
+                    # prediction_mask[i][j] = 1
+                    
+                    b_off = [x in Bs for x in range(offsets[0], offsets[1])]
+                    b_off = sum(b_off)
+                    i_off = [x in Is for x in range(offsets[0], offsets[1])]
+                    i_off = sum(i_off)
+                    # if len(b_off) == len(i_off) and len(i_off)  == 0:
+                    if b_off == 0 and i_off == 0:
+                        labels[-1].append(0)
+                    # elif len(b_off) >= len(i_off) == 1:
+                    elif b_off >= i_off:
+                        labels[-1].append(1)
+                        # print(b_off)
+                        # print(i_off)
+                        # print(j)
+                    else:
+                        labels[-1].append(2)
+
+        tokenized_inputs["labels"] = labels
+        tokenized_inputs["prediction_mask"] = prediction_mask
+        return tokenized_inputs
+
+    def tokenize_for_test(self, examples):
+        tokenized_inputs = self.tokenizer(
+            examples["text"], **self.config.tokenizer_params
+        )
+        prediction_mask = np.zeros_like(np.array(tokenized_inputs["input_ids"]))
+        labels = np.zeros_like(np.array(tokenized_inputs["input_ids"]))
+        
+        offsets_mapping = tokenized_inputs["offset_mapping"]
+
+        for i, offset_mapping in enumerate(offsets_mapping):
+            for j, offsets in enumerate(offset_mapping):
+                if tokenized_inputs["input_ids"][i][j] in [
+                    self.tokenizer.sep_token_id,
+                    self.tokenizer.pad_token_id,
+                ]:
+                    tokenized_inputs["attention_mask"][i][j] = 0
+                else:
+                    prediction_mask[i][j] = 1
+        
+        tokenized_inputs["prediction_mask"] = prediction_mask
+        tokenized_inputs["labels"] = labels
+        return tokenized_inputs
diff --git a/src/datasets/toxic_spans_crf_tokens.py b/src/datasets/toxic_spans_crf_tokens.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef62e2ee52aee96725e832bcfef52bd57d60612c
--- /dev/null
+++ b/src/datasets/toxic_spans_crf_tokens.py
@@ -0,0 +1,111 @@
+from src.utils.mapper import configmapper
+from transformers import AutoTokenizer
+from datasets import load_dataset
+import numpy as np
+
+
+@configmapper.map("datasets", "toxic_spans_crf_tokens")
+class ToxicSpansCRFTokenDataset:
+    def __init__(self, config):
+        self.config = config
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.config.model_checkpoint_name
+        )
+        self.dataset = load_dataset("csv", data_files=dict(self.config.train_files))
+        self.test_dataset = load_dataset("csv", data_files=dict(self.config.eval_files))
+
+        self.tokenized_inputs = self.dataset.map(
+            self.tokenize_and_align_labels_for_train, batched=True
+        )
+        self.test_tokenized_inputs = self.test_dataset.map(
+            self.tokenize_for_test, batched=True
+        )
+
+    def tokenize_and_align_labels_for_train(self, examples):
+        tokenized_inputs = self.tokenizer(
+            examples["text"], **self.config.tokenizer_params
+        )
+
+        # tokenized_inputs["text"] = examples["text"]
+        example_spans = []
+        labels = []
+        prediction_mask = np.zeros_like(np.array(tokenized_inputs["input_ids"]))
+        offsets_mapping = tokenized_inputs["offset_mapping"]
+
+        ## Wrong Code
+        # for i, offset_mapping in enumerate(offsets_mapping):
+        #     j = 0
+        #     while j < len(offset_mapping):  # [tok1, tok2, tok3] [(0,5),(1,4),(5,7)]
+        #         if tokenized_inputs["input_ids"][i][j] in [
+        #             self.tokenizer.sep_token_id,
+        #             self.tokenizer.pad_token_id,
+        #             self.tokenizer.cls_token_id,
+        #         ]:
+        #             j = j + 1
+        #             continue
+        #         else:
+        #             k = j + 1
+        #             while self.tokenizer.convert_ids_to_tokens(
+        #                 tokenized_inputs["input_ids"][i][k]
+        #             ).startswith("##"):
+        #                 offset_mapping[i][j][1] = offset_mapping[i][k][1]
+        #             j = k
+
+        for i, offset_mapping in enumerate(offsets_mapping):
+            labels.append([])
+
+            spans = eval(examples["spans"][i])
+            example_spans.append(spans)
+            cls_label = 2  ## DUMMY LABEL
+            for j, offsets in enumerate(offset_mapping):
+                if tokenized_inputs["input_ids"][i][j] in [
+                    self.tokenizer.sep_token_id,
+                    self.tokenizer.pad_token_id,
+                ]:
+                    tokenized_inputs["attention_mask"][i][j] = 0
+
+                if tokenized_inputs["input_ids"][i][j] == self.tokenizer.cls_token_id:
+                    labels[-1].append(cls_label)
+                    prediction_mask[i][j] = 1
+
+                elif offsets[0] == offsets[1] and offsets[0] == 0:
+                    labels[-1].append(2)  ## DUMMY
+
+                else:
+                    toxic_offsets = [x in spans for x in range(offsets[0], offsets[1])]
+                    ## If any part of the the token is in span, mark it as Toxic
+                    if (
+                        len(toxic_offsets) > 0
+                        and sum(toxic_offsets) / len(toxic_offsets) > 0.0
+                    ):
+                        labels[-1].append(1)
+                    else:
+                        labels[-1].append(0)
+                    prediction_mask[i][j] = 1
+
+        tokenized_inputs["labels"] = labels
+        tokenized_inputs["prediction_mask"] = prediction_mask
+        return tokenized_inputs
+
+    def tokenize_for_test(self, examples):
+        tokenized_inputs = self.tokenizer(
+            examples["text"], **self.config.tokenizer_params
+        )
+        prediction_mask = np.zeros_like(np.array(tokenized_inputs["input_ids"]))
+        labels = np.zeros_like(np.array(tokenized_inputs["input_ids"]))
+        
+        offsets_mapping = tokenized_inputs["offset_mapping"]
+
+        for i, offset_mapping in enumerate(offsets_mapping):
+            for j, offsets in enumerate(offset_mapping):
+                if tokenized_inputs["input_ids"][i][j] in [
+                    self.tokenizer.sep_token_id,
+                    self.tokenizer.pad_token_id,
+                ]:
+                    tokenized_inputs["attention_mask"][i][j] = 0
+                else:
+                    prediction_mask[i][j] = 1
+        
+        tokenized_inputs["prediction_mask"] = prediction_mask
+        tokenized_inputs["labels"] = labels
+        return tokenized_inputs
diff --git a/src/datasets/toxic_spans_multi_spans.py b/src/datasets/toxic_spans_multi_spans.py
new file mode 100644
index 0000000000000000000000000000000000000000..445e9f66316a037c59cec136ff5c08b017e78d7a
--- /dev/null
+++ b/src/datasets/toxic_spans_multi_spans.py
@@ -0,0 +1,237 @@
+from src.utils.mapper import configmapper
+from transformers import AutoTokenizer
+import pandas as pd
+from datasets import load_dataset, Dataset
+from evaluation.fix_spans import _contiguous_ranges
+
+
+@configmapper.map("datasets", "toxic_spans_multi_spans")
+class ToxicSpansMultiSpansDataset:
+    def __init__(self, config):
+        self.config = config
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.config.model_checkpoint_name
+        )
+
+        self.dataset = load_dataset("csv", data_files=dict(self.config.train_files))
+        self.test_dataset = load_dataset("csv", data_files=dict(self.config.eval_files))
+
+        temp_key_train = list(self.dataset.keys())[0]
+        self.intermediate_dataset = self.dataset.map(
+            self.create_train_features,
+            batched=True,
+            batch_size=1000000,  ##Unusually Large Batch Size ## Needed For Correct ID mapping
+            remove_columns=self.dataset[temp_key_train].column_names,
+        )
+
+        temp_key_test = list(self.test_dataset.keys())[0]
+        self.intermediate_test_dataset = self.test_dataset.map(
+            self.create_test_features,
+            batched=True,
+            batch_size=1000000,  ##Unusually Large Batch Size ## Needed For Correct ID mapping
+            remove_columns=self.test_dataset[temp_key_test].column_names,
+        )
+
+        self.tokenized_inputs = self.intermediate_dataset.map(
+            self.prepare_train_features,
+            batched=True,
+            remove_columns=self.intermediate_dataset[temp_key_train].column_names,
+        )
+        self.test_tokenized_inputs = self.intermediate_test_dataset.map(
+            self.prepare_test_features,
+            batched=True,
+            remove_columns=self.intermediate_test_dataset[temp_key_test].column_names,
+        )
+
+    def create_train_features(self, examples):
+        features = {
+            "context": [],
+            "id": [],
+            "question": [],
+            "title": [],
+            "start_positions": [],
+            "end_positions": [],
+        }
+        id = 0
+        # print(examples)
+        for row_number in range(len(examples["text"])):
+            context = examples["text"][row_number]
+            question = "offense"
+            title = context.split(" ")[0]
+            start_positions = []
+            end_positions = []
+            span = eval(examples["spans"][row_number])
+            contiguous_spans = _contiguous_ranges(span)
+            for lst in contiguous_spans:
+                lst = list(lst)
+                dict_to_write = {}
+
+                start_positions.append(lst[0])
+                end_positions.append(lst[1])
+
+            features["context"].append(context)
+            features["id"].append(str(id))
+            features["question"].append(question)
+            features["title"].append(title)
+            features["start_positions"].append(start_positions)
+            features["end_positions"].append(end_positions)
+            id += 1
+
+        return features
+
+    def create_test_features(self, examples):
+        features = {"context": [], "id": [], "question": [], "title": []}
+        id = 0
+        for row_number in range(len(examples["text"])):
+            context = examples["text"][row_number]
+            question = "offense"
+            title = context.split(" ")[0]
+            features["context"].append(context)
+            features["id"].append(str(id))
+            features["question"].append(question)
+            features["title"].append(title)
+            id += 1
+        return features
+
+    def prepare_train_features(self, examples):
+        """Generate tokenized features from examples.
+
+        Args:
+            examples (dict): The examples to be tokenized.
+
+        Returns:
+            transformers.tokenization_utils_base.BatchEncoding:
+                The tokenized features/examples after processing.
+        """
+        # Tokenize our examples with truncation and padding, but keep the
+        # overflows using a stride. This results in one example possible
+        # giving several features when a context is long, each of those
+        # features having a context that overlaps a bit the context
+        # of the previous feature.
+        pad_on_right = self.tokenizer.padding_side == "right"
+        print("### Batch Tokenizing Examples ###")
+        tokenized_examples = self.tokenizer(
+            examples["question" if pad_on_right else "context"],
+            examples["context" if pad_on_right else "question"],
+            **dict(self.config.tokenizer_params),
+        )
+
+        # Since one example might give us several features if it has
+        # a long context, we need a map from a feature to
+        # its corresponding example. This key gives us just that.
+        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+        # The offset mappings will give us a map from token to
+        # character position in the original context. This will
+        # help us compute the start_positions and end_positions.
+        offset_mapping = tokenized_examples.pop("offset_mapping")
+
+        # Let's label those examples!
+        tokenized_examples["start_positions"] = []
+        tokenized_examples["end_positions"] = []
+
+        for i, offsets in enumerate(offset_mapping):
+            # We will label impossible answers with the index of the CLS token.
+            input_ids = tokenized_examples["input_ids"][i]
+
+            # Grab the sequence corresponding to that example
+            # (to know what is the context and what is the question).
+            sequence_ids = tokenized_examples.sequence_ids(i)
+
+            # One example can give several spans, this is the index of
+            # the example containing this span of text.
+            sample_index = sample_mapping[i]
+            start_positions = examples["start_positions"][sample_index]
+            end_positions = examples["end_positions"][sample_index]
+
+            start_positions_token_wise = [0 for x in range(len(input_ids))]
+            end_positions_token_wise = [0 for x in range(len(input_ids))]
+            # If no answers are given, set the cls_index as answer.
+            if len(start_positions) != 0:
+                for position in range(len(start_positions)):
+                    start_char = start_positions[position]
+                    end_char = end_positions[position] + 1
+
+                    # Start token index of the current span in the text.
+                    token_start_index = 0
+                    while sequence_ids[token_start_index] != (1 if pad_on_right else 0):
+                        token_start_index += 1
+
+                    # End token index of the current span in the text.
+                    token_end_index = len(input_ids) - 1
+                    while sequence_ids[token_end_index] != (1 if pad_on_right else 0):
+                        token_end_index -= 1
+
+                    # Detect if the answer is out of the span (in which case we continue).
+                    if not (
+                        offsets[token_start_index][0] <= start_char
+                        and offsets[token_end_index][1] >= end_char
+                    ):
+                        continue
+                    else:
+                        # Otherwise move the token_start_index and token_end_index to the two ends of the answer.
+                        # Note: we could go after the last offset if the answer is the last word (edge case).
+                        while (
+                            token_start_index < len(offsets)
+                            and offsets[token_start_index][0] <= start_char
+                        ):
+                            token_start_index += 1
+                        start_positions_token_wise[token_start_index - 1] = 1
+                        while offsets[token_end_index][1] >= end_char:
+                            token_end_index -= 1
+                        end_positions_token_wise[token_end_index + 1] = 1
+            tokenized_examples["start_positions"].append(start_positions_token_wise)
+            tokenized_examples["end_positions"].append(start_positions_token_wise)
+        return tokenized_examples
+
+    def prepare_test_features(self, examples):
+
+        """Generate tokenized validation features from examples.
+
+        Args:
+            examples (dict): The validation examples to be tokenized.
+
+        Returns:
+            transformers.tokenization_utils_base.BatchEncoding:
+                The tokenized features/examples for validation set after processing.
+        """
+
+        # Tokenize our examples with truncation and maybe
+        # padding, but keep the overflows using a stride.
+        # This results in one example possible giving several features
+        # when a context is long, each of those features having a
+        # context that overlaps a bit the context of the previous feature.
+        print("### Tokenizing Validation Examples")
+        pad_on_right = self.tokenizer.padding_side == "right"
+        tokenized_examples = self.tokenizer(
+            examples["question" if pad_on_right else "context"],
+            examples["context" if pad_on_right else "question"],
+            **dict(self.config.tokenizer_params),
+        )
+
+        # Since one example might give us several features if it has a long context,
+        #  we need a map from a feature to its corresponding example. This key gives us just that.
+        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+
+        # We keep the example_id that gave us this feature and we will store the offset mappings.
+        tokenized_examples["example_id"] = []
+
+        for i in range(len(tokenized_examples["input_ids"])):
+            # Grab the sequence corresponding to that example
+            # (to know what is the context and what is the question).
+            sequence_ids = tokenized_examples.sequence_ids(i)
+            context_index = 1 if pad_on_right else 0
+
+            # One example can give several spans,
+            # this is the index of the example containing this span of text.
+            sample_index = sample_mapping[i]
+            tokenized_examples["example_id"].append(str(examples["id"][sample_index]))
+
+            # Set to None the offset_mapping that are not part
+            # of the context so it's easy to determine if a token
+            # position is part of the context or not.
+            tokenized_examples["offset_mapping"][i] = [
+                (o if sequence_ids[k] == context_index else None)
+                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
+            ]
+
+        return tokenized_examples
diff --git a/src/datasets/toxic_spans_spans.py b/src/datasets/toxic_spans_spans.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f034ee428a56569eaec41b84a8d7078a442dd03
--- /dev/null
+++ b/src/datasets/toxic_spans_spans.py
@@ -0,0 +1,238 @@
+from src.utils.mapper import configmapper
+from transformers import AutoTokenizer
+import pandas as pd
+from datasets import load_dataset, Dataset
+from evaluation.fix_spans import _contiguous_ranges
+
+
+@configmapper.map("datasets", "toxic_spans_spans")
+class ToxicSpansSpansDataset:
+    def __init__(self, config):
+        # print("### ToxicSpansSpansDataset ###"); exit()
+        self.config = config
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.config.model_checkpoint_name
+        )
+
+        self.dataset = load_dataset("csv", data_files=dict(self.config.train_files))
+        self.test_dataset = load_dataset("csv", data_files=dict(self.config.eval_files))
+
+        temp_key_train = list(self.dataset.keys())[0]
+        self.intermediate_dataset = self.dataset.map(
+            self.create_train_features,
+            batched=True,
+            batch_size=1000000,  ##Unusually Large Batch Size ## Needed For Correct ID mapping
+            remove_columns=self.dataset[temp_key_train].column_names,
+        )
+
+        temp_key_test = list(self.test_dataset.keys())[0]
+        self.intermediate_test_dataset = self.test_dataset.map(
+            self.create_test_features,
+            batched=True,
+            batch_size=1000000,  ##Unusually Large Batch Size ## Needed For Correct ID mapping
+            remove_columns=self.test_dataset[temp_key_test].column_names,
+        )
+
+        self.tokenized_inputs = self.intermediate_dataset.map(
+            self.prepare_train_features,
+            batched=True,
+            remove_columns=self.intermediate_dataset[temp_key_train].column_names,
+        )
+        self.test_tokenized_inputs = self.intermediate_test_dataset.map(
+            self.prepare_test_features,
+            batched=True,
+            remove_columns=self.intermediate_test_dataset[temp_key_test].column_names,
+        )
+
+    def create_train_features(self, examples):
+        features = {"context": [], "id": [], "question": [], "title": []}
+        id = 0
+        # print(examples)
+        for row_number in range(len(examples["text"])):
+            context = examples["text"][row_number]
+            # question = "offense"
+            question = "ভুল"
+            title = context.split(" ")[0]
+            span = eval(examples["spans"][row_number])
+            contiguous_spans = _contiguous_ranges(span)
+            for lst in contiguous_spans:
+                lst = list(lst)
+                dict_to_write = {}
+
+                dict_to_write["answer_start"] = [lst[0]]
+                dict_to_write["text"] = [context[lst[0] : lst[-1] + 1]]
+                # print(dict_to_write)
+                if "answers" in features.keys():
+                    features["answers"].append(dict_to_write)
+                else:
+                    features["answers"] = [
+                        dict_to_write,
+                    ]
+                features["context"].append(context)
+                features["id"].append(str(id))
+                features["question"].append(question)
+                features["title"].append(title)
+                id += 1
+
+        return features
+
+    def create_test_features(self, examples):
+        features = {"context": [], "id": [], "question": [], "title": []}
+        id = 0
+        for row_number in range(len(examples["text"])):
+            context = examples["text"][row_number]
+            # question = "offense"
+            question = "ভুল"
+            title = context.split(" ")[0]
+            features["context"].append(context)
+            features["id"].append(str(id))
+            features["question"].append(question)
+            features["title"].append(title)
+            id += 1
+        return features
+
+    def prepare_train_features(self, examples):
+        """Generate tokenized features from examples.
+
+        Args:
+            examples (dict): The examples to be tokenized.
+
+        Returns:
+            transformers.tokenization_utils_base.BatchEncoding:
+                The tokenized features/examples after processing.
+        """
+        # Tokenize our examples with truncation and padding, but keep the
+        # overflows using a stride. This results in one example possible
+        # giving several features when a context is long, each of those
+        # features having a context that overlaps a bit the context
+        # of the previous feature.
+        pad_on_right = self.tokenizer.padding_side == "right"
+        print("### Batch Tokenizing Examples ###")
+        tokenized_examples = self.tokenizer(
+            examples["question" if pad_on_right else "context"],
+            examples["context" if pad_on_right else "question"],
+            **dict(self.config.tokenizer_params),
+        )
+
+        # Since one example might give us several features if it has
+        # a long context, we need a map from a feature to
+        # its corresponding example. This key gives us just that.
+        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+        # The offset mappings will give us a map from token to
+        # character position in the original context. This will
+        # help us compute the start_positions and end_positions.
+        offset_mapping = tokenized_examples.pop("offset_mapping")
+
+        # Let's label those examples!
+        tokenized_examples["start_positions"] = []
+        tokenized_examples["end_positions"] = []
+
+        for i, offsets in enumerate(offset_mapping):
+            # We will label impossible answers with the index of the CLS token.
+            input_ids = tokenized_examples["input_ids"][i]
+            cls_index = input_ids.index(self.tokenizer.cls_token_id)
+
+            # Grab the sequence corresponding to that example
+            # (to know what is the context and what is the question).
+            sequence_ids = tokenized_examples.sequence_ids(i)
+
+            # One example can give several spans, this is the index of
+            # the example containing this span of text.
+            sample_index = sample_mapping[i]
+            answers = examples["answers"][sample_index]
+            # If no answers are given, set the cls_index as answer.
+            if len(answers["answer_start"]) == 0:
+                tokenized_examples["start_positions"].append(cls_index)
+                tokenized_examples["end_positions"].append(cls_index)
+            else:
+                # Start/end character index of the answer in the text.
+                start_char = answers["answer_start"][0]
+                end_char = start_char + len(answers["text"][0])
+
+                # Start token index of the current span in the text.
+                token_start_index = 0
+                while sequence_ids[token_start_index] != (1 if pad_on_right else 0):
+                    token_start_index += 1
+
+                # End token index of the current span in the text.
+                token_end_index = len(input_ids) - 1
+                while sequence_ids[token_end_index] != (1 if pad_on_right else 0):
+                    token_end_index -= 1
+
+                # Detect if the answer is out of the span
+                # (in which case this feature is labeled with the CLS index).
+                if not (
+                    offsets[token_start_index][0] <= start_char
+                    and offsets[token_end_index][1] >= end_char
+                ):
+                    tokenized_examples["start_positions"].append(cls_index)
+                    tokenized_examples["end_positions"].append(cls_index)
+                else:
+                    # Otherwise move the token_start_index and
+                    # stoken_end_index to the two ends of the answer.
+                    # Note: we could go after the last offset
+                    # if the answer is the last word (edge case).
+                    while (
+                        token_start_index < len(offsets)
+                        and offsets[token_start_index][0] <= start_char
+                    ):
+                        token_start_index += 1
+                    tokenized_examples["start_positions"].append(token_start_index - 1)
+                    while offsets[token_end_index][1] >= end_char:
+                        token_end_index -= 1
+                    tokenized_examples["end_positions"].append(token_end_index + 1)
+
+        return tokenized_examples
+
+    def prepare_test_features(self, examples):
+
+        """Generate tokenized validation features from examples.
+
+        Args:
+            examples (dict): The validation examples to be tokenized.
+
+        Returns:
+            transformers.tokenization_utils_base.BatchEncoding:
+                The tokenized features/examples for validation set after processing.
+        """
+
+        # Tokenize our examples with truncation and maybe
+        # padding, but keep the overflows using a stride.
+        # This results in one example possible giving several features
+        # when a context is long, each of those features having a
+        # context that overlaps a bit the context of the previous feature.
+        print("### Tokenizing Validation Examples")
+        pad_on_right = self.tokenizer.padding_side == "right"
+        tokenized_examples = self.tokenizer(
+            examples["question" if pad_on_right else "context"],
+            examples["context" if pad_on_right else "question"],
+            **dict(self.config.tokenizer_params),
+        )
+
+        # Since one example might give us several features if it has a long context,
+        #  we need a map from a feature to its corresponding example. This key gives us just that.
+        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+
+        # We keep the example_id that gave us this feature and we will store the offset mappings.
+        tokenized_examples["example_id"] = []
+
+        for i in range(len(tokenized_examples["input_ids"])):
+            # Grab the sequence corresponding to that example
+            # (to know what is the context and what is the question).
+            sequence_ids = tokenized_examples.sequence_ids(i)
+            context_index = 1 if pad_on_right else 0
+
+            # One example can give several spans,
+            # this is the index of the example containing this span of text.
+            sample_index = sample_mapping[i]
+            tokenized_examples["example_id"].append(str(examples["id"][sample_index]))
+
+            # Set to None the offset_mapping that are not part
+            # of the context so it's easy to determine if a token
+            # position is part of the context or not.
+            tokenized_examples["offset_mapping"][i] = [
+                (o if sequence_ids[k] == context_index else None)
+                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
+            ]
+
+        return tokenized_examples
diff --git a/src/datasets/toxic_spans_tokens.py b/src/datasets/toxic_spans_tokens.py
new file mode 100644
index 0000000000000000000000000000000000000000..d48c2b7d66e57ca27bb33da80373ac86c9856c15
--- /dev/null
+++ b/src/datasets/toxic_spans_tokens.py
@@ -0,0 +1,81 @@
+from src.utils.mapper import configmapper
+from transformers import AutoTokenizer
+from datasets import load_dataset
+
+# import pdb
+
+@configmapper.map("datasets", "toxic_spans_tokens")
+class ToxicSpansTokenDataset:
+    def __init__(self, config):
+        # print("### ToxicSpansTokenDataset ###"); exit()
+        self.config = config
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.config.model_checkpoint_name
+        )
+        # if self.config.model_checkpoint_name == "sberbank-ai/mGPT":
+            # self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+        self.dataset = load_dataset("csv", data_files=dict(self.config.train_files))
+        self.test_dataset = load_dataset("csv", data_files=dict(self.config.eval_files))
+
+        self.tokenized_inputs = self.dataset.map(
+            self.tokenize_and_align_labels_for_train, batched=True
+        )
+        self.test_tokenized_inputs = self.test_dataset.map(
+            self.tokenize_for_test, batched=True
+        )
+
+    def tokenize_and_align_labels_for_train(self, examples):
+
+        tokenized_inputs = self.tokenizer(
+            examples["text"], **self.config.tokenizer_params
+        )
+
+        # tokenized_inputs["text"] = examples["text"]
+        example_spans = []
+        labels = []
+    
+        offsets_mapping = tokenized_inputs["offset_mapping"]
+        # pdb.set_trace()
+        for i, offset_mapping in enumerate(offsets_mapping):
+            labels.append([])
+
+            spans = eval(examples["spans"][i])
+            example_spans.append(spans)
+            if self.config.label_cls:
+                cls_label = (
+                    1
+                    if (
+                        len(examples["text"][i]) > 0
+                        and len(spans) / len(examples["text"][i])
+                        > self.config.cls_threshold
+                    )
+                    else 0
+                )  ## Make class label based on threshold
+            else:
+                cls_label = -100
+            for j, offsets in enumerate(offset_mapping):
+                if tokenized_inputs["input_ids"][i][j] == self.tokenizer.cls_token_id:
+                    labels[-1].append(cls_label)
+                elif offsets[0] == offsets[1] and offsets[0] == 0: # All zero
+                    labels[-1].append(-100)  ## SPECIAL TOKEN
+                else:
+                    toxic_offsets = [x in spans for x in range(offsets[0], offsets[1])]
+                    ## If any part of the the token is in span, mark it as Toxic
+                    if (
+                        len(toxic_offsets) > 0
+                        and sum(toxic_offsets) / len(toxic_offsets)
+                        > self.config.token_threshold
+                    ):
+                        labels[-1].append(1)
+                    else:
+                        labels[-1].append(0)
+
+        tokenized_inputs["labels"] = labels
+        # print("tokenized_inputs", tokenized_inputs); exit()
+        return tokenized_inputs
+
+    def tokenize_for_test(self, examples):
+        tokenized_inputs = self.tokenizer(
+            examples["text"], **self.config.tokenizer_params
+        )
+        return tokenized_inputs
diff --git a/src/datasets/toxic_spans_tokens_3cls.py b/src/datasets/toxic_spans_tokens_3cls.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bdb664aca1834d9514e5f98dc7effd1564560f5
--- /dev/null
+++ b/src/datasets/toxic_spans_tokens_3cls.py
@@ -0,0 +1,102 @@
+from src.utils.mapper import configmapper
+from transformers import AutoTokenizer
+from datasets import load_dataset
+
+import pdb
+
+@configmapper.map("datasets", "toxic_spans_tokens_3cls")
+class ToxicSpansToken3CLSDataset:
+    def __init__(self, config):
+        # print("### ToxicSpansTokenDataset ###"); exit()
+        self.config = config
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.config.model_checkpoint_name
+        )
+        # if self.config.model_checkpoint_name == "sberbank-ai/mGPT":
+            # self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+        self.dataset = load_dataset("csv", data_files=dict(self.config.train_files))
+        self.test_dataset = load_dataset("csv", data_files=dict(self.config.eval_files))
+
+        self.tokenized_inputs = self.dataset.map(
+            self.tokenize_and_align_labels_for_train, batched=True
+        )
+        self.test_tokenized_inputs = self.test_dataset.map(
+            self.tokenize_for_test, batched=True
+        )
+
+    def tokenize_and_align_labels_for_train(self, examples):
+
+        tokenized_inputs = self.tokenizer(
+            examples["text"], **self.config.tokenizer_params
+        )
+
+        # tokenized_inputs["text"] = examples["text"]
+        example_spans = []
+        labels = []
+    
+        offsets_mapping = tokenized_inputs["offset_mapping"]
+        # pdb.set_trace()
+        for i, offset_mapping in enumerate(offsets_mapping):
+            labels.append([])
+
+            spans = eval(examples["spans"][i])
+            Bs = eval(examples["Bs"][i])
+            Is = eval(examples["Is"][i])
+            example_spans.append(spans)
+            if self.config.label_cls:
+                cls_label = (
+                    1
+                    if (
+                        len(examples["text"][i]) > 0
+                        and len(spans) / len(examples["text"][i])
+                        > self.config.cls_threshold
+                    )
+                    else 0
+                )  ## Make class label based on threshold
+            else:
+                cls_label = -100
+            for j, offsets in enumerate(offset_mapping):
+                if tokenized_inputs["input_ids"][i][j] == self.tokenizer.cls_token_id:
+                    labels[-1].append(cls_label)
+                elif offsets[0] == offsets[1] and offsets[0] == 0: # All zero
+                    labels[-1].append(-100)  ## SPECIAL TOKEN
+                else:
+                    # toxic_offsets = [x in spans for x in range(offsets[0], offsets[1])]
+                    ## If any part of the the token is in span, mark it as Toxic
+                    # if (
+                    #     len(toxic_offsets) > 0
+                    #     and sum(toxic_offsets) / len(toxic_offsets)
+                    #     > self.config.token_threshold
+                    # ):
+                    #     labels[-1].append(1)
+                    # else:
+                    #     labels[-1].append(0)
+                    b_off = [x in Bs for x in range(offsets[0], offsets[1])]
+                    b_off = sum(b_off)
+                    i_off = [x in Is for x in range(offsets[0], offsets[1])]
+                    i_off = sum(i_off)
+                    # if len(b_off) == len(i_off) and len(i_off)  == 0:
+                    if b_off == 0 and i_off == 0:
+                        labels[-1].append(0)
+                    # elif len(b_off) >= len(i_off) == 1:
+                    elif b_off >= i_off:
+                        labels[-1].append(1)
+                        # print(b_off)
+                        # print(i_off)
+                        # print(j)
+                    else:
+                        labels[-1].append(2)
+
+            # pdb.set_trace()
+
+
+
+        tokenized_inputs["labels"] = labels
+        # print("tokenized_inputs", tokenized_inputs); exit()
+        return tokenized_inputs
+
+    def tokenize_for_test(self, examples):
+        tokenized_inputs = self.tokenizer(
+            examples["text"], **self.config.tokenizer_params
+        )
+        return tokenized_inputs
diff --git a/src/datasets/toxic_spans_tokens_spans.py b/src/datasets/toxic_spans_tokens_spans.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd1b6e68d3b115b1cd0be0b417e34da31ab4ee0c
--- /dev/null
+++ b/src/datasets/toxic_spans_tokens_spans.py
@@ -0,0 +1,269 @@
+from src.utils.mapper import configmapper
+from transformers import AutoTokenizer
+import pandas as pd
+from datasets import load_dataset, Dataset
+from evaluation.fix_spans import _contiguous_ranges
+
+
+@configmapper.map("datasets", "toxic_spans_tokens_spans")
+class ToxicSpansTokensSpansDataset:
+    def __init__(self, config):
+        self.config = config
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.config.model_checkpoint_name
+        )
+
+        self.dataset = load_dataset("csv", data_files=dict(self.config.train_files))
+        self.test_dataset = load_dataset("csv", data_files=dict(self.config.eval_files))
+
+        temp_key_train = list(self.dataset.keys())[0]
+        self.intermediate_dataset = self.dataset.map(
+            self.create_train_features,
+            batched=True,
+            batch_size=1000000,  ##Unusually Large Batch Size ## Needed For Correct ID mapping
+            remove_columns=self.dataset[temp_key_train].column_names,
+        )
+
+        temp_key_test = list(self.test_dataset.keys())[0]
+        self.intermediate_test_dataset = self.test_dataset.map(
+            self.create_test_features,
+            batched=True,
+            batch_size=1000000,  ##Unusually Large Batch Size ## Needed For Correct ID mapping
+            remove_columns=self.test_dataset[temp_key_test].column_names,
+        )
+
+        self.tokenized_inputs = self.intermediate_dataset.map(
+            self.prepare_train_features,
+            batched=True,
+            remove_columns=self.intermediate_dataset[temp_key_train].column_names,
+        )
+        self.test_tokenized_inputs = self.intermediate_test_dataset.map(
+            self.prepare_test_features,
+            batched=True,
+            remove_columns=self.intermediate_test_dataset[temp_key_test].column_names,
+        )
+
+    def create_train_features(self, examples):
+        features = {"context": [], "id": [], "question": [], "title": [], "spans": []}
+        id = 0
+        # print(examples)
+        for row_number in range(len(examples["text"])):
+            context = examples["text"][row_number]
+            question = "offense"
+            title = context.split(" ")[0]
+            span = eval(examples["spans"][row_number])
+            contiguous_spans = _contiguous_ranges(span)
+            for lst in contiguous_spans:
+                lst = list(lst)
+                dict_to_write = {}
+
+                dict_to_write["answer_start"] = [lst[0]]
+                dict_to_write["text"] = [context[lst[0] : lst[-1] + 1]]
+                # print(dict_to_write)
+                if "answers" in features.keys():
+                    features["answers"].append(dict_to_write)
+                else:
+                    features["answers"] = [
+                        dict_to_write,
+                    ]
+                features["context"].append(context)
+                features["id"].append(str(id))
+                features["question"].append(question)
+                features["title"].append(title)
+                features["spans"].append(span)
+                id += 1
+
+        return features
+
+    def create_test_features(self, examples):
+        features = {"context": [], "id": [], "question": [], "title": []}
+        id = 0
+        for row_number in range(len(examples["text"])):
+            context = examples["text"][row_number]
+            question = "offense"
+            title = context.split(" ")[0]
+            features["context"].append(context)
+            features["id"].append(str(id))
+            features["question"].append(question)
+            features["title"].append(title)
+            id += 1
+        return features
+
+    def prepare_train_features(self, examples):
+        """Generate tokenized features from examples.
+
+        Args:
+            examples (dict): The examples to be tokenized.
+
+        Returns:
+            transformers.tokenization_utils_base.BatchEncoding:
+                The tokenized features/examples after processing.
+        """
+        # Tokenize our examples with truncation and padding, but keep the
+        # overflows using a stride. This results in one example possible
+        # giving several features when a context is long, each of those
+        # features having a context that overlaps a bit the context
+        # of the previous feature.
+        pad_on_right = self.tokenizer.padding_side == "right"
+        print("### Batch Tokenizing Examples ###")
+        tokenized_examples = self.tokenizer(
+            examples["question" if pad_on_right else "context"],
+            examples["context" if pad_on_right else "question"],
+            **dict(self.config.tokenizer_params),
+        )
+
+        # Since one example might give us several features if it has
+        # a long context, we need a map from a feature to
+        # its corresponding example. This key gives us just that.
+        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+        # The offset mappings will give us a map from token to
+        # character position in the original context. This will
+        # help us compute the start_positions and end_positions.
+        offset_mapping = tokenized_examples.pop("offset_mapping")
+
+        # Let's label those examples!
+        token_labels = []
+        tokenized_examples["start_positions"] = []
+        tokenized_examples["end_positions"] = []
+
+        for i, offsets in enumerate(offset_mapping):
+            # We will label impossible answers with the index of the CLS token.
+
+            token_labels.append([])
+            input_ids = tokenized_examples["input_ids"][i]
+            spans = examples["spans"][i]
+            if self.config.label_cls:
+                cls_label = (
+                    1
+                    if (
+                        len(examples["context"][i]) > 0
+                        and len(spans) / len(examples["context"][i])
+                        > self.config.cls_threshold
+                    )
+                    else 0
+                )  ## Make class label based on threshold
+            else:
+                cls_label = -100
+            for j, offset in enumerate(offsets):
+                if tokenized_examples["input_ids"][i][j] == self.tokenizer.cls_token_id:
+                    token_labels[-1].append(cls_label)
+                elif offset[0] == offset[1] and offset[0] == 0:
+                    token_labels[-1].append(-100)  ## SPECIAL TOKEN
+                else:
+                    toxic_offsets = [x in spans for x in range(offset[0], offset[1])]
+                    ## If any part of the the token is in span, mark it as Toxic
+                    if (
+                        len(toxic_offsets) > 0
+                        and sum(toxic_offsets) / len(toxic_offsets)
+                        > self.config.token_threshold
+                    ):
+                        token_labels[-1].append(1)
+                    else:
+                        token_labels[-1].append(0)
+
+            cls_index = input_ids.index(self.tokenizer.cls_token_id)
+
+            # Grab the sequence corresponding to that example
+            # (to know what is the context and what is the question).
+            sequence_ids = tokenized_examples.sequence_ids(i)
+
+            # One example can give several spans, this is the index of
+            # the example containing this span of text.
+            sample_index = sample_mapping[i]
+            answers = examples["answers"][sample_index]
+            # If no answers are given, set the cls_index as answer.
+            if len(answers["answer_start"]) == 0:
+                tokenized_examples["start_positions"].append(cls_index)
+                tokenized_examples["end_positions"].append(cls_index)
+            else:
+                # Start/end character index of the answer in the text.
+                start_char = answers["answer_start"][0]
+                end_char = start_char + len(answers["text"][0])
+
+                # Start token index of the current span in the text.
+                token_start_index = 0
+                while sequence_ids[token_start_index] != (1 if pad_on_right else 0):
+                    token_start_index += 1
+
+                # End token index of the current span in the text.
+                token_end_index = len(input_ids) - 1
+                while sequence_ids[token_end_index] != (1 if pad_on_right else 0):
+                    token_end_index -= 1
+
+                # Detect if the answer is out of the span
+                # (in which case this feature is labeled with the CLS index).
+                if not (
+                    offsets[token_start_index][0] <= start_char
+                    and offsets[token_end_index][1] >= end_char
+                ):
+                    tokenized_examples["start_positions"].append(cls_index)
+                    tokenized_examples["end_positions"].append(cls_index)
+                else:
+                    # Otherwise move the token_start_index and
+                    # stoken_end_index to the two ends of the answer.
+                    # Note: we could go after the last offset
+                    # if the answer is the last word (edge case).
+                    while (
+                        token_start_index < len(offsets)
+                        and offsets[token_start_index][0] <= start_char
+                    ):
+                        token_start_index += 1
+                    tokenized_examples["start_positions"].append(token_start_index - 1)
+                    while offsets[token_end_index][1] >= end_char:
+                        token_end_index -= 1
+                    tokenized_examples["end_positions"].append(token_end_index + 1)
+        tokenized_examples["labels"] = token_labels
+        return tokenized_examples
+
+    def prepare_test_features(self, examples):
+
+        """Generate tokenized validation features from examples.
+
+        Args:
+            examples (dict): The validation examples to be tokenized.
+
+        Returns:
+            transformers.tokenization_utils_base.BatchEncoding:
+                The tokenized features/examples for validation set after processing.
+        """
+
+        # Tokenize our examples with truncation and maybe
+        # padding, but keep the overflows using a stride.
+        # This results in one example possible giving several features
+        # when a context is long, each of those features having a
+        # context that overlaps a bit the context of the previous feature.
+        print("### Tokenizing Validation Examples")
+        pad_on_right = self.tokenizer.padding_side == "right"
+        tokenized_examples = self.tokenizer(
+            examples["question" if pad_on_right else "context"],
+            examples["context" if pad_on_right else "question"],
+            **dict(self.config.tokenizer_params),
+        )
+
+        # Since one example might give us several features if it has a long context,
+        #  we need a map from a feature to its corresponding example. This key gives us just that.
+        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+
+        # We keep the example_id that gave us this feature and we will store the offset mappings.
+        tokenized_examples["example_id"] = []
+
+        for i in range(len(tokenized_examples["input_ids"])):
+            # Grab the sequence corresponding to that example
+            # (to know what is the context and what is the question).
+            sequence_ids = tokenized_examples.sequence_ids(i)
+            context_index = 1 if pad_on_right else 0
+
+            # One example can give several spans,
+            # this is the index of the example containing this span of text.
+            sample_index = sample_mapping[i]
+            tokenized_examples["example_id"].append(str(examples["id"][sample_index]))
+
+            # Set to None the offset_mapping that are not part
+            # of the context so it's easy to determine if a token
+            # position is part of the context or not.
+            tokenized_examples["offset_mapping"][i] = [
+                (o if sequence_ids[k] == context_index else None)
+                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
+            ]
+
+        return tokenized_examples
diff --git a/src/models/__init__.py b/src/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa5c0b1e176859f857aff494aa67e27206039c84
--- /dev/null
+++ b/src/models/__init__.py
@@ -0,0 +1,7 @@
+from src.models.auto_models import *
+from src.models.bert_token_spans import *
+from src.models.roberta_token_spans import *
+from src.models.bert_multi_spans import *
+from src.models.roberta_multi_spans import *
+from src.models.bert_crf_token import *
+from src.models.roberta_crf_token import *
\ No newline at end of file
diff --git a/src/models/__pycache__/__init__.cpython-38.pyc b/src/models/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd2ee719d2d18a32aaa262804ca6a88c398036b2
Binary files /dev/null and b/src/models/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/models/__pycache__/auto_models.cpython-38.pyc b/src/models/__pycache__/auto_models.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0ead5a4736f87412e204c35e68cfc6a2b319b4d0
Binary files /dev/null and b/src/models/__pycache__/auto_models.cpython-38.pyc differ
diff --git a/src/models/__pycache__/bert_crf_token.cpython-38.pyc b/src/models/__pycache__/bert_crf_token.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c4200b2d3bb4040bf4684bd71b4f10183827148
Binary files /dev/null and b/src/models/__pycache__/bert_crf_token.cpython-38.pyc differ
diff --git a/src/models/__pycache__/bert_multi_spans.cpython-38.pyc b/src/models/__pycache__/bert_multi_spans.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..961f794199c3c98733b67c464015ec260459c767
Binary files /dev/null and b/src/models/__pycache__/bert_multi_spans.cpython-38.pyc differ
diff --git a/src/models/__pycache__/bert_token_spans.cpython-38.pyc b/src/models/__pycache__/bert_token_spans.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4347408c463b2e9f55cc51f13265e14c841eb995
Binary files /dev/null and b/src/models/__pycache__/bert_token_spans.cpython-38.pyc differ
diff --git a/src/models/__pycache__/roberta_crf_token.cpython-38.pyc b/src/models/__pycache__/roberta_crf_token.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e0dba29bbbb76c516cd9cb12227ec9d1da6937c
Binary files /dev/null and b/src/models/__pycache__/roberta_crf_token.cpython-38.pyc differ
diff --git a/src/models/__pycache__/roberta_multi_spans.cpython-38.pyc b/src/models/__pycache__/roberta_multi_spans.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ac653a17f849575090449f0eb40c9240ec6182c7
Binary files /dev/null and b/src/models/__pycache__/roberta_multi_spans.cpython-38.pyc differ
diff --git a/src/models/__pycache__/roberta_token_spans.cpython-38.pyc b/src/models/__pycache__/roberta_token_spans.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f18266dedc1620d040c529b989143ee88d6d74a9
Binary files /dev/null and b/src/models/__pycache__/roberta_token_spans.cpython-38.pyc differ
diff --git a/src/models/auto_models.py b/src/models/auto_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..df89ff6548111df9bca5a070a039814767aad8de
--- /dev/null
+++ b/src/models/auto_models.py
@@ -0,0 +1,6 @@
+from transformers import AutoModelForTokenClassification, AutoModelForQuestionAnswering
+from src.utils.mapper import configmapper
+
+configmapper.map("models", "autotoken")(AutoModelForTokenClassification)
+configmapper.map("models", "autotoken_3cls")(AutoModelForTokenClassification)
+configmapper.map("models", "autospans")(AutoModelForQuestionAnswering)
diff --git a/src/models/bert_crf_token.py b/src/models/bert_crf_token.py
new file mode 100644
index 0000000000000000000000000000000000000000..71466dcb2da1a5aaedd31faa761e3343f7c0ed5c
--- /dev/null
+++ b/src/models/bert_crf_token.py
@@ -0,0 +1,72 @@
+import torch
+# from transformers import BertForTokenClassification
+from transformers import ElectraForTokenClassification
+from torchcrf import CRF
+from src.utils.mapper import configmapper
+# import pdb
+
+
+@configmapper.map("models", "bert_crf_token")
+# class BertLSTMCRF(BertForTokenClassification):
+class BertLSTMCRF(ElectraForTokenClassification):
+    def __init__(self, config, lstm_hidden_size, lstm_layers):
+        super().__init__(config)
+        # ipdb.set_trace()
+        self.lstm = torch.nn.LSTM(
+            input_size=config.hidden_size,
+            hidden_size=lstm_hidden_size,
+            num_layers=lstm_layers,
+            dropout=0.2,
+            batch_first=True,
+            bidirectional=True,
+        )
+        self.crf = CRF(config.num_labels, batch_first=True)
+
+        del self.classifier
+        self.classifier = torch.nn.Linear(2 * lstm_hidden_size, config.num_labels)
+
+    def forward(
+        self,
+        input_ids,
+        attention_mask=None,
+        token_type_ids=None,
+        labels=None,
+        prediction_mask=None,
+    ):
+        # pdb.set_trace()
+
+        # outputs = self.bert(
+        outputs = self.electra(
+            input_ids,
+            attention_mask,
+            token_type_ids,
+            output_hidden_states=True,
+            return_dict=False,
+        )
+        # seq_output, all_hidden_states, all_self_attntions, all_cross_attentions
+
+        sequence_output = outputs[0]  # outputs[1] is pooled output which is none.
+
+        sequence_output = self.dropout(sequence_output)
+
+        lstm_out, *_ = self.lstm(sequence_output)
+        sequence_output = self.dropout(lstm_out)
+
+        logits = self.classifier(sequence_output)
+
+        ## CRF
+        mask = prediction_mask
+        mask = mask[:, : logits.size(1)].contiguous()
+
+        # print(logits)
+
+        if labels is not None:
+            labels = labels[:, : logits.size(1)].contiguous()
+            loss = -self.crf(logits, labels, mask=mask.bool(), reduction="token_mean")
+
+        tags = self.crf.decode(logits, mask.bool())
+        # print(tags)
+        if labels is not None:
+            return (loss, logits, tags)
+        else:
+            return (logits, tags)
diff --git a/src/models/bert_multi_spans.py b/src/models/bert_multi_spans.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d9c92803af525958bf453735880db1681743afc
--- /dev/null
+++ b/src/models/bert_multi_spans.py
@@ -0,0 +1,84 @@
+import torch.nn as nn
+from torch.nn import BCEWithLogitsLoss
+# from transformers import BertModel, BertPreTrainedModel
+from transformers import ElectraPreTrainedModel, ElectraModel
+from src.utils.mapper import configmapper
+
+
+@configmapper.map("models", "bert_multi_spans")
+# class BertForMultiSpans(BertPreTrainedModel):
+class BertForMultiSpans(ElectraPreTrainedModel):
+    def __init__(self, config):
+        super(BertForMultiSpans, self).__init__(config)
+        # self.bert = BertModel(config)
+        self.bert = ElectraModel(config)
+        self.num_labels = config.num_labels
+
+        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
+        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
+        self.init_weights()
+
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        start_positions=None,
+        end_positions=None,
+        output_attentions=None,
+        output_hidden_states=None,
+    ):
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=None,
+        )
+
+        sequence_output = outputs[0]
+
+        logits = self.qa_outputs(sequence_output)
+        start_logits, end_logits = logits.split(1, dim=-1)
+        start_logits = start_logits.squeeze(-1)
+        end_logits = end_logits.squeeze(-1)  # batch_size
+        # print(start_logits.shape, end_logits.shape, start_positions.shape, end_positions.shape)
+
+        total_loss = None
+        if (
+            start_positions is not None and end_positions is not None
+        ):  # [batch_size/seq_length]
+            # # If we are on multi-GPU, split add a dimension
+            # if len(start_positions.size()) > 1:
+            #     start_positions = start_positions.squeeze(-1)
+            # if len(end_positions.size()) > 1:
+            #     end_positions = end_positions.squeeze(-1)
+            # sometimes the start/end positions are outside our model inputs, we ignore these terms
+            # ignored_index = start_logits.size(1)
+            # start_positions.clamp_(0, ignored_index)
+            # end_positions.clamp_(0, ignored_index)
+
+            # start_positions = start_logits.view()
+
+            loss_fct = BCEWithLogitsLoss()
+
+            start_loss = loss = loss_fct(
+                start_logits,
+                start_positions.float(),
+            )
+            end_loss = loss = loss_fct(
+                end_logits,
+                end_positions.float(),
+            )
+            total_loss = (start_loss + end_loss) / 2
+
+        output = (start_logits, end_logits) + outputs[2:]
+        return ((total_loss,) + output) if total_loss is not None else output
\ No newline at end of file
diff --git a/src/models/bert_token_spans.py b/src/models/bert_token_spans.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2d2d6fc4f2e12c557a5fe2a30e41a14085acdd8
--- /dev/null
+++ b/src/models/bert_token_spans.py
@@ -0,0 +1,100 @@
+import torch.nn as nn
+import torch
+from torch.nn import CrossEntropyLoss
+# from transformers import BertPreTrainedModel, BertModel
+from transformers import ElectraPreTrainedModel, ElectraModel
+from src.utils.mapper import configmapper
+
+
+@configmapper.map("models", "bert_token_spans")
+# class BertModelForTokenAndSpans(BertPreTrainedModel):
+class BertModelForTokenAndSpans(ElectraPreTrainedModel):
+    def __init__(self, config, num_token_labels=2, num_qa_labels=2):
+        super(BertModelForTokenAndSpans, self).__init__(config)
+        # self.bert = BertModel(config)
+        self.bert = ElectraModel(config)
+        self.num_token_labels = num_token_labels
+        self.num_qa_labels = num_qa_labels
+        # print("Number of Token Labels: ", num_token_labels); exit()
+
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.classifier = nn.Linear(config.hidden_size, num_token_labels)
+        self.qa_outputs = nn.Linear(config.hidden_size, num_qa_labels)
+        self.init_weights()
+
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        start_positions=None,
+        end_positions=None,
+        labels=None,  # Token Wise Labels
+        output_attentions=None,
+        output_hidden_states=None,
+    ):
+
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=None,
+        )
+
+        sequence_output = outputs[0]
+
+        qa_logits = self.qa_outputs(sequence_output)
+        start_logits, end_logits = qa_logits.split(1, dim=-1)
+        start_logits = start_logits.squeeze(-1)
+        end_logits = end_logits.squeeze(-1)
+
+        sequence_output = self.dropout(sequence_output)
+        token_logits = self.classifier(sequence_output)
+
+        total_loss = None
+        if (
+            start_positions is not None
+            and end_positions is not None
+            and labels is not None
+        ):
+            # If we are on multi-GPU, split add a dimension
+            if len(start_positions.size()) > 1:
+                start_positions = start_positions.squeeze(-1)
+            if len(end_positions.size()) > 1:
+                end_positions = end_positions.squeeze(-1)
+
+            ignored_index = start_logits.size(1)
+            start_positions.clamp_(0, ignored_index)
+            end_positions.clamp_(0, ignored_index)
+
+            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
+            start_loss = loss_fct(start_logits, start_positions)
+            end_loss = loss_fct(end_logits, end_positions)
+
+            loss_fct = CrossEntropyLoss()
+            if attention_mask is not None:
+                active_loss = attention_mask.view(-1) == 1
+                active_logits = token_logits.view(-1, self.num_token_labels)
+                active_labels = torch.where(
+                    active_loss,
+                    labels.view(-1),
+                    torch.tensor(loss_fct.ignore_index).type_as(labels),
+                )
+                token_loss = loss_fct(active_logits, active_labels)
+            else:
+                token_loss = loss_fct(
+                    token_logits.view(-1, self.num_token_labels), labels.view(-1)
+                )
+
+            total_loss = (start_loss + end_loss) / 2 + token_loss
+
+        output = (start_logits, end_logits, token_logits) + outputs[2:]
+        return ((total_loss,) + output) if total_loss is not None else output
\ No newline at end of file
diff --git a/src/models/roberta_crf_token.py b/src/models/roberta_crf_token.py
new file mode 100644
index 0000000000000000000000000000000000000000..1dc34e96dd9d5798eda2634f18c10d98a05514b6
--- /dev/null
+++ b/src/models/roberta_crf_token.py
@@ -0,0 +1,66 @@
+import torch
+from transformers import RobertaForTokenClassification
+from torchcrf import CRF
+from src.utils.mapper import configmapper
+
+
+@configmapper.map("models", "roberta_crf_token")
+class RobertaLSTMCRF(RobertaForTokenClassification):
+    def __init__(self, config, lstm_hidden_size, lstm_layers):
+        super().__init__(config)
+        self.lstm = torch.nn.LSTM(
+            input_size=config.hidden_size,
+            hidden_size=lstm_hidden_size,
+            num_layers=lstm_layers,
+            dropout=0.2,
+            batch_first=True,
+            bidirectional=True,
+        )
+        self.crf = CRF(config.num_labels, batch_first=True)
+
+        del self.classifier
+        self.classifier = torch.nn.Linear(2 * lstm_hidden_size, config.num_labels)
+
+    def forward(
+        self,
+        input_ids,
+        attention_mask=None,
+        token_type_ids=None,
+        labels=None,
+        prediction_mask=None,
+    ):
+
+        outputs = self.roberta(
+            input_ids,
+            attention_mask,
+            token_type_ids,
+            output_hidden_states=True,
+            return_dict=False,
+        )
+        # seq_output, all_hidden_states, all_self_attntions, all_cross_attentions
+
+        sequence_output = outputs[0]  # outputs[1] is pooled output which is none.
+
+        sequence_output = self.dropout(sequence_output)
+
+        lstm_out, *_ = self.lstm(sequence_output)
+        sequence_output = self.dropout(lstm_out)
+
+        logits = self.classifier(sequence_output)
+
+        ## CRF
+        mask = prediction_mask
+        mask = mask[:, : logits.size(1)].contiguous()
+
+        # print(logits)
+
+        if labels is not None:
+            labels = labels[:, : logits.size(1)].contiguous()
+            loss = -self.crf(logits, labels, mask=mask.bool(), reduction="token_mean")
+
+        tags = self.crf.decode(logits, mask.bool())
+        # print(tags)
+        if labels is not None:
+            return (loss, logits, tags)
+        else:
+            return (logits, tags)
diff --git a/src/models/roberta_multi_spans.py b/src/models/roberta_multi_spans.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3514552138df7ceb518a7c99c148c37659f5178
--- /dev/null
+++ b/src/models/roberta_multi_spans.py
@@ -0,0 +1,82 @@
+import torch.nn as nn
+from torch.nn import BCEWithLogitsLoss
+from transformers import RobertaModel
+from transformers.models.roberta.modeling_roberta import RobertaPreTrainedModel
+from src.utils.mapper import configmapper
+
+
+@configmapper.map("models", "roberta_multi_spans")
+class RobertaForMultiSpans(RobertaPreTrainedModel):
+    def __init__(self, config):
+        super(RobertaForMultiSpans, self).__init__(config)
+        self.roberta = RobertaModel(config)
+        self.num_labels = config.num_labels
+
+        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
+        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
+        self.init_weights()
+
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        start_positions=None,
+        end_positions=None,
+        output_attentions=None,
+        output_hidden_states=None,
+    ):
+        outputs = self.roberta(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=None,
+        )
+
+        sequence_output = outputs[0]
+
+        logits = self.qa_outputs(sequence_output)
+        start_logits, end_logits = logits.split(1, dim=-1)
+        start_logits = start_logits.squeeze(-1)
+        end_logits = end_logits.squeeze(-1)  # batch_size
+        # print(start_logits.shape, end_logits.shape, start_positions.shape, end_positions.shape)
+
+        total_loss = None
+        if (
+            start_positions is not None and end_positions is not None
+        ):  # [batch_size/seq_length]
+            # # If we are on multi-GPU, split add a dimension
+            # if len(start_positions.size()) > 1:
+            #     start_positions = start_positions.squeeze(-1)
+            # if len(end_positions.size()) > 1:
+            #     end_positions = end_positions.squeeze(-1)
+            # sometimes the start/end positions are outside our model inputs, we ignore these terms
+            # ignored_index = start_logits.size(1)
+            # start_positions.clamp_(0, ignored_index)
+            # end_positions.clamp_(0, ignored_index)
+
+            # start_positions = start_logits.view()
+
+            loss_fct = BCEWithLogitsLoss()
+
+            start_loss = loss = loss_fct(
+                start_logits,
+                start_positions.float(),
+            )
+            end_loss = loss = loss_fct(
+                end_logits,
+                end_positions.float(),
+            )
+            total_loss = (start_loss + end_loss) / 2
+
+        output = (start_logits, end_logits) + outputs[2:]
+        return ((total_loss,) + output) if total_loss is not None else output
\ No newline at end of file
diff --git a/src/models/roberta_token_spans.py b/src/models/roberta_token_spans.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e010a42b1a006cbcbf691315bbe64c46dd450f5
--- /dev/null
+++ b/src/models/roberta_token_spans.py
@@ -0,0 +1,97 @@
+import torch.nn as nn
+import torch
+from torch.nn import CrossEntropyLoss
+from transformers import RobertaModel
+from transformers.models.roberta.modeling_roberta import RobertaPreTrainedModel
+from src.utils.mapper import configmapper
+
+
+@configmapper.map("models", "roberta_token_spans")
+class RobertaModelForTokenAndSpans(RobertaPreTrainedModel):
+    def __init__(self, config, num_token_labels=2, num_qa_labels=2):
+        super(RobertaModelForTokenAndSpans, self).__init__(config)
+        self.roberta = RobertaModel(config)
+        self.num_token_labels = num_token_labels
+        self.num_qa_labels = num_qa_labels
+
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.classifier = nn.Linear(config.hidden_size, num_token_labels)
+        self.qa_outputs = nn.Linear(config.hidden_size, num_qa_labels)
+        self.init_weights()
+
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        start_positions=None,
+        end_positions=None,
+        labels=None,  # Token Wise Labels
+        output_attentions=None,
+        output_hidden_states=None,
+    ):
+
+        outputs = self.roberta(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=None,
+        )
+
+        sequence_output = outputs[0]
+
+        qa_logits = self.qa_outputs(sequence_output)
+        start_logits, end_logits = qa_logits.split(1, dim=-1)
+        start_logits = start_logits.squeeze(-1)
+        end_logits = end_logits.squeeze(-1)
+
+        sequence_output = self.dropout(sequence_output)
+        token_logits = self.classifier(sequence_output)
+
+        total_loss = None
+        if (
+            start_positions is not None
+            and end_positions is not None
+            and labels is not None
+        ):
+            # If we are on multi-GPU, split add a dimension
+            if len(start_positions.size()) > 1:
+                start_positions = start_positions.squeeze(-1)
+            if len(end_positions.size()) > 1:
+                end_positions = end_positions.squeeze(-1)
+
+            ignored_index = start_logits.size(1)
+            start_positions.clamp_(0, ignored_index)
+            end_positions.clamp_(0, ignored_index)
+
+            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
+            start_loss = loss_fct(start_logits, start_positions)
+            end_loss = loss_fct(end_logits, end_positions)
+
+            loss_fct = CrossEntropyLoss()
+            if attention_mask is not None:
+                active_loss = attention_mask.view(-1) == 1
+                active_logits = token_logits.view(-1, self.num_token_labels)
+                active_labels = torch.where(
+                    active_loss,
+                    labels.view(-1),
+                    torch.tensor(loss_fct.ignore_index).type_as(labels),
+                )
+                token_loss = loss_fct(active_logits, active_labels)
+            else:
+                token_loss = loss_fct(
+                    token_logits.view(-1, self.num_token_labels), labels.view(-1)
+                )
+
+            total_loss = (start_loss + end_loss) / 2 + token_loss
+
+        output = (start_logits, end_logits, token_logits) + outputs[2:]
+        return ((total_loss,) + output) if total_loss is not None else output
\ No newline at end of file
diff --git a/src/models/two_layer_nn.py b/src/models/two_layer_nn.py
new file mode 100644
index 0000000000000000000000000000000000000000..969f22490cdca2eb483a3b650d0be9f9a996a433
--- /dev/null
+++ b/src/models/two_layer_nn.py
@@ -0,0 +1,46 @@
+"""Implements a two layer Neural Network."""
+
+from torch.nn import Module, Linear, ReLU
+from src.utils.mapper import configmapper
+
+
+@configmapper.map("models", "two_layer_nn")
+class TwoLayerNN(Module):
+    """Implements two layer neural network.
+
+    Methods:
+        forward(x_input): Returns the output of the neural network.
+    """
+
+    def __init__(self, embedding, dims):
+        """Construct the two layer Neural Network.
+
+        This method is used to initialize the two layer neural network,
+        with a given embedding type and corresponding arguments.
+
+        Args:
+            embedding (torch.nn.Module): The embedding layer for the model.
+            dims (list): List of dimensions for the neural network, input to output.
+        """
+        super(TwoLayerNN, self).__init__()
+
+        self.embedding = embedding
+        self.linear1 = Linear(dims[0], dims[1])
+        self.relu = ReLU()
+        self.linear2 = Linear(dims[1], dims[2])
+
+    def forward(self, x_input):
+        """
+        Return the output of the neural network for an input.
+
+        Args:
+            x_input (torch.Tensor): The input tensor to the neural network.
+
+        Returns:
+            x_output (torch.Tensor): The output tensor for the neural network.
+        """
+        output = self.embedding(x_input)
+        output = self.linear1(output)
+        output = self.relu(output)
+        x_output = self.linear2(output)
+        return x_output
diff --git a/src/modules/__init__.py b/src/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/modules/__pycache__/__init__.cpython-38.pyc b/src/modules/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5290fbff4f68909ec67e34b430d32141ca8c6daa
Binary files /dev/null and b/src/modules/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/modules/__pycache__/embeddings.cpython-38.pyc b/src/modules/__pycache__/embeddings.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16d8441cca8e5ee5156823a8b4eabe8a3865be80
Binary files /dev/null and b/src/modules/__pycache__/embeddings.cpython-38.pyc differ
diff --git a/src/modules/__pycache__/preprocessors.cpython-38.pyc b/src/modules/__pycache__/preprocessors.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1a33a8d44f2f41672102d52a9afd86ace0056684
Binary files /dev/null and b/src/modules/__pycache__/preprocessors.cpython-38.pyc differ
diff --git a/src/modules/__pycache__/tokenizers.cpython-38.pyc b/src/modules/__pycache__/tokenizers.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8613f355e4892f3ef3e4f89789fdd7420d8f3052
Binary files /dev/null and b/src/modules/__pycache__/tokenizers.cpython-38.pyc differ
diff --git a/src/modules/activations.py b/src/modules/activations.py
new file mode 100644
index 0000000000000000000000000000000000000000..34bc248e43817ca9ab0b674611580076a78fb066
--- /dev/null
+++ b/src/modules/activations.py
@@ -0,0 +1,6 @@
+import torch.nn as nn
+from src.utils.mapper import configmapper
+
+configmapper.map("activations", "relu")(nn.ReLU)
+configmapper.map("activations", "logsoftmax")(nn.LogSoftmax)
+configmapper.map("activations", "softmax")(nn.Softmax)
diff --git a/src/modules/embeddings.py b/src/modules/embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..6799ba751625036f5ca49d861a397a2bb4267bf8
--- /dev/null
+++ b/src/modules/embeddings.py
@@ -0,0 +1,37 @@
+"""Contains various kinds of embeddings like Glove, BERT, etc."""
+
+from torch.nn import Module, Embedding, Flatten
+from src.utils.mapper import configmapper
+
+
+@configmapper.map("embeddings", "glove")
+class GloveEmbedding(Module):
+    """Implement Glove based Word Embedding."""
+
+    def __init__(self, embedding_matrix, padding_idx, static=True):
+        """Construct GloveEmbedding.
+
+        Args:
+            embedding_matrix (torch.Tensor): The matrix contrainining the embedding weights
+            padding_idx (int): The padding index in the tokenizer.
+            static (bool): Whether or not to freeze embeddings.
+        """
+        super(GloveEmbedding, self).__init__()
+        self.embedding = Embedding.from_pretrained(embedding_matrix)
+        self.embedding.padding_idx = padding_idx
+        if static:
+            self.embedding.weight.required_grad = False
+        self.flatten = Flatten(start_dim=1)
+
+    def forward(self, x_input):
+        """Pass the input through the embedding.
+
+        Args:
+            x_input (torch.Tensor): The numericalized tokenized input
+
+        Returns:
+            x_output (torch.Tensor): The output from the embedding
+        """
+        x_output = self.embedding(x_input)
+        x_output = self.flatten(x_output)
+        return x_output
diff --git a/src/modules/losses.py b/src/modules/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b470ac132400279cbd847ff3db04dd4aae97f31
--- /dev/null
+++ b/src/modules/losses.py
@@ -0,0 +1,6 @@
+"All criterion functions."
+from torch.nn import MSELoss, CrossEntropyLoss
+from src.utils.mapper import configmapper
+
+configmapper.map("losses", "mse")(MSELoss)
+configmapper.map("losses", "CrossEntropyLoss")(CrossEntropyLoss)
diff --git a/src/modules/metrics.py b/src/modules/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..24db6ab3a3905a1de2df85c91f848b4dfa889df0
--- /dev/null
+++ b/src/modules/metrics.py
@@ -0,0 +1,17 @@
+"""Metrics."""
+from sklearn.metrics import (
+    mean_squared_error,
+    f1_score,
+    precision_score,
+    recall_score,
+    roc_auc_score,
+    accuracy_score,
+)
+from src.utils.mapper import configmapper
+
+configmapper.map("metrics", "sklearn_f1")(f1_score)
+configmapper.map("metrics", "sklearn_p")(precision_score)
+configmapper.map("metrics", "sklearn_r")(recall_score)
+configmapper.map("metrics", "sklearn_roc")(roc_auc_score)
+configmapper.map("metrics", "sklearn_acc")(accuracy_score)
+configmapper.map("metrics", "sklearn_mse")(mean_squared_error)
diff --git a/src/modules/optimizers.py b/src/modules/optimizers.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac2fd7365666b9386304f2d27c4f783f456e7cbb
--- /dev/null
+++ b/src/modules/optimizers.py
@@ -0,0 +1,7 @@
+" Method containing activation functions"
+from torch.optim import Adam, AdamW, SGD
+from src.utils.mapper import configmapper
+
+configmapper.map("optimizers", "adam")(Adam)
+configmapper.map("optimizers", "adam_w")(AdamW)
+configmapper.map("optimizers", "sgd")(SGD)
diff --git a/src/modules/preprocessors.py b/src/modules/preprocessors.py
new file mode 100644
index 0000000000000000000000000000000000000000..d14359cc10417b857b9cf92c200ae061dc8fac71
--- /dev/null
+++ b/src/modules/preprocessors.py
@@ -0,0 +1,112 @@
+from src.modules.tokenizers import *
+from src.modules.embeddings import *
+from src.utils.mapper import configmapper
+
+
+class Preprocessor:
+    def preprocess(self):
+        pass
+
+
+@configmapper.map("preprocessors", "glove")
+class GlovePreprocessor(Preprocessor):
+    """GlovePreprocessor."""
+
+    def __init__(self, config):
+        """
+        Args:
+            config (src.utils.module.Config): configuration for preprocessor
+        """
+        super(GlovePreprocessor, self).__init__()
+        self.config = config
+        self.tokenizer = configmapper.get_object(
+            "tokenizers", self.config.main.preprocessor.tokenizer.name
+        )(**self.config.main.preprocessor.tokenizer.init_params.as_dict())
+        self.tokenizer_params = (
+            self.config.main.preprocessor.tokenizer.init_vector_params.as_dict()
+        )
+
+        self.tokenizer.initialize_vectors(**self.tokenizer_params)
+        self.embeddings = configmapper.get_object(
+            "embeddings", self.config.main.preprocessor.embedding.name
+        )(
+            self.tokenizer.text_field.vocab.vectors,
+            self.tokenizer.text_field.vocab.stoi[self.tokenizer.text_field.pad_token],
+        )
+
+    def preprocess(self, model_config, data_config):
+        train_dataset = configmapper.get_object("datasets", data_config.main.name)(
+            data_config.train, self.tokenizer
+        )
+        val_dataset = configmapper.get_object("datasets", data_config.main.name)(
+            data_config.val, self.tokenizer
+        )
+        model = configmapper.get_object("models", model_config.name)(
+            self.embeddings, **model_config.params.as_dict()
+        )
+
+        return model, train_dataset, val_dataset
+
+
+@configmapper.map("preprocessors", "clozePreprocessor")
+class ClozePreprocessor(Preprocessor):
+    """GlovePreprocessor."""
+
+    def __init__(self, config):
+        """
+        Args:
+            config (src.utils.module.Config): configuration for preprocessor
+        """
+        super(ClozePreprocessor, self).__init__()
+        self.config = config
+        self.tokenizer = configmapper.get_object(
+            "tokenizers", self.config.main.preprocessor.tokenizer.name
+        ).from_pretrained(
+            **self.config.main.preprocessor.tokenizer.init_params.as_dict()
+        )
+
+    def preprocess(self, model_config, data_config):
+        train_dataset = configmapper.get_object("datasets", data_config.main.name)(
+            data_config.train, self.tokenizer
+        )
+        val_dataset = configmapper.get_object("datasets", data_config.main.name)(
+            data_config.val, self.tokenizer
+        )
+        model = configmapper.get_object("models", model_config.name).from_pretrained(
+            **model_config.params.as_dict()
+        )
+
+        return model, train_dataset, val_dataset
+
+
+@configmapper.map("preprocessors", "transformersConcretenessPreprocessor")
+class TransformersConcretenessPreprocessor(Preprocessor):
+    """BertConcretenessPreprocessor."""
+
+    def __init__(self, config):
+        """
+        Args:
+            config (src.utils.module.Config): configuration for preprocessor
+        """
+        super(TransformersConcretenessPreprocessor, self).__init__()
+        self.config = config
+        self.tokenizer = configmapper.get_object(
+            "tokenizers", self.config.main.preprocessor.tokenizer.name
+        ).from_pretrained(
+            **self.config.main.preprocessor.tokenizer.init_params.as_dict()
+        )
+
+    def preprocess(self, model_config, data_config):
+
+        train_dataset = configmapper.get_object("datasets", data_config.main.name)(
+            data_config.train, self.tokenizer
+        )
+        val_dataset = configmapper.get_object("datasets", data_config.main.name)(
+            data_config.val, self.tokenizer
+        )
+
+        model = configmapper.get_object("models", model_config.name)(
+            **model_config.params.as_dict()
+        )
+
+        return model, train_dataset, val_dataset
diff --git a/src/modules/schedulers.py b/src/modules/schedulers.py
new file mode 100644
index 0000000000000000000000000000000000000000..a41bb049aae0d00349c5493d3a17fcc017b76063
--- /dev/null
+++ b/src/modules/schedulers.py
@@ -0,0 +1,14 @@
+from torch.optim.lr_scheduler import (
+    StepLR,
+    CosineAnnealingLR,
+    ReduceLROnPlateau,
+    CyclicLR,
+    CosineAnnealingWarmRestarts,
+)
+from src.utils.mapper import configmapper
+
+configmapper.map("schedulers", "step")(StepLR)
+configmapper.map("schedulers", "cosineanneal")(CosineAnnealingLR)
+configmapper.map("schedulers", "reduceplateau")(ReduceLROnPlateau)
+configmapper.map("schedulers", "cyclic")(CyclicLR)
+configmapper.map("schedulers", "cosineannealrestart")(CosineAnnealingWarmRestarts)
diff --git a/src/modules/tokenizers.py b/src/modules/tokenizers.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7009261fbce92dad162591c8045cb53be268e0a
--- /dev/null
+++ b/src/modules/tokenizers.py
@@ -0,0 +1,107 @@
+"""Contains tokenizers like GloveTokenizers and BERT Tokenizer."""
+
+import torch
+# from torchtext.vocab import GloVe
+# from torchtext.data import Field, TabularDataset
+from src.utils.mapper import configmapper
+from transformers import AutoTokenizer
+
+
+class Tokenizer:
+    """Abstract Class for Tokenizers."""
+
+    def tokenize(self):
+        """Abstract Method for tokenization."""
+
+
+@configmapper.map("tokenizers", "glove")
+class GloveTokenizer(Tokenizer):
+    """Implement GloveTokenizer for tokenizing text for Glove Embeddings.
+
+    Attributes:
+        embeddings (torchtext.vocab.Vectors): Loaded pre-trained embeddings.
+        text_field (torchtext.data.Field): Text_field for vector creation.
+
+    Methods:
+        __init__(self, name='840B', dim='300', cache='../embeddings/') : Constructor method
+        initialize_vectors(fix_length=4, tokenize='spacy', file_path="../data/imperceptibility
+                           /Concreteness Ratings/train/forty.csv",
+                           file_format='tsv', fields=None): Initialize vocab vectors based on data.
+
+        tokenize(x_input, **initializer_params): Tokenize given input and return the output.
+    """
+
+    def __init__(self, name="840B", dim="300", cache="../embeddings/"):
+        """Construct GloveTokenizer.
+
+        Args:
+            name (str): Name of the GloVe embedding file
+            dim (str): Dimensions of the Glove embedding file
+            cache (str): Path to the embeddings directory
+        """
+        super(GloveTokenizer, self).__init__()
+        self.embeddings = GloVe(name=name, dim=dim, cache=cache)
+        self.text_field = None
+
+    def initialize_vectors(
+        self,
+        fix_length=4,
+        tokenize="spacy",
+        tokenizer_file_paths=None,
+        file_format="tsv",
+        fields=None,
+    ):
+        """Initialize words/sequences based on GloVe embedding.
+
+        Args:
+            fields (list): The list containing the fields to be taken
+                                     and processed from the file (see documentation for
+                                      torchtext.data.TabularDataset)
+            fix_length (int): The length of the tokenized text,
+                              padding or cropping is done accordingly
+            tokenize (function or string): Method to tokenize the data.
+                                           If 'spacy' uses spacy tokenizer,
+                                           else the specified method.
+            tokenizer_file_paths (list of str): The paths of the files containing the data
+            format (str): The format of the file : 'csv', 'tsv' or 'json'
+        """
+        text_field = Field(batch_first=True, fix_length=fix_length, tokenize=tokenize)
+        tab_dats = [
+            TabularDataset(
+                i, format=file_format, fields={k: (k, text_field) for k in fields}
+            )
+            for i in tokenizer_file_paths
+        ]
+        text_field.build_vocab(*tab_dats)
+        text_field.vocab.load_vectors(self.embeddings)
+        self.text_field = text_field
+
+    def tokenize(self, x_input, **init_vector__params):
+        """Tokenize given input based on initialized vectors.
+
+        Initialize the vectors with given parameters if not already initialized.
+
+        Args:
+            x_input (str): Unprocessed input text to be tokenized
+            **initializer_params (Keyword arguments): Parameters to initialize vectors
+
+        Returns:
+            x_output (str): Processed and tokenized text
+        """
+        if self.text_field is None:
+            self.initialize_vectors(**init_vector__params)
+        try:
+            x_output = torch.squeeze(
+                self.text_field.process([self.text_field.preprocess(x_input)])
+            )
+        except Exception as e:
+            print(x_input)
+            print(self.text_field.preprocess(x_input))
+            print(e)
+        return x_output
+
+
+@configmapper.map("tokenizers", "AutoTokenizer")
+class AutoTokenizer(AutoTokenizer):
+    def __init__(self, *args):
+        super(AutoTokenizer, self).__init__()
diff --git a/src/trainers/__init__.py b/src/trainers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/trainers/base_trainer.py b/src/trainers/base_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3867b5ad6a440f2c8f033f57ee8e58f5e731e85
--- /dev/null
+++ b/src/trainers/base_trainer.py
@@ -0,0 +1,563 @@
+import math
+import os
+import torch
+from src.modules.optimizers import *
+from src.modules.embeddings import *
+from src.modules.schedulers import *
+from src.modules.tokenizers import *
+from src.modules.metrics import *
+from src.modules.losses import *
+from src.utils.misc import *
+from src.utils.logger import Logger
+from src.utils.mapper import configmapper
+from src.utils.configuration import Config
+
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+
+@configmapper.map("trainers", "base")
+class BaseTrainer:
+    def __init__(self, config):
+        self._config = config
+        self.metrics = {
+            configmapper.get_object("metrics", metric["type"]): metric["params"]
+            for metric in self._config.main_config.metrics
+        }
+        self.train_config = self._config.train
+        self.val_config = self._config.val
+        self.log_label = self.train_config.log.log_label
+        if self.train_config.log_and_val_interval is not None:
+            self.val_log_together = True
+        print("Logging with label: ", self.log_label)
+
+    def train(self, model, train_dataset, val_dataset=None, logger=None):
+        device = torch.device(self._config.main_config.device.name)
+        model.to(device)
+        optim_params = self.train_config.optimizer.params
+        if optim_params:
+            optimizer = configmapper.get_object(
+                "optimizers", self.train_config.optimizer.type
+            )(model.parameters(), **optim_params.as_dict())
+        else:
+            optimizer = configmapper.get_object(
+                "optimizers", self.train_config.optimizer.type
+            )(model.parameters())
+
+        if self.train_config.scheduler is not None:
+            scheduler_params = self.train_config.scheduler.params
+            if scheduler_params:
+                scheduler = configmapper.get_object(
+                    "schedulers", self.train_config.scheduler.type
+                )(optimizer, **scheduler_params.as_dict())
+            else:
+                scheduler = configmapper.get_object(
+                    "schedulers", self.train_config.scheduler.type
+                )(optimizer)
+
+        criterion_params = self.train_config.criterion.params
+        if criterion_params:
+            criterion = configmapper.get_object(
+                "losses", self.train_config.criterion.type
+            )(**criterion_params.as_dict())
+        else:
+            criterion = configmapper.get_object(
+                "losses", self.train_config.criterion.type
+            )()
+        if "custom_collate_fn" in dir(train_dataset):
+            train_loader = DataLoader(
+                dataset=train_dataset,
+                collate_fn=train_dataset.custom_collate_fn,
+                **self.train_config.loader_params.as_dict(),
+            )
+        else:
+            train_loader = DataLoader(
+                dataset=train_dataset, **self.train_config.loader_params.as_dict()
+            )
+        # train_logger = Logger(**self.train_config.log.logger_params.as_dict())
+
+        max_epochs = self.train_config.max_epochs
+        batch_size = self.train_config.loader_params.batch_size
+
+        if self.val_log_together:
+            val_interval = self.train_config.log_and_val_interval
+            log_interval = val_interval
+        else:
+            val_interval = self.train_config.val_interval
+            log_interval = self.train_config.log.log_interval
+
+        if logger is None:
+            train_logger = Logger(**self.train_config.log.logger_params.as_dict())
+        else:
+            train_logger = logger
+
+        train_log_values = self.train_config.log.values.as_dict()
+
+        best_score = (
+            -math.inf if self.train_config.save_on.desired == "max" else math.inf
+        )
+        save_on_score = self.train_config.save_on.score
+        best_step = -1
+        best_model = None
+
+        best_hparam_list = None
+        best_hparam_name_list = None
+        best_metrics_list = None
+        best_metrics_name_list = None
+
+        # print("\nTraining\n")
+        # print(max_steps)
+
+        global_step = 0
+        for epoch in range(1, max_epochs + 1):
+            print(
+                "Epoch: {}/{}, Global Step: {}".format(epoch, max_epochs, global_step)
+            )
+            train_loss = 0
+            val_loss = 0
+
+            if(self.train_config.label_type=='float'):
+                all_labels = torch.FloatTensor().to(device)
+            else:
+                all_labels = torch.LongTensor().to(device)
+
+            all_outputs = torch.Tensor().to(device)
+
+            train_scores = None
+            val_scores = None
+
+            pbar = tqdm(total=math.ceil(len(train_dataset) / batch_size))
+            pbar.set_description("Epoch " + str(epoch))
+
+            val_counter = 0
+
+            for step, batch in enumerate(train_loader):
+                model.train()
+                optimizer.zero_grad()
+                inputs, labels = batch
+
+                if(self.train_config.label_type=='float'): ##Specific to Float Type
+                    labels = labels.float()
+
+                for key in inputs:
+                    inputs[key] = inputs[key].to(device)
+                labels = labels.to(device)
+                outputs = model(inputs)
+                loss = criterion(torch.squeeze(outputs), labels)
+                loss.backward()
+
+                all_labels = torch.cat((all_labels, labels), 0)
+
+                if (self.train_config.label_type=='float'):
+                    all_outputs = torch.cat((all_outputs, outputs), 0)
+                else:
+                    all_outputs = torch.cat((all_outputs, torch.argmax(outputs, axis=1)), 0)
+
+
+                train_loss += loss.item()
+                optimizer.step()
+
+                if self.train_config.scheduler is not None:
+                    if isinstance(scheduler, ReduceLROnPlateau):
+                        scheduler.step(train_loss / (step + 1))
+                    else:
+                        scheduler.step()
+
+                # print(train_loss)
+                # print(step+1)
+
+                pbar.set_postfix_str(f"Train Loss: {train_loss /(step+1)}")
+                pbar.update(1)
+
+                global_step += 1
+
+                # Need to check if we want global_step or local_step
+
+                if val_dataset is not None and (global_step - 1) % val_interval == 0:
+                    # print("\nEvaluating\n")
+                    val_scores = self.val(
+                        model,
+                        val_dataset,
+                        criterion,
+                        device,
+                        global_step,
+                        train_logger,
+                        train_log_values,
+                    )
+
+                    #save_flag = 0
+                    if self.train_config.save_on is not None:
+
+                        ## BEST SCORES UPDATING
+
+                        train_scores = self.get_scores(
+                            train_loss,
+                            global_step,
+                            self.train_config.criterion.type,
+                            all_outputs,
+                            all_labels,
+                        )
+
+                        best_score, best_step, save_flag = self.check_best(
+                            val_scores, save_on_score, best_score, global_step
+                        )
+
+                        store_dict = {
+                            "model_state_dict": model.state_dict(),
+                            "best_step": best_step,
+                            "best_score": best_score,
+                            "save_on_score": save_on_score,
+                        }
+
+                        path = self.train_config.save_on.best_path.format(
+                            self.log_label
+                        )
+
+                        self.save(store_dict, path, save_flag)
+
+                        if save_flag and train_log_values["hparams"] is not None:
+                            (
+                                best_hparam_list,
+                                best_hparam_name_list,
+                                best_metrics_list,
+                                best_metrics_name_list,
+                            ) = self.update_hparams(
+                                train_scores, val_scores, desc="best_val"
+                            )
+                # pbar.close()
+                if (global_step - 1) % log_interval == 0:
+                    # print("\nLogging\n")
+                    train_loss_name = self.train_config.criterion.type
+                    metric_list = [
+                        metric(all_labels.cpu(), all_outputs.detach().cpu(), **self.metrics[metric])
+                        for metric in self.metrics
+                    ]
+                    metric_name_list = [
+                        metric['type'] for metric in self._config.main_config.metrics
+                    ]
+
+                    train_scores = self.log(
+                        train_loss / (step + 1),
+                        train_loss_name,
+                        metric_list,
+                        metric_name_list,
+                        train_logger,
+                        train_log_values,
+                        global_step,
+                        append_text=self.train_config.append_text,
+                    )
+            pbar.close()
+            if not os.path.exists(self.train_config.checkpoint.checkpoint_dir):
+                os.makedirs(self.train_config.checkpoint.checkpoint_dir)
+
+            if self.train_config.save_after_epoch:
+                store_dict = {
+                    "model_state_dict": model.state_dict(),
+                }
+
+                path = f"{self.train_config.checkpoint.checkpoint_dir}_{str(self.train_config.log.log_label)}_{str(epoch)}.pth"
+
+                self.save(store_dict, path, save_flag=1)
+
+        if epoch == max_epochs:
+            # print("\nEvaluating\n")
+            val_scores = self.val(
+                model,
+                val_dataset,
+                criterion,
+                device,
+                global_step,
+                train_logger,
+                train_log_values,
+            )
+
+            # print("\nLogging\n")
+            train_loss_name = self.train_config.criterion.type
+            metric_list = [
+                metric(all_labels.cpu(), all_outputs.detach().cpu(),**self.metrics[metric])
+                for metric in self.metrics
+            ]
+            metric_name_list = [metric['type'] for metric in self._config.main_config.metrics]
+
+            train_scores = self.log(
+                train_loss / len(train_loader),
+                train_loss_name,
+                metric_list,
+                metric_name_list,
+                train_logger,
+                train_log_values,
+                global_step,
+                append_text=self.train_config.append_text,
+            )
+
+            if self.train_config.save_on is not None:
+
+                ## BEST SCORES UPDATING
+
+                train_scores = self.get_scores(
+                    train_loss,
+                    len(train_loader),
+                    self.train_config.criterion.type,
+                    all_outputs,
+                    all_labels,
+                )
+
+                best_score, best_step, save_flag = self.check_best(
+                    val_scores, save_on_score, best_score, global_step
+                )
+
+                store_dict = {
+                    "model_state_dict": model.state_dict(),
+                    "best_step": best_step,
+                    "best_score": best_score,
+                    "save_on_score": save_on_score,
+                }
+
+                path = self.train_config.save_on.best_path.format(self.log_label)
+
+                self.save(store_dict, path, save_flag)
+
+                if save_flag and train_log_values["hparams"] is not None:
+                    (
+                        best_hparam_list,
+                        best_hparam_name_list,
+                        best_metrics_list,
+                        best_metrics_name_list,
+                    ) = self.update_hparams(train_scores, val_scores, desc="best_val")
+
+                ## FINAL SCORES UPDATING + STORING
+                train_scores = self.get_scores(
+                    train_loss,
+                    len(train_loader),
+                    self.train_config.criterion.type,
+                    all_outputs,
+                    all_labels,
+                )
+
+                store_dict = {
+                    "model_state_dict": model.state_dict(),
+                    "final_step": global_step,
+                    "final_score": train_scores[save_on_score],
+                    "save_on_score": save_on_score,
+                }
+
+                path = self.train_config.save_on.final_path.format(self.log_label)
+
+                self.save(store_dict, path, save_flag=1)
+                if train_log_values["hparams"] is not None:
+                    (
+                        final_hparam_list,
+                        final_hparam_name_list,
+                        final_metrics_list,
+                        final_metrics_name_list,
+                    ) = self.update_hparams(train_scores, val_scores, desc="final")
+                    train_logger.save_hyperparams(
+                        best_hparam_list,
+                        best_hparam_name_list,
+                        [int(self.log_label),] + best_metrics_list + final_metrics_list,
+                        ["hparams/log_label",]
+                        + best_metrics_name_list
+                        + final_metrics_name_list,
+                    )
+                    #
+
+    ## Need to check if we want same loggers of different loggers for train and eval
+    ## Evaluate
+
+    def get_scores(self, loss, divisor, loss_name, all_outputs, all_labels):
+
+        avg_loss = loss / divisor
+
+        metric_list = [
+            metric(all_labels.cpu(), all_outputs.detach().cpu(), **self.metrics[metric])
+            for metric in self.metrics
+        ]
+        metric_name_list = [metric['type'] for metric in self._config.main_config.metrics]
+
+        return dict(zip([loss_name,] + metric_name_list, [avg_loss,] + metric_list,))
+
+    def check_best(self, val_scores, save_on_score, best_score, global_step):
+        save_flag = 0
+        best_step = global_step
+        if self.train_config.save_on.desired == "min":
+            if val_scores[save_on_score] < best_score:
+                save_flag = 1
+                best_score = val_scores[save_on_score]
+                best_step = global_step
+        else:
+            if val_scores[save_on_score] > best_score:
+                save_flag = 1
+                best_score = val_scores[save_on_score]
+                best_step = global_step
+        return best_score, best_step, save_flag
+
+    def update_hparams(self, train_scores, val_scores, desc):
+        hparam_list = []
+        hparam_name_list = []
+        for hparam in self.train_config.log.values.hparams:
+            hparam_list.append(get_item_in_config(self._config, hparam["path"]))
+            if isinstance(hparam_list[-1], Config):
+                hparam_list[-1] = hparam_list[-1].as_dict()
+            hparam_name_list.append(hparam["name"])
+
+        val_keys, val_values = zip(*val_scores.items())
+        train_keys, train_values = zip(*train_scores.items())
+        val_keys = list(val_keys)
+        train_keys = list(train_keys)
+        val_values = list(val_values)
+        train_values = list(train_values)
+        for i, key in enumerate(val_keys):
+            val_keys[i] = f"hparams/{desc}_val_" + val_keys[i]
+        for i, key in enumerate(train_keys):
+            train_keys[i] = f"hparams/{desc}_train_" + train_keys[i]
+        # train_logger.save_hyperparams(hparam_list, hparam_name_list,train_values+val_values,train_keys+val_keys, )
+        return (
+            hparam_list,
+            hparam_name_list,
+            train_values + val_values,
+            train_keys + val_keys,
+        )
+
+    def save(self, store_dict, path, save_flag=0):
+        if save_flag:
+            dirs = "/".join(path.split("/")[:-1])
+            if not os.path.exists(dirs):
+                os.makedirs(dirs)
+            torch.save(store_dict, path)
+
+    def log(
+        self,
+        loss,
+        loss_name,
+        metric_list,
+        metric_name_list,
+        logger,
+        log_values,
+        global_step,
+        append_text,
+    ):
+
+        return_dic = dict(zip([loss_name,] + metric_name_list, [loss,] + metric_list,))
+
+        loss_name = f"{append_text}_{self.log_label}_{loss_name}"
+        if log_values["loss"]:
+            logger.save_params(
+                [loss],
+                [loss_name],
+                combine=True,
+                combine_name="losses",
+                global_step=global_step,
+            )
+
+        for i in range(len(metric_name_list)):
+            metric_name_list[
+                i
+            ] = f"{append_text}_{self.log_label}_{metric_name_list[i]}"
+        if log_values["metrics"]:
+            logger.save_params(
+                metric_list,
+                metric_name_list,
+                combine=True,
+                combine_name="metrics",
+                global_step=global_step,
+            )
+            # print(hparams_list)
+            # print(hparam_name_list)
+
+        # for k,v in dict(zip([loss_name],[loss])).items():
+        #     print(f"{k}:{v}")
+        # for k,v in dict(zip(metric_name_list,metric_list)).items():
+        #     print(f"{k}:{v}")
+        return return_dic
+
+    def val(
+        self,
+        model,
+        dataset,
+        criterion,
+        device,
+        global_step,
+        train_logger=None,
+        train_log_values=None,
+        log=True,
+    ):
+        append_text = self.val_config.append_text
+        if train_logger is not None:
+            val_logger = train_logger
+        else:
+            val_logger = Logger(**self.val_config.log.logger_params.as_dict())
+
+        if train_log_values is not None:
+            val_log_values = train_log_values
+        else:
+            val_log_values = self.val_config.log.values.as_dict()
+        if "custom_collate_fn" in dir(dataset):
+            val_loader = DataLoader(
+                dataset=dataset,
+                collate_fn=dataset.custom_collate_fn,
+                **self.val_config.loader_params.as_dict(),
+            )
+        else:
+            val_loader = DataLoader(
+                dataset=dataset, **self.val_config.loader_params.as_dict()
+            )
+
+        all_outputs = torch.Tensor().to(device)
+        if(self.train_config.label_type=='float'):
+            all_labels = torch.FloatTensor().to(device)
+        else:
+            all_labels = torch.LongTensor().to(device)
+
+        batch_size = self.val_config.loader_params.batch_size
+
+        with torch.no_grad():
+            model.eval()
+            val_loss = 0
+            for j, batch in enumerate(val_loader):
+
+                inputs, labels = batch
+
+                if(self.train_config.label_type=='float'):
+                    labels = labels.float()
+
+                for key in inputs:
+                    inputs[key] = inputs[key].to(device)
+                labels = labels.to(device)
+
+                outputs = model(inputs)
+                loss = criterion(torch.squeeze(outputs), labels)
+                val_loss += loss.item()
+
+                all_labels = torch.cat((all_labels, labels), 0)
+
+                if (self.train_config.label_type=='float'):
+                    all_outputs = torch.cat((all_outputs, outputs), 0)
+                else:
+                    all_outputs = torch.cat((all_outputs, torch.argmax(outputs, axis=1)), 0)
+
+            val_loss = val_loss / len(val_loader)
+
+            val_loss_name = self.train_config.criterion.type
+
+            # print(all_outputs, all_labels)
+            metric_list = [
+                metric(all_labels.cpu(), all_outputs.detach().cpu(), **self.metrics[metric])
+                for metric in self.metrics
+            ]
+            metric_name_list = [metric['type'] for metric in self._config.main_config.metrics]
+            return_dic = dict(
+                zip([val_loss_name,] + metric_name_list, [val_loss,] + metric_list,)
+            )
+            if log:
+                val_scores = self.log(
+                    val_loss,
+                    val_loss_name,
+                    metric_list,
+                    metric_name_list,
+                    val_logger,
+                    val_log_values,
+                    global_step,
+                    append_text,
+                )
+                return val_scores
+            return return_dic
diff --git a/src/utils/__init__.py b/src/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/utils/__pycache__/__init__.cpython-38.pyc b/src/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5cf23705da07e60e9505befbf9d3e82ee8960aaf
Binary files /dev/null and b/src/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/utils/__pycache__/configuration.cpython-38.pyc b/src/utils/__pycache__/configuration.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..83c333b3ea2455fc6e8f8736cff709eac97799e6
Binary files /dev/null and b/src/utils/__pycache__/configuration.cpython-38.pyc differ
diff --git a/src/utils/__pycache__/mapper.cpython-38.pyc b/src/utils/__pycache__/mapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6ad78799089eba998401b9c2c9c78f678c355f0e
Binary files /dev/null and b/src/utils/__pycache__/mapper.cpython-38.pyc differ
diff --git a/src/utils/__pycache__/postprocess_predictions.cpython-38.pyc b/src/utils/__pycache__/postprocess_predictions.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e4dc51a4c1c3ab7fe4fa3dc9d0aadfde4d79a539
Binary files /dev/null and b/src/utils/__pycache__/postprocess_predictions.cpython-38.pyc differ
diff --git a/src/utils/combine_preds.py b/src/utils/combine_preds.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7a910422f044e919baa2ff65561e7a5f047526d
--- /dev/null
+++ b/src/utils/combine_preds.py
@@ -0,0 +1,60 @@
+import os
+import argparse
+from omegaconf import OmegaConf
+
+
+def binary_intersection(lst1, lst2):
+    lst3 = list(set([value for value in lst1 if value in lst2]))
+    return lst3
+
+
+def binary_union(lst1, lst2):
+    lst3 = list(set(lst1 + lst2))
+    return lst3
+
+
+def combine(files, type="union"):
+    text = {}
+    if type == "union":
+        fn = binary_union
+    else:
+        fn = binary_intersection
+    for fil in files:
+        with open(fil, "r") as f:
+            for line in f:
+                line_split = line.split("\t")
+                if int(line_split[0]) in text:
+                    text[int(line_split[0])] = fn(
+                        text[int(line_split[0])], eval(line_split[1])
+                    )
+                else:
+                    text[int(line_split[0])] = eval(line_split[1])
+    return text
+
+
+def write_dict_to_file(text, path):
+    with open(path, "w") as f:
+        for id, spans in text.items():
+            if id != len(text) - 1:
+                f.write(f"{id}\t{str(spans)}\n")
+            else:
+                f.write(f"{id}\t{str(spans)}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog="combine_preds.py", description="Combine span predictions."
+    )
+    parser.add_argument(
+        "--config",
+        type=str,
+        action="store",
+        help="The configuration for combining predictions.",
+    )
+    args = parser.parse_args()
+    combine_config = OmegaConf.load(args.config)
+    text = combine(combine_config.files, combine_config.type)
+    dir = "/".join(combine_config.path.split("/")[:-1])
+    if not os.path.exists(dir):
+        os.makedirs(dir)
+    write_dict_to_file(text, combine_config.path)
diff --git a/src/utils/combine_preds_3cls.py b/src/utils/combine_preds_3cls.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d1d09458fc925f6812228dba03c303c20135e7c
--- /dev/null
+++ b/src/utils/combine_preds_3cls.py
@@ -0,0 +1,82 @@
+import os
+import argparse
+from omegaconf import OmegaConf
+
+
+def binary_intersection(lst1, lst2):
+  lst3 = list(set([value for value in lst1 if value in lst2]))
+  return lst3
+
+
+def binary_union(lst1, lst2):
+  lst3 = list(set(lst1 + lst2))
+  return lst3
+
+
+def combine(files, type="union"):
+  text = {}
+  if type == "union":
+    fn = binary_union
+  else:
+    fn = binary_intersection
+  for fil in files:
+    with open(fil, "r") as f:
+      for line in f:
+        line_split = line.split("\t")
+        if int(line_split[0]) in text:
+          text[int(line_split[0])] = fn(
+            text[int(line_split[0])], eval(line_split[1])
+          )
+        else:
+          text[int(line_split[0])] = eval(line_split[1])
+  return text
+
+
+def combine_I(files, type="union"):
+  text = {}
+  if type == "union":
+    fn = binary_union
+  else:
+    fn = binary_intersection
+  for fil in files:
+    with open(fil, "r") as f:
+      for line in f:
+        line_split = line.split("\t")
+        if int(line_split[0]) in text:
+          text[int(line_split[0])] = fn(
+            text[int(line_split[0])], eval(line_split[2])
+          )
+        else:
+          text[int(line_split[0])] = eval(line_split[2])
+  return text
+
+
+def write_dict_to_file(text, text_I, path):
+  with open(path, "w") as f:
+    for id, spans in text.items():
+      # if id != len(text) - 1:
+      if 1:
+        f.write(f"{id}\t{str(spans)}\t{str(text_I[id])}\n")
+      # else:
+      #     f.write(f"{id}\t{str(spans)}")
+
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser(
+    prog="combine_preds.py", description="Combine span predictions."
+  )
+  parser.add_argument(
+    "--config",
+    type=str,
+    action="store",
+    help="The configuration for combining predictions.",
+  )
+  args = parser.parse_args()
+  combine_config = OmegaConf.load(args.config)
+  text = combine(combine_config.files, combine_config.type)
+  text_I = combine_I(combine_config.files, combine_config.type)
+
+  dir = "/".join(combine_config.path.split("/")[:-1])
+  if not os.path.exists(dir):
+    os.makedirs(dir)
+  write_dict_to_file(text, text_I, combine_config.path)
diff --git a/src/utils/configuration.py b/src/utils/configuration.py
new file mode 100644
index 0000000000000000000000000000000000000000..7213e723414897f112a6dcbbc5a7500ea42d8797
--- /dev/null
+++ b/src/utils/configuration.py
@@ -0,0 +1,148 @@
+import yaml
+import copy
+from src.utils.mapper import configmapper
+
+
+def load_yaml(path):
+    """
+    Function to load a yaml file and
+    return the collected dict(s)
+
+    Parameters
+    ----------
+    path : str
+        The path to the yaml config file
+
+    Returns
+    -------
+    result : dict
+        The dictionary from the config file
+    """
+
+    assert isinstance(path, str), "Provided path is not a string"
+    try:
+        f = open(path, "r")
+        result = yaml.load(f, Loader=yaml.Loader)
+    except FileNotFoundError as e:
+        # Adding this for future functionality
+        raise e
+    return result
+
+
+def convert_params_to_dict(params):
+    dic = {}
+    for k, v in params.as_dict():
+        try:
+            obj = configmapper.get_object("params", v)
+            dic[k] = v
+        except:
+            print(
+                f"Undefined {v} for the given key: {k} in mapper        ,storing original value"
+            )
+            dic[k] = v
+        return value
+
+
+class Config:
+    """Config Class to be used with YAML configuration files
+
+    This class can be used to address keys as attributes.
+    Ensure that there are no spaces between the keys.
+    Only objects of type dict can be converted to config.
+
+    Attributes
+    ----------
+    _config : dict,
+        The dictionary which is formed from the
+        yaml file or custom dictionary
+
+    Methods
+    -------
+    as_dict(),
+        Return the config object as dictionary
+
+        Possible update:
+        ## Can be converted using __getattr__ to use **kwargs
+        ## with the Config object directly.
+
+    set_value(attr,value)
+        Set the value of a particular attribute.
+    """
+
+    def __init__(self, *, path=None, dic=None):
+        """
+        Initializer for the Config class
+
+        Needs either path or the dict object to create the config
+
+        Parameters
+        ----------
+        path: str, optional
+            The path to the config YAML file.
+            Default value is None.
+        dic : dict, optional
+            The dictionary containing the configuration.
+            Default value is None.
+        """
+        if path:
+            self._config = load_yaml(path)
+        elif dict:
+            self._config = dic
+        else:
+            raise Exception("Need either path or dict object to instantiate object.")
+        # self.keys = self._config.keys()
+
+    def __getattr__(self, attr):
+        """
+        Get method for Config class. Helps get keys as attributes.
+
+        Parameters
+        ----------
+        attr: The attribute name passed as <object>.attr
+
+        Returns
+        -------
+        self._config[attr]: object or Config object
+            The value of the given key if it exists.
+            If the value is a dict object,
+            a Config object of that dict is returned.
+            Otherwise, the exact value is returned.
+
+        Raises
+        ------
+
+        KeyError() if the given key is not defined.
+        """
+        if attr in self._config:
+            if isinstance(self._config[attr], dict):
+                return Config(dic=self._config[attr])
+            else:
+                return self._config[attr]
+        else:
+            raise KeyError(f"Key:{attr} not defined.")
+
+    def set_value(self, attr, value):
+        """
+        Set method for Config class. Helps set keys in the _config.
+
+        Parameters
+        ----------
+        attr: The attribute name passed as <object>.attr
+        value: The value to be stored as the attr.
+        """
+        self._config[attr] = value
+
+    def __str__(self):
+        """Function to print the dictionary
+        contained in the object."""
+        return self._config.__str__()
+
+    def __repr__(self):
+        return f"Config(dic={self._config})"
+
+    def __deepcopy__(self, memo):
+        return Config(dic=copy.deepcopy(self._config))
+
+    def as_dict(self):
+        """Function to get the config as dictionary object"""
+        return dict(self._config)
diff --git a/src/utils/logger.py b/src/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..b03c303e2e4d701d27c924d6cc95ed8196c0f1b8
--- /dev/null
+++ b/src/utils/logger.py
@@ -0,0 +1,135 @@
+import os
+import json
+import torch
+from torch.autograd import Variable
+from torch.utils.tensorboard import SummaryWriter
+
+# from torchvision.utils import make_grid
+# from torchviz import make_dot
+
+
+class Logger:
+    """"""
+
+    def __init__(self, model, trainer, log_dir, comment=None):
+        """Initializer for Logger Class
+        Args:
+
+        """
+        self.model_path = os.path.join(log_dir, model, trainer)
+        self.writer = SummaryWriter(log_dir=self.model_path, comment=comment)
+        try:
+            if not os.path.exists(log_dir):
+                os.makedirs(log_dir)
+            if not (os.path.exists(self.model_path)):
+                os.makedirs(self.model_path)
+            else:
+                pass
+                # print("Directory Already Exists.")
+        except Exception as e:
+            print(e)
+            print("Failed to Create Log Directory.")
+
+    def save_params(
+        self,
+        param_list,
+        param_name_list,
+        epoch=None,
+        batch_size=None,
+        batch=None,
+        combine=False,
+        combine_name=None,
+        global_step=None,
+    ):
+        if combine == False:
+            for i in range(len(param_list)):
+                if isinstance(param_list[i], Variable):
+                    param_list[i] = param_list[i].data.cpu().numpy()
+
+                if global_step is None:
+                    self.writer.add_scalar(
+                        param_name_list[i],
+                        param_list[i],
+                        Logger._global_step(epoch, batch_size, batch),
+                    )
+                else:
+                    self.writer.add_scalar(
+                        param_name_list[i], param_list[i], global_step
+                    )
+
+        else:
+            scalar_dict = dict(zip(param_name_list, param_list))
+            if global_step is None:
+                self.writer.add_scalars(
+                    combine_name,
+                    scalar_dict,
+                    Logger._global_step(epoch, batch_size, batch),
+                )
+            else:
+                self.writer.add_scalars(combine_name, scalar_dict, global_step)
+
+    def save_batch_images(
+        self, image_name, image_batch, epoch, batch_size, batch=None, dataformats="CHW"
+    ):
+        self.writer.add_images(
+            image_name,
+            image_batch,
+            Logger._global_step(epoch, batch_size, batch),
+            dataformats=dataformats,
+        )
+
+    def save_prcurve(self, labels, preds, epoch, batch_size, batch=None):
+        self.writer.add_pr_curve(
+            "pr_curve", labels, preds, Logger._global_step(epoch, batch_size, batch)
+        )
+
+    def save_hyperparams(
+        self, hparam_list, hparam_name_list, metric_list, metric_name_list
+    ):
+
+        for i in range(len(hparam_list)):
+            if isinstance(hparam_list[i], list):
+                hparam_list[i] = ",".join(list(map(str, hparam_list[i])))
+            if isinstance(hparam_list[i], dict):
+                hparam_list[i] = json.dumps(hparam_list[i])
+            if hparam_list[i] is None:
+                hparam_list[i] = "None"
+        print(hparam_list, hparam_name_list, metric_list, metric_name_list)
+        self.writer.add_hparams(
+            dict(zip(hparam_name_list, hparam_list)),
+            dict(zip(metric_name_list, metric_list)),
+        )
+
+    def save_models(self, model_list, model_names_list, epoch):
+        for model_name, model in zip(model_names_list, model_list):
+            torch.save(model.state_dict(), os.path.join(self.model_path, model_name))
+
+    def save_fig(self, fig, fig_name, epoch, batch_size, batch=None):
+        self.writer.add_figure(
+            fig_name, fig, Logger._global_step(epoch, batch_size, batch)
+        )
+
+    # def display_params(self,
+    #     params_list, params_name_list, epoch, num_epochs, batch_size, batch
+    # ):
+    #     for i in range(len(params_list)):
+    #         if isinstance(params_list[i], Variable):
+    #             params_list[i] = params_list[i].data.cpu().numpy()
+    #     print("Epoch: {}/{}, Batch: {}/{}".format(epoch, num_epochs, batch, batch_size))
+    #     for i in range(len(params_list)):
+    #         print("{}:{}".format(params_name_list[i], params_list[i]))
+    #
+    # def draw_model_architecture(self,model, output, input, input_name, save_name):
+    #     make_dot(
+    #         output, params=dict(list(model.named_parameters())) + [(input_name, input)]
+    #     )
+
+    def close(self):
+        self.writer.close()
+
+    @staticmethod
+    def _global_step(epoch, batch_size, batch):
+        if batch:
+            return epoch * batch_size + batch
+        else:
+            return epoch
diff --git a/src/utils/mapper.py b/src/utils/mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..eebcebd3a17cba63df5c2889b1f6664c4732a791
--- /dev/null
+++ b/src/utils/mapper.py
@@ -0,0 +1,64 @@
+class ConfigMapper:
+    """Class for creating ConfigMapper objects.
+
+    This class can be used to create custom configuration names using YAML files.
+    For each class or object instantiated in any modules,
+    the ConfigMapper object can be used either with the functions,
+    or as a decorator to store the mapping in the function.
+
+    Attributes
+    ----------
+
+    Methods
+    -------
+
+    """
+
+    dicts = {
+        "models": {},
+        "trainers": {},
+        "metrics": {},
+        "losses": {},
+        "optimizers": {},
+        "schedulers": {},
+        "devices": {},
+        "embeddings": {},
+        "params": {},
+        "datasets": {},
+        "preprocessors": {},
+        "tokenizers": {},
+    }
+
+    @classmethod
+    def map(cls, key, name):
+        """
+        Map a particular name to an object, in the specified key
+
+        Parameters
+        ----------
+            name : str
+                The name of the object which will be used.
+            key : str
+                The key of the mapper to be used.
+        """
+
+        def wrap(obj):
+            if key in cls.dicts.keys():
+                cls.dicts[key][name] = obj
+            else:
+                cls.dicts[key] = {}
+                cls.dicts[key][name] = obj
+            return obj
+
+        return wrap
+
+    @classmethod
+    def get_object(cls, key, name):
+        """"""
+        try:
+            return cls.dicts[key][name]
+        except:
+            raise NotImplementedError("Key:{name} Undefined".format(name=name))
+
+
+configmapper = ConfigMapper()
diff --git a/src/utils/misc.py b/src/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..f80cfff5f1e8612ecfe54ac2da8da213d952c42f
--- /dev/null
+++ b/src/utils/misc.py
@@ -0,0 +1,154 @@
+"""Miscellaneous utility functions."""
+
+import random
+import numpy as np
+import torch
+import copy
+import itertools
+
+
+def seed(value=42):
+    """Set random seed for everything.
+
+    Args:
+        value (int): Seed
+    """
+    np.random.seed(value)
+    torch.manual_seed(value)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    random.seed(value)
+
+
+def map_dict_to_obj(dic):
+    result_dic = {}
+    if dic is not None:
+        for k, v in dic.items():
+            if isinstance(v, dict):
+                result_dic[k] = map_dict_to_obj(v)
+            else:
+                try:
+                    obj = configmapper.get_object("params", v)
+                    result_dic[k] = obj
+                except:
+                    result_dic[k] = v
+    return result_dic
+
+
+def get_item_in_config(config, path):
+    ## config is a dictionary
+    curr = config
+    if isinstance(config, dict):
+        for step in path:
+            curr = curr[step]
+            if curr is None:
+                break
+    else:
+        for step in path:
+            curr = curr.__getattr__(step)
+            if curr is None:
+                break
+    return curr
+
+
+# init = train_config.grid_search
+# curr = get_item_in_config(init,['hyperparams','loader_params'])
+# curr.set_value('batch_size',1)
+# print(train_config.grid_search)
+
+
+def generate_grid_search_configs(main_config, grid_config, root="hyperparams"):
+    ## DFS
+    locations_values_pair = {}
+    init = grid_config.as_dict()
+    # print(init)
+    stack = [root]
+    visited = [stack[-1]]
+
+    log_label_path = None
+    hparams_path = None
+
+    # root = init[stack[-1]]
+    while len(stack) != 0:
+        root = get_item_in_config(init, stack)
+        flag = 0
+        # print(visited)
+        # print(stack)
+        if (
+            not isinstance(root, dict) and "hparams" not in stack
+        ):  ## Meaning it is a leaf node
+            # print(stack)
+            if isinstance(root, list):
+                locations_values_pair[
+                    tuple(copy.deepcopy(stack))
+                ] = root  ## Append the current stack, and the list values
+            else:
+                locations_values_pair[tuple(copy.deepcopy(stack))] = [
+                    root,
+                ]  ## Append the current stack, and the list values
+
+            _ = stack.pop()  ## Pop this root because we don't need it.
+        else:
+            if isinstance(root, list) and "hparams" in stack:
+                hparams_path = copy.deepcopy(stack)
+                visited.append(".".join(stack))
+                stack.pop()
+                continue
+
+            if "log_label" in root.keys():
+                log_label_path = copy.deepcopy(
+                    stack
+                    + [
+                        "log_label",
+                    ]
+                )
+
+            if "log_label" in root.keys():
+                log_label_path = copy.deepcopy(
+                    stack
+                    + [
+                        "log_label",
+                    ]
+                )
+            parent = root  ## Otherwise it has children
+
+        for key in parent.keys():  ## For the children
+            if (
+                ".".join(
+                    stack
+                    + [
+                        key,
+                    ]
+                )
+                not in visited
+            ):  ## Check if I have visited these children
+                flag = 1  ## If not, we need to repeat the process for this key
+                stack.append(key)  ## Append this key to the stack
+                visited.append(".".join(stack))
+                break
+        if flag == 0:
+            stack.pop()
+
+    paths = list(locations_values_pair.keys())
+    values = itertools.product(*list(locations_values_pair.values()))
+
+    result_configs = []
+    for value in values:
+        for item_index in range(len(value)):
+            curr_path = paths[item_index]
+            curr_item = value[item_index]
+
+            curr_config_item = get_item_in_config(main_config, curr_path[1:-1])
+            curr_config_item.set_value(curr_path[-1], curr_item)
+
+            log_item = get_item_in_config(main_config, log_label_path[1:-1])
+            log_item.set_value(log_label_path[-1], str(len(result_configs) + 1))
+
+            hparam_item = get_item_in_config(main_config, hparams_path[1:-1])
+            hparam_item.set_value(
+                hparams_path[-1],
+                get_item_in_config(grid_config.hyperparams, hparams_path[1:]),
+            )
+
+        result_configs.append(copy.deepcopy(main_config))
+    return result_configs
diff --git a/src/utils/postprocess_predictions.py b/src/utils/postprocess_predictions.py
new file mode 100644
index 0000000000000000000000000000000000000000..fec93fb1581116b2f8979bb3e4cfbec30d7dbf20
--- /dev/null
+++ b/src/utils/postprocess_predictions.py
@@ -0,0 +1,230 @@
+import collections
+import numpy as np
+from tqdm.auto import tqdm
+
+
+def postprocess_token_span_predictions(
+    features,
+    examples,
+    raw_predictions,
+    tokenizer,
+    n_best_size=20,
+    max_answer_length=30,
+    squad_v2=False,
+):
+    all_start_logits, all_end_logits, token_logits = raw_predictions
+    # Build a map example to its corresponding features.
+    example_id_to_index = {k: i for i, k in enumerate(list(examples["id"]))}
+    features_per_example = collections.defaultdict(list)
+    for i, feature in enumerate(features):
+        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
+
+    # The dictionaries we have to fill.
+    predictions = collections.OrderedDict()
+
+    # Logging.
+    print(
+        f"Post-processing {len(examples)} example predictions split into {len(features)} features."
+    )
+
+    # Let's loop over all the examples!
+    for example_index in tqdm(range(len(examples))):
+        # Those are the indices of the features associated to the current example.
+        feature_indices = features_per_example[example_index]
+
+        min_null_score = None  # Only used if squad_v2 is True.
+        valid_answers = []
+
+        context = examples[example_index]["context"]
+        # Looping through all the features associated to the current example.
+        for feature_index in feature_indices:
+            # We grab the predictions of the model for this feature.
+            start_logits = all_start_logits[feature_index]
+            end_logits = all_end_logits[feature_index]
+            # This is what will allow us to map some the positions in our logits to span of texts in the original
+            # context.
+            offset_mapping = features[feature_index]["offset_mapping"]
+
+            # Update minimum null prediction.
+            cls_index = features[feature_index]["input_ids"].index(
+                tokenizer.cls_token_id
+            )
+            feature_null_score = start_logits[cls_index] + end_logits[cls_index]
+            if min_null_score is None or min_null_score < feature_null_score:
+                min_null_score = feature_null_score
+
+            # Go through all possibilities for the `n_best_size` greater start and end logits.
+            start_indexes = np.argsort(start_logits)[
+                -1 : -n_best_size - 1 : -1
+            ].tolist()
+            end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
+            for start_index in start_indexes:
+                for end_index in end_indexes:
+                    # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
+                    # to part of the input_ids that are not in the context.
+                    if (
+                        start_index >= len(offset_mapping)
+                        or end_index >= len(offset_mapping)
+                        or offset_mapping[start_index] is None
+                        or offset_mapping[end_index] is None
+                    ):
+                        continue
+                    # Don't consider answers with a length that is either < 0 or > max_answer_length.
+                    if (
+                        end_index < start_index
+                        or end_index - start_index + 1 > max_answer_length
+                    ):
+                        continue
+
+                    start_char = offset_mapping[start_index][0]
+                    end_char = offset_mapping[end_index][1]
+                    valid_answers.append(
+                        {
+                            "qa_score": (
+                                start_logits[start_index] + end_logits[end_index]
+                            )
+                            / 2,
+                            "token_score": np.mean(
+                                [
+                                    token_logits[example_index][token_index][1]
+                                    for token_index in range(start_index, end_index + 1)
+                                ]
+                            ),
+                            "score": (start_logits[start_index] + end_logits[end_index])
+                            / 2
+                            + np.mean(
+                                [
+                                    token_logits[example_index][token_index][1]
+                                    for token_index in range(start_index, end_index + 1)
+                                ]
+                            ),
+                            "text": context[start_char:end_char],
+                            "start": start_char,
+                            "end": end_char,
+                        }
+                    )
+
+        if len(valid_answers) > 0:
+            sorted_answers = sorted(
+                valid_answers, key=lambda x: x["score"], reverse=True
+            )
+        else:
+            # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
+            # failure.
+            sorted_answers = [{"text": "", "score": 0.0, "start": None, "end": None}]
+        # Let's pick our final answer: the best one or the null answer (only for squad_v2)
+        if sorted_answers[0]["score"] <= min_null_score:
+            sorted_answers = [
+                {"text": "", "score": min_null_score, "start": None, "end": None},
+            ] + sorted_answers
+        predictions[examples[example_index]["id"]] = sorted_answers
+
+    return predictions
+
+
+def postprocess_multi_span_predictions(
+    features,
+    examples,
+    raw_predictions,
+    tokenizer,
+    n_best_size=20,
+    max_answer_length=30,
+    squad_v2=False,
+):
+
+    all_start_logits, all_end_logits = raw_predictions
+    # Build a map example to its corresponding features.
+    example_id_to_index = {k: i for i, k in enumerate(list(examples["id"]))}
+    features_per_example = collections.defaultdict(list)
+    for i, feature in enumerate(features):
+        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
+
+    # The dictionaries we have to fill.
+    predictions = collections.OrderedDict()
+
+    # Logging.
+    print(
+        f"Post-processing {len(examples)} example predictions split into {len(features)} features."
+    )
+
+    # Let's loop over all the examples!
+    for example_index in tqdm(range(len(examples))):
+        # Those are the indices of the features associated to the current example.
+        feature_indices = features_per_example[example_index]
+
+        min_null_score = None  # Only used if squad_v2 is True.
+        valid_answers = []
+
+        context = examples[example_index]["context"]
+        # Looping through all the features associated to the current example.
+        for feature_index in feature_indices:
+            # We grab the predictions of the model for this feature.
+            start_logits = all_start_logits[feature_index]
+            end_logits = all_end_logits[feature_index]
+            # This is what will allow us to map some the positions
+            # in our logits to span of texts in the original context.
+            offset_mapping = features[feature_index]["offset_mapping"]
+
+            # Update minimum null prediction.
+            cls_index = features[feature_index]["input_ids"].index(
+                tokenizer.cls_token_id
+            )
+            feature_null_score = start_logits[cls_index] + end_logits[cls_index]
+            if min_null_score is None or min_null_score < feature_null_score:
+                min_null_score = feature_null_score
+
+            # Go through all possibilities for the `n_best_size` greater start and end logits.
+            start_indexes = np.argsort(start_logits)[
+                -1 : -n_best_size - 1 : -1
+            ].tolist()
+            end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
+            for start_index in start_indexes:
+                for end_index in end_indexes:
+                    # Don't consider out-of-scope answers,
+                    # either because the indices are out of bounds or correspond
+                    # to part of the input_ids that are not in the context.
+                    if (
+                        start_index >= len(offset_mapping)
+                        or end_index >= len(offset_mapping)
+                        or offset_mapping[start_index] is None
+                        or offset_mapping[end_index] is None
+                    ):
+                        continue
+                    # Don't consider answers with a length that
+                    # is either < 0 or > max_answer_length.
+                    if (
+                        end_index < start_index
+                        or end_index - start_index + 1 > max_answer_length
+                    ):
+                        continue
+
+                    start_char = offset_mapping[start_index][0]
+                    end_char = offset_mapping[end_index][1]
+                    valid_answers.append(
+                        {
+                            "score": start_logits[start_index] + end_logits[end_index],
+                            "text": context[start_char:end_char],
+                            "start": start_char,
+                            "end": end_char,
+                        }
+                    )
+
+        if len(valid_answers) > 0:
+            sorted_answers = sorted(
+                valid_answers, key=lambda x: x["score"], reverse=True
+            )
+        else:
+            # In the very rare edge case we have not a single non-null prediction,
+            # we create a fake prediction to avoid failure.
+            sorted_answers = [{"text": "", "score": 0.0, "start": None, "end": None}]
+
+        # Let's pick our final answer: the best one or the null answer (only for squad_v2)
+
+        if sorted_answers[0]["score"] <= min_null_score:
+            sorted_answers = [
+                {"text": "", "score": min_null_score, "start": None, "end": None},
+            ] + sorted_answers
+
+        predictions[examples[example_index]["id"]] = sorted_answers
+
+    return predictions
\ No newline at end of file
diff --git a/src/utils/viz.py b/src/utils/viz.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0857de2ff5634c489db2285806ee560c98fcec2
--- /dev/null
+++ b/src/utils/viz.py
@@ -0,0 +1,102 @@
+"""Visualization utils."""
+
+import numpy as np
+from IPython.core.display import HTML, display
+
+
+def _get_color(attr):
+    # clip values to prevent CSS errors (Values should be from [-1,1])
+    attr = max(-1, min(1, attr))
+    if attr > 0:
+        hue = 220
+        sat = 100
+        lig = 100 - int(90 * attr)
+    else:
+        hue = 220
+        sat = 100
+        lig = 100 - int(-125 * attr)
+    return "hsl({}, {}%, {}%)".format(hue, sat, lig)
+
+
+def format_special_tokens(token):
+    """Convert <> to # if there are any HTML syntax tags.
+
+    Example: '<Hello>' will be converted to '#Hello' to avoid confusion
+    with HTML tags.
+
+    Args:
+        token (str): The token to be formatted.
+    Returns:
+        (str): The formatted token.
+    """
+    if token.startswith("<") and token.endswith(">"):
+        return "#" + token.strip("<>")
+    return token
+
+
+def format_word_importances(
+    words,
+    importances,
+    ground_text_spans,
+    predicted_text_spans,
+):
+    if np.isnan(importances[0]):
+        importances = np.zeros_like(importances)
+
+    assert len(words) <= len(importances)
+    tags = ["<div><b>Text</b>: "]
+
+    for word_index, (word, importance) in enumerate(
+        zip(words, importances[: len(words)])
+    ):
+        word = format_special_tokens(word)
+        for character in word:  ## Printing Weird Words
+            if ord(character) >= 128:
+                print(word)
+                break
+        color = _get_color(importance)
+
+        unwrapped_tag = f'<mark style="background-color: {color}; opacity:1.0; \
+                    line-height:1.75"><font color="black"> {word}\
+                    </font></mark>'
+        tags.append(unwrapped_tag)
+    tags.append("</div>")
+    tags.append("<br><span> <b>Ground Spans</b>: [ ")
+    for i, span in enumerate(ground_text_spans):
+        if i != len(ground_text_spans) - 1:
+            tags.append(f"'{span}',")
+        else:
+            tags.append(f"'{span}'")
+    tags.append(" ]</span>")
+    tags.append("<br><span> <b>Predicted Spans</b>: [ ")
+    for i, span in enumerate(predicted_text_spans):
+        if i != len(predicted_text_spans) - 1:
+            tags.append(f"'{span}',")
+        else:
+            tags.append(f"'{span}'")
+    tags.append(" ]</span>")
+    return HTML("".join(tags))
+
+
+def format_word_colors(words, colors):
+    assert len(words) == len(colors)
+    tags = ["<div style='width:50%;'>"]
+    for word, color in zip(words, colors):
+        word = format_special_tokens(word)
+        unwrapped_tag = '<mark style="background-color: {color}; opacity:1.0; \
+                    line-height:1.75"><font color="black"> {word}\
+                    </font></mark>'.format(
+            color=color, word=word
+        )
+        tags.append(unwrapped_tag)
+    tags.append("</div>")
+    return HTML("".join(tags))
+
+
+def display_html(html):
+    display(html)
+
+
+def save_to_file(html, path):
+    with open(path, "w") as f:
+        f.write(html.data)