Text Generation
Transformers
Safetensors
llama
text-generation-inference
Inference Endpoints
mfromm commited on
Commit
b6c73ac
·
verified ·
1 Parent(s): 85ef05e

Update gptx_tokenizer.py

Browse files
Files changed (1) hide show
  1. gptx_tokenizer.py +4 -4
gptx_tokenizer.py CHANGED
@@ -245,10 +245,9 @@ class HFGPTXTokenizer(PreTrainedTokenizer):
245
  """
246
  output = self.tok.decode(input=token_ids, num_threads=num_threads)
247
  if skip_special_tokens:
248
- warnings.warn(
249
- "skip_special_tokens currently not implemented"
250
- )
251
-
252
  if clean_up_tokenization_spaces:
253
  warnings.warn(
254
  "when cleaning up tokenization spaces, this will not behave "
@@ -259,6 +258,7 @@ class HFGPTXTokenizer(PreTrainedTokenizer):
259
 
260
  return output
261
 
 
262
  def _convert_id_to_token(self, index: int) -> str:
263
  """
264
  Convert a token ID to its corresponding token string.
 
245
  """
246
  output = self.tok.decode(input=token_ids, num_threads=num_threads)
247
  if skip_special_tokens:
248
+ for substring in self.additional_special_tokens:
249
+ output = output.replace(substring, "")
250
+
 
251
  if clean_up_tokenization_spaces:
252
  warnings.warn(
253
  "when cleaning up tokenization spaces, this will not behave "
 
258
 
259
  return output
260
 
261
+
262
  def _convert_id_to_token(self, index: int) -> str:
263
  """
264
  Convert a token ID to its corresponding token string.