Source code for text_machina.src.tokenizers.hf_local
from typing import List
from transformers import AutoTokenizer
from .base import Tokenizer
[docs]class HuggingFaceLocalTokenizer(Tokenizer):
"""
Tokenizer for HuggingFace models.
"""
def __init__(self, model_name: str):
super().__init__(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
[docs] def decode(self, tokens: List[int]) -> str:
return self.tokenizer.decode(tokens, skip_special_tokens=True)