Source code for text_machina.src.extractors.combined

import importlib
from functools import reduce
from typing import Dict, List

from datasets import Dataset

from ..config import InputConfig
from ..types import TaskType
from .base import Extractor


[docs]class Combined(Extractor): """ Extractor that combines multiple extractors. This extractor does not need specific template placeholders, just the placehoders of the extractors being combined. This extractor does not need specific arguments, just the arguments for the extractors being combined. """ def __init__(self, input_config: InputConfig, task_type: TaskType): super().__init__(input_config, task_type) self.extractors = [] for extractor_name in self.input_config.extractors_list: module, class_ = extractor_name.rsplit(".", 1) extractor = getattr( importlib.import_module( "." + module, package=__name__.rsplit(".", 1)[0] ), class_, )(self.input_config, task_type) self.extractors.append(extractor) def _extract(self, dataset: Dataset) -> Dict[str, List[str]]: extractor_outputs = [ extractor._extract(dataset) for extractor in self.extractors ] return reduce(lambda x, y: dict(x, **y), extractor_outputs)