nlp_dedup.types

Datatypes used in the project.

View Source
"""Datatypes used in the project."""

from typing import Any, Dict, Iterator, Protocol, Union


class Corpus(Protocol):
    """Protocol for a corpus."""

    def __iter__(self) -> Iterator[Union[str, Dict[str, Any]]]:
        """Iterate over the corpus."""
        ...

    def __next__(self) -> Union[str, Dict[str, Any]]:
        """Get the next item in the corpus."""
        ...
#   class Corpus(typing.Protocol):
View Source
class Corpus(Protocol):
    """Protocol for a corpus."""

    def __iter__(self) -> Iterator[Union[str, Dict[str, Any]]]:
        """Iterate over the corpus."""
        ...

    def __next__(self) -> Union[str, Dict[str, Any]]:
        """Get the next item in the corpus."""
        ...

Protocol for a corpus.

#   Corpus(*args, **kwargs)
View Source
def _no_init_or_replace_init(self, *args, **kwargs):
    cls = type(self)

    if cls._is_protocol:
        raise TypeError('Protocols cannot be instantiated')

    # Already using a custom `__init__`. No need to calculate correct
    # `__init__` to call. This can lead to RecursionError. See bpo-45121.
    if cls.__init__ is not _no_init_or_replace_init:
        return

    # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`.
    # The first instantiation of the subclass will call `_no_init_or_replace_init` which
    # searches for a proper new `__init__` in the MRO. The new `__init__`
    # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent
    # instantiation of the protocol subclass will thus use the new
    # `__init__` and no longer call `_no_init_or_replace_init`.
    for base in cls.__mro__:
        init = base.__dict__.get('__init__', _no_init_or_replace_init)
        if init is not _no_init_or_replace_init:
            cls.__init__ = init
            break
    else:
        # should not happen
        cls.__init__ = object.__init__

    cls.__init__(self, *args, **kwargs)