Source code for jina.drivers.querylang.filter

__copyright__ = "Copyright (c) 2020 Jina AI Limited. All rights reserved."
__license__ = "Apache-2.0"

from typing import Dict, Any, Iterable

from ...types.querylang.queryset.lookup import Q
from .. import QuerySetReader, BaseRecursiveDriver, ContextAwareRecursiveMixin

if False:
    from ...types.sets import DocumentSet


[docs]class FilterQL(QuerySetReader, ContextAwareRecursiveMixin, BaseRecursiveDriver): """Filters incoming `docs` by evaluating a series of `lookup rules`. This is often useful when the proceeding Pods require only a signal, not the full message. Example :: - !FilterQL with: lookups: {modality: mode2} - !EncodeDriver with: method: encode ensures that the EncodeDriver will only get documents which modality field value is `mode2` by filtering those documents at the specific levels that do not comply with this condition :param lookups: (dict) a dictionary where keys are interpreted by ``:class:`LookupLeaf`` to form a an evaluation function. For instance, a dictionary ``{ modality__in: [mode1, mode2] }``, would create an evaluation function that will check if the field `modality` is found in `[mode1, mode2]` :param args: additional positional arguments which are just used for the parent initialization :param kwargs: additional key value arguments which are just used for the parent initialization """ def __init__(self, lookups: Dict[str, Any], *args, **kwargs): super().__init__(*args, **kwargs) self._lookups = lookups def _apply_all( self, doc_sequences: Iterable['DocumentSet'], *args, **kwargs ) -> None: for docs in doc_sequences: if self.lookups: _lookups = Q(**self.lookups) miss_idx = [] for idx, doc in enumerate(docs): if not _lookups.evaluate(doc): miss_idx.append(idx) # delete non-exit matches in reverse for j in reversed(miss_idx): del docs[j]