Source code for spokestack.nlu.parsers.integer

"""
This module contains the logic to parse integers from NLU results. Integers can be
in the form of words (ie. one, two, three) or numbers (ie. 1, 2, 3). Either form
will resolve to Python's built-in 'int' type. The metadata must contain a range
key containing the minimum and maximum values for the expected integer range. It is
important to note the difference between digits and integers. Integers are
counting numbers: 2 apples, a table for two. In contrast, digits
can be used for sequences of numbers like phone numbers or social security numbers.
"""
from typing import Any, Dict, Union

from spokestack.nlu.parsers import DIGIT_SPLIT_RE, maps


[docs]def parse(metadata: Dict[str, Any], raw_value: str) -> Union[int, None]: """Integer Parser Args: metadata (Dict[str, Any]): metadata for the integer slot raw_value (str): value tagged by the model Returns: Union[int, None]: integer if parsable, None if invalid """ raw_range = metadata.get("range") normalized = raw_value.lower() tokens = DIGIT_SPLIT_RE.split(normalized) parsed_values = [] for token in tokens: try: parsed = int(token) parsed_values.append(parsed) except ValueError: if not _parse_reduce(token, parsed_values): return None result = sum(parsed_values) if _is_in_range(result, raw_range): return result return None
def _parse_reduce(number: Any, so_far: Any) -> Any: to_parse = number if to_parse.endswith("th"): to_parse = to_parse[: len(to_parse) - 2] if to_parse not in maps.WORD_TO_NUM: return None if to_parse in maps.MULTIPLIERS: total = _collapse(maps.MULTIPLIERS[to_parse], so_far) so_far.clear() so_far += total else: so_far.append(maps.WORD_TO_NUM[to_parse]) return so_far def _collapse(multiplier: int, so_far: Any) -> Any: collapsed = [] total = 0 for number in so_far: if number > multiplier: collapsed.append(number) else: total += number total = max(total, 1) collapsed.append(total * multiplier) return collapsed def _is_in_range(value: int, interval: Any) -> bool: return value in range(interval[0], interval[1])