Source code for astropy.units.format.cds

# Licensed under a 3-clause BSD style license - see LICNSE.rst

# This module includes files automatically generated from ply (these end in
# _lextab.py and _parsetab.py). To generate these files, remove them from this
# folder, then build astropy and run the tests in-place:
#
#   python setup.py build_ext --inplace
#   pytest astropy/units
#
# You can then commit the changes to the re-generated _lextab.py and
# _parsetab.py files.

"""Handles the CDS string format for units."""

import re
from typing import ClassVar, Literal

from ply.lex import Lexer
from astropy.units.core import CompositeUnit, Unit, UnitBase
from astropy.units.enums import DeprecatedUnitAction
from astropy.units.utils import is_effectively_unity
from astropy.utils import classproperty, parsing
from astropy.utils.parsing import ThreadSafeParser

from .base import Base, _ParsingFormatMixin


[docs] class CDS(Base, _ParsingFormatMixin): """ Support the `Centre de Données astronomiques de Strasbourg <https://cds.unistra.fr/>`_ `Standards for Astronomical Catalogues 2.0 <https://vizier.unistra.fr/vizier/doc/catstd-3.2.htx>`_ format, and the `complete set of supported units <https://vizier.unistra.fr/viz-bin/Unit>`_. This format is used by VOTable up to version 1.2. """ _space: ClassVar[str] = "." _times: ClassVar[str] = "x" _scale_unit_separator: ClassVar[str] = "" _tokens: ClassVar[tuple[str, ...]] = ( "PRODUCT", "DIVISION", "OPEN_PAREN", "CLOSE_PAREN", "OPEN_BRACKET", "CLOSE_BRACKET", "X", "SIGN", "UINT", "UFLOAT", "UNIT", "DIMENSIONLESS", ) @classproperty(lazy=True) def _units(cls) -> dict[str, UnitBase]: from astropy import units as u from astropy.units import cds return {k: v for k, v in cds.__dict__.items() if isinstance(v, u.UnitBase)} @classproperty(lazy=True) def _lexer(cls) -> Lexer: tokens = cls._tokens t_PRODUCT = r"\." t_DIVISION = r"/" t_OPEN_PAREN = r"\(" t_CLOSE_PAREN = r"\)" t_OPEN_BRACKET = r"\[" t_CLOSE_BRACKET = r"\]" # NOTE THE ORDERING OF THESE RULES IS IMPORTANT!! # Regular expression rules for simple tokens def t_UFLOAT(t): r"((\d+\.?\d+)|(\.\d+))([eE][+-]?\d+)?" if not re.search(r"[eE\.]", t.value): t.type = "UINT" t.value = int(t.value) else: t.value = float(t.value) return t def t_UINT(t): r"\d+" t.value = int(t.value) return t def t_SIGN(t): r"[+-](?=\d)" t.value = float(t.value + "1") return t def t_X(t): # multiplication for factor in front of unit r"[x×]" return t # Most units are just combinations of letters with no numbers, but there # are a few special ones (\h is Planch constant) and three that end in 0. def t_UNIT(t): r"%|°|\\h|(a|eps|mu)0|((?!\d)\w)+" t.value = cls._get_unit(t) return t def t_DIMENSIONLESS(t): r"---|-" # These are separate from t_UNIT since they cannot have a prefactor. t.value = cls._get_unit(t) return t t_ignore = "" # Error handling rule def t_error(t): raise ValueError(f"Invalid character at col {t.lexpos}") return parsing.lex( lextab="cds_lextab", package="astropy/units", reflags=int(re.UNICODE) ) @classproperty(lazy=True) def _parser(cls) -> ThreadSafeParser: """ The grammar here is based on the description in the `Standards for Astronomical Catalogues 2.0 <https://vizier.unistra.fr/vizier/doc/catstd-3.2.htx>`_, which is not terribly precise. The exact grammar is here is based on the YACC grammar in the `unity library <https://purl.org/nxg/dist/unity/>`_. """ tokens = cls._tokens def p_main(p): """ main : factor combined_units | combined_units | DIMENSIONLESS | OPEN_BRACKET combined_units CLOSE_BRACKET | OPEN_BRACKET DIMENSIONLESS CLOSE_BRACKET | factor """ from astropy.units import dex if len(p) == 3: p[0] = CompositeUnit(p[1] * p[2].scale, p[2].bases, p[2].powers) elif len(p) == 4: p[0] = dex(p[2]) else: p[0] = Unit(p[1]) def p_combined_units(p): """ combined_units : product_of_units | division_of_units """ p[0] = p[1] def p_product_of_units(p): """ product_of_units : unit_expression PRODUCT combined_units | unit_expression """ if len(p) == 4: p[0] = p[1] * p[3] else: p[0] = p[1] def p_division_of_units(p): """ division_of_units : DIVISION unit_expression | combined_units DIVISION unit_expression """ if len(p) == 3: p[0] = p[2] ** -1 else: p[0] = p[1] / p[3] def p_unit_expression(p): """ unit_expression : unit_with_power | OPEN_PAREN combined_units CLOSE_PAREN """ if len(p) == 2: p[0] = p[1] else: p[0] = p[2] def p_factor(p): """ factor : signed_float X UINT SIGN UINT | UINT X UINT SIGN UINT | UINT SIGN UINT | UINT | signed_float """ match p[1:]: case factor, _, 10, sign, exponent: p[0] = factor * 10.0 ** (sign * exponent) case _, _, _, _, _: raise ValueError("Only base ten exponents are allowed in CDS") case 10, sign, exponent: p[0] = 10.0 ** (sign * exponent) case _, _, _: raise ValueError("Only base ten exponents are allowed in CDS") case _: p[0] = p[1] def p_unit_with_power(p): """ unit_with_power : UNIT numeric_power | UNIT """ if len(p) == 2: p[0] = p[1] else: p[0] = p[1] ** p[2] def p_numeric_power(p): """ numeric_power : sign UINT """ p[0] = p[1] * p[2] def p_sign(p): """ sign : SIGN | """ if len(p) == 2: p[0] = p[1] else: p[0] = 1.0 def p_signed_float(p): """ signed_float : sign UINT | sign UFLOAT """ p[0] = p[1] * p[2] def p_error(p): raise ValueError() return parsing.yacc(tabmodule="cds_parsetab", package="astropy/units")
[docs] @classmethod def parse(cls, s: str, debug: bool = False) -> UnitBase: if " " in s: raise ValueError("CDS unit must not contain whitespace") if not isinstance(s, str): s = s.decode("ascii") return cls._do_parse(s, debug)
@classmethod def _format_mantissa(cls, m: str) -> str: return "" if m == "1" else m @classmethod def _format_superscript(cls, number: str) -> str: return number if number.startswith("-") else "+" + number
[docs] @classmethod def to_string( cls, unit: UnitBase, fraction: bool | Literal["inline", "multiline"] = False, deprecations: DeprecatedUnitAction = DeprecatedUnitAction.WARN, ) -> str: # Remove units that aren't known to the format unit = cls._decompose_to_known_units(unit) if not unit.bases: if unit.scale == 1: return "---" elif is_effectively_unity(unit.scale * 100.0): return "%" return super().to_string(unit, fraction=fraction)