Source code for disdrodb.l0.check_metadata

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import yaml
from typing import Union
from disdrodb.l0.io import (
    get_disdrodb_dir,
    get_data_source,
    get_campaign_name,
)
from disdrodb.api.metadata import get_metadata_list, read_station_metadata
from disdrodb.l0.l0_reader import _check_metadata_reader
from disdrodb.l0.metadata import (
    _check_metadata_keys,
    _check_metadata_data_source,
    _check_metadata_campaign_name,
    _check_metadata_station_name,
    _check_metadata_sensor_name,
    check_metadata_compliance,
)

#### --------------------------------------------------------------------------.


[docs]def read_yaml(fpath: str) -> dict: """Read YAML file. Parameters ---------- fpath : str Input YAML file path. Returns ------- dict Attributes read from the YAML file. """ with open(fpath, "r") as f: attrs = yaml.safe_load(f) return attrs
#### --------------------------------------------------------------------------. #### Metadata Archive Missing Information
[docs]def check_metadata_geolocation(metadata) -> None: """Identify metadata with missing or wrong geolocation.""" # Get longitude, latitude and platform type longitude = metadata.get("longitude") latitude = metadata.get("latitude") platform_type = metadata.get("platform_type") # Check type validity if isinstance(longitude, str): raise TypeError("longitude is not defined as numeric.") if isinstance(latitude, str): raise TypeError("latitude is not defined as numeric.") # Check is not none if isinstance(longitude, type(None)) or isinstance(latitude, type(None)): raise ValueError("Unspecified longitude and latitude coordinates.") else: # Check value validity # - If mobile platform if platform_type == "mobile": if longitude != -9999 or latitude != -9999: raise ValueError( "For mobile platform_type, specify latitude and longitude -9999" ) # - If fixed platform else: if longitude == -9999 or latitude == -9999: raise ValueError("Missing lat lon coordinates (-9999).") elif longitude > 180 or longitude < -180: raise ValueError("Unvalid longitude (outside [-180, 180])") elif latitude > 90 or latitude < -90: raise ValueError("Unvalid latitude (outside [-90, 90])") else: pass return None
[docs]def identify_missing_metadata_coords(metadata_fpaths: str) -> None: """Identify missing coordinates. Parameters ---------- metadata_fpaths : str Input YAML file path. Raises ------ TypeError Error if latitude or longitude coordinates are not present or are wrongly formatted. """ for fpath in metadata_fpaths: metadata = read_yaml(fpath) check_metadata_geolocation(metadata) return None
[docs]def identify_empty_metadata_keys(metadata_fpaths: list, keys: Union[str, list]) -> None: """Identify empty metadata keys. Parameters ---------- metadata_fpaths : str Input YAML file path. keys : Union[str,list] Attributes to verify the presence. """ if isinstance(keys, str): keys = [keys] for fpath in metadata_fpaths: for key in keys: metadata = read_yaml(fpath) if len(str(metadata.get(key, ""))) == 0: # ensure is string to avoid error print(f"Empty {key} at: ", fpath) return None
[docs]def get_archive_metadata_key_value(disdrodb_dir, key, return_tuple=True): """Return the values of a metadata key for all the archive.""" list_metadata_paths = get_metadata_list(disdrodb_dir) list_info = [] for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) value = metadata[key] info = (data_source, campaign_name, station_name, value) list_info.append(info) if not return_tuple: list_info = [info[3] for info in list_info] return list_info
#### --------------------------------------------------------------------------. #### Metadata Archive Checks
[docs]def check_archive_metadata_keys(disdrodb_dir): list_metadata_paths = get_metadata_list(disdrodb_dir) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_keys(metadata) except Exception as e: print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.")
[docs]def check_archive_metadata_campaign_name(disdrodb_dir): list_metadata_paths = get_metadata_list(disdrodb_dir) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_campaign_name(metadata, expected_name=campaign_name) except Exception as e: print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.")
[docs]def check_archive_metadata_data_source(disdrodb_dir): list_metadata_paths = get_metadata_list(disdrodb_dir) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_data_source(metadata, expected_name=data_source) except Exception as e: print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.")
[docs]def check_archive_metadata_sensor_name(disdrodb_dir): list_metadata_paths = get_metadata_list(disdrodb_dir) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_sensor_name(metadata) except Exception as e: print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.")
[docs]def check_archive_metadata_station_name(disdrodb_dir): list_metadata_paths = get_metadata_list(disdrodb_dir) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_station_name(metadata, expected_name=station_name) except Exception as e: print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.")
[docs]def check_archive_metadata_reader(disdrodb_dir): list_metadata_paths = get_metadata_list(disdrodb_dir) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_reader(metadata) except Exception as e: print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.")
[docs]def check_archive_metadata_compliance(disdrodb_dir): list_metadata_paths = get_metadata_list(disdrodb_dir) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") try: check_metadata_compliance( disdrodb_dir=disdrodb_dir, data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) except Exception as e: print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.")
[docs]def check_archive_metadata_geolocation(disdrodb_dir): list_metadata_paths = get_metadata_list(disdrodb_dir) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: check_metadata_geolocation(metadata) except Exception as e: print( f"Missing information for {data_source} {campaign_name} {station_name}." ) print(f"The error is: {e}.")