Source code for crawler.schemas

"""Schemas module for the definition and retrieval of ``"collection"`` and ``"item"``-type metadata dictionary
compliant to the following schemas/models:

==============  ===============================================  ============================================
Schema          Collection metadata                              Item metadata
==============  ===============================================  ============================================
``PDSSP_STAC``  :class:`crawler.schemas.PDSSP_STAC_Collection`   :class:`crawler.schemas.PDSSP_STAC_Item`
``PDSSP_WFS``   :class:`crawler.schemas.PDSSP_WFS_Layer`         :class:`crawler.schemas.PDSSP_WFS_Feature`
``PDSODE``      :class:`crawler.schemas.PDSODE_IIPTSet`          :class:`crawler.schemas.PDSODE_Product`
``EPNTAP``      :class:`crawler.schemas.EPNTAP_Collection`       :class:`crawler.schemas.EPNTAP_Granule`
``MARSSI_WFS``  :class:`crawler.schemas.MARSSI_WFS_Layer`        :class:`crawler.schemas.MARSSI_WFS_Feature`
==============  ===============================================  ============================================

References/examples:

- https://github.com/radiantearth/stac-spec/blob/master/item-spec
- https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md
- https://github.com/ivoa-std/EPNTAP/blob/master/example-record.xml
"""

from pydantic import BaseModel, Field
from typing import Dict, List, Union, Optional
# from datetime import datetime

STAC_VERSION = '1.0.0'

# PDSSP_STAC -> "proxy" schema to STAC model

[docs]class PDSSP_STAC_SpatialExtent(BaseModel): bbox: list[list[float]] """Potential spatial extents covered by the Collection."""
[docs]class PDSSP_STAC_TemporalExtent(BaseModel): interval: list[list[str]] """Potential temporal extents covered by the Collection."""
[docs]class PDSSP_STAC_Extent(BaseModel): spatial: PDSSP_STAC_SpatialExtent """Potential spatial extents covered by the Collection.""" temporal: PDSSP_STAC_TemporalExtent """Potential temporal extents covered by the Collection."""
[docs]class PDSSP_STAC_Provider(BaseModel): name: str """The name of the organization or the individual.""" description: Optional[str] """Multi-line description to add further provider information such as processing details for processors and producers, hosting details for hosts or basic contact information. CommonMark 0.29 syntax MAY be used for rich text representation.""" roles: Optional[list[str]] """Roles of the provider. Any of licensor, producer, processor or host."""
[docs]class PDSSP_STAC_Collection(BaseModel): type: str stac_version: str = STAC_VERSION stac_extensions: Optional[list[str]] id: str title: Optional[str] description: str keywords: Optional[list[str]] licence: str providers: Optional[list[PDSSP_STAC_Provider]] extent: PDSSP_STAC_Extent summaries: Optional[dict] links: Optional[list[PDSSP_STAC_Link]] # WARNING: NOT optional following STAC standard -> created automatically by PySTAC. assets: Optional[dict] ## Map<string, PDSSP_STAC_Asset>: dictionary of asset objects that can be downloaded, each with a unique key extra_fields: Optional[dict]
[docs]class PDSSP_STAC_Asset(BaseModel): href: str title: Optional[str] description: Optional[str] type: Optional[str] roles: Optional[list[str]]
[docs]class PDSSP_STAC_Properties(BaseModel): # STAC Common Metadata title: Optional[str] # description: Optional[str] datetime: str # ISO 8601 format created: Optional[str] # ISO 8601 format updated: Optional[str] # ISO 8601 format start_datetime: Optional[str] # ISO 8601 format end_datetime: Optional[str] # ISO 8601 format license: Optional[str] platform: Optional[str] # PDS instrument_host_id instruments: Optional[list[str]] # PDS instrument_id constellation: Optional[str] # ? mission: Optional[str] # PDS mission_id gsd: Optional[float]
# ssys_targets: Optional[list[str]] = Field(None, alias='ssys:targets') # ssys_solar_longitude: Optional[float] # ssys_instrument_host: Optional[str]
[docs]class PDSSP_STAC_Item(BaseModel): type: str stac_version: str = STAC_VERSION stac_extensions: Optional[list[str]] id: str geometry: object # GeoJSON Geometry bbox: Union[list[float], None] properties: dict # PDSSP_STAC_Properties links: Optional[list[PDSSP_STAC_Link]] # WARNING: NOT optional following STAC standard -> created automatically by PySTAC. assets: dict ## Map<string, PDSSP_STAC_Asset>: dictionary of asset objects that can be downloaded, each with a unique key. collection: Optional[str] extra_fields: Optional[dict]
[docs]class PDSSP_STAC_SSYS_Properties(BaseModel): ssys_targets: Optional[list[str]] = Field(alias='ssys:targets') ssys_solar_longitude: Optional[float] = Field(alias='ssys:solar_longitude') ssys_solar_distance: Optional[float] = Field(alias='ssys:solar_distance') ssys_incidence_angle: Optional[float] = Field(alias='ssys:incidence_angle') ssys_emission_angle: Optional[float] = Field(alias='ssys:emission_angle') ssys_phase_angle: Optional[float] = Field(alias='ssys:phase_angle')
# ssys_spatial_resolution: Optional[float] = Field(alias='ssys:spatial_resolution') # ssys_processing_level: Optional[str] = Field(alias='ssys:processing_level') # ssys_product_type: Optional[str] = Field(alias='ssys:product_type')
[docs]class PDSSP_STAC_Processing_Properties(BaseModel): processing_expression: Optional[object] = Field(alias='processing:expression') processing_lineage: Optional[str] = Field(alias='processing:lineage') processing_level: Optional[str] = Field(alias='processing:level') processing_facility: Optional[str] = Field(alias='processing:facility') processing_software: Optional[dict] = Field(alias='processing:software')
[docs]class PDSSP_WFS_Layer(BaseModel): pass
[docs]class PDSSP_WFS_Feature(BaseModel): pass
# PDS ODE Metadata Schemas #
[docs]class PDSODE_ValidTargets(BaseModel): ValidTarget: Union[str, list[str]]
[docs]class PDSODE_IIPTSet(BaseModel): ODEMetaDB: str IHID: str IHName: str IID: str IName: str PT: str PTName: str DataSetId: str ValidTargets: PDSODE_ValidTargets NumberProducts: int
[docs]class PDSODE_Collection(BaseModel): iiptset: PDSODE_IIPTSet stac_extensions: Optional[list[str]]
[docs]class PDSODE_Product_file(BaseModel): Description: str # eg" 'MAP PROJECTION FILE'",' FileName: str # "DSMAP.CAT", KBytes: str # "7", Type: str # "Referenced", URL: str # "https://hirise.lpl.arizona.edu/PDS/CATALOG/DSMAP.CAT"
[docs]class PDSODE_Product_file_key(BaseModel): Product_file: list[PDSODE_Product_file]
[docs]class PDSODE_Product(BaseModel): ode_id: str """An internal ODE product identifier.""" pdsid: str """PDS Product Id.""" ihid: str """Instrument host id.""" iid: str """Instrument id.""" pt: str """ODE Product type.""" Label_Product_Type: Optional[str] """Label product type (if it exists in the label and is different from the ODE_Product_Type).""" Data_Set_Id: str """PDS Data Set Id.""" PDSVolume_Id: str """Volume Id.""" RelativePathtoVol: str """The relative path from the volume root to the product label file.""" LabelFileName: str """The file name of the product label.""" Product_creation_time: str """Product creation time (UTC).""" Target_name: str """Product target (example: Mars).""" Product_version_id: Optional[str] """Product version.""" Observation_id: Optional[str] """Identifies a scientific observation within a data set.""" Observation_number: Optional[str] """Monotonically increasing ordinal counter of the EDRs generated for a particular OBSERVATION_ID.""" Observation_type: Optional[str] """Identifies the general type of an observation.""" Producer_id: Optional[str] """Producer id.""" Producer_name: Optional[str] """Producer name.""" Product_release_date: Optional[str] """Product release date.""" Activity_id: Optional[str] """Label Activity id""" Predicted_dust_opacity: Optional[str] """Predicted dust opacity.""" Predicted_dust_opacity_text: Optional[str] """Predicted dust opacity text.""" Observation_time: Optional[str] """Observation time (mid-point between the start and end of the observation).""" SpaceCraft_clock_start_count: Optional[str] """Spacecraft clock start.""" SpaceCraft_clock_stop_count: Optional[str] """Spacecraft clock stop.""" Stop_orbit_number: Optional[str] """Stop orbit number.""" UTC_start_time: str # defined as mandatory for all observational data products """Observation start time in UTC.""" UTC_stop_time: str # defined as mandatory for all observational data products """Observation stop time in UTC.""" Emission_angle: Optional[float] """Emission angle.""" Emission_angle_text: Optional[str] """Emission angle text from the product label.""" Phase_angle: Optional[float] """Phase angle.""" Phase_angle_text: Optional[float] """Phase angle text from the product label.""" Incidence_angle: Optional[float] """Incidence angle.""" Incidence_angle_text: Optional[float] """Incidence angle text from the product label.""" Map_resolution: Optional[float] """Map resolution.""" Map_resolution_text: Optional[float] """Map resolution text from the product label.""" Map_scale: Optional[float] """Map scale.""" Map_scale_text: Optional[float] """Map scale text from the product label.""" Solar_distance: Optional[float] """Solar distance.""" Solar_distance_text: Optional[float] """Solar distance text from the product label.""" Solar_longitude: Optional[float] """Solar longitude.""" Center_georeferenced: Optional[str] """T if the product has a footprint center.""" Center_latitude: Optional[str] """Planetocentric footprint center latitude.""" Center_longitude: Optional[str] """Longitude 0-360 center longitude.""" Center_latitude_text: Optional[str] """Text found in the center latitude label keyword if the center latitude is not a valid number.""" Center_longitude_text: Optional[str] """Text found in the center longitude label keyword if the center longitude is not a valid number.""" BB_georeferenced: Optional[str] """T if the product has a footprint bounding box.""" Easternmost_longitude: Optional[str] """Longitude 0-360 Easternmost longitude of the footprint.""" Maximum_latitude: Optional[str] """Planetocentric maximum latitude of the footprint.""" Minimum_latitude: Optional[str] """Planetocentric minimum latitude of the footprint.""" Westernmost_longitude: Optional[str] """Longitude 0-360 Westernmost longitude of the footprint.""" Easternmost_longitude_text: Optional[str] """Text found in the easternmost longitude label keyword if the easternmost longitude is not a valid number.""" Maximum_latitude_text: Optional[str] """Text found in the maximum latitude label keyword if the maximum latitude is not a valid number.""" Minimum_latitude_text: Optional[str] """Text found in the minimum latitude label keyword if the minimum latitude is not a valid number.""" Westernmost_longitude_text: Optional[str] """Text found in the westernmost longitude label keyword if the westernmost longitude is not a valid number.""" Footprint_geometry: Optional[str] """Cylindrical projected planetocentric, longitude 0-360 product footprint in WKT format. Only if there is a valid footprint. Note - this is a cylindrical projected footprint. The footprint has been split into multiple polygons when crossing the 0/360 longitude line and any footprints that cross the poles have been adjusted to add points to and around the pole. It is meant for use in cylindrical projects and is not appropriate for spherical displays.""" Footprint_C0_geometry: Optional[str] """Planetocentric, longitude -180-180 product footprint in WKT format. Only if there is a valid footprint. Note - this is a cylindrical projected footprint. The footprint has been split into multiple polygons when crossing the -180/180 longitude line and any footprints that cross the poles have been adjusted to add points to and around the pole. It is meant for use in cylindrical projects and is not appropriate for spherical displays.""" Footprint_GL_geometry: Optional[str] """Planetocentric, longitude 0-360 product footprint in WKT format. Only if there is a valid footprint. This is not a projected footprint.""" Footprint_NP_geometry: Optional[str] """Stereographic south polar projected footprint in WKT format. Only if there is a valid footprint. This footprint has been projected into meters in stereographic north polar projection.""" Footprint_SP_geometry: Optional[str] """Stereographic south polar projected footprint in WKT format. Only if there is a valid footprint. This footprint has been projected into meters in stereographic south polar projection.""" Footprints_cross_meridian: Optional[str] """T if the footprint crosses the 0/360 longitude line (anti-meridian).""" Pole_state: Optional[str] """String of "none", "north", or "south".""" Footprint_souce: Optional[str] """A brief description of where the footprint came from.""" # USGS_Sites Comment: Optional[str] """Any associated comment.""" Description: Optional[str] """Label description""" # ODE_notes External_url: Optional[str] """URL to an external reference to the product. Product type specific but usually something like the HiRISE site.""" External_url2: Optional[str] """URL to an external reference to the product. Product type specific but usually something like the HiRISE site.""" External_url3: Optional[str] """URL to an external reference to the product. Product type specific but usually something like the HiRISE site.""" FilesURL: Optional[str] """""" ProductURL: Optional[str] """""" LabelURL: Optional[str] """""" Product_files: PDSODE_Product_file_key """Associated product files."""
[docs]class EPNTAP_Collection(BaseModel): pass
[docs]class EPNTAP_Granule(BaseModel): granule_uid: str granule_gid: str obs_id: str dataproduct_type: str target_name: str target_class: str time_min: str time_max: str time_sampling_step_min: str time_sampling_step_max: str time_exp_min: str time_exp_max: str spectral_range_min: str spectral_range_max: str spectral_sampling_step_min: str spectral_sampling_step_max: str spectral_resolution_min: str spectral_resolution_max: str c1min: str c1max: str c2min: str c2max: str c3min: str c3max: str s_region: str c1_resol_min: str c1_resol_max: str c2_resol_min: str c2_resol_max: str c3_resol_min: str c3_resol_max: str spatial_frame_type: str incidence_min: str incidence_max: str emergence_min: str emergence_max: str phase_min: str phase_max: str instrument_host_name: str instrument_name: str measurement_type: str processing_level: str creation_date: str modification_date: str release_date: str service_title: str access_url: str access_format: str access_estsize: str file_name: str publisher: str
[docs]class MARSSI_WFS_Layer(BaseModel): pass
[docs]class MARSSI_WFS_Feature(BaseModel): pass
[docs]def get_schema_names() -> list[str]: schema_names = [] for name in METADATA_SCHEMAS.keys(): name += ' ' + str(list(METADATA_SCHEMAS[name].keys())) schema_names.append(name) return schema_names
[docs]def get_schema_json(name: str, object_type: str) -> Optional[BaseModel]: """Function serving as Schema objects factory. :param name: :param object_type: can be 'collection', 'item', or 'catalog', 'asset', 'link' for STAC schemas. :return: """ if name in METADATA_SCHEMAS.keys(): if object_type in METADATA_SCHEMAS[name].keys(): BaseModelClass = METADATA_SCHEMAS[name][object_type] schema_json = BaseModelClass.schema_json(indent=2) # BaseModelClass() return schema_json else: print(f'No schema defined for `{METADATA_SCHEMAS[name]}` schema `{object_type}` object type.') return None else: print(f'Unknown metadata schema: {name}.') return None
[docs]def create_schema_object(metadata: dict, name: str, object_type: str) -> Optional[BaseModel]: """Create a collection or item metadata object from an input metadata dictionary and schema name. """ if name in METADATA_SCHEMAS.keys(): if object_type in METADATA_SCHEMAS[name].keys(): BaseModelClass = METADATA_SCHEMAS[name][object_type] schema_object = BaseModelClass(**metadata) return schema_object else: print(f'No schema defined for `{METADATA_SCHEMAS[name]}` schema `{object_type}` object type.') return None else: print(f'Unknown metadata schema: {name}.') return None
METADATA_SCHEMAS = { 'PDSSP_STAC': {'collection': PDSSP_STAC_Collection, 'item': PDSSP_STAC_Item}, 'PDSSP_WFS': {'collection': PDSSP_WFS_Layer, 'item': PDSSP_WFS_Feature}, 'PDSODE': {'collection': PDSODE_IIPTSet,'item': PDSODE_Product}, 'EPNTAP': {'collection': EPNTAP_Collection,'item': EPNTAP_Granule}, 'MARSSI_WFS': {'collection': MARSSI_WFS_Layer, 'item': MARSSI_WFS_Feature} } """Mapping of netadata schemas and their corresponding ``"collection"`` and ``"item"`` classes."""