Coverage for pds_crawler/load/pds_objects_parser.py: 90%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2# pds-crawler - ETL to index PDS data to pdssp
3# Copyright (C) 2023 - CNES (Jean-Christophe Malapert for Pôle Surfaces Planétaires)
4# This file is part of pds-crawler <https://github.com/pdssp/pds_crawler>
5# SPDX-License-Identifier: LGPL-3.0-or-later
6"""
7Module Name:
8 pds_archive
10Description:
11 the pds_archive module parses the PDS3 catalogs by providing the parser and stores
12 the information in tha appropriate model.
14Classes:
15 GrammarEnum:
16 Abstract Enum to create a concrete grammar for Lark
17 PdsTransformer :
18 Common parser, used by others parsers.
19 MissionCatalogTransformer :
20 Parses the PDS3 mission catalog file that contains the mission information
21 and stores the information in the MissionModel class.
22 ReferenceCatalogTransformer :
23 Parses the PDS3 reference catalog file that contains the citations and
24 stores the information in the ReferencesModel class.
25 PersonCatalogTransformer :
26 Parses the PDS3 person catalog file that contains the points of contact and
27 stores the information in the PersonnelsModel model.
28 VolumeDescriptionTransformer :
29 Parses the PDS3 volume catalog file that contains the references to others
30 catalogs and stores the information in the VolumeModel model.
31 InstrumentCatalogTransformer :
32 Parses the PDS3 instrument catalog file that contains the instrument information
33 and stores the information in the InstrumentModel model.
34 InstrumentHostCatalogTransformer :
35 Parses the PDS3 platform catalog file that contains the platform description
36 and stores the information in the InstrumentHostModel model.
37 DataSetCatalogTransformer :
38 Parses the PDS3 dataset catalog file that contains the dataset description
39 and stores the information in the DataSetModel class.
40 PdsParserFactory :
41 Factory to select the right parser and the related Lark grammar.
44.. uml::
46 class PdsTransformer{
47 +__init__(visit_tokens: bool = True)
48 +result() : Any
49 }
50 class ProjectionDescriptionTransformer{
51 +__init__(visit_tokens: bool = True)
52 +result() : DataSetMapProjectionModel
53 -__result : DataSetMapProjectionModel
54 }
55 class MissionCatalogTransformer{
56 +__init__(visit_tokens: bool = True)
57 +result() : MissionModel
58 -__result : MissionModel
59 }
60 class ReferenceCatalogTransformer{
61 +__init__(visit_tokens: bool = True)
62 +result() : ReferencesModel
63 -__result : ReferencesModel
64 }
65 class PersonCatalogTransformer{
66 +__init__(visit_tokens: bool = True)
67 +result() : PersonnelsModel
68 -__result : PersonnelsModel
69 }
70 class VolumeDescriptionTransformer{
71 +__init__(visit_tokens: bool = True)
72 +result() : VolumeModel
73 -__result : VolumeModel
74 }
75 class InstrumentCatalogTransformer{
76 +__init__(visit_tokens: bool = True)
77 +result() : InstrumentModel
78 -__result : InstrumentModel
79 }
80 class DataSetCatalogTransformer{
81 +__init__(visit_tokens: bool = True)
82 +result() : DataSetModel
83 -__result : DataSetModel
84 }
85 PdsTransformer <|-- ProjectionDescriptionTransformer
86 PdsTransformer <|-- MissionCatalogTransformer
87 PdsTransformer <|-- ReferenceCatalogTransformer
88 PdsTransformer <|-- PersonCatalogTransformer
89 PdsTransformer <|-- VolumeDescriptionTransformer
90 PdsTransformer <|-- InstrumentCatalogTransformer
91 PdsTransformer <|-- DataSetCatalogTransformer
93Author:
94 Jean-Christophe Malapert
95"""
96import importlib
97import logging
98import os
99import signal
100from abc import ABC
101from abc import abstractproperty
102from contextlib import closing
103from enum import Enum
104from pathlib import Path
105from typing import Any
107from lark import Lark
108from lark import Transformer
109from lark import Tree
110from lark import v_args
112from ..exception import ParserTimeOutError
113from ..models import DataSetMapProjectionModel
114from ..models import DataSetModel
115from ..models import InstrumentHostModel
116from ..models import InstrumentModel
117from ..models import MissionModel
118from ..models import PersonnelsModel
119from ..models import ReferencesModel
120from ..models import VolumeModel
121from ..utils import requests_retry_session
123logger = logging.getLogger(__name__)
126class GrammarEnum(Enum):
127 """Enum where we can add documentation and grammar."""
129 def __new__(cls, *args):
130 obj = object.__new__(cls)
131 obj._value_ = str(args[1]) # keep the value of the enum
132 return obj
134 # ignore the first param since it's already set by __new__
135 def __init__( # pylint: disable=too-many-arguments
136 self,
137 value: str,
138 grammar: str,
139 class_name: str,
140 doc=None,
141 ):
142 self._grammar_: str = grammar
143 self._class_name_: str = class_name
144 if doc is not None:
145 self.__doc__ = doc
147 @property
148 def grammar(self) -> str:
149 return self._grammar_
151 @property
152 def class_name(self) -> str:
153 return self._class_name_
156class PdsTransformer(Transformer):
157 """Common parser, used by others parsers."""
159 def __init__(self, visit_tokens: bool = True) -> None:
160 super().__init__(visit_tokens)
162 @abstractproperty
163 def result(self) -> Any:
164 raise NotImplementedError("Method def result(self) not implemented")
166 @v_args(inline=True)
167 def properties(self, *args):
168 properties = dict()
169 for arg in args:
170 properties.update(arg)
171 return properties
173 @v_args(inline=True)
174 def property(self, keyword, value):
175 return {keyword: value}
177 @v_args(inline=True)
178 def keyword_property(self, name):
179 return name.rstrip().lstrip()
181 @v_args(inline=True)
182 def value_property(self, name):
183 return name
185 @v_args(inline=True)
186 def open_list(self, *args):
187 return ""
189 @v_args(inline=True)
190 def close_list(self, *args):
191 return ""
193 @v_args(inline=True)
194 def open_parenthesis(self, *args):
195 return ""
197 @v_args(inline=True)
198 def close_parenthesis(self, *args):
199 return ""
201 @v_args(inline=True)
202 def open_bracket(self, *args):
203 return ""
205 @v_args(inline=True)
206 def close_bracket(self, *args):
207 return ""
209 @v_args(inline=True)
210 def simple_value(self, name):
211 return name.rstrip('"').lstrip('"')
212 # if len(name) < 1000:
213 # name = name.replace("\n"," -")
214 # name = " ".join(name.split())
215 # return name.rstrip('\"').lstrip('\"')
217 @v_args(inline=True)
218 def standard_value(self, *args):
219 value = ""
220 for arg in args:
221 value += arg
222 return value
224 @v_args(inline=True)
225 def tiret(self):
226 return "-"
228 @v_args(inline=True)
229 def point(self):
230 return "."
232 @v_args(inline=True)
233 def multi_values(self, *args):
234 val_list = list()
235 for arg in args:
236 if arg != "":
237 val_list.append(arg)
238 return val_list
240 @v_args(inline=True)
241 def common_comma(self, name):
242 return ","
244 @v_args(inline=True)
245 def simple_value_comma(self, name, *args):
246 return name
248 @v_args(inline=True)
249 def string(self, name):
250 return name
252 @v_args(inline=True)
253 def multi_lines_string(self, name):
254 return name
256 @v_args(inline=True)
257 def date_str(self, *args):
258 return "".join(args)
261class ProjectionDescriptionTransformer(PdsTransformer):
262 """Parses the PDS3 projection catalog file that contains projection information."""
264 def __init__(self, visit_tokens: bool = True) -> None:
265 super().__init__(visit_tokens)
266 self.__result: DataSetMapProjectionModel
268 @property
269 def result(self) -> DataSetMapProjectionModel:
270 return self.__result
272 @v_args(inline=True)
273 def data_set_map_projection(
274 self, start, properties, data_set_map_projection_info, stop
275 ):
276 projection = dict()
277 projection.update(properties)
278 projection.update(data_set_map_projection_info)
279 self.__result = DataSetMapProjectionModel.from_dict(projection)
281 @v_args(inline=True)
282 def data_set_map_projection_start(self, *args):
283 return ""
285 @v_args(inline=True)
286 def data_set_map_projection_stop(self, *args):
287 return ""
289 @v_args(inline=True)
290 def data_set_map_projection_info(
291 self, start, properties, data_set_map_projection_refs_info, stop
292 ):
293 properties.update(data_set_map_projection_refs_info)
294 return {"DATA_SET_MAP_PROJECTION_INFO": properties}
296 @v_args(inline=True)
297 def data_set_map_projection_info_start(self, *args):
298 return ""
300 @v_args(inline=True)
301 def data_set_map_projection_info_stop(self, *args):
302 return ""
304 @v_args(inline=True)
305 def data_set_map_projection_refs_info(self, *args):
306 return {"DS_MAP_PROJECTION_REF_INFO": args}
308 @v_args(inline=True)
309 def data_set_map_projection_ref_info(self, start, properties, stop):
310 return properties
312 @v_args(inline=True)
313 def data_set_map_projection_ref_info_start(self, *args):
314 return ""
316 @v_args(inline=True)
317 def data_set_map_projection_ref_info_stop(self, *args):
318 return ""
321class MissionCatalogTransformer(PdsTransformer):
322 """Parses the PDS3 mission catalog file that contains the mission information
323 and stores the information in the MissionModel class.
324 """
326 def __init__(self, visit_tokens: bool = True) -> None:
327 super().__init__(visit_tokens)
328 self.__result: MissionModel
330 @property
331 def result(self) -> MissionModel:
332 return self.__result
334 @v_args(inline=True)
335 def mission(
336 self,
337 start,
338 properties,
339 mission_information,
340 mission_host,
341 mission_reference_informations,
342 stop,
343 ):
344 mission = dict()
345 mission.update(properties)
346 mission.update(mission_information)
347 mission.update(mission_host)
348 mission.update(mission_reference_informations)
349 self.__result = MissionModel.from_dict(mission)
351 @v_args(inline=True)
352 def mission_start(self, *args):
353 return ""
355 @v_args(inline=True)
356 def mission_stop(self, *args):
357 return ""
359 @v_args(inline=True)
360 def mission_information(self, start, properties, stop):
361 return {"MISSION_INFORMATION": properties}
363 @v_args(inline=True)
364 def mission_information_start(self, *args):
365 return ""
367 @v_args(inline=True)
368 def mission_information_stop(self, *args):
369 return ""
371 @v_args(inline=True)
372 def mission_host(self, start, properties, mission_targets, stop):
373 properties.update(mission_targets)
374 return {"MISSION_HOST": properties}
376 @v_args(inline=True)
377 def mission_host_start(self, *args):
378 return ""
380 @v_args(inline=True)
381 def mission_host_stop(self, *args):
382 return ""
384 @v_args(inline=True)
385 def mission_targets(self, *args):
386 return {"MISSION_TARGET": args}
388 @v_args(inline=True)
389 def mission_target(self, start, properties, stop):
390 return properties
392 @v_args(inline=True)
393 def mission_target_start(self, *args):
394 return ""
396 @v_args(inline=True)
397 def mission_target_stop(self, *args):
398 return ""
400 @v_args(inline=True)
401 def mission_reference_informations(self, *args):
402 return {"MISSION_REFERENCE_INFORMATION": args}
404 @v_args(inline=True)
405 def mission_reference_information(self, start, properties, stop):
406 return properties
408 @v_args(inline=True)
409 def mission_reference_information_start(self, *args):
410 return ""
412 @v_args(inline=True)
413 def mission_reference_information_stop(self, *args):
414 return ""
417class ReferenceCatalogTransformer(PdsTransformer):
418 """Parses the PDS3 reference catalog file that contains the citations and
419 stores the information in the ReferencesModel class.
420 """
422 def __init__(self, visit_tokens: bool = True) -> None:
423 super().__init__(visit_tokens)
424 self.__result: ReferencesModel
426 @property
427 def result(self) -> ReferencesModel:
428 return self.__result
430 @v_args(inline=True)
431 def references(self, *args):
432 self.__result = ReferencesModel.from_dict({"REFERENCES": args})
434 @v_args(inline=True)
435 def reference(self, start, properties, stop):
436 return properties
438 @v_args(inline=True)
439 def reference_start(*args):
440 return ""
442 @v_args(inline=True)
443 def reference_stop(*args):
444 return ""
447class PersonCatalogTransformer(PdsTransformer):
448 """Parses the PDS3 person catalog file that contains the points of contact and
449 stores the information in the PersonnelsModel model.
450 """
452 def __init__(self, visit_tokens: bool = True) -> None:
453 super().__init__(visit_tokens)
454 self.__result: PersonnelsModel
456 @property
457 def result(self) -> PersonnelsModel:
458 return self.__result
460 @v_args(inline=True)
461 def personnels(self, *args):
462 self.__result = PersonnelsModel.from_dict({"PERSONNELS": args})
464 @v_args(inline=True)
465 def personnel(
466 self,
467 start,
468 pds_user_id,
469 personnel_information,
470 personnel_electronic_mail,
471 stop,
472 ):
473 personnel = dict()
474 personnel.update(pds_user_id)
475 personnel.update(personnel_information)
476 personnel.update(personnel_electronic_mail)
477 return personnel
479 @v_args(inline=True)
480 def personnel_start(self, *args):
481 return ""
483 @v_args(inline=True)
484 def personnel_stop(self, *args):
485 return ""
487 @v_args(inline=True)
488 def pds_user_value(self, name):
489 return name
491 @v_args(inline=True)
492 def pds_user_id(self, name):
493 return {"PDS_USER_ID": name}
495 @v_args(inline=True)
496 def personnel_information(self, start, properties, stop):
497 return {"PERSONNEL_INFORMATION": properties}
499 @v_args(inline=True)
500 def personnel_information_stop(self, *args):
501 return ""
503 @v_args(inline=True)
504 def personnel_information_start(self, *args):
505 return ""
507 @v_args(inline=True)
508 def personnel_electronic_mail(self, start, name, stop):
509 return {"PERSONNEL_ELECTRONIC_MAIL": name}
511 @v_args(inline=True)
512 def personnel_electronic_mail_stop(self, *args):
513 return ""
515 @v_args(inline=True)
516 def personnel_electronic_mail_start(self, *args):
517 return ""
520class VolumeDescriptionTransformer(PdsTransformer):
521 """Parses the PDS3 volume catalog file that contains the references to others
522 catalogs and stores the information in the VolumeModel model.
523 """
525 def __init__(self, visit_tokens: bool = True) -> None:
526 super().__init__(visit_tokens)
527 self.__result: VolumeModel
529 @property
530 def result(self) -> VolumeModel:
531 return self.__result
533 @v_args(inline=True)
534 def volume(self, *args):
535 volume = dict()
536 for arg in args:
537 if isinstance(arg, Tree):
538 # this is start or stop
539 continue
540 volume.update(arg)
541 self.__result = VolumeModel.from_dict(volume)
543 @v_args(inline=True)
544 def volume_start(self, *args):
545 return ""
547 @v_args(inline=True)
548 def data_producer(self, start, properties, stop):
549 return {"DATA_PRODUCER": properties}
551 @v_args(inline=True)
552 def data_producer_start(self, *args):
553 return ""
555 @v_args(inline=True)
556 def data_producer_stop(self, *args):
557 return ""
559 @v_args(inline=True)
560 def catalog(self, start, properties, stop):
561 return {"CATALOG": properties}
563 @v_args(inline=True)
564 def catalog_start(self, *args):
565 return ""
567 @v_args(inline=True)
568 def catalog_stop(self, *args):
569 return ""
571 @v_args(inline=True)
572 def data_supplier(self, start, properties, stop):
573 return {"DATA_SUPPLIER": properties}
575 @v_args(inline=True)
576 def data_supplier_start(self, *args):
577 return ""
579 @v_args(inline=True)
580 def data_supplier_stop(self, *args):
581 return ""
583 @v_args(inline=True)
584 def files(self, *args):
585 return {"FILE": args}
587 @v_args(inline=True)
588 def file(self, start, properties, stop):
589 return properties
591 @v_args(inline=True)
592 def file_start(self, *args):
593 return ""
595 @v_args(inline=True)
596 def file_stop(self, *args):
597 return ""
599 @v_args(inline=True)
600 def directories(self, *args):
601 return {"DIRECTORY": args}
603 @v_args(inline=True)
604 def directory(self, start, properties, files, stop):
605 properties.update(files)
606 return properties
608 @v_args(inline=True)
609 def directory_start(self, *args):
610 return ""
612 @v_args(inline=True)
613 def directory_stop(self, *args):
614 return ""
617class InstrumentCatalogTransformer(PdsTransformer):
618 """Parses the PDS3 platform catalog file that contains the platform description
619 and stores the information in the InstrumentHostModel model.
620 """
622 def __init__(self, visit_tokens: bool = True) -> None:
623 super().__init__(visit_tokens)
624 self.__result: InstrumentModel
626 @property
627 def result(self) -> InstrumentModel:
628 return self.__result
630 @v_args(inline=True)
631 def instrument(
632 self,
633 start,
634 properties,
635 instrument_information,
636 instrument_reference_infos,
637 stop,
638 ):
639 instrument = dict()
640 instrument.update(properties)
641 instrument.update(instrument_information)
642 instrument.update(instrument_reference_infos)
643 self.__result = InstrumentModel.from_dict(instrument)
645 @v_args(inline=True)
646 def instrument_start(self, *args):
647 return ""
649 @v_args(inline=True)
650 def instrument_stop(self, *args):
651 return ""
653 @v_args(inline=True)
654 def instrument_information(self, start, properties, stop):
655 return {"INSTRUMENT_INFORMATION": properties}
657 @v_args(inline=True)
658 def instrument_information_start(self, *args):
659 return ""
661 @v_args(inline=True)
662 def instrument_information_stop(self, *args):
663 return ""
665 @v_args(inline=True)
666 def instrument_reference_infos(self, *args):
667 return {"INSTRUMENT_REFERENCE_INFO": args}
669 @v_args(inline=True)
670 def instrument_reference_info(self, start, properties, stop):
671 return properties
673 @v_args(inline=True)
674 def instrument_reference_info_start(self, *args):
675 return ""
677 @v_args(inline=True)
678 def instrument_reference_info_stop(self, *args):
679 return ""
682class InstrumentHostCatalogTransformer(PdsTransformer):
683 """Parses the PDS3 platform catalog file that contains the platform description
684 and stores the information in the InstrumentHostModel model.
685 """
687 def __init__(self, visit_tokens: bool = True) -> None:
688 super().__init__(visit_tokens)
689 self.__result: InstrumentHostModel
691 @property
692 def result(self) -> InstrumentHostModel:
693 return self.__result
695 @v_args(inline=True)
696 def instrument_host(
697 self,
698 start,
699 properties,
700 instrument_host_information,
701 instrument_host_reference_infos,
702 stop,
703 ):
704 instrument_host = dict()
705 instrument_host.update(properties)
706 instrument_host.update(instrument_host_information)
707 instrument_host.update(instrument_host_reference_infos)
708 self.__result = InstrumentHostModel.from_dict(instrument_host)
710 @v_args(inline=True)
711 def instrument_host_start(self, *args):
712 return ""
714 @v_args(inline=True)
715 def instrument_host_stop(self, *args):
716 return ""
718 @v_args(inline=True)
719 def instrument_host_information(self, start, properties, stop):
720 return {"INSTRUMENT_HOST_INFORMATION": properties}
722 @v_args(inline=True)
723 def instrument_host_information_start(self, *args):
724 return ""
726 @v_args(inline=True)
727 def instrument_host_information_stop(self, *args):
728 return ""
730 @v_args(inline=True)
731 def instrument_host_reference_infos(self, *args):
732 return {"INSTRUMENT_HOST_REFERENCE_INFO": args}
734 @v_args(inline=True)
735 def instrument_host_reference_info(self, start, properties, stop):
736 return properties
738 @v_args(inline=True)
739 def instrument_host_reference_info_start(self, *args):
740 return ""
742 @v_args(inline=True)
743 def instrument_host_reference_info_stop(self, *args):
744 return ""
747class DataSetCatalogTransformer(PdsTransformer):
748 """Parses the PDS3 dataset catalog file that contains the dataset description
749 and stores the information in the DataSetModel class.
750 """
752 def __init__(self, visit_tokens: bool = True) -> None:
753 super().__init__(visit_tokens)
754 self.__result: DataSetModel
756 @property
757 def result(self) -> DataSetModel:
758 return self.__result
760 @v_args(inline=True)
761 def data_set_content(self, *args):
762 dataset = dict()
763 for arg in args:
764 dataset.update(arg)
765 return dataset
767 @v_args(inline=True)
768 def data_set(self, *args):
769 dataset = dict()
770 for arg in args:
771 dataset.update(arg)
772 self.__result = DataSetModel.from_dict(dataset)
774 @v_args(inline=True)
775 def data_set_start(self, *args):
776 return ""
778 @v_args(inline=True)
779 def data_set_stop(self, *args):
780 return ""
782 @v_args(inline=True)
783 def data_set_host(self, start, properties, stop):
784 return {"DATA_SET_HOST": properties}
786 @v_args(inline=True)
787 def data_set_host_start(self, *args):
788 return ""
790 @v_args(inline=True)
791 def data_set_host_stop(self, *args):
792 return ""
794 @v_args(inline=True)
795 def dataset_information(self, start, properties, stop):
796 return {"DATA_SET_INFORMATION": properties}
798 @v_args(inline=True)
799 def dataset_information_start(self, *args):
800 return ""
802 @v_args(inline=True)
803 def dataset_information_stop(self, *args):
804 return ""
806 @v_args(inline=True)
807 def data_set_targets(self, *args):
808 return {"DATA_SET_TARGET": args}
810 @v_args(inline=True)
811 def data_set_target(self, start, properties, stop):
812 return properties
814 @v_args(inline=True)
815 def data_set_target_start(self, *args):
816 return ""
818 @v_args(inline=True)
819 def data_set_target_stop(self, *args):
820 return ""
822 @v_args(inline=True)
823 def data_set_mission(self, start, properties, stop):
824 return {"DATA_SET_MISSION": properties}
826 @v_args(inline=True)
827 def data_set_mission_start(self, *args):
828 return ""
830 @v_args(inline=True)
831 def data_set_mission_stop(self, *args):
832 return ""
834 @v_args(inline=True)
835 def data_set_reference_informations(self, *args):
836 return {"DATA_SET_REFERENCE_INFORMATION".upper(): args}
838 @v_args(inline=True)
839 def data_set_reference_information(self, start, properties, stop):
840 return properties
842 @v_args(inline=True)
843 def data_set_reference_information_start(self, *args):
844 return ""
846 @v_args(inline=True)
847 def data_set_reference_information_stop(self, *args):
848 return ""
851class PdsParserFactory(ABC):
852 """Factory to select the right parser and the related Lark grammar."""
854 DEFAULT_PARSER_TIMEOUT: int = (
855 30 # default timeout in seconds to parse a resource
856 )
858 class FileGrammary(GrammarEnum):
859 """Mapping between enum, Lark grammar and implementation class."""
861 REFERENCE_CATALOG = (
862 "REFERENCE_CATALOG",
863 "grammar_ref_cat.lark",
864 "ReferenceCatalogTransformer",
865 "Grammary for reference catalog",
866 )
867 MISSION_CATALOG = (
868 "MISSION_CATALOG",
869 "grammar_mission_cat.lark",
870 "MissionCatalogTransformer",
871 "Grammary for mission catalog",
872 )
873 PERSONNEL_CATALOG = (
874 "PERSONNEL_CATALOG",
875 "grammar_person_cat.lark",
876 "PersonCatalogTransformer",
877 "Grammary for person catalog",
878 )
879 INSTRUMENT_CATALOG = (
880 "INSTRUMENT_CATALOG",
881 "grammar_inst_cat.lark",
882 "InstrumentCatalogTransformer",
883 "Grammary for instrument catalog",
884 )
885 INSTRUMENT_HOST_CATALOG = (
886 "INSTRUMENT_HOST_CATALOG",
887 "grammar_inst_host.lark",
888 "InstrumentHostCatalogTransformer",
889 "Grammary for instrument host catalog",
890 )
891 DATA_SET_CATALOG = (
892 "DATA_SET_CATALOG",
893 "grammar_ds_cat.lark",
894 "DataSetCatalogTransformer",
895 "Grammary for dataset catalog",
896 )
897 VOL_DESC = (
898 "VOL_DESC",
899 "grammar_vol_desc.lark",
900 "VolumeDescriptionTransformer",
901 "Grammary for volume description",
902 )
903 DATA_SET_MAP_PROJECTION_CATALOG = (
904 "DATA_SET_MAP_PROJECTION_CATALOG",
905 "grammar_projection.lark",
906 "ProjectionDescriptionTransformer",
907 "Grammary for volume description",
908 )
910 @staticmethod
911 def get_enum_from(name: str):
912 members = PdsParserFactory.FileGrammary._member_map_
913 if name in members:
914 return members[name]
915 else:
916 raise KeyError(f"File Grammary enum not found from {name}")
918 @staticmethod
919 def parse(uri: str, type_file: FileGrammary, **args) -> Any:
920 """Parse the content of a file provided an URI by using a Lark grammar.
922 Args:
923 uri (str): URI of the file or directly content of the file
924 type_file (FileGrammary): Type of file
926 Raises:
927 NotImplementedError: Unknown implementation class
929 Note: Other arguments will be passed to json dump (like indent=4)
931 Returns:
932 Any: One of the models
933 """
934 parser: Lark
935 content: str
936 timeout: int = args.get(
937 "timeout", PdsParserFactory.DEFAULT_PARSER_TIMEOUT
938 )
939 logger.debug(f"[PdsParserFactory] {uri}")
940 if Path(uri).is_file and "PDS_VERSION_ID" not in uri:
941 # Path(uri).is_file is not enough for the test
942 # for instance, some catalog starts like that
943 # 'CCSD3ZF0000100000001NJPL3IF0PDSX00000001
944 # instead of by PDS_VERSION_ID = PDS3
945 logger.debug("[PdsParserFactory] URI is a file")
946 with open(uri, encoding="utf8", errors="ignore") as f:
947 content = f.read()
948 elif uri.lower().startswith("http"):
949 logger.debug("[PdsParserFactory] URI is an URL")
950 with closing(
951 requests_retry_session().get(
952 uri, stream=True, verify=False, timeout=(180, 1800)
953 )
954 ) as response:
955 if response.ok:
956 content = response.text
957 else:
958 raise Exception(uri)
959 else:
960 logger.debug("[PdsParserFactory] URI is a content")
961 content = uri
963 grammary_file: str = os.path.join(
964 os.path.abspath(os.path.dirname(__file__)),
965 "grammar",
966 type_file.grammar,
967 )
969 def timeout_handler(signum, frame):
970 raise TimeoutError("Parsing took too long!")
972 signal.signal(signal.SIGALRM, timeout_handler)
973 signal.alarm(timeout)
974 parser = Lark.open(grammary_file, rel_to=__file__)
976 try:
977 module = importlib.import_module(__name__)
978 transformer: PdsTransformer = getattr(
979 module, type_file.class_name
980 )()
981 transformer.transform(parser.parse(content))
982 return transformer.result
983 except ModuleNotFoundError:
984 raise NotImplementedError(
985 "Cannot load data products plugin with "
986 + __name__
987 + "."
988 + type_file.class_name
989 )
990 except TimeoutError:
991 err_msg = f"Parsing {uri} took too long!"
992 logger.critical(err_msg)
993 raise ParserTimeOutError(err_msg)
994 finally:
995 signal.alarm(0)