Coverage for pds_crawler/load/pds_objects_parser.py: 90%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

568 statements  

1# -*- coding: utf-8 -*- 

2# pds-crawler - ETL to index PDS data to pdssp 

3# Copyright (C) 2023 - CNES (Jean-Christophe Malapert for Pôle Surfaces Planétaires) 

4# This file is part of pds-crawler <https://github.com/pdssp/pds_crawler> 

5# SPDX-License-Identifier: LGPL-3.0-or-later 

6""" 

7Module Name: 

8 pds_archive 

9 

10Description: 

11 the pds_archive module parses the PDS3 catalogs by providing the parser and stores 

12 the information in tha appropriate model. 

13 

14Classes: 

15 GrammarEnum: 

16 Abstract Enum to create a concrete grammar for Lark 

17 PdsTransformer : 

18 Common parser, used by others parsers. 

19 MissionCatalogTransformer : 

20 Parses the PDS3 mission catalog file that contains the mission information 

21 and stores the information in the MissionModel class. 

22 ReferenceCatalogTransformer : 

23 Parses the PDS3 reference catalog file that contains the citations and 

24 stores the information in the ReferencesModel class. 

25 PersonCatalogTransformer : 

26 Parses the PDS3 person catalog file that contains the points of contact and 

27 stores the information in the PersonnelsModel model. 

28 VolumeDescriptionTransformer : 

29 Parses the PDS3 volume catalog file that contains the references to others 

30 catalogs and stores the information in the VolumeModel model. 

31 InstrumentCatalogTransformer : 

32 Parses the PDS3 instrument catalog file that contains the instrument information 

33 and stores the information in the InstrumentModel model. 

34 InstrumentHostCatalogTransformer : 

35 Parses the PDS3 platform catalog file that contains the platform description 

36 and stores the information in the InstrumentHostModel model. 

37 DataSetCatalogTransformer : 

38 Parses the PDS3 dataset catalog file that contains the dataset description 

39 and stores the information in the DataSetModel class. 

40 PdsParserFactory : 

41 Factory to select the right parser and the related Lark grammar. 

42 

43 

44.. uml:: 

45 

46 class PdsTransformer{ 

47 +__init__(visit_tokens: bool = True) 

48 +result() : Any 

49 } 

50 class ProjectionDescriptionTransformer{ 

51 +__init__(visit_tokens: bool = True) 

52 +result() : DataSetMapProjectionModel 

53 -__result : DataSetMapProjectionModel 

54 } 

55 class MissionCatalogTransformer{ 

56 +__init__(visit_tokens: bool = True) 

57 +result() : MissionModel 

58 -__result : MissionModel 

59 } 

60 class ReferenceCatalogTransformer{ 

61 +__init__(visit_tokens: bool = True) 

62 +result() : ReferencesModel 

63 -__result : ReferencesModel 

64 } 

65 class PersonCatalogTransformer{ 

66 +__init__(visit_tokens: bool = True) 

67 +result() : PersonnelsModel 

68 -__result : PersonnelsModel 

69 } 

70 class VolumeDescriptionTransformer{ 

71 +__init__(visit_tokens: bool = True) 

72 +result() : VolumeModel 

73 -__result : VolumeModel 

74 } 

75 class InstrumentCatalogTransformer{ 

76 +__init__(visit_tokens: bool = True) 

77 +result() : InstrumentModel 

78 -__result : InstrumentModel 

79 } 

80 class DataSetCatalogTransformer{ 

81 +__init__(visit_tokens: bool = True) 

82 +result() : DataSetModel 

83 -__result : DataSetModel 

84 } 

85 PdsTransformer <|-- ProjectionDescriptionTransformer 

86 PdsTransformer <|-- MissionCatalogTransformer 

87 PdsTransformer <|-- ReferenceCatalogTransformer 

88 PdsTransformer <|-- PersonCatalogTransformer 

89 PdsTransformer <|-- VolumeDescriptionTransformer 

90 PdsTransformer <|-- InstrumentCatalogTransformer 

91 PdsTransformer <|-- DataSetCatalogTransformer 

92 

93Author: 

94 Jean-Christophe Malapert 

95""" 

96import importlib 

97import logging 

98import os 

99import signal 

100from abc import ABC 

101from abc import abstractproperty 

102from contextlib import closing 

103from enum import Enum 

104from pathlib import Path 

105from typing import Any 

106 

107from lark import Lark 

108from lark import Transformer 

109from lark import Tree 

110from lark import v_args 

111 

112from ..exception import ParserTimeOutError 

113from ..models import DataSetMapProjectionModel 

114from ..models import DataSetModel 

115from ..models import InstrumentHostModel 

116from ..models import InstrumentModel 

117from ..models import MissionModel 

118from ..models import PersonnelsModel 

119from ..models import ReferencesModel 

120from ..models import VolumeModel 

121from ..utils import requests_retry_session 

122 

123logger = logging.getLogger(__name__) 

124 

125 

126class GrammarEnum(Enum): 

127 """Enum where we can add documentation and grammar.""" 

128 

129 def __new__(cls, *args): 

130 obj = object.__new__(cls) 

131 obj._value_ = str(args[1]) # keep the value of the enum 

132 return obj 

133 

134 # ignore the first param since it's already set by __new__ 

135 def __init__( # pylint: disable=too-many-arguments 

136 self, 

137 value: str, 

138 grammar: str, 

139 class_name: str, 

140 doc=None, 

141 ): 

142 self._grammar_: str = grammar 

143 self._class_name_: str = class_name 

144 if doc is not None: 

145 self.__doc__ = doc 

146 

147 @property 

148 def grammar(self) -> str: 

149 return self._grammar_ 

150 

151 @property 

152 def class_name(self) -> str: 

153 return self._class_name_ 

154 

155 

156class PdsTransformer(Transformer): 

157 """Common parser, used by others parsers.""" 

158 

159 def __init__(self, visit_tokens: bool = True) -> None: 

160 super().__init__(visit_tokens) 

161 

162 @abstractproperty 

163 def result(self) -> Any: 

164 raise NotImplementedError("Method def result(self) not implemented") 

165 

166 @v_args(inline=True) 

167 def properties(self, *args): 

168 properties = dict() 

169 for arg in args: 

170 properties.update(arg) 

171 return properties 

172 

173 @v_args(inline=True) 

174 def property(self, keyword, value): 

175 return {keyword: value} 

176 

177 @v_args(inline=True) 

178 def keyword_property(self, name): 

179 return name.rstrip().lstrip() 

180 

181 @v_args(inline=True) 

182 def value_property(self, name): 

183 return name 

184 

185 @v_args(inline=True) 

186 def open_list(self, *args): 

187 return "" 

188 

189 @v_args(inline=True) 

190 def close_list(self, *args): 

191 return "" 

192 

193 @v_args(inline=True) 

194 def open_parenthesis(self, *args): 

195 return "" 

196 

197 @v_args(inline=True) 

198 def close_parenthesis(self, *args): 

199 return "" 

200 

201 @v_args(inline=True) 

202 def open_bracket(self, *args): 

203 return "" 

204 

205 @v_args(inline=True) 

206 def close_bracket(self, *args): 

207 return "" 

208 

209 @v_args(inline=True) 

210 def simple_value(self, name): 

211 return name.rstrip('"').lstrip('"') 

212 # if len(name) < 1000: 

213 # name = name.replace("\n"," -") 

214 # name = " ".join(name.split()) 

215 # return name.rstrip('\"').lstrip('\"') 

216 

217 @v_args(inline=True) 

218 def standard_value(self, *args): 

219 value = "" 

220 for arg in args: 

221 value += arg 

222 return value 

223 

224 @v_args(inline=True) 

225 def tiret(self): 

226 return "-" 

227 

228 @v_args(inline=True) 

229 def point(self): 

230 return "." 

231 

232 @v_args(inline=True) 

233 def multi_values(self, *args): 

234 val_list = list() 

235 for arg in args: 

236 if arg != "": 

237 val_list.append(arg) 

238 return val_list 

239 

240 @v_args(inline=True) 

241 def common_comma(self, name): 

242 return "," 

243 

244 @v_args(inline=True) 

245 def simple_value_comma(self, name, *args): 

246 return name 

247 

248 @v_args(inline=True) 

249 def string(self, name): 

250 return name 

251 

252 @v_args(inline=True) 

253 def multi_lines_string(self, name): 

254 return name 

255 

256 @v_args(inline=True) 

257 def date_str(self, *args): 

258 return "".join(args) 

259 

260 

261class ProjectionDescriptionTransformer(PdsTransformer): 

262 """Parses the PDS3 projection catalog file that contains projection information.""" 

263 

264 def __init__(self, visit_tokens: bool = True) -> None: 

265 super().__init__(visit_tokens) 

266 self.__result: DataSetMapProjectionModel 

267 

268 @property 

269 def result(self) -> DataSetMapProjectionModel: 

270 return self.__result 

271 

272 @v_args(inline=True) 

273 def data_set_map_projection( 

274 self, start, properties, data_set_map_projection_info, stop 

275 ): 

276 projection = dict() 

277 projection.update(properties) 

278 projection.update(data_set_map_projection_info) 

279 self.__result = DataSetMapProjectionModel.from_dict(projection) 

280 

281 @v_args(inline=True) 

282 def data_set_map_projection_start(self, *args): 

283 return "" 

284 

285 @v_args(inline=True) 

286 def data_set_map_projection_stop(self, *args): 

287 return "" 

288 

289 @v_args(inline=True) 

290 def data_set_map_projection_info( 

291 self, start, properties, data_set_map_projection_refs_info, stop 

292 ): 

293 properties.update(data_set_map_projection_refs_info) 

294 return {"DATA_SET_MAP_PROJECTION_INFO": properties} 

295 

296 @v_args(inline=True) 

297 def data_set_map_projection_info_start(self, *args): 

298 return "" 

299 

300 @v_args(inline=True) 

301 def data_set_map_projection_info_stop(self, *args): 

302 return "" 

303 

304 @v_args(inline=True) 

305 def data_set_map_projection_refs_info(self, *args): 

306 return {"DS_MAP_PROJECTION_REF_INFO": args} 

307 

308 @v_args(inline=True) 

309 def data_set_map_projection_ref_info(self, start, properties, stop): 

310 return properties 

311 

312 @v_args(inline=True) 

313 def data_set_map_projection_ref_info_start(self, *args): 

314 return "" 

315 

316 @v_args(inline=True) 

317 def data_set_map_projection_ref_info_stop(self, *args): 

318 return "" 

319 

320 

321class MissionCatalogTransformer(PdsTransformer): 

322 """Parses the PDS3 mission catalog file that contains the mission information 

323 and stores the information in the MissionModel class. 

324 """ 

325 

326 def __init__(self, visit_tokens: bool = True) -> None: 

327 super().__init__(visit_tokens) 

328 self.__result: MissionModel 

329 

330 @property 

331 def result(self) -> MissionModel: 

332 return self.__result 

333 

334 @v_args(inline=True) 

335 def mission( 

336 self, 

337 start, 

338 properties, 

339 mission_information, 

340 mission_host, 

341 mission_reference_informations, 

342 stop, 

343 ): 

344 mission = dict() 

345 mission.update(properties) 

346 mission.update(mission_information) 

347 mission.update(mission_host) 

348 mission.update(mission_reference_informations) 

349 self.__result = MissionModel.from_dict(mission) 

350 

351 @v_args(inline=True) 

352 def mission_start(self, *args): 

353 return "" 

354 

355 @v_args(inline=True) 

356 def mission_stop(self, *args): 

357 return "" 

358 

359 @v_args(inline=True) 

360 def mission_information(self, start, properties, stop): 

361 return {"MISSION_INFORMATION": properties} 

362 

363 @v_args(inline=True) 

364 def mission_information_start(self, *args): 

365 return "" 

366 

367 @v_args(inline=True) 

368 def mission_information_stop(self, *args): 

369 return "" 

370 

371 @v_args(inline=True) 

372 def mission_host(self, start, properties, mission_targets, stop): 

373 properties.update(mission_targets) 

374 return {"MISSION_HOST": properties} 

375 

376 @v_args(inline=True) 

377 def mission_host_start(self, *args): 

378 return "" 

379 

380 @v_args(inline=True) 

381 def mission_host_stop(self, *args): 

382 return "" 

383 

384 @v_args(inline=True) 

385 def mission_targets(self, *args): 

386 return {"MISSION_TARGET": args} 

387 

388 @v_args(inline=True) 

389 def mission_target(self, start, properties, stop): 

390 return properties 

391 

392 @v_args(inline=True) 

393 def mission_target_start(self, *args): 

394 return "" 

395 

396 @v_args(inline=True) 

397 def mission_target_stop(self, *args): 

398 return "" 

399 

400 @v_args(inline=True) 

401 def mission_reference_informations(self, *args): 

402 return {"MISSION_REFERENCE_INFORMATION": args} 

403 

404 @v_args(inline=True) 

405 def mission_reference_information(self, start, properties, stop): 

406 return properties 

407 

408 @v_args(inline=True) 

409 def mission_reference_information_start(self, *args): 

410 return "" 

411 

412 @v_args(inline=True) 

413 def mission_reference_information_stop(self, *args): 

414 return "" 

415 

416 

417class ReferenceCatalogTransformer(PdsTransformer): 

418 """Parses the PDS3 reference catalog file that contains the citations and 

419 stores the information in the ReferencesModel class. 

420 """ 

421 

422 def __init__(self, visit_tokens: bool = True) -> None: 

423 super().__init__(visit_tokens) 

424 self.__result: ReferencesModel 

425 

426 @property 

427 def result(self) -> ReferencesModel: 

428 return self.__result 

429 

430 @v_args(inline=True) 

431 def references(self, *args): 

432 self.__result = ReferencesModel.from_dict({"REFERENCES": args}) 

433 

434 @v_args(inline=True) 

435 def reference(self, start, properties, stop): 

436 return properties 

437 

438 @v_args(inline=True) 

439 def reference_start(*args): 

440 return "" 

441 

442 @v_args(inline=True) 

443 def reference_stop(*args): 

444 return "" 

445 

446 

447class PersonCatalogTransformer(PdsTransformer): 

448 """Parses the PDS3 person catalog file that contains the points of contact and 

449 stores the information in the PersonnelsModel model. 

450 """ 

451 

452 def __init__(self, visit_tokens: bool = True) -> None: 

453 super().__init__(visit_tokens) 

454 self.__result: PersonnelsModel 

455 

456 @property 

457 def result(self) -> PersonnelsModel: 

458 return self.__result 

459 

460 @v_args(inline=True) 

461 def personnels(self, *args): 

462 self.__result = PersonnelsModel.from_dict({"PERSONNELS": args}) 

463 

464 @v_args(inline=True) 

465 def personnel( 

466 self, 

467 start, 

468 pds_user_id, 

469 personnel_information, 

470 personnel_electronic_mail, 

471 stop, 

472 ): 

473 personnel = dict() 

474 personnel.update(pds_user_id) 

475 personnel.update(personnel_information) 

476 personnel.update(personnel_electronic_mail) 

477 return personnel 

478 

479 @v_args(inline=True) 

480 def personnel_start(self, *args): 

481 return "" 

482 

483 @v_args(inline=True) 

484 def personnel_stop(self, *args): 

485 return "" 

486 

487 @v_args(inline=True) 

488 def pds_user_value(self, name): 

489 return name 

490 

491 @v_args(inline=True) 

492 def pds_user_id(self, name): 

493 return {"PDS_USER_ID": name} 

494 

495 @v_args(inline=True) 

496 def personnel_information(self, start, properties, stop): 

497 return {"PERSONNEL_INFORMATION": properties} 

498 

499 @v_args(inline=True) 

500 def personnel_information_stop(self, *args): 

501 return "" 

502 

503 @v_args(inline=True) 

504 def personnel_information_start(self, *args): 

505 return "" 

506 

507 @v_args(inline=True) 

508 def personnel_electronic_mail(self, start, name, stop): 

509 return {"PERSONNEL_ELECTRONIC_MAIL": name} 

510 

511 @v_args(inline=True) 

512 def personnel_electronic_mail_stop(self, *args): 

513 return "" 

514 

515 @v_args(inline=True) 

516 def personnel_electronic_mail_start(self, *args): 

517 return "" 

518 

519 

520class VolumeDescriptionTransformer(PdsTransformer): 

521 """Parses the PDS3 volume catalog file that contains the references to others 

522 catalogs and stores the information in the VolumeModel model. 

523 """ 

524 

525 def __init__(self, visit_tokens: bool = True) -> None: 

526 super().__init__(visit_tokens) 

527 self.__result: VolumeModel 

528 

529 @property 

530 def result(self) -> VolumeModel: 

531 return self.__result 

532 

533 @v_args(inline=True) 

534 def volume(self, *args): 

535 volume = dict() 

536 for arg in args: 

537 if isinstance(arg, Tree): 

538 # this is start or stop 

539 continue 

540 volume.update(arg) 

541 self.__result = VolumeModel.from_dict(volume) 

542 

543 @v_args(inline=True) 

544 def volume_start(self, *args): 

545 return "" 

546 

547 @v_args(inline=True) 

548 def data_producer(self, start, properties, stop): 

549 return {"DATA_PRODUCER": properties} 

550 

551 @v_args(inline=True) 

552 def data_producer_start(self, *args): 

553 return "" 

554 

555 @v_args(inline=True) 

556 def data_producer_stop(self, *args): 

557 return "" 

558 

559 @v_args(inline=True) 

560 def catalog(self, start, properties, stop): 

561 return {"CATALOG": properties} 

562 

563 @v_args(inline=True) 

564 def catalog_start(self, *args): 

565 return "" 

566 

567 @v_args(inline=True) 

568 def catalog_stop(self, *args): 

569 return "" 

570 

571 @v_args(inline=True) 

572 def data_supplier(self, start, properties, stop): 

573 return {"DATA_SUPPLIER": properties} 

574 

575 @v_args(inline=True) 

576 def data_supplier_start(self, *args): 

577 return "" 

578 

579 @v_args(inline=True) 

580 def data_supplier_stop(self, *args): 

581 return "" 

582 

583 @v_args(inline=True) 

584 def files(self, *args): 

585 return {"FILE": args} 

586 

587 @v_args(inline=True) 

588 def file(self, start, properties, stop): 

589 return properties 

590 

591 @v_args(inline=True) 

592 def file_start(self, *args): 

593 return "" 

594 

595 @v_args(inline=True) 

596 def file_stop(self, *args): 

597 return "" 

598 

599 @v_args(inline=True) 

600 def directories(self, *args): 

601 return {"DIRECTORY": args} 

602 

603 @v_args(inline=True) 

604 def directory(self, start, properties, files, stop): 

605 properties.update(files) 

606 return properties 

607 

608 @v_args(inline=True) 

609 def directory_start(self, *args): 

610 return "" 

611 

612 @v_args(inline=True) 

613 def directory_stop(self, *args): 

614 return "" 

615 

616 

617class InstrumentCatalogTransformer(PdsTransformer): 

618 """Parses the PDS3 platform catalog file that contains the platform description 

619 and stores the information in the InstrumentHostModel model. 

620 """ 

621 

622 def __init__(self, visit_tokens: bool = True) -> None: 

623 super().__init__(visit_tokens) 

624 self.__result: InstrumentModel 

625 

626 @property 

627 def result(self) -> InstrumentModel: 

628 return self.__result 

629 

630 @v_args(inline=True) 

631 def instrument( 

632 self, 

633 start, 

634 properties, 

635 instrument_information, 

636 instrument_reference_infos, 

637 stop, 

638 ): 

639 instrument = dict() 

640 instrument.update(properties) 

641 instrument.update(instrument_information) 

642 instrument.update(instrument_reference_infos) 

643 self.__result = InstrumentModel.from_dict(instrument) 

644 

645 @v_args(inline=True) 

646 def instrument_start(self, *args): 

647 return "" 

648 

649 @v_args(inline=True) 

650 def instrument_stop(self, *args): 

651 return "" 

652 

653 @v_args(inline=True) 

654 def instrument_information(self, start, properties, stop): 

655 return {"INSTRUMENT_INFORMATION": properties} 

656 

657 @v_args(inline=True) 

658 def instrument_information_start(self, *args): 

659 return "" 

660 

661 @v_args(inline=True) 

662 def instrument_information_stop(self, *args): 

663 return "" 

664 

665 @v_args(inline=True) 

666 def instrument_reference_infos(self, *args): 

667 return {"INSTRUMENT_REFERENCE_INFO": args} 

668 

669 @v_args(inline=True) 

670 def instrument_reference_info(self, start, properties, stop): 

671 return properties 

672 

673 @v_args(inline=True) 

674 def instrument_reference_info_start(self, *args): 

675 return "" 

676 

677 @v_args(inline=True) 

678 def instrument_reference_info_stop(self, *args): 

679 return "" 

680 

681 

682class InstrumentHostCatalogTransformer(PdsTransformer): 

683 """Parses the PDS3 platform catalog file that contains the platform description 

684 and stores the information in the InstrumentHostModel model. 

685 """ 

686 

687 def __init__(self, visit_tokens: bool = True) -> None: 

688 super().__init__(visit_tokens) 

689 self.__result: InstrumentHostModel 

690 

691 @property 

692 def result(self) -> InstrumentHostModel: 

693 return self.__result 

694 

695 @v_args(inline=True) 

696 def instrument_host( 

697 self, 

698 start, 

699 properties, 

700 instrument_host_information, 

701 instrument_host_reference_infos, 

702 stop, 

703 ): 

704 instrument_host = dict() 

705 instrument_host.update(properties) 

706 instrument_host.update(instrument_host_information) 

707 instrument_host.update(instrument_host_reference_infos) 

708 self.__result = InstrumentHostModel.from_dict(instrument_host) 

709 

710 @v_args(inline=True) 

711 def instrument_host_start(self, *args): 

712 return "" 

713 

714 @v_args(inline=True) 

715 def instrument_host_stop(self, *args): 

716 return "" 

717 

718 @v_args(inline=True) 

719 def instrument_host_information(self, start, properties, stop): 

720 return {"INSTRUMENT_HOST_INFORMATION": properties} 

721 

722 @v_args(inline=True) 

723 def instrument_host_information_start(self, *args): 

724 return "" 

725 

726 @v_args(inline=True) 

727 def instrument_host_information_stop(self, *args): 

728 return "" 

729 

730 @v_args(inline=True) 

731 def instrument_host_reference_infos(self, *args): 

732 return {"INSTRUMENT_HOST_REFERENCE_INFO": args} 

733 

734 @v_args(inline=True) 

735 def instrument_host_reference_info(self, start, properties, stop): 

736 return properties 

737 

738 @v_args(inline=True) 

739 def instrument_host_reference_info_start(self, *args): 

740 return "" 

741 

742 @v_args(inline=True) 

743 def instrument_host_reference_info_stop(self, *args): 

744 return "" 

745 

746 

747class DataSetCatalogTransformer(PdsTransformer): 

748 """Parses the PDS3 dataset catalog file that contains the dataset description 

749 and stores the information in the DataSetModel class. 

750 """ 

751 

752 def __init__(self, visit_tokens: bool = True) -> None: 

753 super().__init__(visit_tokens) 

754 self.__result: DataSetModel 

755 

756 @property 

757 def result(self) -> DataSetModel: 

758 return self.__result 

759 

760 @v_args(inline=True) 

761 def data_set_content(self, *args): 

762 dataset = dict() 

763 for arg in args: 

764 dataset.update(arg) 

765 return dataset 

766 

767 @v_args(inline=True) 

768 def data_set(self, *args): 

769 dataset = dict() 

770 for arg in args: 

771 dataset.update(arg) 

772 self.__result = DataSetModel.from_dict(dataset) 

773 

774 @v_args(inline=True) 

775 def data_set_start(self, *args): 

776 return "" 

777 

778 @v_args(inline=True) 

779 def data_set_stop(self, *args): 

780 return "" 

781 

782 @v_args(inline=True) 

783 def data_set_host(self, start, properties, stop): 

784 return {"DATA_SET_HOST": properties} 

785 

786 @v_args(inline=True) 

787 def data_set_host_start(self, *args): 

788 return "" 

789 

790 @v_args(inline=True) 

791 def data_set_host_stop(self, *args): 

792 return "" 

793 

794 @v_args(inline=True) 

795 def dataset_information(self, start, properties, stop): 

796 return {"DATA_SET_INFORMATION": properties} 

797 

798 @v_args(inline=True) 

799 def dataset_information_start(self, *args): 

800 return "" 

801 

802 @v_args(inline=True) 

803 def dataset_information_stop(self, *args): 

804 return "" 

805 

806 @v_args(inline=True) 

807 def data_set_targets(self, *args): 

808 return {"DATA_SET_TARGET": args} 

809 

810 @v_args(inline=True) 

811 def data_set_target(self, start, properties, stop): 

812 return properties 

813 

814 @v_args(inline=True) 

815 def data_set_target_start(self, *args): 

816 return "" 

817 

818 @v_args(inline=True) 

819 def data_set_target_stop(self, *args): 

820 return "" 

821 

822 @v_args(inline=True) 

823 def data_set_mission(self, start, properties, stop): 

824 return {"DATA_SET_MISSION": properties} 

825 

826 @v_args(inline=True) 

827 def data_set_mission_start(self, *args): 

828 return "" 

829 

830 @v_args(inline=True) 

831 def data_set_mission_stop(self, *args): 

832 return "" 

833 

834 @v_args(inline=True) 

835 def data_set_reference_informations(self, *args): 

836 return {"DATA_SET_REFERENCE_INFORMATION".upper(): args} 

837 

838 @v_args(inline=True) 

839 def data_set_reference_information(self, start, properties, stop): 

840 return properties 

841 

842 @v_args(inline=True) 

843 def data_set_reference_information_start(self, *args): 

844 return "" 

845 

846 @v_args(inline=True) 

847 def data_set_reference_information_stop(self, *args): 

848 return "" 

849 

850 

851class PdsParserFactory(ABC): 

852 """Factory to select the right parser and the related Lark grammar.""" 

853 

854 DEFAULT_PARSER_TIMEOUT: int = ( 

855 30 # default timeout in seconds to parse a resource 

856 ) 

857 

858 class FileGrammary(GrammarEnum): 

859 """Mapping between enum, Lark grammar and implementation class.""" 

860 

861 REFERENCE_CATALOG = ( 

862 "REFERENCE_CATALOG", 

863 "grammar_ref_cat.lark", 

864 "ReferenceCatalogTransformer", 

865 "Grammary for reference catalog", 

866 ) 

867 MISSION_CATALOG = ( 

868 "MISSION_CATALOG", 

869 "grammar_mission_cat.lark", 

870 "MissionCatalogTransformer", 

871 "Grammary for mission catalog", 

872 ) 

873 PERSONNEL_CATALOG = ( 

874 "PERSONNEL_CATALOG", 

875 "grammar_person_cat.lark", 

876 "PersonCatalogTransformer", 

877 "Grammary for person catalog", 

878 ) 

879 INSTRUMENT_CATALOG = ( 

880 "INSTRUMENT_CATALOG", 

881 "grammar_inst_cat.lark", 

882 "InstrumentCatalogTransformer", 

883 "Grammary for instrument catalog", 

884 ) 

885 INSTRUMENT_HOST_CATALOG = ( 

886 "INSTRUMENT_HOST_CATALOG", 

887 "grammar_inst_host.lark", 

888 "InstrumentHostCatalogTransformer", 

889 "Grammary for instrument host catalog", 

890 ) 

891 DATA_SET_CATALOG = ( 

892 "DATA_SET_CATALOG", 

893 "grammar_ds_cat.lark", 

894 "DataSetCatalogTransformer", 

895 "Grammary for dataset catalog", 

896 ) 

897 VOL_DESC = ( 

898 "VOL_DESC", 

899 "grammar_vol_desc.lark", 

900 "VolumeDescriptionTransformer", 

901 "Grammary for volume description", 

902 ) 

903 DATA_SET_MAP_PROJECTION_CATALOG = ( 

904 "DATA_SET_MAP_PROJECTION_CATALOG", 

905 "grammar_projection.lark", 

906 "ProjectionDescriptionTransformer", 

907 "Grammary for volume description", 

908 ) 

909 

910 @staticmethod 

911 def get_enum_from(name: str): 

912 members = PdsParserFactory.FileGrammary._member_map_ 

913 if name in members: 

914 return members[name] 

915 else: 

916 raise KeyError(f"File Grammary enum not found from {name}") 

917 

918 @staticmethod 

919 def parse(uri: str, type_file: FileGrammary, **args) -> Any: 

920 """Parse the content of a file provided an URI by using a Lark grammar. 

921 

922 Args: 

923 uri (str): URI of the file or directly content of the file 

924 type_file (FileGrammary): Type of file 

925 

926 Raises: 

927 NotImplementedError: Unknown implementation class 

928 

929 Note: Other arguments will be passed to json dump (like indent=4) 

930 

931 Returns: 

932 Any: One of the models 

933 """ 

934 parser: Lark 

935 content: str 

936 timeout: int = args.get( 

937 "timeout", PdsParserFactory.DEFAULT_PARSER_TIMEOUT 

938 ) 

939 logger.debug(f"[PdsParserFactory] {uri}") 

940 if Path(uri).is_file and "PDS_VERSION_ID" not in uri: 

941 # Path(uri).is_file is not enough for the test 

942 # for instance, some catalog starts like that 

943 # 'CCSD3ZF0000100000001NJPL3IF0PDSX00000001 

944 # instead of by PDS_VERSION_ID = PDS3 

945 logger.debug("[PdsParserFactory] URI is a file") 

946 with open(uri, encoding="utf8", errors="ignore") as f: 

947 content = f.read() 

948 elif uri.lower().startswith("http"): 

949 logger.debug("[PdsParserFactory] URI is an URL") 

950 with closing( 

951 requests_retry_session().get( 

952 uri, stream=True, verify=False, timeout=(180, 1800) 

953 ) 

954 ) as response: 

955 if response.ok: 

956 content = response.text 

957 else: 

958 raise Exception(uri) 

959 else: 

960 logger.debug("[PdsParserFactory] URI is a content") 

961 content = uri 

962 

963 grammary_file: str = os.path.join( 

964 os.path.abspath(os.path.dirname(__file__)), 

965 "grammar", 

966 type_file.grammar, 

967 ) 

968 

969 def timeout_handler(signum, frame): 

970 raise TimeoutError("Parsing took too long!") 

971 

972 signal.signal(signal.SIGALRM, timeout_handler) 

973 signal.alarm(timeout) 

974 parser = Lark.open(grammary_file, rel_to=__file__) 

975 

976 try: 

977 module = importlib.import_module(__name__) 

978 transformer: PdsTransformer = getattr( 

979 module, type_file.class_name 

980 )() 

981 transformer.transform(parser.parse(content)) 

982 return transformer.result 

983 except ModuleNotFoundError: 

984 raise NotImplementedError( 

985 "Cannot load data products plugin with " 

986 + __name__ 

987 + "." 

988 + type_file.class_name 

989 ) 

990 except TimeoutError: 

991 err_msg = f"Parsing {uri} took too long!" 

992 logger.critical(err_msg) 

993 raise ParserTimeOutError(err_msg) 

994 finally: 

995 signal.alarm(0)