From 9928ab797ebc7882708c23e2769693da9c119afa Mon Sep 17 00:00:00 2001 From: Pascal Su Date: Fri, 31 Oct 2025 17:12:10 +0100 Subject: [PATCH 1/3] Prototype Pydantic declarative scheme (#3) * Update quickstart examples * Implementation dump: Pydantic decorators but still java api compatible * Cleanup for publishing --- .gitignore | 5 +- 0.2.x/examples/quickstart/QuickStartRead.java | 42 + .../{Quickstart.java => QuickStartWrite.java} | 25 +- .../lib-ro-crate-schema/.python-version | 1 - .../FULL_EXAMPLE_EXPLANATION.md | 76 + 0.2.x/lib/python/lib-ro-crate-schema/LICENSE | 190 ++ .../python/lib-ro-crate-schema/PUBLISHING.md | 239 ++ .../PUBLISHING_CHECKLIST.md | 130 ++ .../python/lib-ro-crate-schema/QUICKSTART.md | 163 ++ .../lib/python/lib-ro-crate-schema/README.md | 283 ++- .../lib-ro-crate-schema/architecture.puml | 110 + .../lib-ro-crate-schema/class_diagram.puml | 118 + .../examples/circular_import_test.py | 174 ++ .../examples/decorator_example.py | 185 ++ .../lib-ro-crate-schema/examples/examples.py | 135 ++ .../examples/experimental_observations.csv | 8 + .../examples/export_import_pydantic_demo.py | 224 ++ .../examples/full_example.py | 652 ++++++ .../examples/minimal_import_example.py | 36 + .../examples/python_quickstart_read.py | 48 + .../examples/python_quickstart_write.py | 140 ++ .../python/lib-ro-crate-schema/pyproject.toml | 63 +- .../lib-ro-crate-schema/run_all_tests.py | 77 + .../python/lib-ro-crate-schema/run_tests.py | 104 + .../src/lib_ro_crate_schema/__init__.py | 75 +- .../src/lib_ro_crate_schema/check.py | 64 +- .../src/lib_ro_crate_schema/crate/__init__.py | 41 + .../lib_ro_crate_schema/crate/decorators.py | 211 ++ .../crate/forward_ref_resolver.py | 355 +++ .../lib_ro_crate_schema/crate/jsonld_utils.py | 225 +- .../lib_ro_crate_schema/crate/literal_type.py | 40 +- .../crate/metadata_entry.py | 185 +- .../src/lib_ro_crate_schema/crate/prefix.py | 19 - .../crate/property_type.py | 8 + .../src/lib_ro_crate_schema/crate/rdf.py | 24 +- .../crate/reconstruction.py | 107 - .../src/lib_ro_crate_schema/crate/registry.py | 37 - .../lib_ro_crate_schema/crate/restriction.py | 87 +- .../lib_ro_crate_schema/crate/ro_constants.py | 22 - .../crate/schema_facade.py | 1918 ++++++++++++++++- .../crate/schema_registry.py | 186 ++ .../src/lib_ro_crate_schema/crate/type.py | 268 ++- .../crate/type_property.py | 643 ++---- .../lib_ro_crate_schema/example/examples.py | 90 - .../lib/python/lib-ro-crate-schema/test.shacl | 104 - .../lib-ro-crate-schema/tests/__init__.py | 1 + .../lib-ro-crate-schema/tests/schema.shacl | 324 +++ .../tests/test_context_detection.py | 138 ++ .../tests/test_decorator_id.py | 93 + .../test_duplicate_detection.py} | 0 .../tests/test_duplicate_integration.py} | 0 .../lib-ro-crate-schema/tests/test_export.py | 57 + .../tests/test_get_crate.py | 76 + .../tests/test_integration.py | 400 ++++ .../tests/test_metadata_entry.py | 272 +++ .../tests/test_pydantic_export.py | 209 ++ .../tests/test_restriction.py | 211 ++ .../tests/test_roundtrip.py | 397 ++++ .../tests/test_schema_facade.py | 337 +++ .../tests/test_standalone_elements.py | 129 ++ .../lib-ro-crate-schema/tests/test_type.py | 144 ++ .../tests/test_type_property.py | 187 ++ .../tests/test_unknown_namespaces.py | 247 +++ .../output/DELETE_ME | 0 .../readme.txt | 2 - .../DELETE_ME | 0 .../readme.txt | 2 - .../DELETE_ME | 0 .../readme.txt | 2 - .../input/DELETE_ME | 0 .../output/DELETE_ME | 0 .../readme.txt | 2 - .../DELETE_ME | 0 .../readme.txt | 2 - .../DELETE_ME | 0 .../readme.txt | 2 - .../input/DELETE_ME | 0 .../output/DELETE_ME | 0 .../input/DELETE_ME | 0 .../output/DELETE_ME | 0 .../input/DELETE_ME | 0 .../output/DELETE_ME | 0 .../input/DELETE_ME | 0 .../output/DELETE_ME | 0 .../input/DELETE_ME | 0 .../output/DELETE_ME | 0 .../input/DELETE_ME | 0 .../output/DELETE_ME | 0 88 files changed, 10021 insertions(+), 1150 deletions(-) create mode 100644 0.2.x/examples/quickstart/QuickStartRead.java rename 0.2.x/examples/quickstart/{Quickstart.java => QuickStartWrite.java} (87%) delete mode 100644 0.2.x/lib/python/lib-ro-crate-schema/.python-version create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/FULL_EXAMPLE_EXPLANATION.md create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/LICENSE create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/PUBLISHING.md create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/PUBLISHING_CHECKLIST.md create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/QUICKSTART.md create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/architecture.puml create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/class_diagram.puml create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/examples/circular_import_test.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/examples/decorator_example.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/examples/examples.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/examples/experimental_observations.csv create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/examples/export_import_pydantic_demo.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/examples/full_example.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/examples/minimal_import_example.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/examples/python_quickstart_read.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/examples/python_quickstart_write.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/run_all_tests.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/run_tests.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/decorators.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/forward_ref_resolver.py delete mode 100644 0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/prefix.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/property_type.py delete mode 100644 0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/reconstruction.py delete mode 100644 0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/registry.py delete mode 100644 0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/ro_constants.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/schema_registry.py delete mode 100644 0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/example/examples.py delete mode 100644 0.2.x/lib/python/lib-ro-crate-schema/test.shacl create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/__init__.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/schema.shacl create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_context_detection.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_decorator_id.py rename 0.2.x/lib/python/lib-ro-crate-schema/{src/lib_ro_crate_schema/example/__init__.py => tests/test_duplicate_detection.py} (100%) rename 0.2.x/lib/{test-data/test-01-import-ro-crate-metadata/input/DELETE_ME => python/lib-ro-crate-schema/tests/test_duplicate_integration.py} (100%) create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_export.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_get_crate.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_integration.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_metadata_entry.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_pydantic_export.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_restriction.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_roundtrip.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_schema_facade.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_standalone_elements.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_type.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_type_property.py create mode 100644 0.2.x/lib/python/lib-ro-crate-schema/tests/test_unknown_namespaces.py delete mode 100644 0.2.x/lib/test-data/test-01-import-ro-crate-metadata/output/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-01-import-ro-crate-metadata/readme.txt delete mode 100644 0.2.x/lib/test-data/test-02-export-ro-crate-metadata/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-02-export-ro-crate-metadata/readme.txt delete mode 100644 0.2.x/lib/test-data/test-03-import-export-ro-crate-metadata/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-03-import-export-ro-crate-metadata/readme.txt delete mode 100644 0.2.x/lib/test-data/test-04-import-ro-crate-metadata-data/input/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-04-import-ro-crate-metadata-data/output/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-04-import-ro-crate-metadata-data/readme.txt delete mode 100644 0.2.x/lib/test-data/test-05-export-ro-crate-metadata-data/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-05-export-ro-crate-metadata-data/readme.txt delete mode 100644 0.2.x/lib/test-data/test-06-import-export-ro-crate-metadata-data/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-06-import-export-ro-crate-metadata-data/readme.txt delete mode 100644 0.2.x/lib/test-data/test-07-import-ro-crate-metadata-data-types/input/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-07-import-ro-crate-metadata-data-types/output/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-08-import-ro-crate-metadata-classes/input/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-08-import-ro-crate-metadata-classes/output/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-09-import-ro-crate-metadata-classes-with-inheritance/input/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-09-import-ro-crate-metadata-classes-with-inheritance/output/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-10-import-ro-crate-metadata-data-types/input/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-10-import-ro-crate-metadata-data-types/output/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-11-export-ro-crate-metadata-classes/input/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-11-export-ro-crate-metadata-classes/output/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-12-export-ro-crate-metadata-classes-with-inheritance/input/DELETE_ME delete mode 100644 0.2.x/lib/test-data/test-12-export-ro-crate-metadata-classes-with-inheritance/output/DELETE_ME diff --git a/.gitignore b/.gitignore index 7e99e36..c4c6f6c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ -*.pyc \ No newline at end of file +*.pyc +.vscode/ +.github/ +*output_crates/ \ No newline at end of file diff --git a/0.2.x/examples/quickstart/QuickStartRead.java b/0.2.x/examples/quickstart/QuickStartRead.java new file mode 100644 index 0000000..9e827c9 --- /dev/null +++ b/0.2.x/examples/quickstart/QuickStartRead.java @@ -0,0 +1,42 @@ +package ch.eth.sis.rocrate.example.doc; + +import ch.eth.sis.rocrate.SchemaFacade; +import ch.eth.sis.rocrate.facade.IMetadataEntry; +import ch.eth.sis.rocrate.facade.IPropertyType; +import ch.eth.sis.rocrate.facade.IType; +import com.fasterxml.jackson.core.JsonProcessingException; +import edu.kit.datamanager.ro_crate.RoCrate; +import edu.kit.datamanager.ro_crate.reader.FolderReader; +import edu.kit.datamanager.ro_crate.reader.RoCrateReader; + +import java.util.List; + +public class QuickStartRead +{ + + public static void main(String[] args) throws JsonProcessingException + { + RoCrateReader reader = new RoCrateReader(new FolderReader()); + RoCrate crate = reader.readCrate(QuickStartWrite.TMP_EXAMPLE_CRATE); + SchemaFacade schemaFacade = SchemaFacade.of(crate); + + List types = schemaFacade.getTypes(); + + + /* Writes out all types with their entries */ + for (IType type : types) + { + System.out.println(type); + for (IMetadataEntry entry : schemaFacade.getEntries(type.getId())) + { + System.out.println(entry); + } + } + /* Writes out all property types */ + for (IPropertyType propertyType : schemaFacade.getPropertyTypes()) + { + System.out.println(propertyType); + } + + } +} diff --git a/0.2.x/examples/quickstart/Quickstart.java b/0.2.x/examples/quickstart/QuickStartWrite.java similarity index 87% rename from 0.2.x/examples/quickstart/Quickstart.java rename to 0.2.x/examples/quickstart/QuickStartWrite.java index 6211f4d..35adeb6 100644 --- a/0.2.x/examples/quickstart/Quickstart.java +++ b/0.2.x/examples/quickstart/QuickStartWrite.java @@ -1,3 +1,4 @@ +package ch.eth.sis.rocrate.example.doc; import ch.eth.sis.rocrate.SchemaFacade; import ch.eth.sis.rocrate.facade.*; @@ -7,14 +8,17 @@ import java.io.Serializable; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; -public class QuickStart +public class QuickStartWrite { - private static final String PREFIX = ""; + private static final String PREFIX = "Example"; private static final String SEPARATOR = ":"; + public static final String TMP_EXAMPLE_CRATE = "/tmp/example-crate"; + public static void main(String[] args) { /* Setting up an RO-Crate with the schema facade */ @@ -59,10 +63,10 @@ public class QuickStart } + Type experimentType = new Type(); /* Building our Experiment type */ { - Type experimentType = new Type(); experimentType.setId(PREFIX + SEPARATOR + "Experiment"); { @@ -98,36 +102,41 @@ public class QuickStart MetadataEntry personAndreas = new MetadataEntry(); personAndreas.setId("PERSON1"); Map properties = new LinkedHashMap<>(); + personAndreas.setTypes(Set.of(personType.getId())); properties.put("givenname", "Andreas"); properties.put("lastname", "Meier"); properties.put("identifier", "https://orcid.org/0009-0002-6541-4637"); personAndreas.setProps(properties); + personAndreas.setReferences(new LinkedHashMap<>()); schemaFacade.addEntry(personAndreas); MetadataEntry personJuan = new MetadataEntry(); - personAndreas.setId("PERSON2"); + personJuan.setId("PERSON2"); + personJuan.setTypes(Set.of(personType.getId())); Map properties2 = new LinkedHashMap<>(); properties2.put("givenname", "Andreas"); properties2.put("lastname", "Meier"); properties2.put("identifier", "https://orcid.org/0009-0002-6541-4637"); - personAndreas.setProps(properties2); + personJuan.setProps(properties2); + personJuan.setReferences(new LinkedHashMap<>()); + schemaFacade.addEntry(personJuan); MetadataEntry experiment1 = new MetadataEntry(); experiment1.setId("EXPERIMENT1"); experiment1.setReferences(Map.of("creator", List.of(personAndreas.getId()))); + experiment1.setTypes(Set.of(experimentType.getId())); Map propertiesExperiment = new LinkedHashMap<>(); propertiesExperiment.put("name", "Example Experiment"); - propertiesExperiment.put("date", "2025-09-08 08:41:50.000"); // ISO 8601 + propertiesExperiment.put("date", "2025-09-08 08:41:50.000"); experiment1.setProps(propertiesExperiment); schemaFacade.addEntry(experiment1); } FolderWriter folderWriter = new FolderWriter(); - folderWriter.save(schemaFacade.getCrate(), "/tmp/example-crate"); + folderWriter.save(schemaFacade.getCrate(), TMP_EXAMPLE_CRATE); } } - diff --git a/0.2.x/lib/python/lib-ro-crate-schema/.python-version b/0.2.x/lib/python/lib-ro-crate-schema/.python-version deleted file mode 100644 index 24ee5b1..0000000 --- a/0.2.x/lib/python/lib-ro-crate-schema/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.13 diff --git a/0.2.x/lib/python/lib-ro-crate-schema/FULL_EXAMPLE_EXPLANATION.md b/0.2.x/lib/python/lib-ro-crate-schema/FULL_EXAMPLE_EXPLANATION.md new file mode 100644 index 0000000..4486fdf --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/FULL_EXAMPLE_EXPLANATION.md @@ -0,0 +1,76 @@ +# 🧪 RO-Crate Full Example Guide + +**File:** `examples/full_example.py` + +Comprehensive example demonstrating advanced RO-Crate features: chemical synthesis workflow, circular relationships, SHACL validation, and dynamic updates. + +## 📊 **Data Model** + +#### **OpenBIS Entities** (`http://openbis.org/`) + +| Entity | Properties | Relationships | +|--------|------------|---------------| +| **Project** | code, name, description, created_date | → space | +| **Space** | name, description, created_date | → collections[] | +| **Collection** | name, sample_type, storage_conditions, created_date | _(leaf node)_ | +| **Equipment** | name, model, serial_number, created_date, configuration{} | → parent_equipment | + +#### **Schema.org Entities** (`https://schema.org/`) + +| Entity | Properties | Relationships | +|--------|------------|---------------| +| **Molecule** | name, **smiles**, molecular_weight, cas_number, created_date, experimental_notes | → contains_molecules[] | +| **Person** | name, orcid, email | → affiliation | +| **Organization** | name, country, website | _(referenced by Person)_ | +| **Publication** | title, doi, publication_date | → authors[], molecules[], equipment[], organization | + +## ⚡ **Workflow: Setup → Experiment → Export** + +**Created Entities:** +- 1 Project, 1 Space, 1 Collection, 2 Equipment (nested) +- 5 Molecules, 2 People, 1 Organization, 1 Publication + +**Key Features:** +- ✅ **Circular Relationships**: Person ↔ Person colleagues (auto-resolved) +- ✅ **Mixed Namespaces**: OpenBIS + schema.org with auto-context +- ✅ **SHACL Validation**: 100% compliance with 150+ rules +- ✅ **Dynamic Updates**: Experiment modifies molecules + adds new product + +## 🔧 **Key Technical Features** + +### **1. Circular Relationship Resolution** +```python +# Automatic resolution of Person ↔ Person colleagues +sarah = Person(colleagues=[marcus]) +marcus = Person(colleagues=[sarah]) +# → SchemaFacade.resolve_placeholders() merges duplicates +``` + +### **2. Chemical Data with SMILES** +- Benzene: `c1ccccc1` → Toluene: `Cc1ccccc1` → Product: `(c1ccccc1).(Cc1ccccc1)` + +### **3. Scale Metrics** +- **Entities**: 15 → 16 (after synthesis) +- **RDF Triples**: ~500 → ~530 +- **SHACL Validation**: 100% compliance + + +## � **Usage** + +```bash +PYTHONPATH=./src python examples/full_example.py +``` + +**Output:** +Initial Crate: `full_example_initial/` +Final Crate: `full_example_final/` including file [experimental_observations](examples/experimental_observations.csv) + +## ✅ **Testing** + +```bash +python -m pytest tests/ -v # Full suite (85 tests) +``` + +--- + +**Production-ready RO-Crate library with automatic relationship resolution, comprehensive validation, and modern architecture.** \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/LICENSE b/0.2.x/lib/python/lib-ro-crate-schema/LICENSE new file mode 100644 index 0000000..378afb4 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/LICENSE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2025 Simone Baffelli, Pascal Su + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/0.2.x/lib/python/lib-ro-crate-schema/PUBLISHING.md b/0.2.x/lib/python/lib-ro-crate-schema/PUBLISHING.md new file mode 100644 index 0000000..e3a70cc --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/PUBLISHING.md @@ -0,0 +1,239 @@ +# Publishing Guide for lib-ro-crate-schema + +This guide walks through the process of publishing this package to PyPI (Python Package Index). + +## Prerequisites + +1. **PyPI Account**: Create accounts on both: + - Test PyPI: https://test.pypi.org/account/register/ + - Production PyPI: https://pypi.org/account/register/ + +2. **API Tokens**: Generate API tokens for authentication: + - Test PyPI: https://test.pypi.org/manage/account/token/ + - Production PyPI: https://pypi.org/manage/account/token/ + + Save these tokens securely - you'll use them instead of passwords. + +3. **Install Build Tools**: + ```bash + pip install --upgrade build twine + ``` + +## Pre-Publication Checklist + +Before publishing, ensure: + +- [ ] Version number updated in `pyproject.toml` (follow [Semantic Versioning](https://semver.org/)) +- [ ] `README.md` is up-to-date and renders correctly +- [ ] All tests pass: `python run_all_tests.py` +- [ ] `LICENSE` file is present +- [ ] Dependencies in `pyproject.toml` are correct and use appropriate version constraints +- [ ] Author information is correct +- [ ] Repository URLs are correct + +## Building the Package + +1. **Clean Previous Builds**: + ```bash + # Remove old build artifacts + rm -rf dist/ build/ *.egg-info + ``` + +2. **Build Distribution Files**: + ```bash + python -m build + ``` + + This creates two files in the `dist/` directory: + - `.tar.gz` - Source distribution + - `.whl` - Wheel (binary distribution) + +3. **Verify the Build**: + ```bash + # List generated files + ls dist/ + + # Check package contents + tar -tzf dist/lib-ro-crate-schema-*.tar.gz + ``` + +## Testing on Test PyPI (Recommended First Step) + +Always test your package on Test PyPI before publishing to production: + +1. **Upload to Test PyPI**: + ```bash + python -m twine upload --repository testpypi dist/* + ``` + + When prompted: + - Username: `__token__` + - Password: Your Test PyPI API token (including the `pypi-` prefix) + +2. **Test Installation**: + ```bash + # Create a fresh virtual environment + python -m venv test_env + source test_env/bin/activate # On Windows: test_env\Scripts\activate + + # Install from Test PyPI + pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ lib-ro-crate-schema + + # Test the installation + python -c "from lib_ro_crate_schema import SchemaFacade, ro_crate_schema; print('Import successful!')" + ``` + +3. **Run Your Examples**: + ```bash + # Copy your examples to the test environment and run them + python examples/decorator_example.py + ``` + +## Publishing to Production PyPI + +Once you've verified everything works on Test PyPI: + +1. **Upload to PyPI**: + ```bash + python -m twine upload dist/* + ``` + + When prompted: + - Username: `__token__` + - Password: Your PyPI API token (including the `pypi-` prefix) + +2. **Verify on PyPI**: + - Visit: https://pypi.org/project/lib-ro-crate-schema/ + - Check that the README renders correctly + - Verify all links work + +3. **Test Installation**: + ```bash + # In a fresh environment + pip install lib-ro-crate-schema + python -c "from lib_ro_crate_schema import SchemaFacade; print('Success!')" + ``` + +## Using GitHub Actions (Automated Publishing) + +For automated publishing on release, create `.github/workflows/publish.yml`: + +```yaml +name: Publish to PyPI + +on: + release: + types: [published] + +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.13' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build twine + + - name: Build package + run: python -m build + working-directory: 0.2.x/lib/python/lib-ro-crate-schema + + - name: Publish to PyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: python -m twine upload dist/* + working-directory: 0.2.x/lib/python/lib-ro-crate-schema +``` + +Then add your PyPI API token as a GitHub secret named `PYPI_API_TOKEN`. + +## Post-Publication + +1. **Tag the Release**: + ```bash + git tag -a v0.2.0 -m "Release version 0.2.0" + git push origin v0.2.0 + ``` + +2. **Create GitHub Release**: + - Go to your repository's Releases page + - Create a new release from the tag + - Add release notes describing changes + +3. **Update Documentation**: + - Update any documentation that references installation + - Announce the release (if applicable) + +## Version Management + +Follow [Semantic Versioning](https://semver.org/): + +- **MAJOR** (X.0.0): Incompatible API changes +- **MINOR** (0.X.0): Add functionality (backwards-compatible) +- **PATCH** (0.0.X): Bug fixes (backwards-compatible) + +Update version in: +1. `pyproject.toml` - `version = "X.Y.Z"` +2. `src/lib_ro_crate_schema/__init__.py` - `__version__ = "X.Y.Z"` + +## Troubleshooting + +### "File already exists" Error +- You cannot upload the same version twice to PyPI +- Increment the version number in `pyproject.toml` and rebuild + +### Import Errors After Installation +- Check that `__init__.py` files properly export all public APIs +- Verify package structure with: `python -m pip show -f lib-ro-crate-schema` + +### README Not Rendering +- Validate Markdown: Use online tools or VS Code preview +- Ensure `readme = "README.md"` in `pyproject.toml` +- Check that README.md is in the same directory as pyproject.toml + +### Missing Dependencies +- Ensure all dependencies are listed in `pyproject.toml` +- Test in a clean virtual environment + +## Security Best Practices + +1. **Never commit API tokens** to version control +2. **Use API tokens** instead of passwords (more secure, can be revoked) +3. **Limit token scope** to just uploads if possible +4. **Rotate tokens** periodically +5. **Use GitHub Secrets** for CI/CD automation + +## Resources + +- PyPI Help: https://pypi.org/help/ +- Python Packaging Guide: https://packaging.python.org/ +- Twine Documentation: https://twine.readthedocs.io/ +- Semantic Versioning: https://semver.org/ + +## Quick Reference Commands + +```bash +# Clean build +rm -rf dist/ build/ *.egg-info + +# Build package +python -m build + +# Upload to Test PyPI +python -m twine upload --repository testpypi dist/* + +# Upload to PyPI +python -m twine upload dist/* + +# Test installation +pip install --index-url https://test.pypi.org/simple/ lib-ro-crate-schema # Test PyPI +pip install lib-ro-crate-schema # Production PyPI +``` diff --git a/0.2.x/lib/python/lib-ro-crate-schema/PUBLISHING_CHECKLIST.md b/0.2.x/lib/python/lib-ro-crate-schema/PUBLISHING_CHECKLIST.md new file mode 100644 index 0000000..225ea1c --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/PUBLISHING_CHECKLIST.md @@ -0,0 +1,130 @@ +# PyPI Publishing Checklist + +Use this checklist when you're ready to publish to PyPI. + +## Pre-Publishing ✅ + +- [ ] All tests pass: `python run_all_tests.py` +- [ ] Examples run successfully: `python examples/decorator_example.py` +- [ ] Version updated in: + - [ ] `pyproject.toml` (line 3: `version = "X.Y.Z"`) + - [ ] `src/lib_ro_crate_schema/__init__.py` (line 25: `__version__ = "X.Y.Z"`) + - [ ] `src/lib_ro_crate_schema/crate/__init__.py` (line 4: `__version__ = "X.Y.Z"`) +- [ ] Changes committed to git +- [ ] Git tag created: `git tag -a vX.Y.Z -m "Release vX.Y.Z"` + +## Build Package ✅ + +```bash +cd "c:\git\eln_interoperability\ro-crate-interoperability-profile\0.2.x\lib\python\lib-ro-crate-schema" + +# Clean old builds +Remove-Item -Recurse -Force dist, build -ErrorAction SilentlyContinue + +# Build +python -m build + +# Verify +python -m twine check dist/* +``` + +- [ ] Build completed without errors +- [ ] Twine check passed + +## Test on Test PyPI ✅ + +```bash +# Upload to Test PyPI +python -m twine upload --repository testpypi dist/* +``` + +When prompted: +- Username: `__token__` +- Password: `[Your Test PyPI API token]` + +- [ ] Uploaded successfully +- [ ] Check the page: https://test.pypi.org/project/lib-ro-crate-schema/ + +### Test Installation + +```bash +# Create test environment +python -m venv test_env +test_env\Scripts\activate + +# Install from Test PyPI +pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ lib-ro-crate-schema + +# Test import +python -c "from lib_ro_crate_schema import SchemaFacade, ro_crate_schema; print('✅ Import successful!')" +``` + +- [ ] Installed without errors +- [ ] Import works correctly +- [ ] Basic functionality works + +## Publish to PyPI ✅ + +**⚠️ This cannot be undone! Once published, you cannot upload the same version again.** + +```bash +# Upload to production PyPI +python -m twine upload dist/* +``` + +When prompted: +- Username: `__token__` +- Password: `[Your PyPI API token]` + +- [ ] Uploaded successfully +- [ ] Check the page: https://pypi.org/project/lib-ro-crate-schema/ +- [ ] README renders correctly on PyPI + +### Verify Installation + +```bash +# Fresh environment +python -m venv verify_env +verify_env\Scripts\activate + +# Install from PyPI +pip install lib-ro-crate-schema + +# Test +python -c "from lib_ro_crate_schema import SchemaFacade; print('✅ Success!')" +``` + +- [ ] Installed from PyPI successfully +- [ ] All imports work + +## Post-Publishing ✅ + +- [ ] Push git tag: `git push origin vX.Y.Z` +- [ ] Create GitHub release from tag +- [ ] Add release notes to GitHub release +- [ ] Update CHANGELOG (if you have one) +- [ ] Announce release (if applicable) + +## Quick Commands + +```bash +# All in one - Test PyPI +python -m build && python -m twine check dist/* && python -m twine upload --repository testpypi dist/* + +# All in one - Production PyPI +python -m build && python -m twine check dist/* && python -m twine upload dist/* +``` + +## Need Help? + +- **Test PyPI**: https://test.pypi.org/account/register/ +- **PyPI**: https://pypi.org/account/register/ +- **Full Guide**: See [PUBLISHING.md](PUBLISHING.md) +- **Package Docs**: See [README.md](README.md) + +--- + +**Remember**: +- Always test on Test PyPI first! +- You cannot reupload the same version to PyPI +- Keep your API tokens secure diff --git a/0.2.x/lib/python/lib-ro-crate-schema/QUICKSTART.md b/0.2.x/lib/python/lib-ro-crate-schema/QUICKSTART.md new file mode 100644 index 0000000..f808b4f --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/QUICKSTART.md @@ -0,0 +1,163 @@ +# Quick Start Guide + +Get started with `lib-ro-crate-schema` in 5 minutes! + +## Installation + +```bash +pip install lib-ro-crate-schema +``` + +## Your First RO-Crate + +Create a file called `my_first_crate.py`: + +```python +from lib_ro_crate_schema import SchemaFacade, ro_crate_schema, Field +from pydantic import BaseModel + +# 1. Define your data model with decorators +@ro_crate_schema(ontology="https://schema.org/Person") +class Person(BaseModel): + name: str = Field(ontology="https://schema.org/name") + email: str = Field(ontology="https://schema.org/email") + +# 2. Create some data +alice = Person(name="Alice Smith", email="alice@example.com") + +# 3. Create and export an RO-Crate +facade = SchemaFacade() +facade.add_all_registered_models() # Register your models +facade.add_model_instance(alice, "person_001") # Add data +facade.write("my_first_crate") # Export! + +print("✅ RO-Crate created in ./my_first_crate/") +``` + +Run it: +```bash +python my_first_crate.py +``` + +This creates a folder `my_first_crate/` containing: +- `ro-crate-metadata.json` - Your data and schema in JSON-LD format +- Proper RDF/OWL type definitions +- Schema.org vocabulary mappings + +## Next Steps + +### Add Files to Your Crate + +```python +# Add a data file before writing +facade.add_file("data.csv", + name="Experimental Data", + description="Raw measurements") +facade.write("my_crate") +``` + +### Define Related Objects + +```python +@ro_crate_schema(ontology="https://schema.org/Organization") +class Organization(BaseModel): + name: str = Field(ontology="https://schema.org/name") + +@ro_crate_schema(ontology="https://schema.org/Person") +class Person(BaseModel): + name: str = Field(ontology="https://schema.org/name") + affiliation: Organization = Field(ontology="https://schema.org/affiliation") + +# Create related objects +mit = Organization(name="MIT") +alice = Person(name="Alice", affiliation=mit) + +# Export both +facade = SchemaFacade() +facade.add_all_registered_models() +facade.add_model_instance(mit, "org_001") +facade.add_model_instance(alice, "person_001") +facade.write("my_crate") +``` + +### Import and Modify Existing Crates + +```python +from lib_ro_crate_schema import SchemaFacade + +# Load existing crate +facade = SchemaFacade.from_ro_crate("existing_crate") + +# Modify it +# (add more instances, files, etc.) + +# Export modified version +facade.write("modified_crate") +``` + +## What Just Happened? + +When you use `@ro_crate_schema`: +1. Your Pydantic model is registered as an RO-Crate type +2. Field annotations map to ontology properties (like Schema.org) +3. The library generates proper RDF/OWL definitions +4. Your data is packaged following the RO-Crate specification + +## More Examples + +Check out the `examples/` directory: +- `decorator_example.py` - More complex schemas +- `full_example.py` - Scientific workflow with files +- `minimal_import_example.py` - Working with existing crates + +## Common Patterns + +### Optional Fields +```python +from typing import Optional + +@ro_crate_schema(ontology="https://schema.org/Person") +class Person(BaseModel): + name: str = Field(ontology="https://schema.org/name") + email: Optional[str] = Field(default=None, ontology="https://schema.org/email") +``` + +### Lists +```python +from typing import List + +@ro_crate_schema(ontology="https://schema.org/Dataset") +class Dataset(BaseModel): + name: str = Field(ontology="https://schema.org/name") + authors: List[Person] = Field(ontology="https://schema.org/author") +``` + +### Dates and Times +```python +from datetime import datetime + +@ro_crate_schema(ontology="https://schema.org/Event") +class Event(BaseModel): + name: str = Field(ontology="https://schema.org/name") + date: datetime = Field(ontology="https://schema.org/startDate") +``` + +## Need Help? + +- **Full Documentation**: See [README.md](README.md) +- **API Reference**: Browse the [src/lib_ro_crate_schema/](src/lib_ro_crate_schema/) directory +- **Examples**: Check [examples/](examples/) for real-world usage +- **Issues**: Report bugs at [GitHub Issues](https://github.com/Snowwpanda/ro-crate-interoperability-profile/issues) + +## Understanding RO-Crate + +RO-Crate (Research Object Crate) is a standard for packaging research data with metadata. It uses: +- **JSON-LD**: Linked data format +- **Schema.org**: Standard vocabulary for describing things +- **RDF/OWL**: Semantic web technologies + +This library makes it easy to create RO-Crates from Python without needing to understand all these technologies! + +--- + +**Ready for more?** Check out the full [README.md](README.md) for advanced usage and API details. diff --git a/0.2.x/lib/python/lib-ro-crate-schema/README.md b/0.2.x/lib/python/lib-ro-crate-schema/README.md index 87a5f5f..df0bd54 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/README.md +++ b/0.2.x/lib/python/lib-ro-crate-schema/README.md @@ -1,113 +1,246 @@ -# Placeholder +# RO-Crate Schema Library -This is the Python implementation +[![Python 3.13+](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +A Pythonic library for creating and managing [RO-Crates](https://www.researchobject.org/ro-crate/) with schema definitions using Pydantic models. -## How to work on the project +**🚀 New to RO-Crate? Start with the [Quick Start Guide](QUICKSTART.md)!** + +## What is it? + +This library provides a clean, type-safe interface for creating RO-Crates (Research Object Crates) - a community standard for packaging research data with their metadata. It uses familiar Pydantic models with decorators to define schemas that automatically generate RDF/OWL definitions. + +## Installation + +### From PyPI (recommended) -1. Make sure you install `astral-uv` -2. Move to the project folder [here](./) -3. Run the following commands ```bash -uv venv -source .venv/bin/activate -uv pip install -e . +pip install lib-ro-crate-schema ``` +### From Source -# Crate I/O API Guide +```bash +git clone https://github.com/Snowwpanda/ro-crate-interoperability-profile.git +cd ro-crate-interoperability-profile/0.2.x/lib/python/lib-ro-crate-schema +pip install -e . +``` -This library provides a Pythonic interface for importing and exporting objects to and from a RO-Crate using the extension profile. -Unlike the Java implementation, which relies heavily on builder patterns, this API integrates naturally with Pydantic models and standard Python workflows. +## Quick Start -The result is cleaner, more idiomatic code that avoids the verbosity and “stringly-typed” style typical of Java builders, while still ensuring full compatibility with the openBIS requirements. - ---- - -## Importing - -You can inspect the contents of a crate and deserialize objects into strongly typed Pydantic models. - -### List available types - -Assuming we have imported our crate into `crate`, we can do: - - ```python - from pydantic import BaseModel - crate.get_types() -> List[BaseModel] - ``` - -This returns all object types defined in the crate as a list of BaseModels. This could be used for codegen since a basemodel can be exported as a JSON Schema and used to generate the class definitions. - -### Read an object as a given type** -Assuming we have a an avialable `Molecule`, `BaseModel`, we can do: +Here's a minimal example to get you started: ```python -crate.read_as(Molecule, my_crate, id) -> Molecule | None +from lib_ro_crate_schema import SchemaFacade, ro_crate_schema, Field +from pydantic import BaseModel + +# Define your schema using decorators +@ro_crate_schema(ontology="https://schema.org/Person") +class Person(BaseModel): + name: str = Field(ontology="https://schema.org/name") + email: str = Field(ontology="https://schema.org/email") + +# Create an instance +person = Person(name="Dr. Alice Smith", email="alice@example.com") + +# Export to RO-Crate +facade = SchemaFacade() +facade.add_all_registered_models() # Register all @ro_crate_schema models +facade.add_model_instance(person, "person_001") +facade.write("my_research_crate") ``` -This call deserializes an object into the specified Pydantic model (`Molecule` in this case). +This creates a complete RO-Crate with: +- `ro-crate-metadata.json` containing your data and schema +- Proper RDF/OWL definitions +- Schema.org ontology mappings -This is a *static workflow*: it requires that the receiving side knows the type and that it is structurally compatible. +## Key Features -This approach lets developers work directly with familiar Python models rather than manually navigating RDF structures. +✨ **Pydantic Integration** - Define schemas using familiar Pydantic models +📦 **File Handling** - Include data files alongside metadata +🔄 **Round-trip Support** - Import and re-export RO-Crates without data loss +🏷️ **Ontology Mapping** - Map to standard vocabularies (Schema.org, custom ontologies) +🔒 **Type Safety** - Strong typing with automatic validation +📊 **RDF Export** - Generate RDFS/OWL schema definitions -If the class is not available, one needs to create them for example by inspecting the output of `get_types`. +## More Examples ---- - -## Exporting - -Exporting models to a crate is possible in two ways: - -### Register a schema only - -One can add a schema to a crate by passing a BaseModel: +### Including Data Files ```python -crate.add_to_schema(Molecule) +facade.add_file("experiment_data.csv", name="Experimental Results", + description="Raw data from chemical synthesis experiment") +facade.write("my_crate") ``` -This will add the definition to the crate. - -### Add an object instance - -One can also pass directly an instance of a `BaseModel`. +### Complex Relationships ```python -m1 = Molecule() -crate.add(m1) +@ro_crate_schema(ontology="https://schema.org/Organization") +class Organization(BaseModel): + name: str = Field(ontology="https://schema.org/name") + +@ro_crate_schema(ontology="https://schema.org/Person") +class Person(BaseModel): + name: str = Field(ontology="https://schema.org/name") + affiliation: Organization = Field(ontology="https://schema.org/affiliation") + +org = Organization(name="MIT") +person = Person(name="Alice", affiliation=org) + +facade = SchemaFacade() +facade.add_all_registered_models() +facade.add_model_instance(org, "org_001") +facade.add_model_instance(person, "person_001") +facade.write("my_crate") ``` -This automatically adds both the schema and the object’s metadata to the crate. Developers work with native Python objects, while the library ensures that valid RDF is generated and inserted. - ---- - -## Fine-Grained / Manual Mode - -For cases where strict parity with the Java API is required, the library also allows manual construction: +### Importing Existing RO-Crates ```python -p1 = Property(...) -t1 = Type(properties=[p1, ...]) +from lib_ro_crate_schema import SchemaFacade + +facade = SchemaFacade.from_ro_crate("path/to/existing_crate") +# Modify and re-export +facade.write("modified_crate") ``` -This low-level interface mirrors the Java implementation, but is rarely needed in typical Python workflows. +## Documentation ---- +- **[Full Examples](examples/)** - Comprehensive examples including scientific workflows +- **[API Reference](src/lib_ro_crate_schema/)** - Detailed API documentation +- **[Tests](tests/)** - Test suite demonstrating all features -## Conformity and Interoperability +### Running Examples -Internally, the library converts objects into `RdfsClasses` and `RdfTypes`. -A Java-style API is exposed where necessary to meet openBIS interoperability requirements. +```bash +# Simple decorator example +python examples/decorator_example.py -However, the **preferred approach in Python** is to work with Pydantic models and high-level functions (`read_as`, `add`, `add_to_schema`). This avoids boilerplate, reduces errors, and provides strong validation guarantees out of the box. +# Complex scientific workflow with file handling +python examples/full_example.py ---- +# Import/export demonstration +python examples/minimal_import_example.py +``` -## Why the Pythonic Approach Is Better +### Running Tests -* **Java style**: verbose builders, string references, manual wiring. -* **Python style**: typed models, declarative APIs, validation by design. +```bash +# All tests +python run_all_tests.py + +# Or with pytest +pytest tests/ +``` + +## Advanced Usage + +### Manual Construction (without decorators) + +For fine-grained control, you can manually construct Type, TypeProperty, and MetadataEntry objects: + +```python +from lib_ro_crate_schema import SchemaFacade, Type, TypeProperty, MetadataEntry + +# Create property definition +name_property = TypeProperty( + id="name", + range_includes=["http://www.w3.org/2001/XMLSchema#string"], + ontological_annotations=["https://schema.org/name"] +) + +# Create type definition +person_type = Type( + id="Person", + ontological_annotations=["https://schema.org/Person"], + rdfs_property=[name_property] +) + +# Create instance data +person_entry = MetadataEntry( + id="person_001", + class_id="Person", + properties={"name": "Alice"} +) + +facade = SchemaFacade() +facade.addType(person_type) +facade.addEntry(person_entry) +facade.write("manual_crate") +``` + +See [`examples/python_quickstart_write.py`](examples/python_quickstart_write.py) for more details. + +## API Overview + +### Core Classes + +| Class | Purpose | +|-------|---------| +| `SchemaFacade` | Main interface for creating and exporting RO-Crates | +| `Type` | Define RDFS classes (schema types) | +| `TypeProperty` | Define RDF properties for types | +| `MetadataEntry` | Instance data conforming to a Type | +| `Restriction` | OWL cardinality and property restrictions | + +### Decorators (Recommended) + +| Decorator | Purpose | +|-----------|---------| +| `@ro_crate_schema` | Mark Pydantic model as RO-Crate schema type | +| `Field` | Enhanced Pydantic Field with ontology mapping | + +### SchemaFacade Methods + +```python +facade = SchemaFacade() + +# Decorator API +facade.add_all_registered_models() # Add all @ro_crate_schema models +facade.add_model_instance(instance, id) # Add Pydantic instance + +# Manual API +facade.addType(type_obj) # Add Type definition +facade.addEntry(entry) # Add MetadataEntry + +# File handling +facade.add_file(path, name, description) # Include data file + +# Export +facade.write(destination) # Write complete RO-Crate +facade.to_graph() # Get RDFLib Graph + +# Import +SchemaFacade.from_ro_crate(path) # Load existing RO-Crate +``` + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +## License + +This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details. + +## Citation + +If you use this library in your research, please cite: + +```bibtex +@software{ro_crate_schema, + title = {RO-Crate Schema Library}, + author = {Baffelli, Simone and Su, Pascal}, + year = {2025}, + url = {https://github.com/Snowwpanda/ro-crate-interoperability-profile} +} +``` + +## Links + +- **Repository**: https://github.com/Snowwpanda/ro-crate-interoperability-profile +- **RO-Crate Specification**: https://www.researchobject.org/ro-crate/ +- **Pydantic Documentation**: https://docs.pydantic.dev/ -Both approaches remain interoperable, but the Pythonic path is safer, faster, and more natural for data-driven workflows. diff --git a/0.2.x/lib/python/lib-ro-crate-schema/architecture.puml b/0.2.x/lib/python/lib-ro-crate-schema/architecture.puml new file mode 100644 index 0000000..f56e8d5 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/architecture.puml @@ -0,0 +1,110 @@ +@startuml RO-Crate Architecture + +!theme plain +skinparam backgroundColor white +skinparam componentStyle rectangle + +package "Input Sources" as inputs { + [SHACL Schema\nConstraints] as shacl + [Pydantic Models\n@ro_crate_schema] as pymod + [Manual Schema\nDefinition] as manual + [Existing RO-Crate\nMetadata] as rocin +} + +package "External Dependencies" as external { + [RDFLib\nRDF Graph Processing] as rdflib + [RO-Crate\nPython Library] as rocrate + [Pydantic\nData Validation] as pydantic + [JSON-LD\nLinked Data] as jsonld +} + +package "Core Library Components" as core { + + package "Schema Facade (Orchestrator)" as orchestrator { + [SchemaFacade\nMain API Controller] as sf + } + + package "Schema Components" as components { + [Type\nRDFS Classes] as type + [TypeProperty\nRDFS Properties] as prop + [MetadataEntry\nRDF Instances] as meta + [Restriction\nConstraints] as rest + } + + package "Registry & Discovery" as registry { + [SchemaRegistry\nDecorator System] as reg + [ForwardRefResolver\nReference Linking] as frr + } + + package "JSON-LD Processing" as jsonld_proc { + [JSONLDUtils\nContext Generation] as jsonldutils + [Dynamic Context\nNamespace Detection] as ctx + } + + package "RDF Processing" as rdf_proc { + [RDF Module\nTriple Generation] as rdfp + [RDF Graph\nConversion] as graph + } +} + +package "API Interfaces" as apis { + [Python API\nadd_type(), get_entries()] as pyapi + [Java API Compatibility\naddType(), getEntries()] as japi + [Decorator API\n@ro_crate_schema] as decapi +} + +package "Output Formats" as outputs { + [RO-Crate\nJSON-LD Files] as rocout + [RDF/Turtle\nSerialization] as ttlout + [Pure JSON-LD\nSchema Export] as jsonout + [Data Files\nAttachment] as fileout +} + +package "Examples & Usage" as usage { + [Examples\nfull_example.py\nquickstart.py] as examples + [Test Suite\npytest Framework\n83 Tests] as tests +} + +' Data Flow Connections +shacl --> sf +pymod --> reg +manual --> sf +rocin --> sf + +reg --> sf +sf --> type +sf --> prop +sf --> meta +sf --> rest + +type --> rdfp +prop --> rdfp +meta --> rdfp +rest --> rdfp + +rdfp --> graph +graph --> jsonldutils +jsonldutils --> ctx + +frr --> sf +sf --> pyapi +sf --> japi +reg --> decapi + +sf --> rocout +graph --> ttlout +jsonldutils --> jsonout +sf --> fileout + +pyapi --> examples +japi --> examples +decapi --> examples +sf --> tests + +' External Dependencies +rdflib --> graph +rocrate --> sf +pydantic --> reg +jsonld --> jsonldutils + +@enduml \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/class_diagram.puml b/0.2.x/lib/python/lib-ro-crate-schema/class_diagram.puml new file mode 100644 index 0000000..7cc7f45 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/class_diagram.puml @@ -0,0 +1,118 @@ +@startuml RO-Crate Core Classes + +!theme plain +skinparam class { + BackgroundColor White + BorderColor Black + ArrowColor Black +} + +package "Core Schema Objects" { + + class SchemaFacade { + +types: List[Type] + +metadata_entries: List[MetadataEntry] + +standalone_properties: List[TypeProperty] + +standalone_restrictions: List[Restriction] + +prefix: str + -- + +addType(type: Type) + +addEntry(entry: MetadataEntry) + +add_property_type(prop: TypeProperty) + +get_crate(): ROCrate + +from_ro_crate(path): SchemaFacade + +write(destination: str) + +to_json(): dict + } + + class Type { + +id: str + +rdfs_property: List[TypeProperty] + +restrictions: List[Restriction] + +label: str + +comment: str + +sub_class_of: List[ForwardRef] + -- + +to_triples(): Generator[Triple] + } + + class TypeProperty { + +id: str + +range_includes: List[LiteralType] + +domain_includes: List[str] + +required: bool + +label: str + +comment: str + -- + +to_triples(): Generator[Triple] + } + + class MetadataEntry { + +id: str + +class_id: str + +properties: Dict[str, Any] + +label: str + +comment: str + -- + +to_triples(): Generator[Triple] + } + + class Restriction { + +id: str + +target_class: str + +target_property: str + +restriction_type: RestrictionType + +value: Any + -- + +to_triples(): Generator[Triple] + } +} + +package "Registry System" { + class SchemaRegistry { + +registered_models: Dict[str, TypeTemplate] + -- + +register_model(name: str, template: TypeTemplate) + +get_model(name: str): TypeTemplate + +list_models(): List[str] + } + + class TypeTemplate { + +name: str + +properties: List[TypePropertyTemplate] + +base_classes: List[str] + -- + +to_type(): Type + } +} + +package "Processing Utilities" { + class JSONLDUtils { + -- + +get_context(graph: Graph): List + +add_schema_to_crate(facade: SchemaFacade, crate: ROCrate): ROCrate + } + + class ForwardRefResolver { + -- + +resolve_ref(ref: Union[ForwardRef, str]): Any + } +} + +' Relationships +SchemaFacade ||--o{ Type : contains +SchemaFacade ||--o{ MetadataEntry : contains +SchemaFacade ||--o{ TypeProperty : "standalone properties" +SchemaFacade ||--o{ Restriction : "standalone restrictions" + +Type ||--o{ TypeProperty : defines +Type ||--o{ Restriction : constraints + +SchemaRegistry ||--o{ TypeTemplate : manages +TypeTemplate --> Type : generates + +SchemaFacade --> JSONLDUtils : uses +SchemaFacade --> ForwardRefResolver : uses +SchemaFacade --> SchemaRegistry : accesses + +@enduml \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/examples/circular_import_test.py b/0.2.x/lib/python/lib-ro-crate-schema/examples/circular_import_test.py new file mode 100644 index 0000000..f468ec0 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/examples/circular_import_test.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +""" +Focused test for circular import handling in RO-Crate schema. + +This test specifically creates two people who are each other's colleagues +to verify how the system handles circular references during: +1. Schema creation +2. RDF serialization +3. JSON-LD export +4. Round-trip import/export +""" + +import sys +import json +from pathlib import Path +from typing import List, Optional + +# Add src to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from pydantic import BaseModel +from lib_ro_crate_schema.crate.decorators import ro_crate_schema, Field +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade + +@ro_crate_schema(ontology="https://schema.org/Organization") +class SimpleOrganization(BaseModel): + """Simple organization for testing""" + name: str = Field(ontology="https://schema.org/name") + country: str = Field(ontology="https://schema.org/addressCountry") + +@ro_crate_schema(ontology="https://schema.org/Person") +class SimplePerson(BaseModel): + """Person with circular colleague relationship""" + name: str = Field(ontology="https://schema.org/name") + email: str = Field(ontology="https://schema.org/email") + affiliation: SimpleOrganization = Field(ontology="https://schema.org/affiliation") + colleagues: List['SimplePerson'] = Field(default=[], ontology="https://schema.org/colleague") + +def test_circular_imports(): + """Test circular colleague relationships""" + + print("🧪 CIRCULAR IMPORT TEST") + print("=" * 50) + + # Create organization + org = SimpleOrganization( + name="Test University", + country="Switzerland" + ) + + # Create two people without colleagues initially + alice = SimplePerson( + name="Dr. Alice Johnson", + email="alice@test.edu", + affiliation=org, + colleagues=[] + ) + + bob = SimplePerson( + name="Prof. Bob Smith", + email="bob@test.edu", + affiliation=org, + colleagues=[] + ) + + print(f"✅ Created Alice (colleagues: {len(alice.colleagues)})") + print(f"✅ Created Bob (colleagues: {len(bob.colleagues)})") + + # Establish circular colleague relationship + alice = alice.model_copy(update={'colleagues': [bob]}) + bob = bob.model_copy(update={'colleagues': [alice]}) + + print(f"\n🔄 Circular relationships established:") + print(f" Alice colleagues: {[c.name for c in alice.colleagues]}") + print(f" Bob colleagues: {[c.name for c in bob.colleagues]}") + + # Test schema creation with circular refs + print(f"\n📊 Testing schema creation...") + facade = SchemaFacade() + facade.add_all_registered_models() + + print(f" ✅ Schema created with {len(facade.types)} types") + + # Add instances to facade + facade.add_model_instance(org, "test_org") + facade.add_model_instance(alice, "alice") + facade.add_model_instance(bob, "bob") + + print(f" ✅ Added {len(facade.metadata_entries)} instances to facade") + + # Test RDF generation + print(f"\n🕸️ Testing RDF generation...") + try: + graph = facade.to_graph() + print(f" ✅ Generated {len(graph)} RDF triples successfully") + except Exception as e: + print(f" ❌ RDF generation failed: {e}") + return False + + # Test JSON-LD export + print(f"\n📄 Testing RO-Crate export...") + try: + import os + output_dir = "output_crates" + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, "circular_test") + + facade.write(output_path, name="Circular Import Test", + description="Testing circular colleague relationships") + print(f" ✅ Exported to {output_path}") + except Exception as e: + print(f" ❌ Export failed: {e}") + return False + + # Test round-trip import + print(f"\n🔄 Testing round-trip import...") + try: + imported_facade = SchemaFacade.from_ro_crate(output_path) + print(f" ✅ Imported {len(imported_facade.types)} types, {len(imported_facade.metadata_entries)} entries") + + # Check if circular references are preserved + alice_entry = None + bob_entry = None + + for entry in imported_facade.metadata_entries: + if entry.id == "alice": + alice_entry = entry + elif entry.id == "bob": + bob_entry = entry + + if alice_entry and bob_entry: + print(f" ✅ Found Alice and Bob entries after import") + + # Check if colleague relationships survived + alice_colleagues = alice_entry.properties.get('colleagues', []) + bob_colleagues = bob_entry.properties.get('colleagues', []) + + print(f" Alice colleagues in imported data: {alice_colleagues}") + print(f" Bob colleagues in imported data: {bob_colleagues}") + else: + print(f" ⚠️ Could not find Alice/Bob entries after import") + + except Exception as e: + print(f" ❌ Import failed: {e}") + return False + + # Examine the actual JSON-LD structure + print(f"\n🔍 Examining generated JSON-LD structure...") + try: + with open(f"{output_path}/ro-crate-metadata.json", 'r') as f: + crate_data = json.load(f) + + # Find Person entities + person_entities = [] + for entity in crate_data.get("@graph", []): + if entity.get("@type") == "SimplePerson": + person_entities.append(entity) + + print(f" Found {len(person_entities)} Person entities:") + for person in person_entities: + person_id = person.get("@id", "unknown") + person_name = person.get("base:name", "unknown") + colleagues = person.get("base:colleagues", "none") + print(f" - {person_id}: {person_name}") + print(f" Colleagues: {colleagues}") + + except Exception as e: + print(f" ⚠️ Could not examine JSON-LD: {e}") + + print(f"\n🎉 Circular import test completed!") + return True + +if __name__ == "__main__": + test_circular_imports() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/examples/decorator_example.py b/0.2.x/lib/python/lib-ro-crate-schema/examples/decorator_example.py new file mode 100644 index 0000000..837f0d2 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/examples/decorator_example.py @@ -0,0 +1,185 @@ +""" +Example demonstrating the decorator-based model registration system. +""" +from datetime import datetime +from typing import List, Optional +from pydantic import BaseModel + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.decorators import ro_crate_schema, Field +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.schema_registry import get_schema_registry + + +# Example 1: Basic model with ontology annotations (required and optional fields) +@ro_crate_schema(ontology="https://schema.org/Person") +class Person(BaseModel): + """A person in the research project""" + # Required fields (minCardinality: 1) + name: str = Field(ontology="https://schema.org/name", comment="Person's full name") + email: str = Field(ontology="https://schema.org/email", comment="Contact email address") + + # Optional fields (minCardinality: 0) + orcid: Optional[str] = Field(default=None, ontology="https://orcid.org/", comment="ORCID identifier") + phone: Optional[str] = Field(default=None, ontology="https://schema.org/telephone", comment="Phone number") + affiliation: Optional[str] = Field(default=None, ontology="https://schema.org/affiliation", comment="Institution affiliation") + + +# Example 2: Model with relationships and mixed required/optional fields +@ro_crate_schema(ontology="https://schema.org/Dataset") +class Dataset(BaseModel): + """A research dataset""" + # Required fields (minCardinality: 1) + title: str = Field(ontology="https://schema.org/name", comment="Dataset title") + description: str = Field(ontology="https://schema.org/description", comment="Dataset description") + authors: List[Person] = Field(ontology="https://schema.org/author", comment="Dataset authors") + created_date: datetime = Field(ontology="https://schema.org/dateCreated", comment="Creation date") + + # Optional fields (minCardinality: 0) + keywords: Optional[List[str]] = Field(default=None, ontology="https://schema.org/keywords", comment="Research keywords") + version: Optional[str] = Field(default=None, ontology="https://schema.org/version", comment="Dataset version") + license: Optional[str] = Field(default=None, ontology="https://schema.org/license", comment="License information") + + +# Example 3: Model with institutional information +@ro_crate_schema(ontology="https://schema.org/Organization") +class Institution(BaseModel): + """Research institution or organization""" + name: str = Field(ontology="https://schema.org/name", comment="Institution name") + country: str = Field(comment="Country where institution is located") + website: Optional[str] = Field(default=None, comment="Institution website") + + +def example_usage(): + """Demonstrate the complete workflow""" + + print("=== Decorator-based RO-Crate Schema Generation ===") + print() + + # 1. Show registered models (automatically registered by decorators) + registry = get_schema_registry() + + print("Registered models:") + for model_name, type_template in registry.get_all_type_templates().items(): + print(f" - {model_name}: {type_template.ontology}") + for prop_info in type_template.type_properties: + print(f" * {prop_info.name}: {prop_info.rdf_type} (ontology: {prop_info.ontology})") + print() + + # 2. Create schema facade and add all registered models + facade = SchemaFacade() + facade.add_all_registered_models() + + print(f"Schema contains {len(facade.types)} types:") + for type_obj in facade.types: + print(f" - {type_obj.id}: {type_obj.ontological_annotations}") + print() + + # 3. Create model instances and add them as metadata + person1 = Person( + name="Dr. Jane Smith", + email="jane.smith@university.edu", + orcid="0000-0000-0000-0001" + ) + + person2 = Person( + name="Prof. John Doe", + email="john.doe@institute.org" + ) + + dataset = Dataset( + title="Climate Change Impact Study", + description="Analysis of climate data from 2000-2023", + authors=[person1, person2], + created_date=datetime(2024, 1, 15), + keywords=["climate", "environment", "data analysis"] + ) + + # Add instances as metadata entries + facade.add_model_instance(person1, "jane_smith") + facade.add_model_instance(person2, "john_doe") + facade.add_model_instance(dataset, "climate_study_2024") + + print(f"Metadata contains {len(facade.metadata_entries)} entries:") + for entry in facade.metadata_entries: + print(f" - {entry.id} ({entry.class_id})") + print(f" Properties: {entry.properties}") + print(f" References: {entry.references}") + print() + + # 4. Generate RDF graph + graph = facade.to_graph() + print(f"Generated RDF graph with {len(graph)} triples") + print() + print("Sample triples:") + for i, (s, p, o) in enumerate(graph): + if i < 10: # Show first 10 triples + print(f" {s} {p} {o}") + print() + + # 5. Convert to RO-Crate + from lib_ro_crate_schema.crate.jsonld_utils import add_schema_to_crate + from rocrate.rocrate import ROCrate + import json + from pathlib import Path + + print("🔄 Adding schema and metadata to RO-Crate...") + crate = ROCrate() + crate.name = "Decorator Example RO-Crate" + crate.description = "Generated using decorator-based schema registration" + + final_crate = add_schema_to_crate(facade, crate) + + # Get JSON representation by writing to temp directory + import tempfile + with tempfile.TemporaryDirectory() as temp_dir: + final_crate.write(temp_dir) + metadata_file = Path(temp_dir) / "ro-crate-metadata.json" + with open(metadata_file, 'r') as f: + final_crate_json = json.load(f) + + # Save to file + output_path = Path("ro-crate-metadata.json") + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(final_crate_json, f, indent=2) + + print(f"✅ RO-Crate saved to: {output_path.absolute()}") + print(f"📊 Total entities in @graph: {len(final_crate_json['@graph'])}") + print() + + # Show entity types summary + entity_types = {} + for entity in final_crate_json["@graph"]: + entity_type = entity.get("@type", "Unknown") + if isinstance(entity_type, list): + for t in entity_type: + entity_types[t] = entity_types.get(t, 0) + 1 + else: + entity_types[entity_type] = entity_types.get(entity_type, 0) + 1 + + print("📋 Entity types in RO-Crate:") + for entity_type, count in entity_types.items(): + print(f" - {entity_type}: {count}") + print() + + # Show context + context = final_crate_json["@context"] + print(f"🔗 RO-Crate @context: {context}") + print() + + print("🎯 Key Features Demonstrated:") + print(" ✓ Pydantic models → RDFS schema") + print(" ✓ Ontology annotations (schema.org, ORCID)") + print(" ✓ Model instances → RDF metadata") + print(" ✓ Proper RO-Crate integration") + print(" ✓ JSON-LD context management") + print(" ✓ Schema embedding in ro-crate-metadata.json") + + return facade, final_crate_json + + +if __name__ == "__main__": + example_usage() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/examples/examples.py b/0.2.x/lib/python/lib-ro-crate-schema/examples/examples.py new file mode 100644 index 0000000..a1051bc --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/examples/examples.py @@ -0,0 +1,135 @@ +# Utility functions for reconstruction + +import json +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.type import Type +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.literal_type import LiteralType +from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from rocrate.rocrate import ROCrate + +from rdflib import Graph +from lib_ro_crate_schema.crate.jsonld_utils import add_schema_to_crate +# from lib_ro_crate_schema.crate import reconstruction # Not available + + +def main(): + """ + Example demonstrating manual RO-Crate construction with automatic OWL restrictions. + + When manually creating TypeProperty objects, you can specify required=True/False + to automatically generate OWL restrictions with appropriate cardinality constraints: + - required=True -> generates minCardinality: 1 (field is mandatory) + - required=False -> generates minCardinality: 0 (field is optional) + + This ensures Java compatibility where OWL restrictions define field requirements. + """ + + # Define properties with cardinality information + name = TypeProperty( + id="name", + range_includes=[LiteralType.STRING], + required=True, # This will generate minCardinality: 1 + label="Full Name", + comment="The full name of the entity" + ) + identifier = TypeProperty( + id="identifier", + range_includes=[LiteralType.STRING], + required=True, # This will generate minCardinality: 1 + label="Identifier", + comment="Unique identifier for the entity" + ) + + colleague = TypeProperty( + id="colleague", + range_includes=["Participant"], + required=False, # This will generate minCardinality: 0 (optional) + label="Colleague", + comment="Optional colleague relationship" + ) + + participant_type = Type( + id="Participant", + type="Type", + subclass_of=["https://schema.org/Thing"], + ontological_annotations=["http://purl.org/dc/terms/creator"], + rdfs_property=[name, identifier], + comment="A participant in the research", + label="Participant", + ) + + creator_type = Type( + id="Creator", + type="Type", + subclass_of=["https://schema.org/Thing"], + ontological_annotations=["http://purl.org/dc/terms/creator"], + rdfs_property=[name, identifier, colleague], + comment="A creator of the research work", + label="Creator", + ) + + # Example MetadataEntry using new format with class_id and values + creator_entry = MetadataEntry( + id="creator1", + class_id="Creator", + values={ + "name": "John Author", + "identifier": "https://orcid.org/0000-0000-0000-0000", + }, + references={}, + ) + + participant_entry = MetadataEntry( + id="participant", + class_id="Participant", + values={ + "name": "Karl Participant", + "identifier": "https://orcid.org/0000-0000-0000-0001", + }, + references={ + "colleague": ["creator1"] + }, + ) + + schema = SchemaFacade( + types=[creator_type, participant_type], + # properties=[has_name, has_identifier], + metadata_entries=[creator_entry, participant_entry], + ) + #Resolve refs + schema.resolve_forward_refs() + #Add it to a crate + crate = ROCrate() + crate.license = "a" + crate.name = "mtcrate" + crate.description = "test crate" + crate = add_schema_to_crate(schema, crate) + #Serialise - write to temp dir and read back for JSON output + import tempfile + with tempfile.TemporaryDirectory() as temp_dir: + crate.write(temp_dir) + metadata_file = Path(temp_dir) / "ro-crate-metadata.json" + with open(metadata_file, 'r') as f: + res = json.load(f) + print(json.dumps(res)) + # Write to file + import os + output_dir = "output_crates" + os.makedirs(output_dir, exist_ok=True) + crate_path = os.path.join(output_dir, "example_crate") + crate.write(crate_path) + + +# Use the reconstruction module's main entry point +def reconstruct(graph: Graph): + # return reconstruction.reconstruct(graph) # Not available + raise NotImplementedError("Reconstruction module not available") + + +if __name__ == "__main__": + main() diff --git a/0.2.x/lib/python/lib-ro-crate-schema/examples/experimental_observations.csv b/0.2.x/lib/python/lib-ro-crate-schema/examples/experimental_observations.csv new file mode 100644 index 0000000..42a7063 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/examples/experimental_observations.csv @@ -0,0 +1,8 @@ +timestamp,parameter,value,unit,equipment,notes +2024-10-06T15:30:00.000000,Temperature,85.5,°C,FlowSyn Reactor,Maintained constant throughout reaction +2024-10-06T15:30:00.000000,Pressure,2.3,bar,FlowSyn Reactor,Slightly elevated from atmospheric +2024-10-06T15:30:00.000000,Reaction Duration,45.0,minutes,FlowSyn Reactor,Optimal reaction time determined +2024-10-06T15:30:00.000000,Benzene Consumption,0.5,mol,Balance,Starting material fully consumed +2024-10-06T15:30:00.000000,Toluene Consumption,0.7,mol,Balance,Partial consumption, excess reagent +2024-10-06T15:30:00.000000,Product Yield,78.5,%,FlowSyn Reactor,High yield synthesis of Benzene-Toluene Adduct +2024-10-06T15:30:00.000000,Product Purity,94.2,%,FlowSyn Reactor,Determined by GC-MS analysis \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/examples/export_import_pydantic_demo.py b/0.2.x/lib/python/lib-ro-crate-schema/examples/export_import_pydantic_demo.py new file mode 100644 index 0000000..4bb23ca --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/examples/export_import_pydantic_demo.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +""" +Demonstration of exporting Pydantic models from SchemaFacade. + +This example shows how to: +1. Create a schema with Type definitions +2. Export those Types as Pydantic model classes +3. Use the generated classes to create and validate instances +""" + +import sys +from pathlib import Path + +# Add src to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.type import Type +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.restriction import Restriction +from lib_ro_crate_schema.crate.literal_type import LiteralType +from pydantic import BaseModel +from typing import List, Optional + + +def main(): + print("🔧 RO-Crate Pydantic Export Demo") + print("=" * 50) + + # Create SchemaFacade and add some types + # For this demo, we'll define two types: Person and Organization + # The ro-crate-schema will not be exported as crate, just used here for model generation + facade = SchemaFacade() + + # Define Person type, starting with the properties and restrictions + person_name_prop = TypeProperty( + id="name", + label="Full Name", + comment="The complete name of the person", + range_includes=["http://www.w3.org/2001/XMLSchema#string"], + required=True + ) + + person_age_prop = TypeProperty( + id="age", + label="Age", + comment="Age in years", + range_includes=["http://www.w3.org/2001/XMLSchema#integer"], + required=False + ) + + person_emails_prop = TypeProperty( + id="emails", + label="Email Addresses", + comment="List of email addresses", + range_includes=["http://www.w3.org/2001/XMLSchema#string"], + required=False + ) + + # Create restrictions + person_name_restriction = Restriction( + property_type="name", + min_cardinality=1, + max_cardinality=1 + ) + + person_age_restriction = Restriction( + property_type="age", + min_cardinality=0, + max_cardinality=1 + ) + + person_emails_restriction = Restriction( + property_type="emails", + min_cardinality=0, + max_cardinality=None # Unbounded list + ) + + person_type = Type( + id="Person", + label="Person", + comment="Represents a person with personal information", + subclass_of=["https://schema.org/Person"], + rdfs_property=[person_name_prop, person_age_prop, person_emails_prop], + restrictions=[person_name_restriction, person_age_restriction, person_emails_restriction] + ) + + # Define Organization type, starting with properties and restrictions + org_name_prop = TypeProperty( + id="name", + label="Organization Name", + comment="The official name of the organization", + range_includes=["http://www.w3.org/2001/XMLSchema#string"], + required=True + ) + + org_employees_prop = TypeProperty( + id="employees", + label="Employees", + comment="People working for this organization", + range_includes=["Person"], # Reference to Person type + required=False + ) + + org_name_restriction = Restriction( + property_type="name", + min_cardinality=1, + max_cardinality=1 + ) + + org_employees_restriction = Restriction( + property_type="employees", + min_cardinality=0, + max_cardinality=None # Unbounded list + ) + + organization_type = Type( + id="Organization", + label="Organization", + comment="Represents an organization or company", + subclass_of=["https://schema.org/Organization"], + rdfs_property=[org_name_prop, org_employees_prop], + restrictions=[org_name_restriction, org_employees_restriction] + ) + + # Add types to facade + facade.addType(person_type) + facade.addType(organization_type) + + print("📋 Schema created with types:") + for type_def in facade.get_types(): + print(f" - {type_def.id}: {type_def.comment}") + + print("\n🏗️ Exporting Pydantic models...") + + # Export individual model + print("\n1️⃣ Export single model:") + PersonModel = facade.export_pydantic_model("Person") + print(f"Generated class: {PersonModel.__name__}") + print(f"Fields: {list(PersonModel.__annotations__.keys())}") + + # Export all models + print("\n2️⃣ Export all models:") + models = facade.export_all_pydantic_models() + print("Generated models:") + for name, model_class in models.items(): + print(f" - {name}: {model_class.__name__}") + print(f" Fields: {list(model_class.__annotations__.keys())}") + + print("\n✨ Testing generated models...") + + # Test Person model + print("\n👤 Creating Person instances:") + try: + # Valid person with required field + person1 = PersonModel(name="Alice Johnson", age=30, emails=["alice@example.com", "alice.j@work.com"]) + print(f"✅ Created person: {person1.name}, age {person1.age}") + print(f" Emails: {person1.emails}") + + # Person with only required fields + person2 = PersonModel(name="Bob Smith") + print(f"✅ Created person: {person2.name} (minimal)") + + # Test validation - missing required field + print("\n🔍 Testing validation:") + try: + invalid_person = PersonModel(age=25) # Missing required 'name' + print("❌ This should have failed!") + except Exception as e: + print(f"✅ Validation caught error: {e}") + + except Exception as e: + print(f"❌ Error creating person: {e}") + + # Test Organization model + print("\n🏢 Creating Organization instances:") + try: + OrganizationModel = models["Organization"] + + # Note: For now, forward references to other models need to be handled carefully + # In a real implementation, you'd want to resolve these properly + person_as_dict = {"name": "Charlie Brown", "age": 28} + org = OrganizationModel(name="Acme Corporation", employees=[person1, person_as_dict]) + print(f"✅ Created organization: {org.name} with employees {[emp.name for emp in org.employees]}") + + # Test validation - employees must be person instances or dicts with the right fields + try: + invalid_org = OrganizationModel(name="Invalid Org", employees=["Not a person"]) + print("❌ This should have failed!") + except Exception as e: + print(f"✅ Validation caught error: {e}") + + + + # Test validation - employees missing name (required field) will fail + fake_person = {"firstname": "Fake", "lastname": "Person"} + try: + invalid_org = OrganizationModel(name="Invalid Org", employees=[fake_person]) + print("❌ This should have failed!") + except Exception as e: + print(f"✅ Validation caught error: {e}") + + except Exception as e: + print(f"❌ Error creating organization: {e}") + + print("\n🎯 Model schemas:") + print("\nPerson model schema:") + try: + print(PersonModel.model_json_schema()) + except Exception as e: + print(f"Schema generation error: {e}") + + print("\n🎉 Pydantic export demo completed!") + print("\n💡 Key features demonstrated:") + print(" ✓ Export Type definitions as Pydantic model classes") + print(" ✓ Handle required vs optional fields from OWL restrictions") + print(" ✓ Support list fields (unbounded cardinality)") + print(" ✓ Map RDF types to Python types") + print(" ✓ Generate proper Pydantic validation") + print(" ✓ Preserve field metadata (descriptions)") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/examples/full_example.py b/0.2.x/lib/python/lib-ro-crate-schema/examples/full_example.py new file mode 100644 index 0000000..7c40e7d --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/examples/full_example.py @@ -0,0 +1,652 @@ +#!/usr/bin/env python3 +""" +Comprehensive RO-Crate Schema Library Demonstration + +This example showcases the full capabilities of the RO-Crate schema library through +a complex scientific workflow involving OpenBIS data management, chemical synthesis, object modification with round-trip persistence. + +Features demonstrated: +- Complex nested object hierarchies (Project → Space → Collections/Equipment) +- Self-referential relationships (molecules containing other molecules) +- Mixed ontology namespaces (OpenBIS custom + schema.org) +- Dynamic experimental workflow simulation +- Large-scale RDF generation and serialization +- Round-trip fidelity with state modifications +- Real-world scientific data modeling + +Run with: uv run python examples/full_example.py +""" + +import json +from math import e +import sys +import csv +import tempfile +from pathlib import Path +from datetime import datetime +from tkinter import E +from typing import List, Optional, Dict, Any + +# Add src to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from pydantic import BaseModel +from lib_ro_crate_schema.crate.decorators import ro_crate_schema, Field +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade + + +# Removed print_section function - using direct print statements instead + + +# ============================================================================ +# MODEL DEFINITIONS +# ============================================================================ + +@ro_crate_schema(ontology="http://openbis.org/Project") +class Project(BaseModel): + """OpenBIS research project""" + code: str = Field(comment="Unique project identifier") + name: str = Field(ontology="https://schema.org/name") + description: str = Field(ontology="https://schema.org/description") + created_date: datetime = Field(ontology="https://schema.org/dateCreated") + space: Optional['Space'] = Field(default=None, ontology="http://openbis.org/hasSpace") + + +@ro_crate_schema(ontology="http://openbis.org/Space") +class Space(BaseModel): + """OpenBIS laboratory space""" + name: str = Field(ontology="https://schema.org/name") + description: str = Field(ontology="https://schema.org/description") + created_date: datetime = Field(ontology="https://schema.org/dateCreated") + collections: List['Collection'] = Field(default=[], ontology="http://openbis.org/hasCollection") + +@ro_crate_schema(ontology="http://openbis.org/Collection") +class Collection(BaseModel): + """OpenBIS sample/data collection""" + name: str = Field(ontology="https://schema.org/name") + sample_type: str = Field(comment="Type of samples stored") + storage_conditions: str = Field(comment="Storage requirements") + created_date: datetime = Field(ontology="https://schema.org/dateCreated") + contains: List[Any] = Field(default=[], comment="Entities contained in the collection") + + +@ro_crate_schema(ontology="http://openbis.org/Equipment") +class Equipment(BaseModel): + """Laboratory equipment with optional nesting""" + name: str = Field(ontology="https://schema.org/name") + model: str = Field(comment="Equipment model/version") + serial_number: str = Field(ontology="https://schema.org/serialNumber") + created_date: datetime = Field(ontology="https://schema.org/dateCreated") + parent_equipment: Optional['Equipment'] = Field(default=None, ontology="https://schema.org/isPartOf") + configuration: Dict[str, Any] = Field(default={}, comment="Equipment configuration parameters") + + +@ro_crate_schema(ontology="https://schema.org/ChemicalSubstance") +class Molecule(BaseModel): + """Chemical compound with SMILES notation""" + name: str = Field(ontology="https://schema.org/name") + smiles: str = Field(comment="SMILES notation for chemical structure") + molecular_weight: float = Field(comment="Molecular weight in g/mol") + contains_molecules: List['Molecule'] = Field(default=[], ontology="https://schema.org/hasPart") + cas_number: Optional[str] = Field(default=None, comment="CAS Registry Number") + created_date: datetime = Field(ontology="https://schema.org/dateCreated") + experimental_notes: Optional[str] = Field(default=None, comment="Lab notes or modifications") + + +@ro_crate_schema(ontology="https://schema.org/Person") +class Person(BaseModel): + """Research author/scientist""" + name: str = Field(ontology="https://schema.org/name") + orcid: str = Field(ontology="https://schema.org/identifier") + email: str = Field(ontology="https://schema.org/email") + affiliation: 'Organization' = Field(ontology="https://schema.org/affiliation") + colleagues: List['Person'] = Field(default=[], ontology="https://schema.org/colleague") + + +@ro_crate_schema(ontology="https://schema.org/Organization") +class Organization(BaseModel): + """Research institution""" + name: str = Field(ontology="https://schema.org/name") + country: str = Field(ontology="https://schema.org/addressCountry") + website: str = Field(ontology="https://schema.org/url") + + +@ro_crate_schema(ontology="https://schema.org/ScholarlyArticle") +class Publication(BaseModel): + """Scientific publication""" + title: str = Field(ontology="https://schema.org/name") + authors: List[Person] = Field(ontology="https://schema.org/author") + molecules: List[Molecule] = Field(ontology="https://schema.org/mentions") + equipment: List[Equipment] = Field(ontology="https://schema.org/instrument") + organization: Organization = Field(ontology="https://schema.org/publisher") + doi: str = Field(ontology="https://schema.org/identifier") + publication_date: datetime = Field(ontology="https://schema.org/datePublished") + + +def create_initial_data(): + """Create all initial model instances""" + + print("\n🎯 PHASE 1: INITIAL DATA CREATION") + print("=" * 40) + + # Organization + empa = Organization( + name="Swiss Federal Laboratories for Materials Science and Technology (Empa)", + country="Switzerland", + website="https://www.empa.ch" + ) + + # People (with circular colleague relationships) + # First create persons without colleagues + sarah = Person( + name="Dr. Sarah Chen", + orcid="0000-0002-1234-5678", + email="sarah.chen@empa.ch", + affiliation=empa, + colleagues=[] + ) + + marcus = Person( + name="Prof. Marcus Weber", + orcid="0000-0003-8765-4321", + email="marcus.weber@empa.ch", + affiliation=empa, + colleagues=[] + ) + + # Now establish circular colleague relationships + # This tests how the system handles circular imports in the schema + sarah = sarah.model_copy(update={'colleagues': [marcus]}) + marcus = marcus.model_copy(update={'colleagues': [sarah]}) + + # Equipment (nested) + mass_spec = Equipment( + name="Agilent 7890A GC-MS", + model="7890A", + serial_number="DE43151234", + created_date=datetime(2023, 1, 15), + configuration={ + "ionization_mode": "EI", + "mass_range_min": 50, + "mass_range_max": 500, + "resolution": "unit_mass", + "detector_voltage": 1200 + } + ) + + reactor = Equipment( + name="FlowSyn Reactor", + model="v2.1", + serial_number="FSR-2024-001", + created_date=datetime(2023, 2, 1), + parent_equipment=mass_spec, # Mass spec is part of reactor system + configuration={ + "max_temperature_celsius": 250, + "max_pressure_bar": 10, + "flow_rate_ml_per_min": 5, + "volume_ml": 50, + "heating_method": "microwave" + } + ) + + # Collections + molecules_collection = Collection( + name="Molecular Library", + sample_type="Chemical compounds", + storage_conditions="-20°C, inert atmosphere", + created_date=datetime(2023, 3, 1), + contains=[] # Will populate later + ) + + lab_equipment = Collection( + name="Laboratory Equipment", + sample_type="Analytical instruments", + storage_conditions="Room temperature, calibrated monthly", + created_date=datetime(2023, 2, 15), + contains=[reactor, mass_spec] # Equipment collection contains these items + ) + + # Molecules (with complex relationships) + benzene = Molecule( + name="Benzene", + smiles="c1ccccc1", + molecular_weight=78.11, + cas_number="71-43-2", + created_date=datetime(2024, 1, 10) + ) + + toluene = Molecule( + name="Toluene", + smiles="Cc1ccccc1", + molecular_weight=92.14, + cas_number="108-88-3", + created_date=datetime(2024, 1, 12) + ) + + phenol = Molecule( + name="Phenol", + smiles="c1ccc(cc1)O", + molecular_weight=94.11, + cas_number="108-95-2", + created_date=datetime(2024, 1, 15) + ) + + aniline = Molecule( + name="Aniline", + smiles="c1ccc(cc1)N", + molecular_weight=93.13, + cas_number="62-53-3", + created_date=datetime(2024, 1, 18) + ) + + # Complex polymer containing other molecules + complex_polymer = Molecule( + name="Benzene-Toluene Polymer", + smiles="[*]c1ccccc1[*].[*]Cc1ccccc1[*]", # Polymer SMILES + molecular_weight=340.45, + contains_molecules=[benzene, toluene], # Self-reference + created_date=datetime(2024, 2, 1) + ) + + # Add molecules to collection + molecules_collection.contains.extend([benzene, toluene, phenol, aniline, complex_polymer]) + + # OpenBIS hierarchy + science_space = Space( + name="Advanced Materials Laboratory", + description="State-of-the-art facility for nanomaterial synthesis and characterization", + created_date=datetime(2023, 1, 1), + collections=[molecules_collection, lab_equipment] + ) + + openbis_project = Project( + code="NANO-2024", + name="Nanocomposite Materials Research", + description="Development of advanced nanocomposite materials for industrial applications", + created_date=datetime(2024, 1, 1), + space=science_space + ) + + # Publication tying everything together + publication = Publication( + title="Advanced Nanocomposite Materials: From Molecular Design to Industrial Applications", + authors=[sarah, marcus], + molecules=[benzene, toluene, phenol, aniline, complex_polymer], + equipment=[reactor, mass_spec], + organization=empa, + doi="10.1021/acs.nanolett.2024.12345", + publication_date=datetime(2024, 6, 15) + ) + + return { + 'openbis_project': openbis_project, + 'science_space': science_space, + 'molecules_collection': molecules_collection, + 'lab_equipment': lab_equipment, + 'reactor': reactor, + 'mass_spec': mass_spec, + 'benzene': benzene, + 'toluene': toluene, + 'phenol': phenol, + 'aniline': aniline, + 'complex_polymer': complex_polymer, + 'sarah': sarah, + 'marcus': marcus, + 'empa': empa, + 'publication': publication + } + + +class MoleculeModel: # Alias for sake of this example + pass + +# EquipmentModel = Equipment # Alias for clarity + +def experiment(reactant1, reactant2, catalyst, equipment) -> tuple[dict, Path]: + """ + Simulate chemical synthesis experiment and create observation file + + Creates a new product molecule by combining reactants and modifies + the original reactants with experimental notes. Also generates a CSV + file with experimental observations. + + Args: + reactant1: Primary reactant molecule + reactant2: Secondary reactant molecule + catalyst: Catalytic molecule (unchanged) + equipment: Equipment used for reaction + + Returns: + Tuple of (new product molecule, path to observations CSV file) + """ + + print("\n🔹 EXPERIMENTAL SYNTHESIS") + print(f" Reactants: {reactant1.name} + {reactant2.name}") + print(f" Catalyst: {catalyst.name}") + print(f" Equipment: {equipment.name}") + + # Experimental parameters and observations + experiment_time = datetime.now() + + # Create product molecule with combined SMILES + # Simple concatenation for demo (real chemistry would be more complex) + product_smiles = f"({reactant1.smiles}).({reactant2.smiles})" + product_mw = reactant1.molecular_weight + reactant2.molecular_weight + + product_dict = { + "name": f"{reactant1.name}-{reactant2.name} Adduct", + "smiles": product_smiles, + "molecular_weight": product_mw, + "contains_molecules": [reactant1, reactant2], # Names instead of objects + "created_date": experiment_time.isoformat(), + "experimental_notes": f"Synthesized via {catalyst.name} catalysis using {equipment.name}" + } + + # Get sample experimental observations CSV file (located in same folder as this scipt) + csv_path = Path(__file__).parent / "experimental_observations.csv" + + # Check for file + if not csv_path.exists(): + print(f" ⚠️ Warning: Observations CSV file not found at {csv_path}. Skipping file adding.") + else: + print(f" 📁 Found observations CSV file at: {csv_path}") + + # Modify original reactants with experimental data + reactant1.experimental_notes = f"Consumed 0.5 mol in synthesis reaction at {experiment_time.strftime('%Y-%m-%d %H:%M')}" + reactant2.experimental_notes = f"Partially consumed, 0.3 mol remaining after reaction" + + print(f" Product: {product_dict['name']}") + print(f" Product SMILES: {product_dict['smiles']}") + + return product_dict, csv_path + + +def analyze_rocrate_changes(initial_path: Path, final_path: Path): + """Compare initial and final RO-Crate files""" + + print("\n🔹 RO-CRATE COMPARISON ANALYSIS") + + with open(initial_path / "ro-crate-metadata.json", 'r') as f: + initial_data = json.load(f) + + with open(final_path / "ro-crate-metadata.json", 'r') as f: + final_data = json.load(f) + + initial_entities = len(initial_data["@graph"]) + final_entities = len(final_data["@graph"]) + + print(f" 📊 Initial entities: {initial_entities}") + print(f" 📊 Final entities: {final_entities}") + print(f" 📈 Change: +{final_entities - initial_entities} entities") + + # Count entity types + def count_types(data): + types = {} + for entity in data["@graph"]: + entity_type = entity.get("@type", "Unknown") + if isinstance(entity_type, list): + for t in entity_type: + types[t] = types.get(t, 0) + 1 + else: + types[entity_type] = types.get(entity_type, 0) + 1 + return types + + initial_types = count_types(initial_data) + final_types = count_types(final_data) + + print("\n 📋 Entity type changes:") + all_types = set(initial_types.keys()) | set(final_types.keys()) + for entity_type in sorted(all_types): + initial_count = initial_types.get(entity_type, 0) + final_count = final_types.get(entity_type, 0) + if initial_count != final_count: + print(f" {entity_type}: {initial_count} → {final_count} ({final_count - initial_count:+d})") + else: + print(f" {entity_type}: {initial_count} (unchanged)") + + +def main(): + """Execute the complete workflow demonstration""" + + print("🧪 COMPREHENSIVE RO-CRATE SCHEMA WORKFLOW DEMONSTRATION") + print("=" * 80) + print("This demo showcases complex scientific data modeling, experimental workflows,") + print("and dynamic object modification with full round-trip persistence.") + + # ======================================================================== + # PHASE 1: INITIAL SETUP + # ======================================================================== + + print("\n🎯 Creating Initial Schema and Data") + print("=" * 40) + + # Create all instances + instances = create_initial_data() + + print(f" ✅ Created {len(instances)} model instances") + print(" 📋 Instance types:") + type_counts = {} + for instance in instances.values(): + type_name = type(instance).__name__ + type_counts[type_name] = type_counts.get(type_name, 0) + 1 + + for type_name, count in sorted(type_counts.items()): + print(f" - {type_name}: {count}") + + print(f"\n 🔄 Circular Relationship Test:") + sarah_instance = instances['sarah'] + marcus_instance = instances['marcus'] + print(f" - Sarah Chen has {len(sarah_instance.colleagues)} colleague(s): {[c.name for c in sarah_instance.colleagues]}") + print(f" - Marcus Weber has {len(marcus_instance.colleagues)} colleague(s): {[c.name for c in marcus_instance.colleagues]}") + + # Build schema facade + facade = SchemaFacade() + facade.add_all_registered_models() + + print(f"\n 📊 Schema: {len(facade.types)} types registered") + + # Add all instances + for instance_id, instance in instances.items(): + facade.add_model_instance(instance, instance_id) + + print(f" 📦 Added {len(facade.metadata_entries)} metadata entries") + + # Generate RDF + rdf_graph = facade.to_graph() + print(f" 🕸️ Generated {len(rdf_graph)} RDF triples") + + # Export initial state + print("\n🔹 Exporting Initial RO-Crate") + import os + output_dir = "output_crates" + os.makedirs(output_dir, exist_ok=True) + initial_path = os.path.join(output_dir, "full_example_initial") + facade.write( + destination=initial_path, + name="Complex Scientific Workflow - Initial State", + description="Initial RO-Crate before experimental modifications", + license="MIT" + ) + print(f" 💾 Saved initial state: {initial_path}") + initial_path = Path(initial_path) + + # ======================================================================== + # PHASE 2: IMPORT AND EXPERIMENT + # ======================================================================== + + print("\n🎯 Importing RO-Crate and Running Experiment") + print("=" * 40) + + # Import the RO-Crate we just exported + print("\n🔹 Importing RO-Crate from exported files") + print(f" 📁 Loading RO-Crate from: {initial_path}") + + imported_facade = SchemaFacade.from_ro_crate(initial_path) + + print(f" ✅ Successfully imported RO-Crate!") + print(f" 📊 Imported {len(imported_facade.types)} types") + print(f" 📦 Imported {len(imported_facade.metadata_entries)} metadata entries") + + # Show what was imported + print("\n 📋 Imported types:") + for imported_type in imported_facade.types: + props = len(imported_type.rdfs_property or []) + restrictions = len(imported_type.get_restrictions()) + print(f" - {imported_type.id}: {props} properties, {restrictions} restrictions") + + print("\n 📦 Imported metadata entries (first 5):") + for entry in imported_facade.metadata_entries[:5]: + print(f" - {entry.id} (type: {entry.class_id})") + + # Import Molecule and Equipment Models + MoleculeModel = imported_facade.export_pydantic_model("Molecule") + EquipmentModel = imported_facade.export_pydantic_model("Equipment") + + # Know we need molecules: benzene, toluene, aniline + # And equipment: reactor + benzene = imported_facade.get_entry_as("benzene", MoleculeModel) + toluene = imported_facade.get_entry_as("toluene", MoleculeModel) + aniline = imported_facade.get_entry_as("aniline", MoleculeModel) + reactor = imported_facade.get_entry_as("reactor", EquipmentModel) + + print(f" ✅ Selected from imported data: {benzene.name}, {toluene.name}, {aniline.name}, {reactor.name}") + + # Run experiment + product_dict, observations_csv = experiment(benzene, toluene, aniline, reactor) + + # Create new product molecule instance + product = MoleculeModel(**product_dict) + + print(f" 🧪 Experiment complete, product created: {product.name}") + + # ======================================================================== + # PHASE 3: UPDATE AND RE-EXPORT + # ======================================================================== + + print("\n🎯 Updating Schema with Experimental Results") + print("=" * 40) + + # Create new facade with updated data + updated_facade = SchemaFacade() + updated_facade.add_all_registered_models() + + # Add all original instances (now with modifications) + for instance_id, instance in instances.items(): + updated_facade.add_model_instance(instance, instance_id) + + # Add new product + updated_facade.add_model_instance(product, "synthesis_product") + + print(f" 📊 Updated schema: {len(updated_facade.types)} types") + print(f" 📦 Updated entries: {len(updated_facade.metadata_entries)} metadata entries") + + # Generate updated RDF + updated_rdf_graph = updated_facade.to_graph() + print(f" 🕸️ Updated RDF graph: {len(updated_rdf_graph)} triples") + print(f" 📈 RDF growth: +{len(updated_rdf_graph) - len(rdf_graph)} triples") + + # Export final state + print("\n🔹 Exporting Final RO-Crate") + # Add experimental observations file to facade + updated_facade.add_file( + file_path=observations_csv, + name="Experimental Observations", + description="Detailed measurements from chemical synthesis experiment including temperature, pressure, yields and purity data" + ) + + final_path = os.path.join(output_dir, "full_example_final") + updated_facade.write( + destination=final_path, + name="Complex Scientific Workflow - Final State", + description="Final RO-Crate after experimental synthesis with observation data", + license="MIT" + ) + print(f" 💾 Saved final state: {final_path}") + final_path = Path(final_path) + + # ======================================================================== + # PHASE 4: ANALYSIS + # ======================================================================== + + print("\n🎯 WORKFLOW ANALYSIS & RESULTS") + print("=" * 40) + + # Compare facades (original vs imported) + print("\n🔹 Import Fidelity Analysis") + print(f" 📊 Original facade: {len(facade.types)} types, {len(facade.metadata_entries)} entries") + print(f" 📊 Imported facade: {len(imported_facade.types)} types, {len(imported_facade.metadata_entries)} entries") + + # Check if all types were preserved + original_type_ids = {t.id for t in facade.types} + imported_type_ids = {t.id for t in imported_facade.types} + if original_type_ids == imported_type_ids: + print(f" ✅ All {len(original_type_ids)} types preserved in import") + else: + print(f" ⚠️ Type mismatch: original={len(original_type_ids)}, imported={len(imported_type_ids)}") + missing_types = original_type_ids - imported_type_ids + if missing_types: + print(f" Missing: {missing_types}") + extra_types = imported_type_ids - original_type_ids + if extra_types: + print(f" Extra: {extra_types}") + + # Check if all metadata entries were preserved + original_entry_ids = {e.id for e in facade.metadata_entries} + imported_entry_ids = {e.id for e in imported_facade.metadata_entries} + if original_entry_ids == imported_entry_ids: + print(f" ✅ All {len(original_entry_ids)} metadata entries preserved in import") + else: + print(f" ⚠️ Metadata entry mismatch: original={len(original_entry_ids)}, imported={len(imported_entry_ids)}") + missing_entries = original_entry_ids - imported_entry_ids + if missing_entries: + print(f" Missing: {missing_entries}") + extra_entries = imported_entry_ids - original_entry_ids + if extra_entries: + print(f" Extra: {extra_entries}") + + # Compare files + analyze_rocrate_changes(initial_path, final_path) + + # Show experimental modifications + print("\n🔹 Experimental Modifications Detected") + print(f" 🧪 New molecule created: {product.name}") + print(f" SMILES: {product.smiles}") + print(f" Notes: {product.experimental_notes}") + + print(f"\n 📝 Modified molecules:") + modified_molecules = [instances['benzene'], instances['toluene']] + for mol in modified_molecules: + if mol.experimental_notes: + print(f" - {mol.name}: {mol.experimental_notes}") + + # Summary statistics + print("\n🔹 Final Statistics") + print(f" 📊 Original facade: {len(facade.types)} types, {len(facade.metadata_entries)} entries") + print(f" 📊 Imported facade: {len(imported_facade.types)} types, {len(imported_facade.metadata_entries)} entries") + print(f" � Final facade: {len(updated_facade.types)} types, {len(updated_facade.metadata_entries)} entries") + print(f" 🕸️ Final RDF triples: {len(updated_rdf_graph)}") + print(f" 🔄 Round-trip cycles: 3 (export → import → experiment → export)") + print(f" ⚗️ Experiments performed: 1") + print(f" 🆕 New entities created: 1") + print(f" ✏️ Entities modified: 2") + + print("\n" + "="*80) + print("🎉 COMPREHENSIVE WORKFLOW WITH IMPORT DEMONSTRATION COMPLETE!") + print(" 📁 RO-Crates created:") + print(f" - Initial: {initial_path}") + print(f" - Final: {final_path}") + print("="*80) + + return { + 'initial_facade': facade, + 'imported_facade': imported_facade, + 'updated_facade': updated_facade, + 'instances': instances, + 'product': product, + 'initial_path': initial_path, + 'final_path': final_path + } + + +if __name__ == "__main__": + results = main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/examples/minimal_import_example.py b/0.2.x/lib/python/lib-ro-crate-schema/examples/minimal_import_example.py new file mode 100644 index 0000000..edb2246 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/examples/minimal_import_example.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +""" +Minimal import example: Load external openBIS RO-Crate and print summary. +""" + +import sys +from pathlib import Path + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade + +# Import openBIS RO-Crate from external lib/example (kept outside for now) +crate_path = Path(__file__).parent.parent.parent.parent / "example" / "obenbis-one-publication" / "ro-crate-metadata.json" +facade = SchemaFacade.from_ro_crate(str(crate_path)) + +# Print summary +print(f"📁 Imported SchemaFacade with:") +print(f" - {len(facade.types)} RDFS Classes (types)") +print(f" - {len(facade.metadata_entries)} metadata entries") + +print(f"\n📋 Types imported:") +for t in facade.types: + props = len(t.rdfs_property or []) + restrictions = len(t.get_restrictions()) + print(f" - {t.id}: {props} properties, {restrictions} restrictions") + +print(f"\n📦 Metadata entries:") +for entry in facade.metadata_entries[:5]: # Show first 5 + print(f" - {entry.id} (type: {entry.class_id})") + +print(f"\n🎯 Ready to use! You can now:") +print(f" - Export: facade.write('output-directory')") +print(f" - Add data: facade.addEntry(...)") +print(f" - Add types: facade.addType(...)") \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/examples/python_quickstart_read.py b/0.2.x/lib/python/lib-ro-crate-schema/examples/python_quickstart_read.py new file mode 100644 index 0000000..3e7006a --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/examples/python_quickstart_read.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +""" +Python QuickStart Read Example +Mirrors the Java QuickStartRead.java for exact compatibility demonstration +""" + +import sys +sys.path.append('src') + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from python_quickstart_write import write_example_crate + + +# Constants (matching Java pattern exactly) +TMP_EXAMPLE_CRATE = "output_crates/example-crate" + +def read_example_crate(): + """ + Python QuickStart Read matching Java QuickStartRead structure exactly + Demonstrates compatibility between Java and Python RO-Crate implementations + """ + + # First call write example to ensure crate exists (as requested) + write_example_crate() + + # Load RO-Crate from directory (matching Java from_ro_crate pattern) + schemaFacade = SchemaFacade.from_ro_crate(TMP_EXAMPLE_CRATE) + + # Display types (matching Java getTypes() approach) + types = schemaFacade.getTypes() + + print("📚 Types in the crate:") + for typeObj in types: + print(f"- Type {typeObj.getId()}: {typeObj.getComment() if typeObj.getComment() else ''}") + entries = schemaFacade.getEntries(typeObj.getId()) + + for entry in entries: + print(f"{entry.getId()} ({entry.getClassId()}): {entry.properties}") + + + # Display property types + print("📚 Properties in the crate:") + properties = schemaFacade.getPropertyTypes() + for prop in properties: + print(f"{prop.getId()}: {prop.getComment() if prop.getComment() else ''} Range: {prop.getRange()}") + +if __name__ == "__main__": + read_example_crate() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/examples/python_quickstart_write.py b/0.2.x/lib/python/lib-ro-crate-schema/examples/python_quickstart_write.py new file mode 100644 index 0000000..aef41e0 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/examples/python_quickstart_write.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +""" +Python QuickStart Write Example +Mirrors the Java Quickstart.java for exact compatibility demonstration +""" + +import sys +sys.path.append('src') + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.type import Type +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry +from lib_ro_crate_schema.crate.literal_type import LiteralType + + +# Constants (matching Java pattern exactly) +TMP_EXAMPLE_CRATE = "output_crates/example-crate" + +def write_example_crate(): + """ + Python QuickStart matching Java Quickstart structure exactly + Demonstrates compatibility between Java and Python RO-Crate implementations + """ + + PREFIX = "" #Example" + SEPARATOR = "" #:" + + # Setting up an RO-Crate with the schema facade (matching Java constructor pattern) + schemaFacade = SchemaFacade() + + personType = Type(id="id") # Temporary ID for pydantic requirement + + # Block 1: Person type setup (matching Java structure exactly) + personType.setId(PREFIX + SEPARATOR + "Person") + personType.setOntologicalAnnotations(["https://schema.org/Person"]) + + # Block 2: Person ID property (matching Java block structure) + personId = TypeProperty(id="id") # Temporary ID for pydantic requirement + personId.setId(PREFIX + SEPARATOR + "personid") + personId.setTypes([LiteralType.STRING]) + personType.addProperty(personId) + + # Block 3: Given name property (matching Java block structure) + givenName = TypeProperty(id="id") # Temporary ID for pydantic requirement + givenName.setId(PREFIX + SEPARATOR + "givenName") + givenName.setOntologicalAnnotations(["https://schema.org/givenName"]) + givenName.setTypes([LiteralType.STRING]) + personType.addProperty(givenName) + + # Block 4: Family name property (matching Java block structure) + familyName = TypeProperty(id="id") # Temporary ID for pydantic requirement + familyName.setId(PREFIX + SEPARATOR + "familyName") + familyName.setOntologicalAnnotations(["https://schema.org/familyName"]) + familyName.setTypes([LiteralType.STRING]) + personType.addProperty(familyName) + + # Block 5: Identifier property (matching Java block structure) + identifier = TypeProperty(id="id") # Temporary ID for pydantic requirement + identifier.setId(PREFIX + SEPARATOR + "identifier") + identifier.setOntologicalAnnotations(["https://schema.org/identifier"]) + identifier.setTypes([LiteralType.STRING]) + personType.addProperty(identifier) + + schemaFacade.addType(personType) + + # Building Experiment type (matching Java block structure) + experimentType = Type(id="id") # Temporary ID for pydantic requirement + experimentType.setId(PREFIX + SEPARATOR + "Experiment") + + # Block 1: Experiment ID property (matching Java block structure) + experimentId = TypeProperty(id="id") # Temporary ID for pydantic requirement + experimentId.setId(PREFIX + SEPARATOR + "experimentid") + experimentId.setTypes([LiteralType.STRING]) + experimentType.addProperty(experimentId) + + # Block 2: Creator property (matching Java block structure) + creator = TypeProperty(id="id") # Temporary ID for pydantic requirement + creator.setId(PREFIX + SEPARATOR + "creator") + creator.setOntologicalAnnotations(["https://schema.org/creator"]) + creator.addType(personType) # References the personType (matching Java pattern) + experimentType.addProperty(creator) + + # Block 3: Name property (matching Java block structure) + name = TypeProperty(id="id") # Temporary ID for pydantic requirement + name.setId(PREFIX + SEPARATOR + "name") + name.setTypes([LiteralType.STRING]) + experimentType.addProperty(name) + + # Block 4: Date property (matching Java block structure) + date = TypeProperty(id="id") # Temporary ID for pydantic requirement + date.setId(PREFIX + SEPARATOR + "date") + date.setTypes([LiteralType.DATETIME]) + experimentType.addProperty(date) + + schemaFacade.addType(experimentType) + + # Creating metadata entries (matching Java block structure exactly) + + # Block 1: Person Andreas (matching Java structure) + personAndreas = MetadataEntry(id="id", class_id="id") # Temporary values for pydantic requirement + personAndreas.setId("PERSON1") + personAndreas.setClassId(personType.getId()) + properties = {} + properties["givenname"] = "Andreas" + properties["lastname"] = "Meier" + properties["identifier"] = "https://orcid.org/0009-0002-6541-4637" + personAndreas.setProperties(properties) + personAndreas.setReferences({}) + schemaFacade.addEntry(personAndreas) + + # Block 2: Person Juan (matching Java structure) - Note: Java has "Andreas" twice, following that pattern + personJuan = MetadataEntry(id="id", class_id="id") # Temporary values for pydantic requirement + personJuan.setId("PERSON2") + personJuan.setClassId(personType.getId()) + properties2 = {} + properties2["givenname"] = "Juan" # Matching Java code (has Andreas for both persons) + properties2["lastname"] = "Meier" + properties2["identifier"] = "https://orcid.org/0009-0002-6541-4637" + personJuan.setProperties(properties2) + personJuan.setReferences({}) + schemaFacade.addEntry(personJuan) + + # Block 3: Experiment 1 (matching Java structure) + experiment1 = MetadataEntry(id="id", class_id="id") # Temporary values for pydantic requirement + experiment1.setId("EXPERIMENT1") + experiment1.setClassId(experimentType.getId()) + experiment1.setReferences({"creator": [personAndreas.getId()]}) + propertiesExperiment = {} + propertiesExperiment["name"] = "Example Experiment" + propertiesExperiment["date"] = "2025-09-08 08:41:50.000" # ISO 8601 + experiment1.setProperties(propertiesExperiment) + schemaFacade.addEntry(experiment1) + + # Write to file (matching Java FolderWriter pattern) + schemaFacade.write(TMP_EXAMPLE_CRATE, name="Python QuickStart Example") + + +if __name__ == "__main__": + write_example_crate() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/pyproject.toml b/0.2.x/lib/python/lib-ro-crate-schema/pyproject.toml index 860c973..f9f88f5 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/pyproject.toml +++ b/0.2.x/lib/python/lib-ro-crate-schema/pyproject.toml @@ -1,26 +1,79 @@ [project] name = "lib-ro-crate-schema" -version = "0.1.0" -description = "Import and export Ro crate" +version = "0.2.0" +description = "A Pythonic library for creating and managing RO-Crates with schema definitions using Pydantic models" readme = "README.md" +license = { text = "Apache-2.0" } authors = [ - { name = "Simone Baffelli", email = "simone.baffelli@empa.ch" } + { name = "Simone Baffelli", email = "simone.baffelli@empa.ch" }, { name = "Pascal Su", email = "pascal.su@empa.ch" } ] +maintainers = [ + { name = "Simone Baffelli", email = "simone.baffelli@empa.ch" }, + { name = "Pascal Su", email = "pascal.su@empa.ch" } +] +keywords = [ + "ro-crate", + "research-object", + "metadata", + "pydantic", + "linked-data", + "rdf", + "schema", + "fair-data" +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering", + "Topic :: Software Development :: Libraries :: Python Modules", +] requires-python = ">=3.13" dependencies = [ "pydantic>=2.11.7", - "pydantic-rdf>=0.2.0", "pyld>=2.0.4", "pyshacl>=0.30.1", - "rdflib-jsonld>=0.6.2", + "rdflib>=7.1.4", "rocrate>=0.14.0", ] +[project.urls] +Homepage = "https://github.com/Snowwpanda/ro-crate-interoperability-profile" +Documentation = "https://github.com/Snowwpanda/ro-crate-interoperability-profile/tree/main/0.2.x/lib/python/lib-ro-crate-schema" +Repository = "https://github.com/Snowwpanda/ro-crate-interoperability-profile" +Issues = "https://github.com/Snowwpanda/ro-crate-interoperability-profile/issues" + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "pytest-cov>=4.0.0", +] + [project.scripts] check = "lib_ro_crate_schema.check:main" example = "lib_ro_crate_schema.example.examples:main" +[tool.pytest.ini_options] +minversion = "7.0" +addopts = "-ra -q --strict-markers" +testpaths = [ + "tests", +] +python_files = [ + "test_*.py", + "*_test.py", +] +python_classes = [ + "Test*", +] +python_functions = [ + "test_*", +] + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/0.2.x/lib/python/lib-ro-crate-schema/run_all_tests.py b/0.2.x/lib/python/lib-ro-crate-schema/run_all_tests.py new file mode 100644 index 0000000..a70f69d --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/run_all_tests.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +""" +Test runner for RO-Crate Schema Library +""" +import sys +import subprocess +from pathlib import Path + +def run_test(test_file): + """Run a single test file and return success status""" + print(f"\n🧪 Running {test_file.name}") + print("=" * 60) + + try: + result = subprocess.run([sys.executable, str(test_file)], + capture_output=False, + check=True, + cwd=test_file.parent) + print(f"✅ {test_file.name} PASSED") + return True + except subprocess.CalledProcessError as e: + print(f"❌ {test_file.name} FAILED (exit code: {e.returncode})") + return False + except Exception as e: + print(f"❌ {test_file.name} ERROR: {e}") + return False + +def main(): + """Run all tests""" + print("🚀 RO-Crate Schema Library Test Suite") + print("=" * 60) + + # Find test directory + test_dir = Path(__file__).parent / "tests" + if not test_dir.exists(): + print(f"❌ Test directory not found: {test_dir}") + return False + + # Find all test files + test_files = list(test_dir.glob("test_*.py")) + if not test_files: + print(f"❌ No test files found in {test_dir}") + return False + + print(f"📋 Found {len(test_files)} test files:") + for test_file in test_files: + print(f" - {test_file.name}") + + # Run tests + results = [] + for test_file in test_files: + success = run_test(test_file) + results.append((test_file.name, success)) + + # Summary + print("\n🎯 Test Results Summary") + print("=" * 60) + + passed = sum(1 for _, success in results if success) + total = len(results) + + for test_name, success in results: + status = "✅ PASS" if success else "❌ FAIL" + print(f" {test_name}: {status}") + + print(f"\n📊 Overall: {passed}/{total} tests passed") + + if passed == total: + print("🏆 ALL TESTS PASSED!") + return True + else: + print("💥 SOME TESTS FAILED!") + return False + +if __name__ == "__main__": + success = main() + sys.exit(0 if success else 1) \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/run_tests.py b/0.2.x/lib/python/lib-ro-crate-schema/run_tests.py new file mode 100644 index 0000000..ef4d556 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/run_tests.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +""" +Interactive test runner for RO-Crate bidirectional system +""" + +import sys +import subprocess +from pathlib import Path + +def run_test(test_file, working_dir=None): + """Run a test file with proper environment setup""" + import os + + original_dir = Path.cwd() + + try: + if working_dir: + Path(working_dir).mkdir(parents=True, exist_ok=True) + os.chdir(working_dir) + + # Make test_file relative to the working directory if it's absolute + if working_dir and test_file.is_absolute(): + try: + test_file = test_file.relative_to(working_dir) + except ValueError: + # If we can't make it relative, use the absolute path + pass + + # Try to use uv if available, otherwise use regular python + try: + result = subprocess.run([ + "uv", "run", "python", str(test_file) + ], check=True, capture_output=False) + except (subprocess.CalledProcessError, FileNotFoundError): + # Fallback to regular python + result = subprocess.run([ + "python", str(test_file) + ], check=True, capture_output=False) + + return result.returncode == 0 + except Exception as e: + print(f"❌ Error running {test_file}: {e}") + return False + finally: + os.chdir(original_dir) + +def main(): + print("🔬 RO-Crate Bidirectional Test Runner") + print("=====================================") + + # Get the path to test folder + test_folder = Path(__file__).parent / "tests" + # Read in the tests dictionary + if not test_folder.exists(): + print(f"❌ Test folder not found: {test_folder}") + sys.exit(1) + tests = {} + test_counter = 1 + for test in test_folder.glob("test_*.py"): + test_name = test.stem.replace("test_", "").replace("_", " ").title() + tests[str(test_counter)] = (test_name, test, None) + test_counter += 1 + + + + print("\nAvailable tests:") + for key, (name, _, _) in tests.items(): + print(f"{key}. {name}") + print() + + choice = input("Select test (number) or press Enter for complete test: ").strip() + + if not choice: + # Run script run_all_tests.py + script_path = Path(__file__).parent / "run_all_tests.py" + if script_path.exists(): + print("\n🔄 Running all tests via run_all_tests.py...") + success = run_test(script_path) + if success: + print("\n✅ All tests completed successfully!") + else: + print("\n❌ Some tests failed!") + sys.exit(1) + print("\n🏁 Test execution completed!") + return + + if choice in tests: + name, test_file, working_dir = tests[choice] + print(f"\n🔄 Running {name}...") + success = run_test(test_file, working_dir) + + if success: + print(f"\n✅ {name} completed successfully!") + else: + print(f"\n❌ {name} failed!") + sys.exit(1) + else: + print("❌ Invalid choice. Running default complete test...") + run_test("test_complete_round_trip.py") + + print("\n🏁 Test execution completed!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/__init__.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/__init__.py index b6c16aa..3e28230 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/__init__.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/__init__.py @@ -1,2 +1,73 @@ -def hello() -> str: - return "Hello from lib-ro-crate-schema!" +""" +RO-Crate Schema Library +======================== + +A Pythonic library for creating and managing RO-Crates with schema definitions using Pydantic models. + +Quick Start +----------- + +.. code-block:: python + + from lib_ro_crate_schema import SchemaFacade, ro_crate_schema, Field + from pydantic import BaseModel + + @ro_crate_schema(ontology="https://schema.org/Person") + class Person(BaseModel): + name: str = Field(ontology="https://schema.org/name") + email: str = Field(ontology="https://schema.org/email") + + # Create and export + facade = SchemaFacade() + facade.add_all_registered_models() + person = Person(name="Alice", email="alice@example.com") + facade.add_model_instance(person, "person_001") + facade.write("my_crate") + +""" + +__version__ = "0.2.0" + +# Import core components from crate module +from .crate import ( + # Core components + SchemaFacade, + Type, + TypeProperty, + MetadataEntry, + Restriction, + + # Decorator API (recommended for most users) + ro_crate_schema, + Field, + + # Registry system (advanced usage) + SchemaRegistry, + get_schema_registry, + register_model, + is_ro_crate_model, + get_registered_models, +) + +__all__ = [ + # Version + "__version__", + + # Core components + "SchemaFacade", + "Type", + "TypeProperty", + "MetadataEntry", + "Restriction", + + # Decorator API (most commonly used) + "ro_crate_schema", + "Field", + + # Registry system + "SchemaRegistry", + "get_schema_registry", + "register_model", + "is_ro_crate_model", + "get_registered_models", +] diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/check.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/check.py index 8d55845..d5fd397 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/check.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/check.py @@ -25,28 +25,68 @@ def load_graph(path: Path, fmt: DataFormat) -> Graph: def main(): parser = ArgumentParser("Check a RO-crate-profile file for conformity") - parser.add_argument("data_file", type=Path) - parser.add_argument("shape_file", type=Path) - parser.add_argument("data_format", type=DataFormat) + parser.add_argument("data_file", type=Path, help="RDF data file to validate") + parser.add_argument("--shape-file", type=Path, default=None, + help="SHACL shapes file (default: tests/schema.shacl)") + parser.add_argument("--format", type=DataFormat, default=DataFormat.TURTLE, + help="Data format (json-ld or ttl)") + parser.add_argument("--verbose", "-v", action="store_true", + help="Show detailed validation results") args = parser.parse_args() + data_path = args.data_file - shape_path = args.shape_file - data_format = args.data_format + data_format = args.format + + # Default to our updated SHACL schema + if args.shape_file: + shape_path = args.shape_file + else: + # Look for schema.shacl in tests directory + current_dir = Path(__file__).parent + shape_path = current_dir.parent.parent / "tests" / "schema.shacl" + + print(f"🔍 Validating: {data_path}") + print(f"📐 Using SHACL: {shape_path}") + print(f"📄 Data format: {data_format.value}") + + if not data_path.exists(): + print(f"❌ Data file not found: {data_path}") + sys.exit(1) + + if not shape_path.exists(): + print(f"❌ SHACL file not found: {shape_path}") + print(" Use --shape-file to specify a custom SHACL schema") + sys.exit(1) - data_graph = load_graph(data_path, DataFormat.JSONLD) - shape_graph = load_graph(shape_path, DataFormat.TURTLE) - print(data_graph.all_nodes()) - print(shape_graph.all_nodes()) + try: + data_graph = load_graph(data_path, data_format) + shape_graph = load_graph(shape_path, DataFormat.TURTLE) + + print(f"✅ Loaded {len(data_graph)} data triples") + print(f"✅ Loaded {len(shape_graph)} SHACL constraint triples") + + except Exception as e: + print(f"❌ Error loading graphs: {e}") + sys.exit(1) + print("\n🔍 Running SHACL validation...") + conforms, results_graph, results_text = validate( data_graph=data_graph, shacl_graph=shape_graph, - debug=True, + debug=args.verbose, serialize_report_graph=True, ) - print("✔ Conforms" if conforms else "✘ Does NOT conform") - print(results_text) + if conforms: + print("✅ VALIDATION PASSED - Data conforms to SHACL schema!") + print(f" 📊 {len(data_graph)} triples validated successfully") + else: + print("❌ VALIDATION FAILED - Constraint violations found:") + print(results_text) + + if results_graph and args.verbose: + print(f"\n📋 Generated {len(results_graph)} validation result triples") if not conforms: sys.exit(1) diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/__init__.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/__init__.py index e69de29..05ef0b6 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/__init__.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/__init__.py @@ -0,0 +1,41 @@ +""" +RO-Crate interoperability profile implementation. +""" + +__version__ = "0.2.0" + +# Core schema components +from .schema_facade import SchemaFacade +from .type import Type +from .type_property import TypeProperty +from .metadata_entry import MetadataEntry +from .restriction import Restriction + +# Schema registry and decorator system +from .schema_registry import SchemaRegistry, TypeTemplate, TypePropertyTemplate, get_schema_registry +from .decorators import ro_crate_schema, Field, register_model, is_ro_crate_model, get_registered_models + +__all__ = [ + # Version + "__version__", + + # Core components + "SchemaFacade", + "Type", + "TypeProperty", + "MetadataEntry", + "Restriction", + + # Registry system + "SchemaRegistry", + "TypeTemplate", + "TypePropertyTemplate", + "get_schema_registry", + + # Decorator API + "ro_crate_schema", + "Field", + "register_model", + "is_ro_crate_model", + "get_registered_models", +] \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/decorators.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/decorators.py new file mode 100644 index 0000000..5d325e9 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/decorators.py @@ -0,0 +1,211 @@ +""" +Decorator system for registering Pydantic models as RO-Crate schema types. +""" +from typing import Type, Optional, Any, Union +from functools import wraps +from pydantic import BaseModel, Field as PydanticField + +from .schema_registry import get_schema_registry, TypeTemplate + + +def Field(ontology: Optional[str] = None, comment: Optional[str] = None, **kwargs): + """Enhanced Pydantic Field that supports ontology annotations for RO-Crate schema generation. + + Args: + ontology: URI of the ontological concept this field represents + comment: Human-readable description of this field + **kwargs: Standard Pydantic Field arguments + + Returns: + Pydantic FieldInfo with RO-Crate metadata + + Example: + name: str = Field(ontology="https://schema.org/name", comment="Person's full name") + """ + # Store RO-Crate specific metadata in json_schema_extra + json_schema_extra = kwargs.get('json_schema_extra', {}) + if ontology is not None: + json_schema_extra['ontology'] = ontology + if comment is not None: + json_schema_extra['comment'] = comment + + if json_schema_extra: # Only set if we have RO-Crate metadata + kwargs['json_schema_extra'] = json_schema_extra + + # Set description from comment if not provided and remove any lingering ontology/comment + if comment is not None and 'description' not in kwargs: + kwargs['description'] = comment + + # Ensure ontology and comment are not passed directly to PydanticField + # (they should only be in json_schema_extra) + kwargs.pop('ontology', None) + kwargs.pop('comment', None) + + return PydanticField(**kwargs) + + +def ro_crate_schema( + ontology: Optional[str] = None, + comment: Optional[str] = None, + auto_register: bool = True, + id: Optional[str] = None +): + """Decorator to mark Pydantic models as RO-Crate schema types. + + This decorator registers the model in the global schema registry and enables + automatic schema generation for RO-Crate interoperability. + + Args: + ontology: URI of the ontological concept this model represents + comment: Human-readable description of this model type + auto_register: Whether to automatically register the model (default: True) + id: RO-Crate schema ID for the type (defaults to class name if not provided) + + Returns: + Decorated Pydantic model class with RO-Crate metadata + + Example: + @ro_crate_schema(id="Person", ontology="https://schema.org/Person") + class PersonModel(BaseModel): + name: str = Field(ontology="https://schema.org/name") + email: str = Field(ontology="https://schema.org/email") + """ + def decorator(cls: Type[BaseModel]) -> Type[BaseModel]: + # Ensure it's a Pydantic model + if not issubclass(cls, BaseModel): + raise TypeError(f"@ro_crate_schema can only be applied to Pydantic BaseModel classes, got {cls}") + + # Determine the ID to use (explicit id parameter or class name) + type_id = id if id is not None else cls.__name__ + + # Store RO-Crate metadata on the class + cls._ro_crate_ontology = ontology + cls._ro_crate_comment = comment or cls.__doc__ + cls._ro_crate_registered = auto_register + cls._ro_crate_id = type_id # Store the explicit ID + + # Auto-register in the global schema registry + if auto_register: + registry = get_schema_registry() + type_template = registry.register_type_from_model( + model_class=cls, + type_id=type_id, # Use the determined ID + ontology=ontology, + comment=comment or cls.__doc__ + ) + cls._ro_crate_type_template = type_template + + # Add helper methods to the class + cls.get_ro_crate_metadata = classmethod(_get_ro_crate_metadata) + cls.to_ro_crate_type = classmethod(_to_ro_crate_type) + + return cls + + return decorator + + +def _get_ro_crate_metadata(cls) -> Optional[TypeTemplate]: + """Get the RO-Crate metadata for this model class.""" + if hasattr(cls, '_ro_crate_type_template'): + return cls._ro_crate_type_template + + # Try to get from registry using the stored ID or class name as fallback + registry = get_schema_registry() + if hasattr(cls, '_ro_crate_id'): + return registry.get_type_template(cls._ro_crate_id) + else: + # Fallback to class name for backward compatibility + return registry.get_type_template(cls.__name__) + + +def _to_ro_crate_type(cls): + """Convert this model class to a Type object for RO-Crate schema generation.""" + from .type import Type + from .type_property import TypeProperty + from .restriction import Restriction + + type_template = cls.get_ro_crate_metadata() + if not type_template: + raise ValueError(f"Model {cls.__name__} is not registered with RO-Crate schema") + + # Convert properties + properties = [] + restrictions = [] + + for prop_template in type_template.type_properties: + # Create TypeProperty + type_property = TypeProperty( + id=prop_template.name, + range_includes=[prop_template.rdf_type], + domain_includes=[], # Will be set by SchemaFacade + ontological_annotations=[prop_template.ontology] if prop_template.ontology else [], + comment=prop_template.comment, + label=prop_template.name.replace('_', ' ').title() + ) + properties.append(type_property) + + # Create restrictions for all fields (required and optional) + if prop_template.required: + # Required fields: minCardinality = 1 + restriction = Restriction( + property_type=prop_template.name, + min_cardinality=1, + max_cardinality=1 if not prop_template.is_list else None + ) + else: + # Optional fields: minCardinality = 0 + restriction = Restriction( + property_type=prop_template.name, + min_cardinality=0, + max_cardinality=1 if not prop_template.is_list else None + ) + restrictions.append(restriction) + + # Create Type + ro_crate_type = Type( + id=type_template.id, # Use the consistent id field + subclass_of=["https://schema.org/Thing"], # Default parent + ontological_annotations=[type_template.ontology] if type_template.ontology else [], + rdfs_property=properties, + comment=type_template.comment, + label=type_template.id, # Use id as label (could be made customizable) + restrictions=restrictions + ) + + return ro_crate_type + + +def register_model( + model_class: Type[BaseModel], + ontology: Optional[str] = None, + comment: Optional[str] = None, + type_id: Optional[str] = None +) -> TypeTemplate: + """Manually register a Pydantic model for RO-Crate Type generation. + + This is an alternative to using the @ro_crate_schema decorator. + + Args: + model_class: The Pydantic model class to register + ontology: URI of the ontological concept this model represents + comment: Human-readable description of this model type + type_id: RO-Crate schema ID for the type (defaults to class name if not provided) + + Returns: + TypeTemplate for creating Type objects from the registered model + """ + registry = get_schema_registry() + final_type_id = type_id if type_id is not None else model_class.__name__ + return registry.register_type_from_model(model_class, final_type_id, ontology, comment) + + +def is_ro_crate_model(model_class: Type[BaseModel]) -> bool: + """Check if a Pydantic model is registered as an RO-Crate schema type.""" + registry = get_schema_registry() + return registry.is_type_registered(model_class.__name__) + + +def get_registered_models(): + """Get all registered RO-Crate schema models.""" + registry = get_schema_registry() + return registry.get_all_type_templates() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/forward_ref_resolver.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/forward_ref_resolver.py new file mode 100644 index 0000000..6c37081 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/forward_ref_resolver.py @@ -0,0 +1,355 @@ +from typing import TypeVar, Dict, Callable, Any, Union, Optional, List, Generic + +from pydantic import BaseModel + +T = TypeVar("T") +R = TypeVar("R") + + +class ForwardRef(BaseModel): + """ + This internal class is used to mark + properties as forward refs to be resolved + """ + + ref: str + + +class ForwardRefResolver(Generic[T]): + """ + Instance-level resolver for managing forward references and dependency resolution within a SchemaFacade. + + This is NOT a global registry - each SchemaFacade gets its own ForwardRefResolver instance. + It handles forward reference resolution, BFS dependency tracking, and Pydantic model caching + for a specific schema context. + + Key features: + - Store Type/Property objects by string keys for forward reference resolution + - BFS dependency traversal for proper type ordering + - Pydantic model caching to avoid regeneration + - Handle circular dependencies through late binding + """ + + def __init__(self): + self._store: Dict[str, T] = {} + self._pydantic_models: Dict[str, type] = {} # Cache for exported Pydantic models + + def register(self, key: str, value: T): + self._store[key] = value + + def resolve(self, key: Union[ForwardRef, str]) -> T: + """Resolve a ForwardRef or string key to the registered object""" + if isinstance(key, ForwardRef): + return self._store.get(key.ref) + else: + return self._store.get(key) + + def register_pydantic_model(self, type_id: str, model_class: type): + """Register a generated Pydantic model for forward reference resolution""" + self._pydantic_models[type_id] = model_class + + def get_pydantic_model(self, type_id: str) -> Optional[type]: + """Get a previously registered Pydantic model""" + return self._pydantic_models.get(type_id) + + def collect_dependencies_bfs(self, type_id: str) -> List[str]: + """ + Collect all type dependencies using BFS traversal. + Returns list of type IDs in dependency order (dependencies first). + """ + from collections import deque + + visited = set() + queue = deque([type_id]) + dependency_order = [] + + while queue: + current_type_id = queue.popleft() + if current_type_id in visited: + continue + + visited.add(current_type_id) + current_type = self._store.get(current_type_id) + + if current_type and hasattr(current_type, 'rdfs_property'): + # Find dependencies in this type's properties + for prop in current_type.rdfs_property or []: + for range_type in prop.range_includes or []: + # Extract local name and check if it's a registered type + local_name = self._extract_local_id(range_type) + if local_name in self._store and local_name not in visited: + queue.append(local_name) + + dependency_order.append(current_type_id) + + return dependency_order + + def get_all_dependencies(self, type_ids: Union[str, List[str]]) -> List[str]: + """ + Get all dependencies for a type or multiple types, properly ordered. + Returns deduplicated list with dependencies before dependents. + + Args: + type_ids: Single type ID or list of type IDs to get dependencies for + + Returns: + List of all unique type IDs in dependency order + """ + # Handle single string input + if isinstance(type_ids, str): + type_ids = [type_ids] + + all_deps = [] + seen = set() + + for type_id in type_ids: + deps = self.collect_dependencies_bfs(type_id) + for dep in deps: + if dep not in seen: + all_deps.append(dep) + seen.add(dep) + + return all_deps + + @staticmethod + def _extract_local_id(uri: str) -> str: + """Extract local ID from URI (e.g., 'base:Person' → 'Person')""" + if not uri: + return "" + if ":" in uri: + return uri.split(":")[-1] + return uri.split("/")[-1] if "/" in uri else uri + + def resolve_metadata_references(self, entry_resolver, entry_id: str, target_type: type, + processing_stack: set = None) -> dict: + """ + Recursively resolve metadata entry references for proper Pydantic model construction. + + This method handles the conversion of metadata entry references to actual objects, + preventing infinite loops and properly handling forward references. + + Args: + entry_resolver: Object with get_entry(id) and get_entry_as(id, type) methods + entry_id: ID of the metadata entry to resolve + target_type: Target Pydantic model class + processing_stack: Set of entry IDs currently being processed (for cycle detection) + + Returns: + Dictionary with resolved references suitable for target_type constructor + """ + if processing_stack is None: + processing_stack = set() + + # Prevent infinite loops + if entry_id in processing_stack: + return {} + + processing_stack.add(entry_id) + + try: + # Get the metadata entry + entry = entry_resolver.get_entry(entry_id) + if not entry: + return {} + + # Start with direct properties + resolved_data = {} + resolved_data.update(entry.properties) + + # Handle references - resolve them to actual objects + import typing + from typing import Union, List + + annotations = getattr(target_type, '__annotations__', {}) + + # First process actual references + for ref_name, ref_ids in entry.references.items(): + if not ref_ids: # Skip empty reference lists + continue + + expected_type = annotations.get(ref_name) + + # Handle Optional[Type] by extracting Type + if expected_type and hasattr(expected_type, '__origin__'): + if hasattr(typing, 'get_origin') and typing.get_origin(expected_type) is Union: + args = typing.get_args(expected_type) + if len(args) == 2 and type(None) in args: + expected_type = args[0] if args[1] is type(None) else args[1] + + # Check if it's a list type + is_list_type = False + list_element_type = str + if expected_type and hasattr(expected_type, '__origin__'): + if hasattr(typing, 'get_origin') and typing.get_origin(expected_type) in (list, List): + is_list_type = True + args = typing.get_args(expected_type) + if args: + list_element_type = args[0] + expected_type = list_element_type + + resolved_refs = [] + + for ref_id in ref_ids: + # Resolve forward references to actual types first + resolved_expected_type = expected_type + if hasattr(expected_type, '__forward_arg__'): + # This is a forward reference, try to resolve it + forward_name = expected_type.__forward_arg__ + # Handle cases with extra quotes like "'Equipment'" -> "Equipment" + if forward_name.startswith("'") and forward_name.endswith("'"): + forward_name = forward_name[1:-1] + + # Try to resolve using the entry resolver (which should have the pydantic models) + if hasattr(entry_resolver, 'export_pydantic_model'): + try: + resolved_expected_type = entry_resolver.export_pydantic_model(forward_name) + except: + # Fallback: if forward name matches target type + if forward_name == target_type.__name__: + resolved_expected_type = target_type + elif forward_name == target_type.__name__: + resolved_expected_type = target_type + + if resolved_expected_type and resolved_expected_type != str and callable(resolved_expected_type): + try: + # Recursively resolve referenced entries using this same method + if ref_id not in processing_stack: + resolved_ref_data = self.resolve_metadata_references( + entry_resolver, ref_id, resolved_expected_type, processing_stack + ) + + if resolved_ref_data: + # Create instance of expected type + resolved_ref = resolved_expected_type(**resolved_ref_data) + resolved_refs.append(resolved_ref) + else: + # Fallback to ID if resolution fails + resolved_refs.append(ref_id) + else: + # Circular reference - use ID + resolved_refs.append(ref_id) + except Exception as e: + # Fallback to ID if conversion fails + resolved_refs.append(ref_id) + else: + # Expected type is string or not resolvable + resolved_refs.append(ref_id) + + # Set the resolved reference(s) + if is_list_type: + resolved_data[ref_name] = resolved_refs + elif len(resolved_refs) == 1: + resolved_data[ref_name] = resolved_refs[0] + elif len(resolved_refs) > 1: + resolved_data[ref_name] = resolved_refs # Multiple refs for single field + else: + resolved_data[ref_name] = None + + # Handle properties that should be references but are stored as string representations + # (This happens when objects were serialized incorrectly during export) + for prop_name, prop_value in entry.properties.items(): + expected_type = annotations.get(prop_name) + + # Skip if we already processed this as a reference + if prop_name in entry.references: + continue + + # Handle Optional[Type] by extracting Type + if expected_type and hasattr(expected_type, '__origin__'): + if hasattr(typing, 'get_origin') and typing.get_origin(expected_type) is Union: + args = typing.get_args(expected_type) + if len(args) == 2 and type(None) in args: + expected_type = args[0] if args[1] is type(None) else args[1] + + # Resolve forward references to actual types + actual_expected_type = expected_type + if hasattr(expected_type, '__forward_arg__'): + # This is a forward reference, try to resolve it + forward_name = expected_type.__forward_arg__ + # Handle cases with extra quotes like "'Equipment'" -> "Equipment" + if forward_name.startswith("'") and forward_name.endswith("'"): + forward_name = forward_name[1:-1] + + # Try to resolve using the entry resolver (which should have the pydantic models) + if hasattr(entry_resolver, 'export_pydantic_model'): + try: + actual_expected_type = entry_resolver.export_pydantic_model(forward_name) + except: + # Fallback: if forward name matches target type + if forward_name == target_type.__name__: + actual_expected_type = target_type + elif forward_name == target_type.__name__: + actual_expected_type = target_type + + # If expected type is a Pydantic model and we have a string representation + if (actual_expected_type and + hasattr(actual_expected_type, '__bases__') and + any('BaseModel' in str(base) for base in actual_expected_type.__bases__) and + isinstance(prop_value, str) and + prop_value.startswith("{") and prop_value.endswith("}")): + + try: + # Try to parse the string as a Python dict representation, but handle datetime objects + # First, replace datetime.datetime(...) with a parseable format + import re + + # Replace datetime.datetime(year, month, day, ...) with ISO string + def datetime_replacer(match): + # Extract the datetime arguments + args_str = match.group(1) + try: + # Parse basic datetime(year, month, day, hour, minute) pattern + args = [int(x.strip()) for x in args_str.split(',')] + if len(args) >= 3: + from datetime import datetime + dt = datetime(*args[:6]) # year, month, day, hour, minute, second + return f"'{dt.isoformat()}'" + except (ValueError, TypeError): + pass + return "'1900-01-01T00:00:00'" # fallback + + cleaned_value = re.sub(r'datetime\.datetime\(([^)]+)\)', datetime_replacer, prop_value) + + import ast + parsed_dict = ast.literal_eval(cleaned_value) + + # Create an instance of the expected type from the parsed data + if isinstance(parsed_dict, dict): + # Recursively handle nested objects + nested_resolved = {} + nested_annotations = getattr(actual_expected_type, '__annotations__', {}) + + for key, value in parsed_dict.items(): + nested_expected_type = nested_annotations.get(key) + + # Handle nested Optional[Type] + if nested_expected_type and hasattr(nested_expected_type, '__origin__'): + if hasattr(typing, 'get_origin') and typing.get_origin(nested_expected_type) is Union: + nested_args = typing.get_args(nested_expected_type) + if len(nested_args) == 2 and type(None) in nested_args: + nested_expected_type = nested_args[0] if nested_args[1] is type(None) else nested_args[1] + + # Convert datetime strings back to datetime objects if needed + if key == 'created_date' and isinstance(value, str): + from datetime import datetime + try: + value = datetime.fromisoformat(value) + except (ValueError, TypeError): + pass + + nested_resolved[key] = value + + resolved_instance = actual_expected_type(**nested_resolved) + resolved_data[prop_name] = resolved_instance + except (ValueError, SyntaxError, TypeError) as e: + # If parsing fails, keep the original string value + pass + + return resolved_data + + finally: + processing_stack.discard(entry_id) + + def clear(self): + self._store.clear() + self._pydantic_models.clear() diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/jsonld_utils.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/jsonld_utils.py index 568dc0a..2a34135 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/jsonld_utils.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/jsonld_utils.py @@ -2,69 +2,192 @@ import tempfile import json from pathlib import Path from lib_ro_crate_schema.crate.rdf import BASE -from lib_ro_crate_schema.crate.ro_constants import RO_EXTRA_CTX +# Inline the context needed for OWL cardinality constraints +RO_EXTRA_CTX = { + "owl:maxCardinality": {"@type": "xsd:integer"}, + "owl:minCardinality": {"@type": "xsd:integer"}, +} from lib_ro_crate_schema.crate.schema_facade import SchemaFacade import pyld from rocrate.rocrate import ROCrate from rdflib import Graph - -def emit_crate_with_context(crate: ROCrate, context: dict[str, str]) -> dict[str, str]: - """ - Emits the ROCrate to a temporary file, reads it back, updates the @context directly (no pyld), - and returns the updated JSON-LD dict. Uses the tempfile context manager for cleanup. - Only supports original @context as string (RO-Crate style). - """ - with tempfile.TemporaryDirectory() as tmp: - crate.metadata.write(tmp) - ld = json.loads((Path(tmp) / Path("ro-crate-metadata.json")).read_text()) - orig_ctx = ld.get("@context") - if isinstance(orig_ctx, str): - ld["@context"] = [orig_ctx, context] - else: - raise ValueError( - f"Unsupported original @context type: {type(orig_ctx)}. Only string is supported for RO-Crate compatibility." - ) - return ld - - -def update_jsonld_context(ld_obj: dict, new_context: dict[str, str]): - """ - (Legacy) Use pyld to update the @context of a JSON-LD object. - Returns a new JSON-LD object with the updated context. - """ - return pyld.jsonld.compact(ld_obj, new_context) - - def get_context(g: Graph) -> dict[str, str]: """ - Extracts all used namespaces from the rdflib graph and returns a JSON-LD @context dict. - This can be used for JSON-LD compaction or as a base for RO-Crate @context. + Dynamically generates JSON-LD @context based on the actual vocabularies and properties + used in the RDF graph. Analyzes predicates, types, and values to determine needed namespaces. """ + from urllib.parse import urlparse + import re + context = {} - for prefix, namespace in g.namespaces(): - if prefix: - context[prefix] = str(namespace) - if "schema" not in context: - context["schema"] = "https://schema.org/" + used_namespaces = {} + property_contexts = {} + + # Standard RO-Crate context base + ro_crate_base = "https://w3id.org/ro/crate/1.1/context" + + # Collect all URIs used as predicates, types, and objects + all_uris = set() + + for s, p, o in g: + # Add predicate URIs + if str(p).startswith('http'): + all_uris.add(str(p)) + + # Add type URIs from rdf:type triples + if str(p) == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" and str(o).startswith('http'): + all_uris.add(str(o)) + + # Add object URIs that are references + if str(o).startswith('http'): + all_uris.add(str(o)) + + # Analyze URIs to extract namespaces and common patterns + namespace_prefixes = { + "https://schema.org/": "schema", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", + "http://www.w3.org/2000/01/rdf-schema#": "rdfs", + "http://www.w3.org/2002/07/owl#": "owl", + "http://www.w3.org/2001/XMLSchema#": "xsd", + "http://openbis.org/": "openbis", + "http://example.com/": "base" + } + + # Track which namespaces are actually used + unknown_namespaces = {} # Track URIs that don't match predefined namespaces + + for uri in all_uris: + found_match = False + # Check against predefined namespaces first + for namespace_uri, prefix in namespace_prefixes.items(): + if uri.startswith(namespace_uri): + used_namespaces[prefix] = namespace_uri + found_match = True + break + + # If no match found, this might be an unknown namespace + if not found_match and uri.startswith('http'): + # Extract potential namespace (everything up to the last '/' or '#') + if '/' in uri: + # Find the last meaningful separator + parts = uri.split('/') + if len(parts) > 3: # http://domain.com/something + potential_ns = '/'.join(parts[:-1]) + '/' + # Only consider it if it looks like a namespace (has domain + path) + if '.' in parts[2]: # Has a domain with dots + unknown_namespaces[potential_ns] = unknown_namespaces.get(potential_ns, 0) + 1 + + # Auto-detect unknown namespaces that appear frequently enough + for ns_uri, count in unknown_namespaces.items(): + if count >= 2: # Only add namespaces used at least twice + # Generate a prefix from the domain + try: + from urllib.parse import urlparse + parsed = urlparse(ns_uri) + domain_parts = parsed.netloc.split('.') + + # Use first part of domain as prefix (e.g., pokemon.org -> pokemon) + if len(domain_parts) >= 2: + potential_prefix = domain_parts[0] + + # Make sure prefix doesn't conflict with existing ones + counter = 1 + final_prefix = potential_prefix + while final_prefix in used_namespaces: + final_prefix = f"{potential_prefix}{counter}" + counter += 1 + + used_namespaces[final_prefix] = ns_uri + + except Exception: + # If parsing fails, skip this namespace + continue + + # Add base RO-Crate context first + context = [ro_crate_base] + + # Add discovered namespaces as a second context layer + namespace_context = {} + + # Add used vocabularies + for prefix, namespace_uri in used_namespaces.items(): + namespace_context[prefix] = namespace_uri + + if namespace_context: + context.append(namespace_context) + + # If no custom namespaces found, return simple context + if len(context) == 1: + return ro_crate_base + return context -def add_schema_to_crate(schema: SchemaFacade, crate: ROCrate) -> dict: +def add_schema_to_crate(schema: SchemaFacade, crate: ROCrate) -> ROCrate: """ - Emits triples from schema, builds a graph, compacts JSON-LD, adds objects to the crate, - writes to a tempfile, updates context using pyld, and returns the final JSON-LD dict. + Emits triples from schema, builds a graph, converts to JSON-LD with dynamic context, + and adds objects to the crate. Context is generated based on actual vocabulary usage. """ metadata_graph = schema.to_graph() - # Serialize and compact JSON-LD - ld_ser = metadata_graph.serialize(format="json-ld") - ld_obj = pyld.jsonld.json.loads(ld_ser) - - context = {**get_context(metadata_graph), **RO_EXTRA_CTX} - ld_obj_compact = update_jsonld_context(ld_obj, context) - # Add each object in the compacted graph to the crate - for obj in ld_obj_compact.get("@graph", []): - crate.add_jsonld(obj) - # Use the tempfile-based utility to update context and return - new_crate = emit_crate_with_context(crate, context) - return new_crate + + # Generate dynamic context based on actual content + dynamic_context = get_context(metadata_graph) + + # Extract additional context (non-standard RO-Crate namespaces/properties) + additional_context = {} + if isinstance(dynamic_context, list) and len(dynamic_context) > 1: + # Get the second layer which contains our custom namespaces + additional_context = dynamic_context[1] if isinstance(dynamic_context[1], dict) else {} + elif isinstance(dynamic_context, dict): + additional_context = dynamic_context + + # Create serialization context with only namespace mappings (consistent with get_context) + serialization_context = { + "schema": "https://schema.org/", + **additional_context, + **RO_EXTRA_CTX + } + + try: + # Serialize to JSON-LD with the combined context + ld_ser = metadata_graph.serialize(format="json-ld", context=serialization_context) + ld_obj = pyld.jsonld.json.loads(ld_ser) + + except Exception as e: + print(f"Warning: Could not serialize with dynamic context, falling back to basic context: {e}") + # Fallback to basic context + basic_context = {"schema": "https://schema.org/", **RO_EXTRA_CTX} + ld_ser = metadata_graph.serialize(format="json-ld", context=basic_context) + ld_obj = pyld.jsonld.json.loads(ld_ser) + + # Handle both @graph array and single object forms + objects = ld_obj.get("@graph", []) + if not objects and isinstance(ld_obj, dict) and "@id" in ld_obj: + objects = [ld_obj] + + # Add each object in the graph to the crate + for obj in objects: + try: + # Clean up objects that might cause issues with ROCrate + cleaned_obj = {} + for key, value in obj.items(): + if key == "@context": + continue # Skip @context in individual objects + elif isinstance(value, dict) and "@type" in value and "@value" in value: + # Handle typed literals that ROCrate might not like + cleaned_obj[key] = value["@value"] + else: + cleaned_obj[key] = value + + if cleaned_obj and "@id" in cleaned_obj: # Only add valid objects with IDs + crate.add_jsonld(cleaned_obj) + except Exception as e: + # Skip objects that cause issues + print(f"Warning: Could not add object {obj.get('@id', 'unknown')}: {e}") + continue + + # Context is now handled at the ROCrate level via crate.metadata.extra_contexts + # No need for post-processing enhancement here + + return crate diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/literal_type.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/literal_type.py index fd1be55..25ac765 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/literal_type.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/literal_type.py @@ -15,24 +15,22 @@ class LiteralType(Enum): def to_rdf(literal: LiteralType) -> Node: - match literal: - case LiteralType.BOOLEAN: - return XSD.boolean - case LiteralType.INTEGER: - return XSD.integer - case LiteralType.DOUBLE: - return XSD.double - case LiteralType.DECIMAL: - return XSD.decimal - case LiteralType.FLOAT: - return XSD.float - case LiteralType.DATETIME: - return XSD.dateTime - case LiteralType.STRING: - return XSD.string - case LiteralType.XML_LITERAL: - from rdflib.namespace import RDF - - return RDF.XMLLiteral - case _: - raise ValueError(f"Unknown LiteralType: {literal}") + if literal == LiteralType.BOOLEAN: + return XSD.boolean + elif literal == LiteralType.INTEGER: + return XSD.integer + elif literal == LiteralType.DOUBLE: + return XSD.double + elif literal == LiteralType.DECIMAL: + return XSD.decimal + elif literal == LiteralType.FLOAT: + return XSD.float + elif literal == LiteralType.DATETIME: + return XSD.dateTime + elif literal == LiteralType.STRING: + return XSD.string + elif literal == LiteralType.XML_LITERAL: + from rdflib.namespace import RDF + return RDF.XMLLiteral + else: + raise ValueError(f"Unknown LiteralType: {literal}") diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/metadata_entry.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/metadata_entry.py index 430f8d8..6704583 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/metadata_entry.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/metadata_entry.py @@ -1,32 +1,179 @@ -from pydantic import BaseModel, Field, field_validator -from rdflib.graph import Node -from rdflib import URIRef, RDF, Literal +from pydantic import BaseModel, Field from lib_ro_crate_schema.crate.rdf import is_type, object_id +try: + from rdflib import URIRef, RDF, Literal +except ImportError: + # Fallback for when rdflib is not available + URIRef = str + Literal = str -from typing import Union + +from typing import Union, List, Dict, Optional, Any +from datetime import datetime from lib_ro_crate_schema.crate.type_property import TypeProperty from lib_ro_crate_schema.crate.type import Type class MetadataEntry(BaseModel): + """ + Represents an RDF Metadata Entry in an RO-Crate (equivalent to Java IMetadataEntry interface). + Contains the actual data instances that conform to RDFS Class definitions (Type objects). + + Key Responsibilities: + - Store entity data with unique identifier and class type + - Hold property values (strings, numbers, booleans, dates) + - Maintain references to other entities in the knowledge graph + - Provide Java API compatibility for metadata access + + Data Structure: + - id: Unique identifier for this entity (@id in JSON-LD) + - class_id: RDFS Class this entity instantiates (@type in JSON-LD) + - properties: Key-value pairs for simple data (strings, numbers, etc.) + - references: Key-value pairs for relationships to other entities + + Commonly Used Methods: + + **Java API Compatibility (IMetadataEntry):** + - getId() -> Get unique entity identifier + - getClassId() -> Get RDFS Class type this entity instantiates + - getValues() -> Get all property values (alias for properties) + - getReferences() -> Get all entity relationships + - setId(id_value) -> Set unique entity identifier + - setClassId(class_id) -> Set RDFS Class type + - setProperties(properties) -> Set all property values at once + - setReferences(references) -> Set all entity references at once + - addProperty(key, value) -> Add single property value + - addReference(key, reference_id) -> Add single reference to another entity + - addReferences(key, reference_ids) -> Add multiple references for a property + + **Data Access:** + - properties -> Direct access to simple property values + - references -> Direct access to entity relationships + - get_values() -> Alias for properties (Java compatibility) + + + **RDF Generation:** + - to_triples() -> Generate RDF triples for serialization + + Usage Examples: + # Traditional constructor approach + person = MetadataEntry( + id="person1", + class_id="Person", + properties={"name": "Alice Johnson", "age": 30}, + references={"knows": ["person2", "person3"]} + ) + + # Java-style fluent API approach + person = (MetadataEntry(id="temp", class_id="temp") + .setId("person1") + .setClassId("Person") + .addProperty("name", "Alice Johnson") + .addProperty("age", 30) + .addReference("knows", "person2") + .addReference("knows", "person3")) + + # Batch operations + person.setProperties({"name": "Bob Smith", "email": "bob@example.com"}) + person.setReferences({"knows": ["person4", "person5"], "worksFor": ["org1"]}) + + Java Compatibility Features: + - All setter methods return self for method chaining (fluent interface) + - Method names follow Java camelCase conventions + - Supports builder pattern for object construction + - Compatible with existing constructor-based initialization + + JSON-LD Output Example: + { + "@id": "person1", + "@type": "Person", + "name": "Alice Johnson", + "age": 30, + "knows": [{"@id": "person2"}, {"@id": "person3"}] + } + """ id: str - # props: property reference (TypeProperty or str) -> value - props: dict[Union[TypeProperty, str], str] - # Types can be either strings or directly references to Type (RDF Types) - types: list[Union[Type, str]] - # references: property reference (TypeProperty or str) -> list of type references (Type or str) - references: dict[Union[TypeProperty, str], list[Union[Type, str]]] | None = None - children_identifiers: list[str] | None = None - parent_identifiers: list[str] | None = None + class_id: str # Type ID of this entry + properties: Dict[str, Any] = Field(default_factory=dict) # Property values (matches PropertyType concept) + references: Dict[str, List[str]] = Field(default_factory=dict) # References to other entries + + # Java API compatibility methods + def getId(self) -> str: + """Get unique entity identifier (Java IMetadataEntry interface)""" + return self.id + + def getClassId(self) -> str: + """Get RDFS Class type this entity instantiates (Java IMetadataEntry interface)""" + return self.class_id + + def getValues(self) -> Dict[str, Any]: + """Get all property values (Java IMetadataEntry interface)""" + return self.properties + + def getReferences(self) -> Dict[str, List[str]]: + """Get all entity relationships (Java IMetadataEntry interface)""" + return self.references + + def get_values(self) -> Dict[str, Any]: + """Get property values (alias for properties field for Java API compatibility)""" + return self.properties + + # Java-style setter methods for compatibility + def setId(self, id_value: str) -> 'MetadataEntry': + """Set unique entity identifier (Java setter style)""" + self.id = id_value + return self + + def setClassId(self, class_id: str) -> 'MetadataEntry': + """Set RDFS Class type this entity instantiates (Java setter style)""" + self.class_id = class_id + return self + + def setProperties(self, properties: Dict[str, Any]) -> 'MetadataEntry': + """Set all property values (Java setter style)""" + self.properties = properties + return self + + def setReferences(self, references: Dict[str, List[str]]) -> 'MetadataEntry': + """Set all entity relationships (Java setter style)""" + self.references = references + return self + + def addProperty(self, key: str, value: Any) -> 'MetadataEntry': + """Add a single property value (Java fluent style)""" + self.properties[key] = value + return self + + def addReference(self, key: str, reference_id: str) -> 'MetadataEntry': + """Add a single reference to another entity (Java fluent style)""" + if key not in self.references: + self.references[key] = [] + self.references[key].append(reference_id) + return self + + def addReferences(self, key: str, reference_ids: List[str]) -> 'MetadataEntry': + """Add multiple references for a property (Java fluent style)""" + if key not in self.references: + self.references[key] = [] + self.references[key].extend(reference_ids) + return self def to_triples(self): + """Generate RDF triples for this metadata entry""" subj = object_id(self.id) - for current_type in self.types: - match current_type: - case str(tid): - yield is_type(self.id, URIRef(tid)) - case Type(id=tid): - yield is_type(self.id, URIRef(tid)) - for prop_name, prop_value in self.props.items(): + + # Type declaration + yield is_type(self.id, URIRef(self.class_id)) + + # Property values + for prop_name, prop_value in self.properties.items(): + # Handle datetime objects by converting to ISO string + if isinstance(prop_value, datetime): + prop_value = prop_value.isoformat() yield (subj, object_id(prop_name), Literal(prop_value)) + + # References to other entries + for prop_name, ref_list in self.references.items(): + for ref_id in ref_list: + yield (subj, object_id(prop_name), object_id(ref_id)) diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/prefix.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/prefix.py deleted file mode 100644 index 77f011f..0000000 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/prefix.py +++ /dev/null @@ -1,19 +0,0 @@ -from rdflib import Graph, URIRef -from rdflib.namespace import split_uri - - -def split_namespace(node: URIRef) -> tuple[str, str]: - try: - namespace, local = split_uri(node) - except ValueError: - namespace, local = "", str(node) - return namespace, local - - -def extract_uses_namespaces(gr: Graph) -> list[tuple[str, str]]: - ns = set() - for n in gr.all_nodes(): - match n: - case URIRef(uri): - ns.add(split_namespace(uri)[0]) - return ns diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/property_type.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/property_type.py new file mode 100644 index 0000000..1ef828f --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/property_type.py @@ -0,0 +1,8 @@ +""" +PropertyType alias for better Java compatibility. +This is an alias for TypeProperty to match Java naming conventions. +""" +from .type_property import TypeProperty + +# Alias to match Java naming +PropertyType = TypeProperty \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/rdf.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/rdf.py index 5b15cc1..6f0f28c 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/rdf.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/rdf.py @@ -1,12 +1,11 @@ -from typing import Protocol, Self -from lib_ro_crate_schema.crate.registry import Registry +from typing import Protocol, TypeVar, Tuple +from lib_ro_crate_schema.crate.forward_ref_resolver import ForwardRefResolver from rdflib import Graph from rdflib import Node, URIRef, RDF, IdentifiedNode from rdflib import Namespace from rdflib.namespace import NamespaceManager -from typing import TypeVar -type Triple = tuple[IdentifiedNode, IdentifiedNode, Node] +Triple = Tuple[IdentifiedNode, IdentifiedNode, Node] SCHEMA = Namespace("http://schema.org/") BASE = Namespace("http://example.com/") @@ -15,7 +14,9 @@ class RDFSerializable(Protocol): def to_rdf(self) -> list[Triple]: ... -class RDFDeserializable[T](Protocol): +T = TypeVar('T') + +class RDFDeserializable(Protocol): @classmethod def from_rdf(cls, triples: list[Triple]): ... @@ -24,7 +25,7 @@ class Resolvable(Protocol): """ A protocol for a class that implements reference resolution """ - def resolve(self, reg: Registry) -> Self: ... + def resolve(self, reg: ForwardRefResolver): ... def is_type(id: str, type: URIRef) -> Triple: @@ -40,12 +41,11 @@ def object_id(id: str) -> URIRef: def simplfy(node: Node, manager: NamespaceManager): - match node: - case URIRef(ref): - (base, absolute, target) = manager.compute_qname(ref) - return URIRef(f"{base}:{target}") - case _: - return node + if isinstance(node, URIRef): + (base, absolute, target) = manager.compute_qname(node) + return URIRef(f"{base}:{target}") + else: + return node def unbind(g: Graph) -> Graph: diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/reconstruction.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/reconstruction.py deleted file mode 100644 index cf1c12a..0000000 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/reconstruction.py +++ /dev/null @@ -1,107 +0,0 @@ -from rdflib import Graph, RDF, RDFS, OWL, URIRef, Node -from lib_ro_crate_schema.crate.rdf import SCHEMA -from lib_ro_crate_schema.crate.type_property import TypeProperty -from typing import Dict, Any, Optional -from rdflib import Graph, RDF, RDFS, OWL, URIRef, Node -from lib_ro_crate_schema.crate.rdf import SCHEMA -from lib_ro_crate_schema.crate.type_property import TypeProperty -from typing import Dict, Any, Optional -from pydantic import BaseModel - - -def resolve_reference(ref: Optional[Node], cache: Dict[URIRef, Any]) -> Optional[Any]: - """Resolve a reference from the graph, using cache or returning a Ref wrapper.""" - match ref: - case None: - return None - case URIRef() as uri if uri in cache: - return cache[uri] - case URIRef() as uri: - return Ref(uri=uri) - case _: - raise TypeError(f"Reference must be a URIRef or None, got {type(ref)}") - - -class Ref(BaseModel): - """A reference to an entity to be resolved in a second pass.""" - - uri: str - # def __init__(self, uri: URIRef) -> None: - # self.uri = uri - # def __repr__(self) -> str: - # return f"Ref({self.uri})" - - -def get_subjects_by_type(graph: Graph, rdf_type: Node) -> set[Node]: - """Return all subjects of a given rdf:type.""" - return set(graph.subjects(RDF.type, rdf_type)) - - -def get_predicate_object_map(graph: Graph, subject: Node) -> Dict[URIRef, Node]: - """Return a dict of predicate -> object for a given subject.""" - return {p: o for p, o in graph.predicate_objects(subject)} - - -def reconstruct_property( - prop_subject: Node, props: Dict[URIRef, Node], cache: Dict[URIRef, Any] -) -> Dict[URIRef, Any]: - # Ensure prop_subject is a URIRef - if not isinstance(prop_subject, URIRef): - raise TypeError(f"prop_subject must be a URIRef, got {type(prop_subject)}") - domainIncludesRef: Optional[Node] = props.get(SCHEMA["domainIncludes"]) - domainIncludesResolved = resolve_reference(domainIncludesRef, cache) - breakpoint() - tp = TypeProperty( - id=prop_subject, - domain_includes=[domainIncludesResolved] if domainIncludesResolved else [], - ) - cache[prop_subject] = tp - return cache - - -def reconstruct_types(graph: Graph, cache: Dict[URIRef, Any]) -> Dict[URIRef, Any]: - print("Reconstructing Classes:") - for class_subject in get_subjects_by_type(graph, RDFS.Class): - props = get_predicate_object_map(graph, class_subject) - print(f" Class: {class_subject}, {props}") - # TODO: Instantiate Type and assign properties from cache if needed - # cache[class_subject] = Type(...) - return cache - - -def reconstruct_properties(graph: Graph, cache: Dict[URIRef, Any]) -> Dict[URIRef, Any]: - print("Reconstructing Properties:") - for prop_subject in get_subjects_by_type(graph, RDF.Property): - props = get_predicate_object_map(graph, prop_subject) - print(f" Property: {prop_subject}, {props}") - cache = reconstruct_property(prop_subject, props, cache) - return cache - - -def reconstruct_restrictions( - graph: Graph, cache: Dict[URIRef, Any] -) -> Dict[URIRef, Any]: - print("Reconstructing Restrictions:") - for restr_subject in get_subjects_by_type(graph, OWL.Restriction): - props = get_predicate_object_map(graph, restr_subject) - print(f" Restriction: {restr_subject}, {props}") - # TODO: Instantiate Restriction and add to cache - return cache - - -def reconstruct_metadata_entries( - graph: Graph, cache: Dict[URIRef, Any] -) -> Dict[URIRef, Any]: - print("Reconstructing Metadata Entries:") - # TODO: Implement as needed - return cache - - -def reconstruct(graph: Graph) -> Dict[URIRef, Any]: - cache: Dict[URIRef, Any] = {} - cache = reconstruct_properties(graph, cache) - cache = reconstruct_types(graph, cache) - cache = reconstruct_restrictions(graph, cache) - cache = reconstruct_metadata_entries(graph, cache) - # TODO: Second pass to resolve Ref objects - return cache diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/registry.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/registry.py deleted file mode 100644 index 12b18cd..0000000 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/registry.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import TypeVar, Dict, Callable, Any - -from pydantic import BaseModel - -T = TypeVar("T") -R = TypeVar("R") - - -class ForwardRef[R](BaseModel): - """ - This internal class is used to mark - properties as forward refs to be resolved - """ - - ref: str - - -class Registry[T]: - """ - A registry used for - forward reference resolution - """ - - def __init__(self): - self._store: Dict[str, T] = {} - - def register(self, key: str, value: T): - self._store[key] = value - - def resolve(self, key: ForwardRef[T]) -> T: - return self._store.get(key.ref) - - def clear(self): - self._store.clear() - - -type_registry = Registry[BaseModel]() diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/restriction.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/restriction.py index d3312c9..e6c3ccf 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/restriction.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/restriction.py @@ -1,32 +1,83 @@ -from typing import Literal as TLiteral +from typing import Optional from lib_ro_crate_schema.crate.rdf import is_type, object_id, Triple -from pydantic import BaseModel +from pydantic import BaseModel, Field, ConfigDict from rdflib import OWL, Literal, XSD from uuid import uuid4 class Restriction(BaseModel): - id: str = f"{uuid4()}" + """ + Represents an OWL Restriction that constrains how properties can be used on classes. + + OWL Restrictions are a fundamental part of ontological modeling, allowing precise specification + of property constraints such as cardinality (how many values are allowed), type constraints, + and value restrictions. These are essential for RO-Crate schema validation and semantic modeling. + + Key Responsibilities: + - Define cardinality constraints (minimum/maximum number of values) + - Specify which property the restriction applies to + - Generate proper OWL RDF triples for semantic validation + - Support both required properties (minCardinality >= 1) and optional properties (minCardinality = 0) + - Enable precise schema validation in RO-Crate profiles + + Common Restriction Patterns: + - Required single value: min_cardinality=1, max_cardinality=1 + - Required multiple values: min_cardinality=1, max_cardinality=None (unlimited) + - Optional single value: min_cardinality=0, max_cardinality=1 + - Optional multiple values: min_cardinality=0, max_cardinality=None + + Usage Example: + # Create a restriction requiring exactly one name property + name_restriction = Restriction( + property_type="name", + min_cardinality=1, + max_cardinality=1 + ) + + # Create a restriction allowing multiple optional emails + email_restriction = Restriction( + property_type="email", + min_cardinality=0, + max_cardinality=None # unlimited + ) + + JSON-LD Output Example: + { + "@id": "Person_name_restriction", + "@type": "owl:Restriction", + "owl:onProperty": {"@id": "name"}, + "owl:minCardinality": 1, + "owl:maxCardinality": 1 + } + """ + id: str = Field(default_factory=lambda: str(uuid4())) property_type: str - min_cardinality: TLiteral[0, 1] - max_cardinality: TLiteral[0, 1] + min_cardinality: Optional[int] = None + max_cardinality: Optional[int] = None - class Config: - validate_by_name = True - populate_by_name = True + model_config = ConfigDict( + validate_assignment=True, + extra='forbid' + ) def to_triples(self): + """Generate RDF triples for this OWL restriction""" subj = object_id(self.id) yield is_type(self.id, OWL.Restriction) yield (subj, OWL.onProperty, object_id(self.property_type)) - yield ( - subj, - OWL.minCardinality, - Literal(self.min_cardinality, datatype=XSD.integer), - ) - yield ( - subj, - OWL.maxCardinality, - Literal(self.max_cardinality, datatype=XSD.integer), - ) + + # Only emit cardinality constraints that are actually set + if self.min_cardinality is not None: + yield ( + subj, + OWL.minCardinality, + Literal(self.min_cardinality, datatype=XSD.integer), + ) + + if self.max_cardinality is not None: + yield ( + subj, + OWL.maxCardinality, + Literal(self.max_cardinality, datatype=XSD.integer), + ) diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/ro_constants.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/ro_constants.py deleted file mode 100644 index d2e2e37..0000000 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/ro_constants.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Literal - -RANGE_IDENTIFIER: Literal["schema:rangeIncludes"] = "schema:rangeIncludes" -DOMAIN_IDENTIFIER: Literal["schema:domainIncludes"] = "schema:domainIncludes" -OWL_MIN_CARDINALITY: Literal["owl:minCardinality"] = "owl:minCardinality" -OWL_MAX_CARDINALITY: Literal["owl:maxCardinality"] = "owl:maxCardinality" -OWL_RESTRICTION: Literal["owl:restriction"] = "owl:restriction" -ON_PROPERTY: Literal["owl:onProperty"] = "owl:onProperty" -RDFS_LABEL: Literal["rdfs:label"] = "rdfs:label" -RDFS_COMMENT: Literal["rdfs:comment"] = "rdfs:comment" -RDFS_SUBCLASS_OF: Literal["rdfs:subClassOf"] = "rdfs:subClassOf" - -# Cardinality and other integer literals -MIN_CARDINALITY_MANDATORY: Literal[1] = 1 -MAX_CARDINALITY_SINGLE: Literal[1] = 1 -MAX_CARDINALITY_UNLIMITED: Literal[0] = 0 - - -RO_EXTRA_CTX = { - OWL_MAX_CARDINALITY: {"@type": "xsd:integer"}, - OWL_MIN_CARDINALITY: {"@type": "xsd:integer"}, -} diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/schema_facade.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/schema_facade.py index 5efcf31..2bd776f 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/schema_facade.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/schema_facade.py @@ -1,22 +1,29 @@ # Constants from Java SchemaFacade from collections import defaultdict -from typing import Generator, Literal +from pathlib import Path +from typing import Generator, Literal, Optional, Type as TypingType, Union, List +from types import ModuleType +import json +import tempfile +from datetime import datetime from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry from lib_ro_crate_schema.crate.rdf import BASE, Triple, object_id -from lib_ro_crate_schema.crate.registry import Registry +from lib_ro_crate_schema.crate.forward_ref_resolver import ForwardRefResolver from lib_ro_crate_schema.crate.type import Type from lib_ro_crate_schema.crate.type_property import TypeProperty from lib_ro_crate_schema.crate.restriction import Restriction +from lib_ro_crate_schema.crate.schema_registry import get_schema_registry, TypeTemplate from pydantic import BaseModel, Field, PrivateAttr from lib_ro_crate_schema.crate.rdf import SCHEMA from rdflib import RDFS, RDF, Graph +from rocrate.rocrate import ROCrate -from lib_ro_crate_schema.crate.registry import ForwardRef +from lib_ro_crate_schema.crate.forward_ref_resolver import ForwardRef from typing import Any from typing import List, Tuple -type TypeRegistry = List[Tuple[TypeProperty, Type]] +TypeRegistry = List[Tuple[TypeProperty, Type]] def types_to_triples(used_types: TypeRegistry) -> Generator[Triple, None, None]: @@ -34,70 +41,1913 @@ def types_to_triples(used_types: TypeRegistry) -> Generator[Triple, None, None]: class SchemaFacade(BaseModel): """ - `_registry` stores a registry of properties and types - to allow forward references to other types + Main RO-Crate Schema Facade - Central orchestrator for types, properties, restrictions, and metadata entries. + Supports automatic schema generation from decorated Pydantic models and provides full Java API compatibility. + + Key Responsibilities: + - Manage RDFS Classes (Type objects) and Properties (TypeProperty objects) + - Store standalone properties and restrictions not attached to specific types + - Store and query RDF Metadata Entries (MetadataEntry objects) + - Generate RO-Crate JSON-LD output + - Convert Pydantic models to RDF schema representations + - Handle file attachments for RO-Crate data files + + Commonly Used Methods: + + **Schema Management:** + - addType(type_obj) -> Add RDFS Class definition + - add_property_type(property) -> Add standalone property + - add_restriction(restriction) -> Add standalone restriction + - addEntry(entry) -> Add RDF metadata entry + - get_types() -> List all RDFS Classes + - get_property_types() -> List all properties (standalone + type-attached) + - get_restrictions() -> List all restrictions (standalone + type-attached) + - get_property_type(id) -> Find specific property by ID + - get_restriction(id) -> Find specific restriction by ID + - get_entries() -> List all metadata entries + - get_entry(id) -> Find specific metadata entry + - get_entries_by_class(class_id) -> Find entries of specific type + + **File Management:** + - add_file(file_path, name=None, description=None) -> Add file to be included in crate + - get_files() -> List all files to be included + - clear_files() -> Remove all file references + + **Pydantic Integration:** + - add_pydantic_model(model_class) -> Convert Pydantic model to RDFS schema + - add_model_instance(instance) -> Convert Pydantic instance to metadata entry + - add_registered_models(*names) -> Add models from decorator registry + - add_all_registered_models() -> Add all registered models + + **Export & Serialization:** + - write(destination) -> Export complete RO-Crate to file + - to_json() -> Get JSON-LD representation + - to_graph() -> Get RDF Graph representation + - to_triples() -> Get RDF triple iterator + + **Java API Compatibility (ISchemaFacade):** + - get_crate() -> Get complete ROCrate object with schema and files integrated + - getCrate() -> Alias for get_crate() (Java API compatibility) + - getType(id) -> Get specific RDFS Class + - getPropertyTypes() -> Get all properties (includes standalone) + - getPropertyType(id) -> Get specific property by ID + - getRestrictions() -> Get all restrictions (includes standalone) + - getRestriction(id) -> Get specific restriction by ID + + Usage Example: + facade = SchemaFacade() + facade.addType(person_type) + facade.addEntry(person_instance) + facade.add_file("data.csv", name="Experimental Data") + facade.write('my-crate') + + Complete RO-Crate Output Structure Example: + { + "@context": [ + "https://w3id.org/ro/crate/1.1/context", + { + "base": "http://example.com/", + "owl:maxCardinality": {"@type": "xsd:integer"}, + "owl:minCardinality": {"@type": "xsd:integer"} + } + ], + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "name": "My RO-Crate", + "description": "Generated RO-Crate with schema and data", + "hasPart": [ + { + "@id": "data.csv" + } + ] + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"} + }, + { + "@id": "Person", + "@type": "rdfs:Class", + "rdfs:label": "Person", + "rdfs:comment": "Represents a person", + "rdfs:subClassOf": {"@id": "https://schema.org/Thing"}, + "owl:restriction": [{"@id": "Person_name_restriction"}] + }, + { + "@id": "name", + "@type": "rdf:Property", + "rdfs:label": "Name", + "schema:domainIncludes": {"@id": "Person"}, + "schema:rangeIncludes": {"@id": "http://www.w3.org/2001/XMLSchema#string"} + }, + { + "@id": "Person_name_restriction", + "@type": "owl:Restriction", + "owl:onProperty": {"@id": "name"}, + "owl:minCardinality": 1 + }, + { + "@id": "person1", + "@type": "Person", + "name": "Alice Johnson", + "email": "alice@example.com" + }, + { + "@id": "data.csv", + "@type": "File", + "name": "Experimental Data", + "encodingFormat": "text/csv" + } + ] + } """ - _registry: Registry[Type | TypeProperty | Restriction] = PrivateAttr( - default=Registry() + _forward_ref_resolver: ForwardRefResolver = PrivateAttr( + default_factory=ForwardRefResolver ) - types: list[Type] - metadata_entries: list[MetadataEntry] + types: list[Type] = Field(default_factory=list) + property_types: list[TypeProperty] = Field(default_factory=list) # Standalone properties not attached to types + restrictions: list[Restriction] = Field(default_factory=list) # Standalone restrictions + metadata_entries: list[MetadataEntry] = Field(default_factory=list) + files: list[dict] = Field(default_factory=list) # Store file info for later inclusion prefix: str = "base" def model_post_init(self, context: Any) -> None: """ - Register all classes and properties for later reference resolution. + Register all classes, properties, and restrictions for later reference resolution. Convert all string refs in properties to ForwardRef using Pydantic post-init. """ for current_type in self.types: - self._registry.register(current_type.id, current_type) + self._forward_ref_resolver.register(current_type.id, current_type) if current_type.rdfs_property: for prop in current_type.rdfs_property: - self._registry.register(prop.id, prop) + self._forward_ref_resolver.register(prop.id, prop) + + # Register standalone properties + for prop in self.property_types: + self._forward_ref_resolver.register(prop.id, prop) + + # Register standalone restrictions + for restriction in self.restrictions: + self._forward_ref_resolver.register(restriction.id, restriction) + super().model_post_init(context) - def resolve_ref[T](self, ref: str | ForwardRef[T]) -> T: + def resolve_ref(self, ref): """ Resolve a reference (ForwardRef, str, or id) to the actual object using the registry. """ - match ref: - case ForwardRef(ref=ref_id): - return self._registry.resolve(ref_id) - case str(ref_id): - return self._registry.resolve(ref_id) - case _: - return ref + if isinstance(ref, ForwardRef): + return self._forward_ref_resolver.resolve(ref.ref) + elif isinstance(ref, str): + return self._forward_ref_resolver.resolve(ref) + else: + return ref def resolve_forward_refs(self): """ Walk all types/properties and delegate reference resolution to each property. """ for current_type in self.types: - current_type.resolve(self._registry) - # for current_type in self.types: - # if current_type.rdfs_property: - # for prop in current_type.rdfs_property: - # if hasattr(prop, "resolve_references"): - # prop.resolve_references(self) + current_type.resolve(self._forward_ref_resolver) + + # Fluent builder API methods + def addType(self, type_obj: Type): + """Add a type to the schema""" + self.types.append(type_obj) + self._forward_ref_resolver.register(type_obj.id, type_obj) + if type_obj.rdfs_property: + for prop in type_obj.rdfs_property: + self._forward_ref_resolver.register(prop.id, prop) + return self + + def _is_placeholder_id(self, entry_id: str) -> bool: + """Check if an ID is a placeholder/dummy (automatically generated)""" + # Placeholder IDs use explicit naming patterns to make them easy to identify + import re + + # Check for explicit placeholder/dummy patterns + placeholder_patterns = [ + r'.*_placeholder_.*', # Contains "placeholder" + ] + + for pattern in placeholder_patterns: + if re.match(pattern, entry_id, re.IGNORECASE): + return True + + + # Check for hex patterns that are long (8+ chars) + if re.search(r'[a-f0-9]{8,}', entry_id.lower()): + return True + + return False + + def _entries_are_equivalent(self, entry1: MetadataEntry, entry2: MetadataEntry) -> bool: + """Check if two metadata entries represent the same conceptual entity""" + # Must have same class + if entry1.class_id != entry2.class_id: + return False + + # Compare properties (excluding None values and 'id' field since ID is stored separately) + props1 = {k: v for k, v in entry1.properties.items() if v is not None and k != 'id'} + props2 = {k: v for k, v in entry2.properties.items() if v is not None and k != 'id'} + + if props1 != props2: + return False + + # For placeholder resolution, we're more lenient with references + # If one entry is a placeholder, we only compare non-empty reference lists + # This handles circular references where placeholders might have incomplete refs + is_placeholder1 = self._is_placeholder_id(entry1.id) + is_placeholder2 = self._is_placeholder_id(entry2.id) + + if is_placeholder1 or is_placeholder2: + # For placeholder comparisons, be more flexible with references + # Placeholders may have incomplete reference sets due to circular dependency resolution + refs1 = {k: sorted(v) for k, v in entry1.references.items() if v} + refs2 = {k: sorted(v) for k, v in entry2.references.items() if v} + + if not refs1 and not refs2: + return True # Both have no references + elif not refs1 or not refs2: + # One has references, one doesn't - could be placeholder vs real + return True + else: + # Both have references - for placeholders, check if one is a subset of the other + # This handles cases where the placeholder has fewer refs due to circular resolution + smaller_refs = refs1 if len(refs1) <= len(refs2) else refs2 + larger_refs = refs2 if len(refs1) <= len(refs2) else refs1 + + # Check if all references in the smaller set exist in the larger set with same values + for key, values in smaller_refs.items(): + if key not in larger_refs or larger_refs[key] != values: + return False + return True + else: + # Both are real entries, require exact reference match + refs1 = {k: sorted(v) for k, v in entry1.references.items() if v} + refs2 = {k: sorted(v) for k, v in entry2.references.items() if v} + return refs1 == refs2 + + def _choose_preferred_entry(self, entry1: MetadataEntry, entry2: MetadataEntry) -> MetadataEntry: + """Choose the preferred entry when duplicates are found""" + # Prefer entry with real ID over placeholder/dummy ID + placeholder1 = self._is_placeholder_id(entry1.id) + placeholder2 = self._is_placeholder_id(entry2.id) + + if placeholder1 and not placeholder2: + return entry2 + elif not placeholder1 and placeholder2: + return entry1 + else: + # Both are placeholder or both are real, prefer the shorter ID (more likely to be user-defined) + if len(entry1.id) <= len(entry2.id): + return entry1 + else: + return entry2 + + def addEntry(self, entry: MetadataEntry): + """Add a metadata entry to the schema, checking for and removing duplicates""" + # Check if this entry is equivalent to any existing entry + for i, existing_entry in enumerate(self.metadata_entries): + if self._entries_are_equivalent(entry, existing_entry): + # Found equivalent entry, check if one is a placeholder + placeholder1 = self._is_placeholder_id(entry.id) + placeholder2 = self._is_placeholder_id(existing_entry.id) + + if placeholder1: + # Keep existing entry, don't add new one + self._update_references(entry.id, existing_entry.id) + return self + elif not placeholder1 and placeholder2: + # Replace existing with new entry + removed_id = existing_entry.id + self.metadata_entries[i] = entry + # Update all references to point to the new entry ID + self._update_references(removed_id, entry.id) + return self + + # Both are real continue + + # No duplicates found, add the new entry + self.metadata_entries.append(entry) + return self + + def resolve_placeholders(self): + """ + Resolve placeholder entities by finding and merging them with real entities. + This should be called after all model instances have been added to handle + circular references that create placeholder duplicates. + """ + placeholders_to_remove = [] + updates = {} # mapping of old_id -> new_id + + # Find all placeholder entries + placeholder_entries = [entry for entry in self.metadata_entries if self._is_placeholder_id(entry.id)] + real_entries = [entry for entry in self.metadata_entries if not self._is_placeholder_id(entry.id)] + + for placeholder in placeholder_entries: + # Look for a real entry with equivalent content + matching_real_entry = None + for real_entry in real_entries: + if self._entries_are_equivalent(placeholder, real_entry): + matching_real_entry = real_entry + break + + if matching_real_entry: + # Found a match, mark placeholder for removal and track the ID mapping + placeholders_to_remove.append(placeholder) + updates[placeholder.id] = matching_real_entry.id + print(f"🔄 Resolving placeholder {placeholder.id} -> {matching_real_entry.id}") + + # Remove placeholder entries + for placeholder in placeholders_to_remove: + self.metadata_entries.remove(placeholder) + + # Update all references from placeholder IDs to real IDs + for old_id, new_id in updates.items(): + self._update_references(old_id, new_id) + + if placeholders_to_remove: + print(f"🔄 Resolved {len(placeholders_to_remove)} placeholder(s) to avoid circular import duplicates") + return self + + def _find_equivalent_entry_for_model(self, model_instance: BaseModel) -> Optional[MetadataEntry]: + """ + Find an existing metadata entry that represents the same Pydantic model instance. + + This is used to avoid creating duplicate entries when processing object references. + """ + # Convert the model instance to a temporary metadata entry for comparison + model_class = type(model_instance) + model_name = model_class.__name__ + + # Extract properties from instance + temp_properties = {} + for field_name in model_class.model_fields.keys(): + field_value = getattr(model_instance, field_name, None) + if field_value is None or isinstance(field_value, BaseModel) or isinstance(field_value, list): + continue # Skip references and None values for comparison + if isinstance(field_value, datetime): + temp_properties[field_name] = field_value.isoformat() + else: + temp_properties[field_name] = field_value + + # Find an existing entry with the same class and properties + for existing_entry in self.metadata_entries: + if (existing_entry.class_id == model_name and + existing_entry.properties == temp_properties): + return existing_entry + + return None + + def _update_references(self, old_id: str, new_id: str): + """ + Update all references in metadata entries from old_id to new_id. + + This is used when removing duplicate entries to ensure all references + point to the kept entry rather than the removed one. + """ + for entry in self.metadata_entries: + # Update references in all reference lists + for ref_name, ref_list in entry.references.items(): + if ref_list: # Only process non-empty lists + updated_refs = [new_id if ref_id == old_id else ref_id for ref_id in ref_list] + entry.references[ref_name] = updated_refs + + def add_file(self, file_path: Union[str, Path], name: Optional[str] = None, + description: Optional[str] = None, **properties) -> 'SchemaFacade': + """ + Add a file to be included in the RO-Crate when written. + + This method stores file information that will be used when write() is called. + The actual file copying and File entity creation happens during write(). + + Args: + file_path: Path to the file to include + name: Human-readable name for the file (defaults to filename) + description: Description of the file's content/purpose + **properties: Additional properties for the File entity + + Returns: + Self for method chaining + + Example: + facade.add_file("data.csv", name="Experimental Results", + description="Raw measurement data from synthesis experiment") + """ + file_path = Path(file_path) + + if not name: + name = file_path.stem.replace('_', ' ').replace('-', ' ').title() + + if not description: + description = f"Data file: {file_path.name}" + + file_info = { + 'path': file_path, + 'name': name, + 'description': description, + 'properties': properties + } + + self.files.append(file_info) + return self + + def get_files(self) -> list[dict]: + """Get list of files to be included in the crate""" + return self.files + + def clear_files(self) -> 'SchemaFacade': + """Remove all file references from the facade""" + self.files.clear() + return self + + @classmethod + def from_ro_crate(cls, path: Union[str, Path, ROCrate]) -> 'SchemaFacade': + """ + Import SchemaFacade from RO-Crate. This can be the folder or the metadata file itself. + + Args: + path: Path to RO-Crate folder or ro-crate-metadata.json file + + Returns: + SchemaFacade with imported types, properties, restrictions and metadata entries + """ + import json + from pathlib import Path + + if isinstance(path, ROCrate): + crate_data = path.metadata + return cls.from_dict(crate_data) + + path = Path(path) + # Search for file called ro-crate-metadata.json + + if path.is_dir(): + path = path / "ro-crate-metadata.json" + if not path.is_file(): + # Search in subfolders + for subpath in path.glob("**/ro-crate-metadata.json"): + if subpath.is_file(): + path = subpath + break + if not path.is_file(): + raise FileNotFoundError(f"Could not find ro-crate-metadata.json in {path}") + with open(path, 'r', encoding='utf-8') as f: + crate_data = json.load(f) + + return cls.from_dict(crate_data) + + @classmethod + def from_dict(cls, crate_data: dict) -> 'SchemaFacade': + """ + Import SchemaFacade from RO-Crate dictionary. + + Follows the proper import flow: + 1. Parse rdfs:Class entities → create Type objects + 2. Parse rdf:Property entities → create TypeProperty objects + 3. Parse owl:Restriction entities → create Restriction objects + 4. Link properties to types based on owl:restriction references + 5. Parse remaining entities → create MetadataEntry objects + + Args: + crate_data: Dictionary containing RO-Crate JSON-LD data + + Returns: + SchemaFacade with imported schema and data + """ + graph = crate_data.get("@graph", []) + context = crate_data.get("@context", []) + + # Parse and process the JSON-LD context + context_processor = cls._parse_jsonld_context(context) + + # Step 1: Parse all schema elements first + parsed_classes = {} # id -> raw_data + parsed_properties = {} # id -> raw_data + parsed_restrictions = {} # id -> raw_data + metadata_items = [] # remaining non-schema items + + # Separate schema elements from metadata + for item in graph: + item_type = item.get("@type") + item_id = item.get("@id", "") + + # Expand URIs using context for proper type detection + expanded_type = cls._expand_uri_with_context(item_type, context_processor) if item_type else "" + expanded_id = cls._expand_uri_with_context(item_id, context_processor) if item_id else "" + + # Check for rdfs:Class (could be prefixed or full URI) + if (item_type == "rdfs:Class" or + expanded_type.endswith("/Class") or + expanded_type.endswith("#Class")): + parsed_classes[item_id] = item + # Check for rdf:Property or rdfs:Property + elif (item_type in ["rdf:Property", "rdfs:Property"] or + expanded_type.endswith("/Property") or + expanded_type.endswith("#Property")): + parsed_properties[item_id] = item + # Check for owl:Restriction + elif (item_type in ["owl:Restriction", "owl:restriction"] or + expanded_type.endswith("/Restriction") or + expanded_type.endswith("#Restriction")): + parsed_restrictions[item_id] = item + elif item_id not in ["./", "ro-crate-metadata.json"]: # Skip RO-Crate structure + metadata_items.append(item) + + # Step 2: Create TypeProperty objects + type_properties = {} + for prop_id, prop_data in parsed_properties.items(): + local_id = cls._extract_local_id(prop_id) + + # Extract ontology mapping from owl:equivalentProperty + ontology = None + equiv_prop = prop_data.get("owl:equivalentProperty", {}) + if isinstance(equiv_prop, dict): + ontology = equiv_prop.get("@id") + elif isinstance(equiv_prop, str): + ontology = equiv_prop + + # Extract domain includes (what classes can have this property) + domain_includes = [] + domain_data = prop_data.get("schema:domainIncludes", []) + if isinstance(domain_data, dict): + ref_id = domain_data.get("@id", "") + if ref_id: + domain_includes = [cls._expand_uri_with_context(ref_id, context_processor)] + elif isinstance(domain_data, list): + for item in domain_data: + if isinstance(item, dict): + ref_id = item.get("@id", "") + if ref_id: + domain_includes.append(cls._expand_uri_with_context(ref_id, context_processor)) + elif isinstance(item, str): + domain_includes.append(cls._expand_uri_with_context(item, context_processor)) + elif isinstance(domain_data, str): + domain_includes = [cls._expand_uri_with_context(domain_data, context_processor)] + + # Extract range includes (what types this property can hold) + range_includes = [] + range_data = prop_data.get("schema:rangeIncludes", []) + if isinstance(range_data, dict): + ref_id = range_data.get("@id", "") + if ref_id: + range_includes = [cls._expand_uri_with_context(ref_id, context_processor)] + elif isinstance(range_data, list): + for item in range_data: + if isinstance(item, dict): + ref_id = item.get("@id", "") + if ref_id: + range_includes.append(cls._expand_uri_with_context(ref_id, context_processor)) + elif isinstance(item, str): + range_includes.append(cls._expand_uri_with_context(item, context_processor)) + elif isinstance(range_data, str): + range_includes = [cls._expand_uri_with_context(range_data, context_processor)] + + type_prop = TypeProperty( + id=local_id, + label=prop_data.get("rdfs:label"), + comment=prop_data.get("rdfs:comment"), + domain_includes=domain_includes, + range_includes=range_includes, + ontological_annotations=[ontology] if ontology else None + ) + type_properties[prop_id] = type_prop + + # Step 3: Create Restriction objects + restrictions = {} + for restr_id, restr_data in parsed_restrictions.items(): + on_property = restr_data.get("owl:onProperty", {}) + prop_id = on_property.get("@id") if isinstance(on_property, dict) else on_property + + restriction = Restriction( + id=cls._extract_local_id(restr_id), + property_type=cls._extract_local_id(prop_id) if prop_id else "", + min_cardinality=restr_data.get("owl:minCardinality"), + max_cardinality=restr_data.get("owl:maxCardinality") + ) + restrictions[restr_id] = restriction + + # Step 4: Create Type objects and link properties via restrictions + types = [] + linked_property_ids = set() # Track which properties we've linked across all types + + for class_id, class_data in parsed_classes.items(): + local_id = cls._extract_local_id(class_id) + + # Extract subclass relationships + subclass_of = ["https://schema.org/Thing"] # Default + subclass_data = class_data.get("rdfs:subClassOf", {}) + if isinstance(subclass_data, dict): + subclass_ref = subclass_data.get("@id") + if subclass_ref: + subclass_of = [subclass_ref] + elif isinstance(subclass_data, str): + subclass_of = [subclass_data] + + # Extract ontology mapping from owl:equivalentClass + ontology = None + equiv_class = class_data.get("owl:equivalentClass", {}) + if isinstance(equiv_class, dict): + ontology = equiv_class.get("@id") + elif isinstance(equiv_class, str): + ontology = equiv_class + + # Get restrictions linked to this class and their properties + class_restrictions = [] + class_properties = [] + + # First, link properties via owl:restriction (preferred method) + restr_refs = class_data.get("owl:restriction", []) + if isinstance(restr_refs, dict): + restr_refs = [restr_refs] + + for restr_ref in restr_refs: + restr_id = restr_ref.get("@id") if isinstance(restr_ref, dict) else restr_ref + if restr_id in restrictions: + restriction = restrictions[restr_id] + class_restrictions.append(restriction) + + # Find the corresponding property and add it to the class + for prop_id, type_prop in type_properties.items(): + if type_prop.id == restriction.property_type: + # Set required based on restriction cardinality + prop_copy = type_prop.model_copy() + prop_copy.required = (restriction.min_cardinality or 0) > 0 + class_properties.append(prop_copy) + linked_property_ids.add(prop_id) + break + + # Fallback: Link properties via schema:domainIncludes if not linked via restrictions + for prop_id, type_prop in type_properties.items(): + if prop_id not in linked_property_ids: + # Check if this property references this class in its domain + for domain_ref in type_prop.domain_includes: + domain_class_id = cls._extract_local_id(domain_ref) if domain_ref else "" + if domain_class_id == local_id: + # Property belongs to this class - add it + prop_copy = type_prop.model_copy() + prop_copy.required = False # Default to optional when no restriction + class_properties.append(prop_copy) + linked_property_ids.add(prop_id) + break + + # Create Type object + ro_type = Type( + id=local_id, + subclass_of=subclass_of, + ontological_annotations=[ontology] if ontology else None, + rdfs_property=class_properties, + comment=class_data.get("rdfs:comment"), + label=class_data.get("rdfs:label"), + restrictions=class_restrictions + ) + types.append(ro_type) + + # Step 5: Create MetadataEntry objects from remaining items + metadata_entries = [] + for item in metadata_items: + item_type = item.get("@type", "") + item_id = item.get("@id", "") + + # Extract local class name + local_class = cls._extract_local_id(item_type) if item_type else "Unknown" + local_id = cls._extract_local_id(item_id) + + # Extract property values (exclude @id, @type) + properties = {} + references = {} + + for key, value in item.items(): + if key not in ["@id", "@type"]: + local_key = cls._extract_local_id(key) + + # Use context to determine if this should be treated as a reference + is_reference_property = cls._is_reference_property(key, context_processor) + + if isinstance(value, dict) and "@id" in value: + # Explicit reference to another entity - wrap in list as expected by MetadataEntry + references[local_key] = [cls._extract_local_id(value["@id"])] + elif isinstance(value, list): + # Handle arrays - could be references or literals + ref_values = [] + literal_values = [] + + for item_val in value: + if isinstance(item_val, dict) and "@id" in item_val: + ref_values.append(cls._extract_local_id(item_val["@id"])) + elif is_reference_property and isinstance(item_val, str): + # Context indicates this should be treated as reference + ref_values.append(cls._extract_local_id(item_val)) + else: + literal_values.append(item_val) + + # Store in appropriate category + if ref_values: + references[local_key] = ref_values + if literal_values: + properties[local_key] = literal_values if len(literal_values) > 1 else literal_values[0] + elif is_reference_property and isinstance(value, str): + # Context indicates this string should be treated as a reference + references[local_key] = [cls._extract_local_id(value)] + else: + # Direct property value + properties[local_key] = value + + entry = MetadataEntry( + id=local_id, + class_id=local_class, + properties=properties, + references=references + ) + metadata_entries.append(entry) + + # Step 6: Identify standalone properties and restrictions + # Properties that aren't linked to any type via restrictions or domainIncludes + standalone_properties = [] + standalone_restrictions = [] + + for prop_id, type_prop in type_properties.items(): + # Check if this property is linked to any type + is_linked = prop_id in linked_property_ids + if not is_linked: + standalone_properties.append(type_prop) + + # Restrictions that aren't referenced by any type + used_restriction_ids = set() + for type_obj in types: + if type_obj.restrictions: + for restriction in type_obj.restrictions: + used_restriction_ids.add(restriction.id) + + for restr_id, restriction in restrictions.items(): + if restriction.id not in used_restriction_ids: + standalone_restrictions.append(restriction) + + # Create and return SchemaFacade with all components + return cls( + types=types, + property_types=standalone_properties, + restrictions=standalone_restrictions, + metadata_entries=metadata_entries + ) + + @staticmethod + def _parse_jsonld_context(context) -> dict: + """ + Parse JSON-LD context to extract namespace mappings and property configurations. + + Args: + context: JSON-LD @context (string, dict, or list) + + Returns: + Dictionary with namespace mappings and property type information + """ + context_info = { + 'namespaces': {}, # prefix -> URI mapping + 'property_types': {}, # property -> type info + 'base_uri': None + } + + if isinstance(context, str): + # Single context URL - we can't extract local mappings from this + # but we know it's the base RO-Crate context + return context_info + elif isinstance(context, list): + # Process each context in the list + for ctx_item in context: + if isinstance(ctx_item, str): + continue # Skip URLs + elif isinstance(ctx_item, dict): + context_info = SchemaFacade._merge_context_dict(context_info, ctx_item) + elif isinstance(context, dict): + # Single context object + context_info = SchemaFacade._merge_context_dict(context_info, context) + + return context_info + + @staticmethod + def _merge_context_dict(context_info: dict, ctx_dict: dict) -> dict: + """Merge a context dictionary into the context info""" + for key, value in ctx_dict.items(): + if isinstance(value, str): + # Simple namespace mapping: "base": "http://example.com/" + context_info['namespaces'][key] = value + # Check if this could be our base namespace + if key in ['base', '@base'] or 'example.com' in value: + context_info['base_uri'] = value + elif isinstance(value, dict): + # Complex property definition: "name": {"@id": "schema:name", "@type": "@id"} + context_info['property_types'][key] = value + + return context_info + + @staticmethod + def _expand_uri_with_context(uri: str, context_info: dict) -> str: + """ + Expand a prefixed URI using the context information. + + Args: + uri: URI that may be prefixed (e.g., 'base:Person', 'schema:name') + context_info: Parsed context information + + Returns: + Expanded URI (e.g., 'http://example.com/Person', 'https://schema.org/name') + """ + if not uri or ':' not in uri: + return uri + + prefix, local_part = uri.split(':', 1) + namespace_uri = context_info['namespaces'].get(prefix) + + if namespace_uri: + # Ensure namespace URI ends properly for concatenation + if not namespace_uri.endswith(('//', '/', '#')): + namespace_uri += '/' + return namespace_uri + local_part + + return uri # Return unchanged if we can't expand it + + @staticmethod + def _contract_uri_with_context(uri: str, context_info: dict) -> str: + """ + Contract a full URI to a prefixed form using context information. + + Args: + uri: Full URI (e.g., 'http://example.com/Person') + context_info: Parsed context information + + Returns: + Contracted URI if possible (e.g., 'base:Person'), otherwise original + """ + if not uri: + return uri + + # Check against known namespaces + for prefix, namespace_uri in context_info['namespaces'].items(): + # Handle different namespace ending patterns + if namespace_uri.endswith(('//', '/', '#')): + base_ns = namespace_uri + else: + base_ns = namespace_uri + '/' + + if uri.startswith(base_ns): + local_part = uri[len(base_ns):] + return f"{prefix}:{local_part}" + + return uri + + @staticmethod + def _is_reference_property(prop_name: str, context_info: dict) -> bool: + """ + Check if a property should be treated as a reference (points to another entity). + + Args: + prop_name: Property name + context_info: Parsed context information + + Returns: + True if property contains references to other entities + """ + prop_config = context_info['property_types'].get(prop_name, {}) + return prop_config.get('@type') == '@id' + + @staticmethod + def _extract_local_id(uri: str) -> str: + """Extract local ID from URI (e.g., 'base:Person' → 'Person', 'http://example.com/Person' → 'Person')""" + if not uri: + return "" + + # Handle full URLs (http://, https://) + if uri.startswith(('http://', 'https://')): + return uri.split("/")[-1] if "/" in uri else uri + + # Handle namespace prefixes (base:Person, schema:name, etc.) + if ":" in uri: + return uri.split(":")[-1] + + # Handle simple paths or plain strings + return uri.split("/")[-1] if "/" in uri else uri + + def write(self, destination: str, name: Optional[str] = None, description: Optional[str] = None, + license: Optional[str] = None, **kwargs): + """ + Write the schema as an RO-Crate to the specified destination. + Automatically includes any files that were added via add_file(). + Includes dynamic JSON-LD context based on actual vocabulary usage. + + Args: + destination: Directory path where the crate should be written + name: Name for the RO-Crate (optional) + description: Description for the RO-Crate (optional) + license: License identifier for the RO-Crate (optional) + **kwargs: Additional metadata for the RO-Crate + """ + # Get the complete RO-Crate using get_crate (includes dynamic context) + crate: ROCrate = self.get_crate(name=name, description=description, license=license, **kwargs) + + # Write to destination + crate.write(destination) + + return self + + def get_dynamic_context(self) -> dict: + """ + Generate and return the dynamic JSON-LD context based on the vocabularies + and properties actually used in this schema. + + Returns: + JSON-LD @context that includes only the namespaces and properties + that are actually used in the schema + """ + from lib_ro_crate_schema.crate.jsonld_utils import get_context + + # Generate RDF graph and extract context + graph = self.to_graph() + return get_context(graph) + + def to_json(self) -> dict: + """ + Convert the schema to JSON-LD format without writing to disk. + + Returns: + JSON-LD representation of the schema as RO-Crate + """ + from rocrate.rocrate import ROCrate + from lib_ro_crate_schema.crate.jsonld_utils import add_schema_to_crate + + # Resolve any forward references first + self.resolve_forward_refs() + + # Create temporary crate and get JSON representation + crate = ROCrate() + crate = add_schema_to_crate(self, crate) + + # Return the JSON representation of the crate + with tempfile.TemporaryDirectory() as temp_dir: + crate.write(temp_dir) + metadata_file = Path(temp_dir) / "ro-crate-metadata.json" + with open(metadata_file, 'r') as f: + return json.load(f) def to_triples(self) -> Generator[Triple, None, None]: + # Generate triples for types (includes their attached properties and restrictions) for p in self.types: yield from p.to_triples() + + # Generate triples for standalone properties + for prop in self.property_types: + yield from prop.to_triples() + + # Generate triples for standalone restrictions + for restriction in self.restrictions: + yield from restriction.to_triples() + + # Generate triples for metadata entries for m in self.metadata_entries: yield from m.to_triples() def get_properties(self) -> Generator[TypeProperty, None, None]: - yield from set( - [ - property - for current_type in self.types - for property in current_type.rdfs_property - ] - ) + # Get all properties - both standalone and attached to types + all_properties = [] + + # Add standalone properties + all_properties.extend(self.property_types) + + # Add properties from types + for current_type in self.types: + if current_type.rdfs_property: + all_properties.extend(current_type.rdfs_property) + + # Remove duplicates based on ID and yield + seen_ids = set() + for prop in all_properties: + if prop.id not in seen_ids: + seen_ids.add(prop.id) + yield prop + + def getPropertyTypes(self) -> list[TypeProperty]: + """Get list of standalone property types (not attached to any Type)""" + return self.get_properties() + + def to_graph(self) -> Graph: + """Convert the schema to an RDFLib Graph""" local_graph = Graph() [local_graph.add(triple) for triple in self.to_triples()] local_graph.bind(prefix=self.prefix, namespace=BASE) return local_graph + + # New methods for decorator integration + + def export_pydantic_model(self, type_id: str, base_class: Optional[TypingType[BaseModel]] = None) -> TypingType[BaseModel]: + """ + Export a Type definition as a Pydantic model class with BFS dependency resolution. + + This method now uses the Registry's BFS traversal to ensure all dependencies + are resolved in the correct order before creating the target model. + + Args: + type_id: ID of the Type to export + base_class: Optional base class to inherit from (defaults to BaseModel) + + Returns: + Dynamically generated Pydantic model class + + Raises: + ValueError: If type_id is not found in the schema + + Example: + facade = SchemaFacade() + # ... add types to facade ... + PersonModel = facade.export_pydantic_model("Person") + person = PersonModel(name="Alice", age=30) + """ + # Check if already cached + cached_model = self._forward_ref_resolver.get_pydantic_model(type_id) + if cached_model: + return cached_model + + # Find the Type definition + type_def = self.get_type(type_id) + if not type_def: + raise ValueError(f"Type '{type_id}' not found in schema") + + # Use BFS to find all dependencies and export them first + dependency_order = self._forward_ref_resolver.collect_dependencies_bfs(type_id) + + # Export all dependencies first (except the target) + for dep_type_id in dependency_order[:-1]: # Exclude the target type itself + if not self._forward_ref_resolver.get_pydantic_model(dep_type_id): + dep_model = self._create_single_pydantic_model(dep_type_id, base_class) + self._forward_ref_resolver.register_pydantic_model(dep_type_id, dep_model) + + # Finally export the target model + target_model = self._create_single_pydantic_model(type_id, base_class) + self._forward_ref_resolver.register_pydantic_model(type_id, target_model) + + # Rebuild all models to resolve forward references + self._rebuild_pydantic_models(dependency_order) + + return target_model + + def _create_single_pydantic_model(self, type_id: str, base_class: Optional[TypingType[BaseModel]] = None) -> TypingType[BaseModel]: + """ + Create a single Pydantic model without dependency resolution. + Used internally by the BFS-based export methods. + """ + type_def = self.get_type(type_id) + if not type_def: + raise ValueError(f"Type '{type_id}' not found in schema") + + # Determine base class + if base_class is None: + base_class = BaseModel + + # Build field definitions from Type properties + field_definitions = {} + annotations = {} + + if type_def.rdfs_property: + for prop in type_def.rdfs_property: + field_name = prop.id + + # Determine Python type from range_includes with registry-aware resolution + python_type = self._rdf_type_to_python_type_with_registry(prop.range_includes) + + # Check if field is required from restrictions + is_required = self._is_field_required(type_def, field_name) + is_list = self._is_field_list(type_def, field_name) + + # Adjust type for lists + if is_list and python_type != Any: + from typing import List as TypingList + python_type = TypingList[python_type] + + # Create Field with metadata + field_kwargs = {} + if prop.comment: + field_kwargs['description'] = prop.comment + if not is_required: + field_kwargs['default'] = None + python_type = Optional[python_type] + + if field_kwargs: + field_definitions[field_name] = Field(**field_kwargs) + else: + field_definitions[field_name] = ... if is_required else None + + annotations[field_name] = python_type + + # Create the class dynamically + class_name = type_def.id + + # Create class attributes dictionary + class_dict = { + '__annotations__': annotations, + '__module__': f"__pydantic_export_{id(self)}", # Unique module name + } + + # Add field definitions + class_dict.update(field_definitions) + + # Add docstring from Type comment + if type_def.comment: + class_dict['__doc__'] = type_def.comment + + # Create the class + model_class = type(class_name, (base_class,), class_dict) + + return model_class + + def export_all_pydantic_models(self, base_class: Optional[TypingType[BaseModel]] = None) -> dict[str, TypingType[BaseModel]]: + """ + Export all Types in the schema as Pydantic model classes with proper dependency resolution. + + This method uses the Registry's dependency resolution to export all models + in the correct order, ensuring forward references work properly. + + Args: + base_class: Optional base class for all models (defaults to BaseModel) + + Returns: + Dictionary mapping type IDs to generated Pydantic model classes + + Example: + facade = SchemaFacade() + # ... add types to facade ... + models = facade.export_all_pydantic_models() + PersonModel = models["Person"] + OrganizationModel = models["Organization"] + """ + models = {} + + # Get all type IDs + type_ids = [type_def.id for type_def in self.types] + + # Use registry to get proper dependency order for all types + ordered_type_ids = self._forward_ref_resolver.get_all_dependencies(type_ids) + + # Export models in dependency order + for type_id in ordered_type_ids: + if not self._forward_ref_resolver.get_pydantic_model(type_id): + model_class = self._create_single_pydantic_model(type_id, base_class) + self._forward_ref_resolver.register_pydantic_model(type_id, model_class) + models[type_id] = model_class + else: + models[type_id] = self._forward_ref_resolver.get_pydantic_model(type_id) + + # Rebuild all models to resolve forward references + self._rebuild_pydantic_models(ordered_type_ids) + + return models + + def clear_pydantic_model_cache(self): + """Clear the cached Pydantic models to force regeneration""" + if hasattr(self._forward_ref_resolver, '_pydantic_models'): + self._forward_ref_resolver._pydantic_models.clear() + + def _rdf_type_to_python_type(self, range_includes: List[str]) -> TypingType: + """Convert RDF range types to Python types (legacy method)""" + return self._rdf_type_to_python_type_with_registry(range_includes) + + def _rdf_type_to_python_type_with_registry(self, range_includes: List[str]) -> TypingType: + """Convert RDF range types to Python types with Registry-aware resolution""" + if not range_includes: + return Any + + # Take the first range type for simplicity + rdf_type = range_includes[0] + + # Map common XSD types to Python types + type_mapping = { + 'http://www.w3.org/2001/XMLSchema#string': str, + 'http://www.w3.org/2001/XMLSchema#integer': int, + 'http://www.w3.org/2001/XMLSchema#int': int, + 'http://www.w3.org/2001/XMLSchema#long': int, + 'http://www.w3.org/2001/XMLSchema#float': float, + 'http://www.w3.org/2001/XMLSchema#double': float, + 'http://www.w3.org/2001/XMLSchema#decimal': float, + 'http://www.w3.org/2001/XMLSchema#boolean': bool, + 'http://www.w3.org/2001/XMLSchema#date': str, # Could use datetime.date + 'http://www.w3.org/2001/XMLSchema#dateTime': str, # Could use datetime.datetime + 'http://www.w3.org/2001/XMLSchema#time': str, # Could use datetime.time + 'http://www.w3.org/2001/XMLSchema#anyURI': str, + 'https://schema.org/Text': str, + 'https://schema.org/Number': float, + 'https://schema.org/Integer': int, + 'https://schema.org/Boolean': bool, + 'https://schema.org/URL': str, + 'https://schema.org/Date': str, + 'https://schema.org/DateTime': str, + } + + # Check if it's a known XSD/schema.org type + if rdf_type in type_mapping: + return type_mapping[rdf_type] + + # Check if it's a reference to another Type in our schema + referenced_type = self.get_type(rdf_type) + if referenced_type: + # Check if we already have a Pydantic model for this type + cached_model = self._forward_ref_resolver.get_pydantic_model(rdf_type) + if cached_model: + return cached_model + # Return a forward reference string that Pydantic can resolve + return f"'{rdf_type}'" + + # Extract local name for custom types + local_name = self._extract_local_id(rdf_type) + if self.get_type(local_name): + # Check registry cache first + cached_model = self._forward_ref_resolver.get_pydantic_model(local_name) + if cached_model: + return cached_model + return f"'{local_name}'" + + # Default to Any for unknown types + return Any + + def _rebuild_pydantic_models(self, type_ids: List[str]): + """Rebuild all Pydantic models to resolve forward references""" + import sys + from types import ModuleType + + # Create a temporary module with all models for proper resolution + temp_module_name = f"__pydantic_rebuild_{id(self)}" + temp_module = ModuleType(temp_module_name) + + try: + # Add all models to the temporary module namespace + for type_id in type_ids: + model_class = self._forward_ref_resolver.get_pydantic_model(type_id) + if model_class: + setattr(temp_module, type_id, model_class) + # Update the model's module reference + model_class.__module__ = temp_module_name + + # Register the module + sys.modules[temp_module_name] = temp_module + + # Rebuild all models + for type_id in type_ids: + model_class = self._forward_ref_resolver.get_pydantic_model(type_id) + if model_class: + try: + model_class.model_rebuild() + except Exception as e: + print(f"Warning: Could not rebuild model {model_class.__name__}: {e}") + + finally: + # Clean up the temporary module + if temp_module_name in sys.modules: + del sys.modules[temp_module_name] + + def _is_field_required(self, type_def: Type, field_name: str) -> bool: + """Check if a field is required based on OWL restrictions""" + if not type_def.restrictions: + return False + + for restriction in type_def.restrictions: + if restriction.property_type == field_name: + return (restriction.min_cardinality or 0) > 0 + + return False + + def _is_field_list(self, type_def: Type, field_name: str) -> bool: + """Check if a field should be a list based on OWL restrictions""" + if not type_def.restrictions: + return False + + for restriction in type_def.restrictions: + if restriction.property_type == field_name: + # If max_cardinality is None (unbounded) or > 1, it's a list + return restriction.max_cardinality is None or (restriction.max_cardinality or 0) > 1 + + return False + + def add_pydantic_model(self, model_class: TypingType[BaseModel], + ontology: Optional[str] = None, + comment: Optional[str] = None) -> Type: + """ + Add a Pydantic model to the schema, either by using existing registration + or by registering it on-the-fly. + + Args: + model_class: The Pydantic model class + ontology: Optional ontology URI (overrides decorator setting) + comment: Optional comment (overrides decorator setting) + + Returns: + The generated Type object + """ + # Check if model is already registered + schema_registry = get_schema_registry() + type_template = schema_registry.get_type_template(model_class.__name__) + + if not type_template: + # Register the model if not already registered + # Use class name as default type_id for dynamic registration + type_template = schema_registry.register_type_from_model( + model_class=model_class, + type_id=model_class.__name__, # Default to class name + ontology=ontology, + comment=comment + ) + + # Convert to Type object and add to facade + ro_crate_type = self._type_template_to_type(type_template) + + # Check if already exists in types + existing_type = next((t for t in self.types if t.id == ro_crate_type.id), None) + if not existing_type: + self.types.append(ro_crate_type) + self._forward_ref_resolver.register(ro_crate_type.id, ro_crate_type) + + # Register properties too + if ro_crate_type.rdfs_property: + for prop in ro_crate_type.rdfs_property: + self._forward_ref_resolver.register(prop.id, prop) + + return ro_crate_type + + def add_registered_models(self, *model_names: str) -> List[Type]: + """ + Add models that were previously registered via @ro_crate_schema decorator. + + Args: + *model_names: Names of registered models to add + + Returns: + List of generated Type objects + """ + schema_registry = get_schema_registry() + added_types = [] + + for model_name in model_names: + type_template = schema_registry.get_type_template(model_name) + if not type_template: + raise ValueError(f"Model '{model_name}' is not registered. Use @ro_crate_schema decorator first.") + + ro_crate_type = self._type_template_to_type(type_template) + + # Check if already exists + existing_type = next((t for t in self.types if t.id == ro_crate_type.id), None) + if not existing_type: + self.types.append(ro_crate_type) + self._forward_ref_resolver.register(ro_crate_type.id, ro_crate_type) + + # Register properties + if ro_crate_type.rdfs_property: + for prop in ro_crate_type.rdfs_property: + self._forward_ref_resolver.register(prop.id, prop) + + added_types.append(ro_crate_type) + + return added_types + + def add_all_registered_models(self) -> List[Type]: + """ + Add all models that were registered via @ro_crate_schema decorator. + + Returns: + List of generated Type objects + """ + schema_registry = get_schema_registry() + all_type_templates = schema_registry.get_all_type_templates() + return self.add_registered_models(*all_type_templates.keys()) + + def add_model_instance(self, instance: BaseModel, instance_id: Optional[str] = None) -> MetadataEntry: + """ + Add a Pydantic model instance as a metadata entry. + The model class should be registered first. + + Args: + instance: Pydantic model instance + instance_id: Optional custom ID for the instance + + Returns: + The created MetadataEntry + """ + model_class = type(instance) + + # Ensure the model type is in the schema + self.add_pydantic_model(model_class) + + # Get the correct type ID from the schema registry (might be different from class name) + schema_registry = get_schema_registry() + + # First try to get by explicit ID if the model was decorated + if hasattr(model_class, '_ro_crate_id'): + type_id = model_class._ro_crate_id + else: + # Fallback to class name for dynamic models + type_id = model_class.__name__ + + # Verify the type exists in our schema + type_template = schema_registry.get_type_template(type_id) + if not type_template: + # Try class name as fallback + type_template = schema_registry.get_type_template(model_class.__name__) + if type_template: + type_id = type_template.id + + # Determine instance ID + if instance_id is None: + # Try to extract ID from instance if it has an @id or id field + # Use getattr to access the actual field values, not model_dump() + if hasattr(instance, '@id') and getattr(instance, '@id') is not None: + instance_id = getattr(instance, '@id') + elif hasattr(instance, 'id') and getattr(instance, 'id') is not None: + instance_id = getattr(instance, 'id') + else: + # Generate placeholder ID as fallback + instance_id = f"{type_id.lower()}_placeholder_{abs(hash(str(instance)))}" + + # Extract properties and references from instance + properties = {} + references = {} + + # Iterate over actual field values, not model_dump() output + for field_name in type(instance).model_fields.keys(): + field_value = getattr(instance, field_name, None) + + if field_value is None: + continue + + + if isinstance(field_value, BaseModel): + # Reference to another model instance + # First, try to find an existing equivalent entry + ref_instance_id = None + + # Check for explicit ID first + if hasattr(field_value, '@id') and getattr(field_value, '@id') is not None: + ref_instance_id = getattr(field_value, '@id') + elif hasattr(field_value, 'id') and getattr(field_value, 'id') is not None: + ref_instance_id = getattr(field_value, 'id') + else: + # Check if an equivalent entry already exists in metadata + existing_entry = self._find_equivalent_entry_for_model(field_value) + if existing_entry: + ref_instance_id = existing_entry.id + else: + ref_instance_id = f"{type(field_value).__name__.lower()}_placeholder_{abs(hash(str(field_value)))}" + + references[field_name] = [ref_instance_id] + + # Only recursively add if we don't already have an equivalent entry + if not self._find_equivalent_entry_for_model(field_value): + self.add_model_instance(field_value, ref_instance_id) + elif isinstance(field_value, list): + # Handle lists (could be references or properties) + field_refs = [] + for item in field_value: + if isinstance(item, BaseModel): + # Create proper ID for list item + ref_instance_id = None + + # Check for explicit ID first + if hasattr(item, '@id') and getattr(item, '@id') is not None: + ref_instance_id = getattr(item, '@id') + elif hasattr(item, 'id') and getattr(item, 'id') is not None: + ref_instance_id = getattr(item, 'id') + else: + # Check if an equivalent entry already exists in metadata + existing_entry = self._find_equivalent_entry_for_model(item) + if existing_entry: + ref_instance_id = existing_entry.id + else: + ref_instance_id = f"{type(item).__name__.lower()}_placeholder_{abs(hash(str(item)))}" + + field_refs.append(ref_instance_id) + + # Only recursively add if we don't already have an equivalent entry + if not self._find_equivalent_entry_for_model(item): + self.add_model_instance(item, ref_instance_id) + else: + # Simple value in list - not supported in current format + pass + if field_refs: + references[field_name] = field_refs + else: + # Simple value - handle datetime serialization properly + if isinstance(field_value, datetime): + properties[field_name] = field_value.isoformat() + else: + properties[field_name] = field_value + + # Create metadata entry + entry = MetadataEntry( + id=instance_id, + class_id=type_id, # Use the correct type ID + properties=properties, + references=references + ) + + # Use the same duplicate detection logic as addEntry + self.addEntry(entry) + + # Return the entry that was actually kept (might be different if duplicate was found) + final_entry = next((e for e in self.metadata_entries if + self._entries_are_equivalent(e, entry) and self._is_placeholder_id(entry.id)), entry) + return final_entry + + def _type_template_to_type(self, type_template: TypeTemplate) -> Type: + """Convert TypeTemplate to Type object""" + # Convert properties + properties = [] + restrictions = [] + + for prop_template in type_template.type_properties: + # Create TypeProperty + type_property = TypeProperty( + id=prop_template.name, + range_includes=[prop_template.rdf_type], + domain_includes=[type_template.id], # Use id instead of name + ontological_annotations=[prop_template.ontology] if prop_template.ontology else [], + comment=prop_template.comment, + label=prop_template.name.replace('_', ' ').title() + ) + properties.append(type_property) + + # Create OWL restrictions for all fields (conforming to Java architecture) + if prop_template.required: + # Required fields get minCardinality: 1 + # Lists get maxCardinality: None (unbounded), single values get maxCardinality: 1 + restriction = Restriction( + property_type=prop_template.name, + min_cardinality=1, + max_cardinality=None if prop_template.is_list else 1 + ) + else: + # Optional fields get minCardinality: 0 + # Lists get maxCardinality: None (unbounded), single values get maxCardinality: 1 + restriction = Restriction( + property_type=prop_template.name, + min_cardinality=0, + max_cardinality=None if prop_template.is_list else 1 + ) + restrictions.append(restriction) + + # Create Type + ro_crate_type = Type( + id=type_template.id, # Use id instead of name + subclass_of=["https://schema.org/Thing"], + ontological_annotations=[type_template.ontology] if type_template.ontology else [], + rdfs_property=properties, + comment=type_template.comment, + label=type_template.id, # Use id instead of name + restrictions=restrictions + ) + + return ro_crate_type + + # Java API compatibility getter methods + def get_types(self) -> List[Type]: + """Get all types in the schema""" + return self.types + + def getTypes(self) -> List[Type]: + """Java API compatibility method to get all types""" + return self.get_types() + + def get_type(self, type_id: str) -> Optional[Type]: + """Get a specific type by its ID""" + for type_obj in self.types: + if type_obj.id == type_id: + return type_obj + return None + + def getType(self, type_id: str) -> Optional[Type]: + """Java API compatibility method to get a specific type by its ID""" + return self.get_type(type_id) + + def get_entries(self) -> List[MetadataEntry]: + """Get all metadata entries in the schema""" + return self.metadata_entries + + + def get_entry(self, entry_id: str) -> Optional[MetadataEntry]: + """Get a specific metadata entry by its ID""" + for entry in self.metadata_entries: + if entry.id == entry_id: + return entry + return None + + + def get_entries_by_class(self, class_id: str) -> List[MetadataEntry]: + """Get all metadata entries of a specific class""" + return [entry for entry in self.metadata_entries if entry.class_id == class_id] + + def getEntries(self, class_id: str = "") -> List[MetadataEntry]: + """Java API compatibility method to get all metadata entries of a specific class""" + if not class_id: + return self.get_entries() + + return self.get_entries_by_class(class_id) + + def get_entry_as(self, entry_id: str, target_type: TypingType) -> Optional[Any]: + """ + Convert a metadata entry to an instance of the specified type. + + This method finds the metadata entry by ID and converts it to an instance + of the provided target type (Pydantic model class or any other callable). + + Args: + entry_id: The ID of the metadata entry to convert + target_type: The target class/type to convert to (e.g., a Pydantic model) + + Returns: + An instance of target_type created from the metadata entry, or None if entry not found + + Raises: + ValueError: If the entry cannot be converted to the target type + TypeError: If the target_type is not callable + + Example: + facade = SchemaFacade.from_ro_crate("my_crate") + + # Define or get your Pydantic model + class Person(BaseModel): + name: str + age: Optional[int] = None + email: Optional[str] = None + + # Convert metadata entry to Pydantic instance + person = facade.get_entry_as("person_001", Person) + print(f"Name: {person.name}, Age: {person.age}") + + # Or use exported model from schema + PersonModel = facade.export_pydantic_model("Person") + person = facade.get_entry_as("person_001", PersonModel) + """ + # Find the metadata entry + entry = self.get_entry(entry_id) + if not entry: + return None + + # Check if target_type is callable + if not callable(target_type): + raise TypeError(f"target_type must be callable, got {type(target_type)}") + + try: + # Use the ForwardRefResolver to handle recursive reference resolution + constructor_data = self._forward_ref_resolver.resolve_metadata_references( + self, entry_id, target_type + ) + + # Filter out any keys that aren't valid for the target type + if hasattr(target_type, '__annotations__'): + # For Pydantic models and annotated classes, only use valid fields + valid_fields = set(getattr(target_type, '__annotations__', {}).keys()) + if hasattr(target_type, 'model_fields'): + # Pydantic v2 model fields + valid_fields.update(target_type.model_fields.keys()) + elif hasattr(target_type, '__fields__'): + # Pydantic v1 model fields + valid_fields.update(target_type.__fields__.keys()) + + # Only pass valid fields to avoid unexpected keyword arguments + if valid_fields: + constructor_data = {k: v for k, v in constructor_data.items() if k in valid_fields} + + # Create instance of target type + instance = target_type(**constructor_data) + return instance + + except Exception as e: + raise ValueError(f"Failed to convert entry '{entry_id}' to {target_type.__name__}: {e}") from e + + # Property management methods + def add_property_type(self, property: TypeProperty) -> 'SchemaFacade': + """Add a standalone property to the schema registry""" + # Check if already exists to avoid duplicates + if not any(p.id == property.id for p in self.property_types): + self.property_types.append(property) + self._forward_ref_resolver.register(property.id, property) + return self + + def add_restriction(self, restriction: Restriction) -> 'SchemaFacade': + """Add a standalone restriction to the schema registry""" + # Check if already exists to avoid duplicates + if not any(r.id == restriction.id for r in self.restrictions): + self.restrictions.append(restriction) + self._forward_ref_resolver.register(restriction.id, restriction) + return self + + def get_property_types(self) -> List[TypeProperty]: + """Get all properties from all types in the schema, including standalone properties""" + properties = [] + seen_ids = set() + + # Add standalone properties first + for prop in self.property_types: + if prop.id not in seen_ids: + properties.append(prop) + seen_ids.add(prop.id) + + # Add properties from types + for type_obj in self.types: + if type_obj.rdfs_property: + for prop in type_obj.rdfs_property: + if prop.id not in seen_ids: + properties.append(prop) + seen_ids.add(prop.id) + return properties + + def get_restrictions(self) -> List[Restriction]: + """Get all restrictions, including standalone restrictions and those attached to types""" + restrictions = [] + seen_ids = set() + + # Add standalone restrictions first + for restriction in self.restrictions: + if restriction.id not in seen_ids: + restrictions.append(restriction) + seen_ids.add(restriction.id) + + # Add restrictions from types (both explicit and auto-generated from properties) + for type_obj in self.types: + type_restrictions = type_obj.get_restrictions() # This includes auto-generated ones + for restriction in type_restrictions: + if restriction.id not in seen_ids: + restrictions.append(restriction) + seen_ids.add(restriction.id) + return restrictions + + def getRestrictions(self) -> List[Restriction]: + """Java API compatibility method to get all restrictions""" + return self.get_restrictions() + + def get_property_type(self, property_id: str) -> Optional[TypeProperty]: + """Get a specific property by ID from anywhere in the schema""" + # Check standalone properties first + for prop in self.property_types: + if prop.id == property_id: + return prop + + # Check properties attached to types + for type_obj in self.types: + if type_obj.rdfs_property: + for prop in type_obj.rdfs_property: + if prop.id == property_id: + return prop + return None + + def getPropertyType(self, property_id: str) -> Optional[TypeProperty]: + """Java API compatibility method to get a specific property by ID""" + return self.get_property_type(property_id) + + def get_restriction(self, restriction_id: str) -> Optional[Restriction]: + """Get a specific restriction by ID from anywhere in the schema""" + # Check standalone restrictions first + for restriction in self.restrictions: + if restriction.id == restriction_id: + return restriction + + # Check restrictions attached to types (both explicit and auto-generated) + for type_obj in self.types: + type_restrictions = type_obj.get_restrictions() + for restriction in type_restrictions: + if restriction.id == restriction_id: + return restriction + return None + + def getRestriction(self, restriction_id: str) -> Optional[Restriction]: + """Java API compatibility method to get a specific restriction by ID""" + return self.get_restriction(restriction_id) + + # RO-Crate access method + def get_crate(self, name: Optional[str] = None, description: Optional[str] = None, + license: Optional[str] = None, **kwargs): + """ + Get the underlying RO-Crate object with full schema and file integration. + + This method creates a complete RO-Crate object containing the schema, + metadata entries, and any files that were added via add_file(). + Includes dynamic JSON-LD context based on actual vocabulary usage. + + Args: + name: Name for the RO-Crate (optional) + description: Description for the RO-Crate (optional) + license: License identifier for the RO-Crate (optional) + **kwargs: Additional metadata for the RO-Crate + + Returns: + ROCrate object ready for writing or further manipulation + """ + from rocrate.rocrate import ROCrate + from lib_ro_crate_schema.crate.jsonld_utils import add_schema_to_crate + from datetime import datetime + + # Resolve any forward references first + self.resolve_forward_refs() + + # Resolve placeholder entries (for circular references) + self.resolve_placeholders() + + # Create the RO-Crate + crate = ROCrate() + + # Set crate metadata + if name: + crate.name = name + if description: + crate.description = description + if license: + crate.license = license + + # Add any additional metadata + for key, value in kwargs.items(): + setattr(crate, key, value) + + # Add dynamic JSON-LD context before adding schema + dynamic_context = self.get_dynamic_context() + if isinstance(dynamic_context, list) and len(dynamic_context) > 1: + # Add the additional context (skip base RO-Crate context which is already included) + additional_context = dynamic_context[1] + if additional_context: # Only if there are actually additional mappings + crate.metadata.extra_contexts.append(additional_context) + + # Add schema to crate + crate = add_schema_to_crate(self, crate) + + # Add files to crate + if self.files: + print(f" 📁 Adding {len(self.files)} files to RO-Crate:") + for file_info in self.files: + file_path = file_info['path'] + if file_path.exists(): + # Create file properties + file_properties = { + "@type": "File", + "name": file_info['name'], + "description": file_info['description'], + "encodingFormat": self._get_mime_type(file_path), + "dateCreated": datetime.now().isoformat() + } + + # Add any custom properties + file_properties.update(file_info.get('properties', {})) + + # Add file to crate + file_entity = crate.add_file( + source=str(file_path), + properties=file_properties + ) + print(f" 📄 Added: {file_path.name} ({file_info['name']})") + else: + print(f" ⚠️ File not found: {file_path}") + + return crate + + def getCrate(self, name: Optional[str] = None, description: Optional[str] = None, + license: Optional[str] = None, **kwargs): + """ + Java API compatibility alias for get_crate(). + + Get the underlying RO-Crate object with full schema and file integration. + + Args: + name: Name for the RO-Crate (optional) + description: Description for the RO-Crate (optional) + license: License identifier for the RO-Crate (optional) + **kwargs: Additional metadata for the RO-Crate + + Returns: + ROCrate object ready for writing or further manipulation + """ + return self.get_crate(name=name, description=description, license=license, **kwargs) + + def _get_mime_type(self, file_path: Path) -> str: + """Get MIME type for file based on extension""" + mime_types = { + '.csv': 'text/csv', + '.json': 'application/json', + '.txt': 'text/plain', + '.md': 'text/markdown', + '.pdf': 'application/pdf', + '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.xls': 'application/vnd.ms-excel', + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.gif': 'image/gif', + '.svg': 'image/svg+xml', + '.xml': 'application/xml', + '.html': 'text/html', + '.htm': 'text/html', + '.py': 'text/x-python', + '.js': 'text/javascript', + '.css': 'text/css', + '.zip': 'application/zip', + '.tar.gz': 'application/gzip', + '.gz': 'application/gzip' + } + return mime_types.get(file_path.suffix.lower(), 'application/octet-stream') diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/schema_registry.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/schema_registry.py new file mode 100644 index 0000000..57afa18 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/schema_registry.py @@ -0,0 +1,186 @@ +""" +Schema registry for managing Pydantic model registration and metadata extraction. +""" +from typing import Dict, Type, List, Any, Optional, get_type_hints, get_origin, get_args +from dataclasses import dataclass +from pydantic import BaseModel +import datetime +from decimal import Decimal + + +@dataclass +class TypePropertyTemplate: + """Template for creating TypeProperty objects from Pydantic model fields""" + name: str + python_type: Type + rdf_type: str + required: bool + is_list: bool + ontology: Optional[str] = None + comment: Optional[str] = None + default_value: Any = None + + +@dataclass +class TypeTemplate: + """ + Template for creating Type objects from @ro_crate_schema decorated Pydantic models. + + The 'id' field stores the RO-Crate schema identifier, which may be different from the + Python class name if explicitly set via @ro_crate_schema(id="..."). + """ + id: str # RO-Crate schema identifier (may differ from Python class name) + model_class: Type[BaseModel] + ontology: Optional[str] = None + comment: Optional[str] = None + type_properties: List[TypePropertyTemplate] = None + + def __post_init__(self): + if self.type_properties is None: + self.type_properties = [] + + +class SchemaRegistry: + """ + Global registry for @ro_crate_schema decorated Pydantic models. + + This registry stores TypeTemplates (will become Type objects) and TypePropertyTemplates + (will become TypeProperty objects). It does NOT store MetadataEntry objects - those + are created separately in SchemaFacade from Pydantic model instances. + + Purpose: Bridge between Pydantic models and RO-Crate schema objects + """ + + def __init__(self): + self._registered_types: Dict[str, TypeTemplate] = {} + self._type_converter = TypeConverter() + + def register_type_from_model(self, model_class: Type[BaseModel], type_id: str, + ontology: Optional[str] = None, + comment: Optional[str] = None) -> TypeTemplate: + """Register a Pydantic model and extract template for Type creation""" + + # Extract type properties from Pydantic model fields + type_properties = self._extract_type_properties(model_class) + + type_template = TypeTemplate( + id=type_id, # Use explicit type_id instead of class name + model_class=model_class, + ontology=ontology, + comment=comment or model_class.__doc__, + type_properties=type_properties + ) + + # Store by the type_id, not class name + self._registered_types[type_id] = type_template + return type_template + + def get_type_template(self, type_id: str) -> Optional[TypeTemplate]: + """Get type template for a registered @ro_crate_schema model by id""" + return self._registered_types.get(type_id) + + def get_all_type_templates(self) -> Dict[str, TypeTemplate]: + """Get all registered type templates from @ro_crate_schema models""" + return self._registered_types.copy() + + def is_type_registered(self, type_id: str) -> bool: + """Check if a @ro_crate_schema decorated model is registered""" + return type_id in self._registered_types + + def _extract_type_properties(self, model_class: Type[BaseModel]) -> List[TypePropertyTemplate]: + """Extract TypeProperty templates from Pydantic model fields""" + type_property_templates = [] + + for field_name, field_info in model_class.model_fields.items(): + # Get the field type + field_type = field_info.annotation + + # Check if it's a list/optional type + is_list = self._is_list_type(field_type) + if is_list: + # Extract the inner type for lists + field_type = get_args(field_type)[0] if get_args(field_type) else field_type + + # Convert to RDF type + rdf_type = self._type_converter.python_to_rdf(field_type) + + # Extract ontology annotation from field metadata + json_extra = getattr(field_info, 'json_schema_extra', None) if hasattr(field_info, 'json_schema_extra') else None + ontology = json_extra.get('ontology') if json_extra else None + + type_property_template = TypePropertyTemplate( + name=field_name, + python_type=field_type, + rdf_type=rdf_type, + required=field_info.is_required(), + is_list=is_list, + ontology=ontology, + comment=field_info.description, + default_value=field_info.default if field_info.default is not ... else None + ) + + type_property_templates.append(type_property_template) + + return type_property_templates + + def _is_list_type(self, type_annotation) -> bool: + """Check if a type annotation represents a list""" + origin = get_origin(type_annotation) + return origin is list or origin is List + + +class TypeConverter: + """Converts Python types to XSD/RDF types""" + + # Mapping from Python types to XSD types + TYPE_MAPPING = { + str: "xsd:string", + int: "xsd:integer", + float: "xsd:float", + bool: "xsd:boolean", + datetime.datetime: "xsd:dateTime", + datetime.date: "xsd:date", + datetime.time: "xsd:time", + Decimal: "xsd:decimal", + bytes: "xsd:base64Binary", + } + + def python_to_rdf(self, python_type: Type) -> str: + """Convert a Python type to its corresponding XSD/RDF type""" + # Handle Union types (Optional, etc.) + if hasattr(python_type, '__origin__'): + origin = get_origin(python_type) + if origin is type(None): # Handle NoneType + return "xsd:string" # Default fallback + elif hasattr(python_type, '__args__'): + # For Union types, take the first non-None type + args = get_args(python_type) + for arg in args: + if arg is not type(None): + return self.python_to_rdf(arg) + + # Handle Pydantic models (reference types) + if isinstance(python_type, type) and issubclass(python_type, BaseModel): + return f"base:{python_type.__name__}" # Reference to another model + + # Look up in type mapping + return self.TYPE_MAPPING.get(python_type, "xsd:string") + + def add_type_mapping(self, python_type: Type, rdf_type: str): + """Add a custom type mapping""" + self.TYPE_MAPPING[python_type] = rdf_type + + +# Global decorator registry instance +_schema_registry = SchemaRegistry() + + +def get_schema_registry() -> SchemaRegistry: + """ + Get the global schema registry for @ro_crate_schema decorated Pydantic models. + + This registry contains TypeTemplates that can be converted to Type objects + and TypePropertyTemplates that can be converted to TypeProperty objects. + MetadataEntry objects are NOT stored here - they're created in SchemaFacade. + """ + return _schema_registry \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/type.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/type.py index 7bfb781..ed28af8 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/type.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/type.py @@ -1,76 +1,210 @@ -from typing import List, Generator, Union +from typing import List, Generator, Union, Optional from lib_ro_crate_schema.crate.rdf import is_type, object_id -from lib_ro_crate_schema.crate.registry import ForwardRef, Registry +from lib_ro_crate_schema.crate.forward_ref_resolver import ForwardRef, ForwardRefResolver from .restriction import Restriction from .type_property import TypeProperty -from pydantic import BaseModel +from pydantic import BaseModel, Field from rdflib import Node, Literal, URIRef, RDFS, OWL +class Type(BaseModel): + """ + Represents an RDFS Class in the RO-Crate schema (equivalent to Java IType interface). + Defines the structure and constraints for entities in the knowledge graph. + + Key Responsibilities: + - Define RDFS Class metadata (ID, label, comment, inheritance) + - Associate TypeProperty objects that define allowed properties + - Generate OWL restrictions for property cardinality constraints + - Support ontological alignment via equivalent classes + + Commonly Used Methods: + + **Fluent Builder API:** + - setId(id) -> Set the RDFS Class identifier + - setLabel(label) -> Set human-readable label (rdfs:label) + - setComment(comment) -> Set description (rdfs:comment) + - addProperty(property) -> Add allowed TypeProperty + - setOntologicalAnnotations(annotations) -> Set owl:equivalentClass mappings + + **Java API Compatibility (IType):** + - getId() -> Get the RDFS Class identifier + - getLabel() -> Get human-readable label + - getComment() -> Get description text + - getSubClassOf() -> Get parent class inheritance + - getOntologicalAnnotations() -> Get equivalent class mappings + - get_restrictions() -> Get OWL cardinality restrictions + + **RDF Generation:** + - to_triples() -> Generate RDF triples for serialization + - resolve(registry) -> Resolve forward references to other objects + + Usage Example: + person_type = Type(id="Person") + person_type.setLabel("Person").setComment("Represents a person") + person_type.addProperty(name_property) + person_type.addProperty(email_property) + + JSON-LD Output Example: + { + "@id": "Person", + "@type": "rdfs:Class", + "rdfs:label": "Person", + "rdfs:comment": "Represents a person in the system", + "rdfs:subClassOf": {"@id": "https://schema.org/Thing"}, + "owl:equivalentClass": {"@id": "https://schema.org/Person"}, + "owl:restriction": [ + { + "@id": "Person_name_restriction" + }, + { + "@id": "Person_email_restriction" + } + ] + } + """ + id: str + subclass_of: List[Union[str, "Type", ForwardRef]] = Field(default_factory=lambda: ["https://schema.org/Thing"]) + ontological_annotations: Optional[List[str]] = Field(default=None) + rdfs_property: Optional[List[TypeProperty]] = Field(default_factory=list) + comment: Optional[str] = Field(default=None) + label: Optional[str] = Field(default=None) + restrictions: Optional[List[Restriction]] = Field(default=None) + + # Fluent builder API methods + def setId(self, id: str): + """Set the ID of this type""" + self.id = id + return self + + def setOntologicalAnnotations(self, annotations: List[str]): + """Set ontological annotations""" + self.ontological_annotations = annotations + return self + + def addProperty(self, property: TypeProperty): + """Add a property to this type""" + if self.rdfs_property is None: + self.rdfs_property = [] + self.rdfs_property.append(property) + return self + + def setComment(self, comment: str): + """Set the comment for this type""" + self.comment = comment + return self + + def setLabel(self, label: str): + """Set the label for this type""" + self.label = label + return self + + def get_restrictions(self) -> List[Restriction]: + """ + Get the restrictions that represent the properties of this type (RDFS:Class). + Returns restrictions that define cardinality constraints for properties. + Auto-generates restrictions from properties with explicit required/optional specification. + """ + restrictions = list(self.restrictions or []) + + # Auto-generate restrictions from properties with required field set + if self.rdfs_property: + for prop in self.rdfs_property: + # Check if a restriction already exists for this property + if any(r.property_type == prop.id for r in restrictions): + continue # Skip if restriction already defined + min_cardinality = 1 if prop.required is not None and prop.required else 0 + # Generate restriction ID based on type and property + restriction_id = f"{self.id}_{prop.id}_restriction" + + # Create restriction for this property + restriction = Restriction( + id=restriction_id, + property_type=prop.id, + min_cardinality=min_cardinality + ) + restrictions.append(restriction) + + return restrictions + + # Java API compatibility getter methods + def getId(self) -> str: + """Get the RDFS Class identifier (Java IType interface)""" + return self.id + + def getLabel(self) -> Optional[str]: + """Get human-readable label (Java IType interface)""" + return self.label + + def getComment(self) -> Optional[str]: + """Get description text (Java IType interface)""" + return self.comment + + def getSubClassOf(self) -> List[str]: + """Get parent class inheritance (Java IType interface)""" + result = [] + for parent in self.subclass_of or []: + if isinstance(parent, str): + result.append(parent) + elif hasattr(parent, 'id'): + result.append(parent.id) + else: + result.append(str(parent)) + return result + + def getOntologicalAnnotations(self) -> List[str]: + """Get equivalent class mappings (Java IType interface)""" + return self.ontological_annotations or [] + + def resolve(self, registry: ForwardRefResolver): + """Resolve forward references using the registry""" + if self.rdfs_property: + for prop in self.rdfs_property: + if hasattr(prop, 'resolve'): + prop.resolve(registry) + + def to_triples(self) -> Generator[tuple, None, None]: + """ + Emits the type definition as a set of triples + whose subject is a RDFS:Class + """ + yield is_type(self.id, RDFS.Class) + + if self.comment: + yield (object_id(self.id), RDFS.comment, Literal(self.comment)) + + if self.label: + yield (object_id(self.id), RDFS.label, Literal(self.label)) + + # Subclass relationships + if self.subclass_of: + for parent in self.subclass_of: + parent_id = parent if isinstance(parent, str) else parent.id + yield (object_id(self.id), RDFS.subClassOf, URIRef(parent_id)) + + # Ontological annotations + if self.ontological_annotations: + for annotation in self.ontological_annotations: + yield (object_id(self.id), OWL.equivalentClass, URIRef(annotation)) + + # OWL Restrictions (cardinality constraints on properties) + restrictions = self.get_restrictions() + if restrictions: + # Generate all restriction triples and link them to this class + for restriction in restrictions: + # Generate the full restriction triples (type, onProperty, cardinality) + yield from restriction.to_triples() + # Link this restriction to the class via owl:restriction property + owl_restriction_property = URIRef("http://www.w3.org/2002/07/owl#restriction") + yield (object_id(self.id), owl_restriction_property, object_id(restriction.id)) + + # Properties (with domain set to this type) + if self.rdfs_property: + for prop in self.rdfs_property: + prop_with_domain = prop.model_copy(update=dict(domain_includes=[self.id])) + yield from prop_with_domain.to_triples() -# class Type(BaseModel): -# id: str -# type: str -# subclass_of: List[Union[str, "Type", ForwardRef["Type"]]] | None -# ontological_annotations: List[str] | None -# rdfs_property: List[TypeProperty] | None -# comment: str -# label: str - -# def get_restrictions(self) -> list[Restriction]: -# """ -# Get the restrictions that -# represent the properties of this type (RDFS:Class) -# """ -# return [ -# Restriction(property_type=prop.id, min_cardinality=1, max_cardinality=1) -# for prop in self.rdfs_property -# if self.rdfs_property -# ] - -# def resolve(self, registry: Registry): -# print(f"Before: {self.rdfs_property}") -# for prop in self.rdfs_property: -# prop.resolve(registry) -# print(f"After: {self.rdfs_property}") - -# def to_triples(self) -> Generator[Node]: -# """ -# Emits the type definition as a set of triples -# whose subject is a RDFS:Class -# """ - -# yield is_type(self.id, RDFS.Class) -# yield (object_id(self.id), RDFS.comment, Literal(self.comment)) -# yield (object_id(self.id), RDFS.label, Literal(self.label)) -# annotations = [ -# (object_id(self.id), OWL.equivalentClass, URIRef(cls)) -# for cls in self.ontological_annotations -# ] -# for ann in annotations: -# yield ann -# for restriction in self.get_restrictions(): -# yield from restriction.to_triples() -# for prop in self.rdfs_property: -# prop_with_domain = prop.model_copy(update=dict(domain_includes=[self.id])) -# yield from prop_with_domain.to_triples() - -# # def to_ro(self) -> RdfsClass: -# # return RdfsClass(id=self.id, -# # self_type="rdfs:Class", -# # subclass_of=serialize_references(self.subclass_of), -# # #rdfs_properties=[prop.to_ro() for prop in self.rdfs_property] if self.rdfs_property is not None else None, -# # ontological_annotations=None) - -# # def to_ro(self): -# # return RdfsClass( -# # id=RoId(id=self.id), -# # subclass_of=[RoId(id=i) for i in self.subclass_of if i] if self.subclass_of else [], -# # ontological_annotations= -# # equivalent_class= -# # ) - - -# TypeProperty.model_rebuild() +# Rebuild the model to handle forward references +Type.model_rebuild() diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/type_property.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/type_property.py index a7b980a..e444f0e 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/type_property.py +++ b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/crate/type_property.py @@ -1,444 +1,221 @@ +""" +TypeProperty class for RO-Crate schema representation. +Represents RDFS Properties that define relationships between entities. +""" from __future__ import annotations -from enum import Enum -import itertools -from typing import Annotated, Any, Iterable, List, Optional, Union, TYPE_CHECKING - - -from lib_ro_crate_schema.crate.rdf import SCHEMA, is_type, object_id -from lib_ro_crate_schema.crate.literal_type import LiteralType, to_rdf -from lib_ro_crate_schema.crate.registry import ForwardRef, Registry -from pydantic import ( - AnyUrl, - BaseModel, - Field, - ValidationError, - ValidationInfo, - field_validator, - create_model, -) - -from pydantic_rdf import BaseRdfModel, WithPredicate -from rdflib import BNode, Graph, Namespace, URIRef, RDF, RDFS, Literal, OWL, XSD, SDO - -import re +from typing import List, Optional, Union, Generator, TYPE_CHECKING, Any +from pydantic import BaseModel, Field +from lib_ro_crate_schema.crate.rdf import is_type, object_id +from lib_ro_crate_schema.crate.forward_ref_resolver import ForwardRefResolver +from lib_ro_crate_schema.crate.literal_type import LiteralType +from rdflib import RDF, RDFS, Literal, URIRef from urllib.parse import urlparse -from datetime import datetime -from decimal import Decimal -from typing import Annotated, Optional, Iterable -from pydantic import create_model -from pydantic_rdf import BaseRdfModel, WithPredicate -from rdflib import URIRef - -from pydantic import computed_field +if TYPE_CHECKING: + from .type import Type -MY_NS = Namespace("ro-schema") - - -class LiteralType(Enum): - BOOLEAN = "xsd:boolean" - INTEGER = "xsd:integer" - DOUBLE = "xsd:double" - DECIMAL = "xsd:decimal" - FLOAT = "xsd:float" - DATETIME = "xsd:dateTime" - STRING = "xsd:string" - XML_LITERAL = "rdf:XMLLiteral" - - def to_internal(self) -> URIRef: - match self: - case LiteralType.BOOLEAN: - return XSD.boolean - case LiteralType.INTEGER: - return XSD.integer - case LiteralType.DOUBLE: - return XSD.double - case LiteralType.DECIMAL: - return XSD.decimal - case LiteralType.FLOAT: - return XSD.float - case LiteralType.DATETIME: - return XSD.dateTime - case LiteralType.STRING: - return XSD.string - case LiteralType.XML_LITERAL: - return RDF.XMLLiteral - case _: - raise ValueError(f"Unknown LiteralType: {self}") - - @classmethod - def from_external(cls, value: str | URIRef | object) -> "LiteralType": - """ - Import a LiteralType from an external representation. - Accepts: - - enum value (e.g. 'xsd:boolean') - - full URI string (e.g. 'http://www.w3.org/2001/XMLSchema#boolean') - - rdflib URIRef (e.g. XSD.boolean) - - direct rdflib type (e.g. XSD.boolean) - """ - match value: - case str() as s: - for lt in cls: - if s == lt.value: - return lt - for lt in cls: - if s == str(lt.to_internal()): - return lt - case URIRef() as u: - for lt in cls: - if u == lt.to_internal(): - return lt - case _: - for lt in cls: - if value is lt.to_internal(): - return lt - raise ValueError(f"No LiteralType for external value: {value}") - - -class RdfPropertyType(BaseRdfModel): - rdf_type = RDF.Property - _rdf_namespace = RDF - label: Annotated[str | None, WithPredicate(RDFS.label)] = Field(...) - range_includes: Annotated[ - list[Union[URIRef, "RdfType"]], WithPredicate(SDO.RangeIncludes) - ] = Field(...) - - def to_external(self) -> "PropertyType": - return PropertyType( - id=self.uri, - label=self.label, - range_includes=[convert_range_to_external(r) for r in self.range_includes], - ) - - -def convert_range_to_external( - range: Union[URIRef, "RdfType"], -) -> Union[LiteralType, "Type"]: - match range: - case URIRef() as ref: - return LiteralType.from_external(ref) - case RdfType() as rdf: - return rdf.to_external() - - -def convert_range_to_internal( - range: Union[LiteralType, "Type"], -) -> Union[URIRef, "RdfType"]: - match range: - case LiteralType() as lt: - return lt.to_internal() - case Type() as tp: - return tp.to_internal() - - -class PropertyType(BaseModel): - id: str - label: str | None - range_includes: list[Union[LiteralType, "Type"]] - - def to_internal(self) -> RdfPropertyType: - return RdfPropertyType( - uri=self.id, - label=self.label, - range_includes=[ - convert_range_to_internal(includes) for includes in self.range_includes - ], - ) - - -class Restriction(BaseRdfModel): - rdf_type = OWL.Restriction - _rdf_namespace = MY_NS - on_property: Annotated[RdfPropertyType, WithPredicate(OWL.onProperty)] = Field(...) - min_cardinality: Annotated[int, WithPredicate(OWL.minCardinality)] = Field(...) - max_cardinality: Annotated[int, WithPredicate(OWL.maxCardinality)] = Field(...) - - -class RdfType(BaseRdfModel): - rdf_type = RDFS.Class - _rdf_namespace = MY_NS - equivalent_class: Annotated[str | None, WithPredicate(OWL.equivalentClass)] = Field( - default=None - ) - subclass_of: Annotated[list["RdfType"], WithPredicate(RDFS.subClassOf)] = Field( - default=[] - ) - label: Annotated[str | None, WithPredicate(RDFS.label)] = Field(None) - comment: Annotated[str | None, WithPredicate(RDFS.comment)] = Field(default=None) - restrictions: Annotated[list[Restriction], WithPredicate(OWL.Restriction)] = Field( - default=[] - ) - - def to_external(self) -> "Type": - pass - - -class Type(BaseModel): - id: str - equivalent_class: str = Field(default=None) - subclass_of: list["Type"] = Field(default=[]) - label: str | None = Field(default=None) - comment: str | None = Field(default=None) - properties: list[PropertyType] = Field(default=[]) - - def restrictions(self) -> list[Restriction]: - return [ - Restriction( - uri=BNode(), - on_property=prop.to_internal(), - min_cardinality=0, - max_cardinality=1, - ) - for prop in self.properties - ] - - def to_internal(self) -> RdfType: - restrictions: list[Restriction] = self.restrictions() - return RdfType( - uri=self.id, - subclass_of=[c.to_internal() for c in self.subclass_of], - label=self.comment, - equivalent_class=self.equivalent_class, - restrictions=restrictions, - ) - - -# --------------------------------------------------------------------------- -# helpers -# --------------------------------------------------------------------------- - - -def _safe_field_name(iri: str) -> str: +class TypeProperty(BaseModel): """ - Make a safe Python identifier from an IRI: - - prefer fragment; else last path segment - - replace non-word chars with '_' - - prefix 'f_' if empty or starts with a digit - - preserve camelCase (no forced snake_case) - """ - parsed = urlparse(iri) - candidate = parsed.fragment or parsed.path.rsplit("/", 1)[-1] - candidate = re.sub(r"\W", "_", candidate) - if not candidate or candidate[0].isdigit(): - candidate = f"f_{candidate}" - return candidate - - -def _python_type_for_range(rng) -> type: - """ - Map your model's range types to Python types expected by pydantic-rdf. - - LiteralType -> Python scalar - - Type -> URIRef (object property) - """ - match rng: - # Literal ranges - case LiteralType.BOOLEAN: - return bool - case LiteralType.INTEGER: - return int - case LiteralType.DOUBLE: - return float - case LiteralType.DECIMAL: - return Decimal - case LiteralType.FLOAT: - return float - case LiteralType.DATETIME: - return datetime - case LiteralType.STRING: - return str - case LiteralType.XML_LITERAL: - return str # or a custom XML wrapper - - # Object range (points to another resource of some Type) - case Type(): - return URIRef - - case _: - raise TypeError(f"Unsupported range: {rng!r}") - - -def _union_type_for_ranges(ranges: list[LiteralType | Type]) -> type: - """ - Build a PEP 604 union (A | B | ...) from the allowed ranges. - """ - ts = tuple(_python_type_for_range(r) for r in ranges) - base = ts[0] - for t in ts[1:]: - base = base | t - return base - - -def _cardinality_for_prop(t: Type, prop: PropertyType) -> tuple[int, Optional[int]]: - """ - Extract (min, max) from your Type.restrictions(). Defaults to (0, 1). - """ - for r in t.restrictions(): - # r.on_property is an RdfPropertyType; compare by URI string - if str(r.on_property.uri) == str(prop.id): - return r.min_cardinality, r.max_cardinality - return 0, 1 - - -def _maybe_sequence_type(base_t: type, min_c: int, max_c: Optional[int]) -> type: - """ - If cardinality allows multiple values, use list[base_t]. - """ - if max_c is None or max_c > 1 or min_c > 1: - return list[base_t] - return base_t - - -def _maybe_optional(base_t: type, min_c: int) -> type: - """ - Make Optional[...] when min=0 and not already a list[...] type. - """ - match base_t: - case list(x): - return base_t - case _: - return base_t | None if min_c == 0 else base_t - - -def build_entry_model_for_type(t: Type) -> type[BaseRdfModel]: - """ - Create a BaseRdfModel subclass whose fields correspond to the properties - of the given Type, each annotated with WithPredicate(URIRef(prop.id)). - """ - cls_name = _safe_field_name(t.id) + "Entry" - - # shell - Base = create_model( # type: ignore[call-arg] - cls_name, - __base__=BaseRdfModel, - __module__=__name__, - ) - - # fix rdf:type at class level as expected by pydantic-rdf - setattr(Base, "rdf_type", URIRef(t.id)) - - # build fields - fields: dict[str, tuple[type, object]] = {} - for prop in t.properties: - base_t = _union_type_for_ranges(prop.range_includes) - min_c, max_c = _cardinality_for_prop(t, prop) - base_t = _maybe_sequence_type(base_t, min_c, max_c) - base_t = _maybe_optional(base_t, min_c) - - annotated_t = Annotated[base_t, WithPredicate(URIRef(prop.id))] - fields[_safe_field_name(prop.id)] = (annotated_t, None) - - # finalize subclass with attached fields - return create_model( # type: ignore[call-arg] - cls_name, - __base__=Base, - __module__=__name__, - **fields, - ) - - -# --------------------------------------------------------------------------- -# factory -# --------------------------------------------------------------------------- - - -class MetadataEntry(BaseModel): - """ - High-level, schema-driven entry: - - id: IRI of the node - - type: Type (with properties) - - properties: values keyed by property IRI, label, or safe field name - """ - - id: Union[AnyUrl, str] = Field(...) - type: Type - properties: dict[str, dict | int | str | float] = Field(default_factory=dict) - - @field_validator("id", mode="before") - @classmethod - def _normalize_id(cls, v: Any) -> str: - # Accept AnyUrl, URIRef, str - match v: - case URIRef(): - return str(v) - case _: - return str(v) - - # Convenience API - def to_internal(self) -> BaseRdfModel: - """Build the concrete BaseRdfModel instance (flattened triples).""" - return RdfMetadataEntryFactory.from_external(self) - - def to_graph(self, g: Graph | None = None) -> Graph: - """Serialize directly to an rdflib Graph.""" - g = g or Graph() - self.to_rdf().to_graph(g) - return g - - -class RdfMetadataEntryFactory: - """ - Turn a high-level MetadataEntry into a concrete BaseRdfModel instance - with flattened RDF predicates (no nested dict). - """ - - @staticmethod - def from_external(entry: MetadataEntry) -> BaseRdfModel: - Model = build_entry_model_for_type(entry.type) - - # accept incoming keys as exact IRI, label, or sanitized field name - def _value_for(prop: PropertyType): - for k in (prop.id, prop.label, _safe_field_name(prop.id)): - if k is None: - continue - if (val := entry.properties.get(k)) is not None: - return val - return None - - kwargs = { - _safe_field_name(prop.id): v - for prop in entry.type.properties - if (v := _value_for(prop)) is not None + Represents an RDFS Property in the RO-Crate schema (equivalent to Java IPropertyType interface). + Defines relationships and attributes that can exist between entities in the knowledge graph. + + Key Responsibilities: + - Define RDFS Property metadata (ID, label, comment, domain/range) + - Specify allowed domains (which classes can have this property) + - Specify allowed ranges (what values/types this property can hold) + - Generate OWL cardinality constraints (required/optional, single/multiple values) + - Support ontological alignment via equivalent properties + + Commonly Used Methods: + + **Fluent Builder API:** + - setId(id) -> Set the RDFS Property identifier + - setLabel(label) -> Set human-readable label (rdfs:label) + - setComment(comment) -> Set description (rdfs:comment) + - setTypes(types) -> Set allowed value types (schema:rangeIncludes) + - addType(type_ref) -> Add single allowed value type + - setRequired(required) -> Set if property is mandatory (affects cardinality) + - setOntologicalAnnotations(annotations) -> Set owl:equivalentProperty mappings + + **Java API Compatibility (IPropertyType):** + - getId() -> Get the RDFS Property identifier + - getLabel() -> Get human-readable label + - getComment() -> Get description text + - getDomain() -> Get allowed domain classes (schema:domainIncludes) + - getRange() -> Get allowed value types (schema:rangeIncludes) + - getOntologicalAnnotations() -> Get equivalent property mappings + - get_min_cardinality() -> Get minimum required values (0=optional, 1=required) + - get_max_cardinality() -> Get maximum allowed values (1=single, 0=unlimited) + + **RDF Generation:** + - to_triples() -> Generate RDF triples for serialization + - resolve(registry) -> Resolve forward references to other objects + + Usage Example: + name_prop = TypeProperty(id="name") + name_prop.setLabel("Name").setComment("Person's full name") + name_prop.setTypes(["xsd:string"]).setRequired(True) + + JSON-LD Output Example: + { + "@id": "name", + "@type": "rdf:Property", + "rdfs:label": "Name", + "rdfs:comment": "Person's full name", + "schema:domainIncludes": {"@id": "Person"}, + "schema:rangeIncludes": {"@id": "http://www.w3.org/2001/XMLSchema#string"}, + "owl:equivalentProperty": {"@id": "https://schema.org/name"} } + + Related OWL Restriction (when used on a class): + { + "@id": "Person_name_restriction", + "@type": "owl:Restriction", + "owl:onProperty": {"@id": "name"}, + "owl:minCardinality": 1, + "owl:maxCardinality": 1 + } + """ + id: str + domain_includes: List[str] = Field(default_factory=list) + range_includes: List[Union[str, LiteralType, Any]] = Field(default_factory=list) + ontological_annotations: Optional[List[str]] = Field(default=None) + comment: Optional[str] = Field(default=None) + label: Optional[str] = Field(default=None) + required: Optional[bool] = Field(default=None, description="Whether this property is required (generates OWL restrictions)") + + # Fluent builder API methods + def setId(self, id: str): + """Set the ID of this property""" + self.id = id + return self + + def setTypes(self, types: List[Union[str, Type]]): + """Set the range types for this property""" + self.range_includes = [] + for type_ref in types: + if hasattr(type_ref, 'id'): + self.range_includes.append(type_ref.id) + else: + # Preserve enum objects as-is, convert only plain strings + self.range_includes.append(type_ref) + return self + + def addType(self, type_ref: Union[str, Type]): + """Add a single type to the range of this property""" + if hasattr(type_ref, 'id'): + self.range_includes.append(type_ref.id) + else: + # Preserve enum objects as-is, convert only plain strings + self.range_includes.append(type_ref) + return self + + def setOntologicalAnnotations(self, annotations: List[str]): + """Set ontological annotations for this property""" + self.ontological_annotations = annotations + return self + + def setRequired(self, required: bool): + """Set whether this property is required (generates OWL restrictions)""" + self.required = required + return self + + def setComment(self, comment: str): + """Set the comment for this property""" + self.comment = comment + return self + + def setLabel(self, label: str): + """Set the label for this property""" + self.label = label + return self + + # Java API compatibility getter methods + def get_min_cardinality(self) -> int: + """Get minimum cardinality for this property (0 = optional, 1 = required)""" + if self.required is True: + return 1 + elif self.required is False: + return 0 + else: + return 0 # Default to optional if not explicitly set + + def get_max_cardinality(self) -> int: + """Get maximum cardinality for this property (0 = unbounded, 1 = single value)""" + # For now, assume single values unless explicitly configured + # This could be enhanced to detect list types in range_includes + return 1 - return Model(uri=entry.id, **kwargs) - - -def merge_graphs_from_lists(*graph_lists: Iterable[list[Graph]]) -> Graph: - merged = Graph() - for g in itertools.chain.from_iterable(graph_lists): - merged += g - return merged - - -class SchemaFacade(BaseModel): - types: List[Type] - entries: List[MetadataEntry] - - def add_type(model: BaseModel): + # Java API compatibility getter methods + def getId(self) -> str: + """Get the RDFS Property identifier (Java IPropertyType interface)""" + return self.id + + def getLabel(self) -> Optional[str]: + """Get human-readable label (Java IPropertyType interface)""" + return self.label + + def getComment(self) -> Optional[str]: + """Get description text (Java IPropertyType interface)""" + return self.comment + + def getDomain(self) -> List[str]: + """Get allowed domain classes (Java IPropertyType interface)""" + return self.domain_includes + + def getRange(self) -> List[Union[str, LiteralType, Any]]: + """Get allowed value types (Java IPropertyType interface)""" + return self.range_includes + + def getOntologicalAnnotations(self) -> List[str]: + """Get equivalent property mappings (Java IPropertyType interface)""" + return self.ontological_annotations or [] + + def resolve(self, registry: ForwardRefResolver): + """Resolve forward references using the registry""" + # For now, TypeProperty doesn't have complex forward refs to resolve pass - def to_rdf(self): - rdf_types: list[Graph] = [t.to_internal().model_dump_rdf() for t in self.types] - entries: list[Graph] = [md.to_internal().model_dump_rdf() for e in self.entries] - merged = merge_graphs_from_lists(rdf_types + entries) - return merged - - -t0 = Type(id="Object", subclass_of=[]) -p1 = PropertyType(id="count", label="count", range_includes=[LiteralType.INTEGER]) -p2 = PropertyType(id="name", label="name", range_includes=[LiteralType.STRING]) -t1 = Type(id="MyType", equivalent_class="a", subclass_of=[t0], properties=[p1, p2]) -md = MetadataEntry(id="a", type=t1, properties={"count": 3, "name": "e"}) - -f1 = SchemaFacade(types=[t1], entries=[md]) - -g1 = f1.to_rdf() -print(g1.serialize(format="json-ld")) - -#TODO -# 1. Cleanup the code (at the moment is all in this module) -# 2. Generate Type and MetadataEntry from existing BaseModels -# 3. Implement import from external crate -# 4. Add the generated graph to a crate and make sure the context is correct \ No newline at end of file + def to_triples(self) -> Generator[tuple, None, None]: + """ + Emits the property definition as a set of triples + whose subject is a RDFS:Property + """ + yield is_type(self.id, RDF.Property) + + if self.label: + yield (object_id(self.id), RDFS.label, Literal(self.label)) + + if self.comment: + yield (object_id(self.id), RDFS.comment, Literal(self.comment)) + + # Domain includes - what types can have this property + for domain in self.domain_includes: + yield (object_id(self.id), URIRef("https://schema.org/domainIncludes"), object_id(domain)) + + # Range includes - what types can be values of this property + for range_val in self.range_includes: + # Convert enum to string value if needed + if isinstance(range_val, LiteralType): + range_str = range_val.value + else: + range_str = str(range_val) + + if range_str.startswith("xsd:"): + # XSD type + xsd_uri = range_str.replace("xsd:", "http://www.w3.org/2001/XMLSchema#") + yield (object_id(self.id), URIRef("https://schema.org/rangeIncludes"), URIRef(xsd_uri)) + elif range_str.startswith("base:"): + # Reference to another type in our schema + type_id = range_str.replace("base:", "") + yield (object_id(self.id), URIRef("https://schema.org/rangeIncludes"), object_id(type_id)) + else: + # Assume it's a full URI or local reference + yield (object_id(self.id), URIRef("https://schema.org/rangeIncludes"), object_id(range_str)) + + # Ontological annotations + if self.ontological_annotations: + for annotation in self.ontological_annotations: + yield (object_id(self.id), URIRef("http://www.w3.org/2002/07/owl#equivalentProperty"), URIRef(annotation)) diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/example/examples.py b/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/example/examples.py deleted file mode 100644 index 3109f95..0000000 --- a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/example/examples.py +++ /dev/null @@ -1,90 +0,0 @@ -# Utility functions for reconstruction - -import json -from lib_ro_crate_schema.crate.type import Type -from lib_ro_crate_schema.crate.type_property import TypeProperty -from lib_ro_crate_schema.crate.literal_type import LiteralType -from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry -from lib_ro_crate_schema.crate.schema_facade import SchemaFacade -from rocrate.rocrate import ROCrate - -from rdflib import Graph -from lib_ro_crate_schema.crate.jsonld_utils import add_schema_to_crate -from lib_ro_crate_schema.crate import reconstruction - - -def main(): - has_name = TypeProperty(id="hasName", range_includes=[LiteralType.STRING]) - has_identifier = TypeProperty( - id="hasIdentifier", range_includes=[LiteralType.STRING] - ) - - has_colleague = TypeProperty(id="hasColleague", range_includes=["Participant"]) - - participant_type = Type( - id="Participant", - type="Type", - subclass_of=["https://schema.org/Thing"], - ontological_annotations=["http://purl.org/dc/terms/creator"], - rdfs_property=[has_name, has_identifier], - comment="", - label="", - ) - - creator_type = Type( - id="Creator", - type="Type", - subclass_of=["https://schema.org/Thing"], - ontological_annotations=["http://purl.org/dc/terms/creator"], - rdfs_property=[has_name, has_identifier, has_colleague], - comment="", - label="", - ) - - # Example MetadataEntry using property and type references (object and string) - creator_entry = MetadataEntry( - id="creator1", - types=[creator_type, participant_type], - props={ - "has_name": "John Author", - "has_identifier": "https://orcid.org/0000-0000-0000-0000", - }, - references={}, - ) - - participant_entry = MetadataEntry( - id="participant", - types=[participant_type, creator_type], - props={ - "hasName": "Karl Participant", - "hasIdentifier": "https://orcid.org/0000-0000-0000-0001", - "hasColleague": "creator1", - }, - references={}, - ) - - schema = SchemaFacade( - types=[creator_type, participant_type], - # properties=[has_name, has_identifier], - metadata_entries=[creator_entry, participant_entry], - ) - #Resolve refs - schema.resolve_forward_refs() - breakpoint() - #Add it to a crate - crate = ROCrate() - crate.license = "a" - crate.name = "mtcrate" - crate.description = "test crate" - res = add_schema_to_crate(schema, crate) - #Serialise - print(json.dumps(res)) - - -# Use the reconstruction module's main entry point -def reconstruct(graph: Graph): - return reconstruction.reconstruct(graph) - - -if __name__ == "__main__": - main() diff --git a/0.2.x/lib/python/lib-ro-crate-schema/test.shacl b/0.2.x/lib/python/lib-ro-crate-schema/test.shacl deleted file mode 100644 index af5e716..0000000 --- a/0.2.x/lib/python/lib-ro-crate-schema/test.shacl +++ /dev/null @@ -1,104 +0,0 @@ -PREFIX rdfs: -PREFIX rdf: -PREFIX owl: -PREFIX schema: -PREFIX xsd: -PREFIX openBIS: <_> -PREFIX sh: -PREFIX ex: <_> -#PREFIX crate: - - - -# ex:CrateDefinitionShape a sh:NodeShape ; -# sh:property [ -# sh:path "@graph" ; -# sh:minCount 1 ; -# ] . - -ex:ClassDefinitionShape a sh:NodeShape ; - sh:targetClass rdfs:Class ; - sh:property [ - sh:path rdfs:subClassOf ; - sh:nodeKind sh:IRI; - sh:minCount 1 ; - sh:maxCount 1; - ] ; - sh:property [ - sh:path owl:restriction ; - sh:minCount 0 ; - ] ; - sh:property [ - sh:path owl:equivalentClass ; - sh:minCount 0 ; - ] ; - sh:property [ - sh:path rdfs:label ; - sh:minCount 0 ; - ] ; - sh:property [ - sh:path rdfs:comment ; - sh:minCount 0 ; - ] . - -# ex:MetadataEntryShape a sh:NodeShape ; -# sh:property [ -# sh:path "@graph" ; -# ] . - -ex:OwlRestrictionDefinitionShape a sh:NodeShape; - sh:targetClass owl:restriction ; - sh:closed true; - sh:ignoredProperties (rdf:type) ; - #owl:onProperty should reference a valid rdfs:Property - sh:property [ - sh:path owl:onProperty; - sh:maxCount 1; - sh:minCount 1; - sh:nodeKind sh:IRI; - sh:class rdfs:Property ; - ] ; - sh:property [ - sh:path owl:minCardinality ; - sh:minCount 0; - sh:maxCount 1; - sh:in (0 1); - ] ; - sh:property [ - sh:path owl:maxCardinality ; - sh:minCount 0; - sh:maxCount 1; - sh:in (0 1); - ] . - -ex:PropertyDefinitionShape a sh:NodeShape ; - sh:targetClass rdfs:Property ; - sh:closed true; - sh:ignoredProperties (rdf:type) ; - sh:property [ - sh:path rdfs:label ; - sh:nodeKind sh:Literal ; - sh:minCount 0 ; - ] ; - sh:property [ - sh:path rdfs:comment ; - sh:minCount 0 ; - ] ; - sh:property [ - sh:path owl:equivalentProperty ; - sh:minCount 0 ; - sh:nodeKind sh:IRI ; - ] ; - sh:property [ - sh:path schema:domainIncludes ; - sh:minCount 0 ; - #sh:nodeKind sh:IRI ; - #sh:type rdfs:Class ; - ]; - sh:property [ - sh:path schema:rangeIncludes ; - sh:minCount 0 ; - #sh:nodeKind sh:IRI ; - #The type of the range reference is either a literal type or a reference to another rdfs:Class - #sh:type [sh:in (xsd:string xsd:integer xsd:date xsd:dateTime)]; - ] . \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/__init__.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/__init__.py new file mode 100644 index 0000000..39a7acd --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/__init__.py @@ -0,0 +1 @@ +# Test package for lib-ro-crate-schema \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/schema.shacl b/0.2.x/lib/python/lib-ro-crate-schema/tests/schema.shacl new file mode 100644 index 0000000..8d0a9b4 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/schema.shacl @@ -0,0 +1,324 @@ +# RO-Crate Schema SHACL Validation +# Updated for the modern Python lib-ro-crate-schema architecture +# Validates RDF output from the TypeProperty, Type, and MetadataEntry classes + +@prefix rdf: . +@prefix rdfs: . +@prefix owl: . +@prefix schema: . +@prefix xsd: . +@prefix sh: . +@prefix base: . +@prefix ex: . + +# ===================================================== +# RDFS CLASS DEFINITIONS (Type objects) +# ===================================================== + +ex:ClassDefinitionShape + a sh:NodeShape ; + sh:targetClass rdfs:Class ; + sh:name "RDFS Class Shape" ; + sh:description "Validates Type objects - RDFS class definitions with properties and restrictions" ; + + # Must have rdfs:subClassOf (inheritance) + sh:property [ + sh:path rdfs:subClassOf ; + sh:nodeKind sh:IRI ; + sh:minCount 1 ; + sh:message "Every rdfs:Class must have at least one rdfs:subClassOf relationship" + ] ; + + # Optional label and comment + sh:property [ + sh:path rdfs:label ; + sh:datatype xsd:string ; + sh:maxCount 1 ; + sh:message "rdfs:label must be a single string literal" + ] ; + + sh:property [ + sh:path rdfs:comment ; + sh:datatype xsd:string ; + sh:maxCount 1 ; + sh:message "rdfs:comment must be a single string literal" + ] ; + + # OWL restrictions (cardinality constraints) + sh:property [ + sh:path owl:restriction ; + sh:class owl:Restriction ; + sh:message "owl:restriction must reference valid owl:Restriction objects" + ] ; + + # Optional equivalent classes + sh:property [ + sh:path owl:equivalentClass ; + sh:nodeKind sh:IRI ; + sh:message "owl:equivalentClass must be IRIs" + ] . + +# ===================================================== +# RDF PROPERTY DEFINITIONS (TypeProperty objects) +# ===================================================== + +ex:PropertyDefinitionShape + a sh:NodeShape ; + sh:targetClass rdf:Property ; + sh:name "RDF Property Shape" ; + sh:description "Validates TypeProperty objects - RDF property definitions with domain/range" ; + + # Optional label and comment + sh:property [ + sh:path rdfs:label ; + sh:datatype xsd:string ; + sh:maxCount 1 ; + sh:message "rdfs:label must be a single string literal" + ] ; + + sh:property [ + sh:path rdfs:comment ; + sh:datatype xsd:string ; + sh:maxCount 1 ; + sh:message "rdfs:comment must be a single string literal" + ] ; + + # Domain includes (what classes can have this property) + sh:property [ + sh:path schema:domainIncludes ; + sh:nodeKind sh:IRI ; + sh:message "schema:domainIncludes must reference valid class IRIs" + ] ; + + # Range includes (what types can be values) + sh:property [ + sh:path schema:rangeIncludes ; + sh:nodeKind sh:IRI ; + sh:message "schema:rangeIncludes must reference valid type/class IRIs" + ] ; + + # Optional equivalent properties + sh:property [ + sh:path owl:equivalentProperty ; + sh:nodeKind sh:IRI ; + sh:message "owl:equivalentProperty must be IRIs" + ] . + +# ===================================================== +# OWL RESTRICTION DEFINITIONS (Cardinality constraints) +# ===================================================== + +ex:RestrictionDefinitionShape + a sh:NodeShape ; + sh:targetClass owl:Restriction ; + sh:name "OWL Restriction Shape" ; + sh:description "Validates cardinality restrictions generated from TypeProperty.required fields" ; + + # Must reference a property + sh:property [ + sh:path owl:onProperty ; + sh:class rdf:Property ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:message "owl:Restriction must have exactly one owl:onProperty referencing an rdf:Property" + ] ; + + # Must have at least one cardinality constraint + sh:or ( + [ + sh:property [ + sh:path owl:minCardinality ; + sh:minCount 1 ; + sh:maxCount 1 + ] + ] + [ + sh:property [ + sh:path owl:maxCardinality ; + sh:minCount 1 ; + sh:maxCount 1 + ] + ] + [ + sh:property [ + sh:path owl:cardinality ; + sh:minCount 1 ; + sh:maxCount 1 + ] + ] + ) ; + + # Cardinality values must be non-negative integers (0 or 1 in our system) + sh:property [ + sh:path owl:minCardinality ; + sh:nodeKind sh:Literal ; + sh:in (0 1) ; + sh:message "minCardinality must be 0 (optional) or 1 (required)" + ] ; + + sh:property [ + sh:path owl:maxCardinality ; + sh:nodeKind sh:Literal ; + sh:in (0 1) ; + sh:message "maxCardinality must be 0 (unbounded) or 1 (single value)" + ] . + +# ===================================================== +# METADATA ENTRY INSTANCES (MetadataEntry objects) +# ===================================================== + +ex:InstanceShape + a sh:NodeShape ; + sh:name "Metadata Entry Instance Shape" ; + sh:description "Validates MetadataEntry instances - entities with properties and references" ; + + # Target nodes that have a type but are not schema definitions + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + SELECT ?this WHERE { + ?this a ?type . + FILTER( + ?type != rdfs:Class && + ?type != rdf:Property && + ?type != owl:Restriction && + !STRSTARTS(STR(?type), "http://www.w3.org/") && + !STRSTARTS(STR(?type), "https://schema.org/") + ) + } + """ + ] ; + + # Must have exactly one type declaration + sh:property [ + sh:path rdf:type ; + sh:minCount 1 ; + sh:message "Every metadata entry must have exactly one rdf:type" + ] . + +# ===================================================== +# RANGE VALIDATION FOR COMMON XSD TYPES +# ===================================================== + +ex:StringPropertyShape + a sh:NodeShape ; + sh:name "String Property Validation" ; + sh:description "Validates properties with xsd:string range" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + SELECT ?this WHERE { + ?this ?prop ?value . + ?prop schema:rangeIncludes xsd:string . + FILTER(isLiteral(?value)) + } + """ + ] ; + sh:nodeKind sh:Literal ; + sh:datatype xsd:string . + +ex:IntegerPropertyShape + a sh:NodeShape ; + sh:name "Integer Property Validation" ; + sh:description "Validates properties with xsd:integer range" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + SELECT ?this WHERE { + ?this ?prop ?value . + ?prop schema:rangeIncludes xsd:integer . + FILTER(isLiteral(?value)) + } + """ + ] ; + sh:nodeKind sh:Literal ; + sh:datatype xsd:integer . + +# ===================================================== +# REFERENCE VALIDATION (Object Properties) +# ===================================================== + +ex:ReferencePropertyShape + a sh:NodeShape ; + sh:name "Reference Property Validation" ; + sh:description "Validates reference properties that point to other entities" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + SELECT ?this WHERE { + ?this ?prop ?target . + ?prop schema:rangeIncludes ?rangeClass . + ?target a ?targetType . + FILTER( + !isLiteral(?target) && + ?rangeClass != xsd:string && + ?rangeClass != xsd:integer && + ?rangeClass != xsd:dateTime && + ?rangeClass != xsd:boolean + ) + } + """ + ] ; + sh:nodeKind sh:IRI . + +# ===================================================== +# CONSISTENCY VALIDATION +# ===================================================== + +ex:DomainConsistencyShape + a sh:NodeShape ; + sh:name "Domain Consistency Validation" ; + sh:description "Ensures entities only use properties appropriate for their type" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + SELECT ?this WHERE { + ?this a ?type . + ?this ?prop ?value . + ?prop schema:domainIncludes ?domain . + FILTER(?type != ?domain && ?type != rdfs:Class && ?type != rdf:Property && ?type != owl:Restriction) + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:message "Entity type must be compatible with property domain" ; + sh:select """ + SELECT $this ?prop ?domain ?actualType WHERE { + $this a ?actualType . + $this ?prop ?value . + ?prop schema:domainIncludes ?domain . + FILTER(?actualType != ?domain) + } + """ + ] . + +ex:RequiredPropertyShape + a sh:NodeShape ; + sh:name "Required Property Validation" ; + sh:description "Ensures required properties (minCardinality=1) are present" ; + sh:target [ + a sh:SPARQLTarget ; + sh:select """ + SELECT ?this WHERE { + ?this a ?type . + ?type owl:restriction ?restriction . + ?restriction owl:minCardinality 1 . + ?restriction owl:onProperty ?requiredProp . + FILTER NOT EXISTS { ?this ?requiredProp ?value } + } + """ + ] ; + sh:sparql [ + a sh:SPARQLConstraint ; + sh:message "Required property is missing" ; + sh:select """ + SELECT $this ?requiredProp WHERE { + $this a ?type . + ?type owl:restriction ?restriction . + ?restriction owl:minCardinality 1 . + ?restriction owl:onProperty ?requiredProp . + FILTER NOT EXISTS { $this ?requiredProp ?value } + } + """ + ] . \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_context_detection.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_context_detection.py new file mode 100644 index 0000000..ae5e54b --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_context_detection.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +""" +Simple test to see how unknown namespaces are handled by get_context function. +""" + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent / "src")) + +from lib_ro_crate_schema.crate.jsonld_utils import get_context +from rdflib import Graph, URIRef, Literal +from rdflib.namespace import RDF, RDFS + + +def create_graph_with_unknown_namespaces(): + """Create an RDF graph with unknown namespaces.""" + g = Graph() + + # Add triples with unknown pokemon.org namespace + pokemon_ns = "http://pokemon.org/" + pikachu = URIRef(pokemon_ns + "pikachu") + pokemon_name = URIRef(pokemon_ns + "pokemonName") + electric_type = URIRef(pokemon_ns + "ElectricPokemon") + + # Add some triples + g.add((pikachu, RDF.type, electric_type)) + g.add((pikachu, pokemon_name, Literal("Pikachu"))) + g.add((pokemon_name, RDF.type, RDF.Property)) + g.add((pokemon_name, RDFS.label, Literal("Pokemon Name"))) + + # Add triples with another unknown namespace + villains_ns = "http://villains.org/" + team_rocket = URIRef(villains_ns + "team_rocket") + criminal_org = URIRef(villains_ns + "CriminalOrganization") + motto = URIRef(villains_ns + "motto") + + g.add((team_rocket, RDF.type, criminal_org)) + g.add((team_rocket, motto, Literal("Prepare for trouble!"))) + + # Also add some known namespaces for comparison + schema_name = URIRef("https://schema.org/name") + g.add((pikachu, schema_name, Literal("Pikachu the Electric Mouse"))) + + # Add example.com namespace (base namespace in predefined list) + example_person = URIRef("http://example.com/trainer") + example_name = URIRef("http://example.com/trainerName") + g.add((example_person, example_name, Literal("Ash Ketchum"))) + g.add((example_name, RDF.type, RDF.Property)) + + return g + + +def main(): + print("🔍 TESTING get_context() WITH UNKNOWN NAMESPACES") + print("=" * 55) + + # Create graph with unknown namespaces + g = create_graph_with_unknown_namespaces() + + print("📊 Graph Statistics:") + print(f" Total triples: {len(g)}") + + print("\n🔍 URIs in the graph:") + all_uris = set() + for s, p, o in g: + for uri in [str(s), str(p), str(o)]: + if uri.startswith('http'): + all_uris.add(uri) + + # Group by namespace + namespaces = {} + for uri in sorted(all_uris): + if 'pokemon.org' in uri: + namespaces.setdefault('pokemon.org', []).append(uri) + elif 'villains.org' in uri: + namespaces.setdefault('villains.org', []).append(uri) + elif 'schema.org' in uri: + namespaces.setdefault('schema.org', []).append(uri) + elif 'example.com' in uri: + namespaces.setdefault('example.com', []).append(uri) + else: + namespaces.setdefault('other', []).append(uri) + + for ns, uris in namespaces.items(): + print(f"\n {ns}:") + for uri in uris[:3]: # Show first 3 + print(f" {uri}") + if len(uris) > 3: + print(f" ... and {len(uris) - 3} more") + + # Test get_context function + print(f"\n🎯 Testing get_context() function:") + context = get_context(g) + + print("📋 Generated Context:") + if isinstance(context, list): + for i, ctx_layer in enumerate(context): + if isinstance(ctx_layer, str): + print(f" Layer {i}: \"{ctx_layer}\"") + else: + print(f" Layer {i}:") + for prefix, uri in sorted(ctx_layer.items()): + print(f" \"{prefix}\": \"{uri}\"") + else: + print(f" Single context: {context}") + + # Analyze what happened + print(f"\n🧪 Analysis:") + detected_namespaces = set() + if isinstance(context, list) and len(context) > 1: + for ctx in context[1:]: + if isinstance(ctx, dict): + detected_namespaces.update(ctx.values()) + + test_namespaces = [ + ('pokemon.org', 'http://pokemon.org/'), + ('villains.org', 'http://villains.org/'), + ('schema.org', 'https://schema.org/'), + ('example.com', 'http://example.com/') + ] + + for ns_name, ns_uri in test_namespaces: + if ns_uri in detected_namespaces: + print(f" ✅ {ns_name}: DETECTED") + else: + print(f" ❌ {ns_name}: NOT DETECTED") + + print(f"\n🎮 Conclusion:") + unknown_detected = any(ns in detected_namespaces for _, ns in test_namespaces[:2]) + if unknown_detected: + print(f" 🎉 Unknown namespaces are automatically detected!") + else: + print(f" ❌ Unknown namespaces are NOT automatically detected") + print(f" ➡️ Only predefined namespaces in namespace_prefixes are recognized") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_decorator_id.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_decorator_id.py new file mode 100644 index 0000000..8f56145 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_decorator_id.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +""" +Test the enhanced @ro_crate_schema decorator with explicit id parameter. +""" + +import sys +sys.path.append('src') + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.decorators import ro_crate_schema, Field +from pydantic import BaseModel + +# Test the new 'id' parameter in the decorator +@ro_crate_schema( + id="CustomPerson", + ontology="https://schema.org/Person" +) +class PersonModel(BaseModel): + """A person model with explicit ID different from class name""" + name: str = Field(ontology="https://schema.org/name") + email: str = Field(ontology="https://schema.org/email") + +# Test without explicit ID (should default to class name) +@ro_crate_schema(ontology="https://schema.org/Dataset") +class DatasetModel(BaseModel): + """A dataset model without explicit ID""" + title: str = Field(ontology="https://schema.org/name") + description: str = Field(ontology="https://schema.org/description") + +def test_decorator_with_id(): + print("🧪 Testing @ro_crate_schema decorator with explicit id parameter...") + + # Create facade and add models + facade = SchemaFacade() + facade.add_all_registered_models() + + print("\n📊 Registered types:") + for type_obj in facade.get_types(): + print(f" - Type ID: '{type_obj.id}' (from class: {type_obj.__class__.__name__})") + + # Verify that PersonModel got the custom ID "CustomPerson" + person_type = facade.get_type("CustomPerson") + dataset_type = facade.get_type("DatasetModel") # Should use class name + + if person_type: + print(f"✅ Found PersonModel with custom ID: '{person_type.id}'") + else: + print("❌ PersonModel with custom ID not found") + + if dataset_type: + print(f"✅ Found DatasetModel with default ID: '{dataset_type.id}'") + else: + print("❌ DatasetModel with default ID not found") + + # Create instances and add them + person = PersonModel(name="Alice Johnson", email="alice@example.com") + dataset = DatasetModel(title="Test Dataset", description="A test dataset") + + facade.add_model_instance(person, "alice") + facade.add_model_instance(dataset, "test_dataset") + + print("\n📦 Metadata entries:") + for entry in facade.get_entries(): + print(f" - {entry.id} (class_id: {entry.class_id})") + + # Verify the entries use the correct type IDs + alice_entry = facade.get_entry("alice") + dataset_entry = facade.get_entry("test_dataset") + + if alice_entry and alice_entry.class_id == "CustomPerson": + print("✅ Alice entry correctly references 'CustomPerson' type") + else: + print(f"❌ Alice entry has wrong class_id: {alice_entry.class_id if alice_entry else 'None'}") + + if dataset_entry and dataset_entry.class_id == "DatasetModel": + print("✅ Dataset entry correctly references 'DatasetModel' type") + else: + print(f"❌ Dataset entry has wrong class_id: {dataset_entry.class_id if dataset_entry else 'None'}") + + # Export and verify + print("\n💾 Testing RO-Crate export...") + import os + output_dir = "output_crates" + os.makedirs(output_dir, exist_ok=True) + + test_output_path = os.path.join(output_dir, "test_decorator_id_output") + facade.write(test_output_path, name="Test ID Parameter") + print("✅ Export successful!") + + print("\n🎉 Test completed successfully!") + +if __name__ == "__main__": + test_decorator_with_id() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/example/__init__.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_duplicate_detection.py similarity index 100% rename from 0.2.x/lib/python/lib-ro-crate-schema/src/lib_ro_crate_schema/example/__init__.py rename to 0.2.x/lib/python/lib-ro-crate-schema/tests/test_duplicate_detection.py diff --git a/0.2.x/lib/test-data/test-01-import-ro-crate-metadata/input/DELETE_ME b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_duplicate_integration.py similarity index 100% rename from 0.2.x/lib/test-data/test-01-import-ro-crate-metadata/input/DELETE_ME rename to 0.2.x/lib/python/lib-ro-crate-schema/tests/test_duplicate_integration.py diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_export.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_export.py new file mode 100644 index 0000000..0569330 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_export.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent / "src")) + +from datetime import datetime +from typing import Optional +from pydantic import BaseModel +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.decorators import ro_crate_schema, Field + +@ro_crate_schema(ontology="http://openbis.org/Equipment") +class Equipment(BaseModel): + """Laboratory equipment with optional nesting""" + name: str = Field(ontology="https://schema.org/name") + model: str = Field(comment="Equipment model/version") + serial_number: str = Field(ontology="https://schema.org/serialNumber") + created_date: datetime = Field(ontology="https://schema.org/dateCreated") + parent_equipment: Optional['Equipment'] = Field(default=None, ontology="https://schema.org/isPartOf") + +def test_export(): + facade = SchemaFacade() + + # Create parent equipment + parent = Equipment( + name="Parent Equipment", + model="P1", + serial_number="P001", + created_date=datetime(2023, 1, 1), + parent_equipment=None + ) + + # Create child equipment with parent reference + child = Equipment( + name="Child Equipment", + model="C1", + serial_number="C001", + created_date=datetime(2023, 2, 1), + parent_equipment=parent + ) + + # Add to facade + facade.add_model_instance(parent, "base:parent") + facade.add_model_instance(child, "base:child") + + # Export + import os + output_dir = "output_crates" + os.makedirs(output_dir, exist_ok=True) + test_output_path = os.path.join(output_dir, "test_simple") + + facade.write(test_output_path, "Simple Test", "Testing reference export") + print(f"Export completed - check {test_output_path}/ro-crate-metadata.json") + +if __name__ == "__main__": + test_export() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_get_crate.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_get_crate.py new file mode 100644 index 0000000..8ced6aa --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_get_crate.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +""" +Test the refactored get_crate method to ensure it works independently. +""" + +import sys +sys.path.append('src') + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry +from lib_ro_crate_schema.crate.type import Type +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.restriction import Restriction + +def test_get_crate_method(): + print("🧪 Testing get_crate method...") + + # Create a simple schema + facade = SchemaFacade() + + # Add a simple type with a property + name_prop = TypeProperty( + id="name", + range_includes=["http://www.w3.org/2001/XMLSchema#string"], + required=True + ) + + person_type = Type( + id="Person", + rdfs_property=[name_prop], + comment="A person entity" + ) + + facade.addType(person_type) + + # Add a metadata entry + person_entry = MetadataEntry( + id="john_doe", + class_id="Person", + properties={"name": "John Doe"} + ) + + facade.addEntry(person_entry) + + # Test get_crate method + print("📦 Testing get_crate method...") + crate = facade.get_crate( + name="Test RO-Crate", + description="A test crate created using get_crate method" + ) + + print(f"✅ Created crate: {crate}") + print(f"✅ Crate name: {getattr(crate, 'name', 'Not set')}") + print(f"✅ Crate description: {getattr(crate, 'description', 'Not set')}") + + # Test that the crate can be written + print("💾 Testing crate writing...") + import os + output_dir = "output_crates" + os.makedirs(output_dir, exist_ok=True) + + test_get_crate_path = os.path.join(output_dir, "test_get_crate_output") + crate.write(test_get_crate_path) + print(f"✅ Crate written successfully to '{test_get_crate_path}'") + + # Test that write method still works (using get_crate internally) + print("💾 Testing write method (should use get_crate internally)...") + test_write_path = os.path.join(output_dir, "test_write_output") + facade.write(test_write_path, name="Test via Write", description="Using write method") + print("✅ Write method works correctly") + + print("🎉 All tests passed!") + +if __name__ == "__main__": + test_get_crate_method() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_integration.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_integration.py new file mode 100644 index 0000000..6157752 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_integration.py @@ -0,0 +1,400 @@ +import unittest +import sys +import json +import tempfile +from pathlib import Path + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.type import Type +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.literal_type import LiteralType +from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry + + +class TestIntegrationExamples(unittest.TestCase): + """Integration tests using real examples from the codebase""" + + def setUp(self): + """Set up paths to example files""" + self.test_dir = Path(__file__).parent + self.examples_dir = self.test_dir.parent.parent / "examples" + self.lib_dir = self.test_dir.parent + self.obenbis_crate = self.lib_dir.parent.parent / "example" / "obenbis-one-publication" / "ro-crate-metadata.json" + + def test_examples_py_recreation(self): + """Test recreating the example from examples.py""" + + # Recreate the example schema from examples.py + name = TypeProperty( + id="name", + range_includes=[LiteralType.STRING], + required=True, + label="Full Name", + comment="The full name of the entity" + ) + + identifier = TypeProperty( + id="identifier", + range_includes=[LiteralType.STRING], + required=True, + label="Identifier", + comment="Unique identifier for the entity" + ) + + colleague = TypeProperty( + id="colleague", + range_includes=["Participant"], + required=False, + label="Colleague", + comment="Optional colleague relationship" + ) + + participant_type = Type( + id="Participant", + subclass_of=["https://schema.org/Thing"], + ontological_annotations=["http://purl.org/dc/terms/creator"], + rdfs_property=[name, identifier], + comment="A participant in the research", + label="Participant", + ) + + creator_type = Type( + id="Creator", + subclass_of=["https://schema.org/Thing"], + ontological_annotations=["http://purl.org/dc/terms/creator"], + rdfs_property=[name, identifier, colleague], + comment="A creator of the research work", + label="Creator", + ) + + creator_entry = MetadataEntry( + id="creator1", + class_id="Creator", + properties={ + "name": "John Author", + "identifier": "https://orcid.org/0000-0000-0000-0000", + }, + references={}, + ) + + participant_entry = MetadataEntry( + id="participant", + class_id="Participant", + properties={ + "name": "Karl Participant", + "identifier": "https://orcid.org/0000-0000-0000-0001", + }, + references={ + "colleague": ["creator1"] + }, + ) + + schema = SchemaFacade( + types=[creator_type, participant_type], + metadata_entries=[creator_entry, participant_entry], + ) + + # Test the schema + self.assertEqual(len(schema.types), 2) + self.assertEqual(len(schema.metadata_entries), 2) + + # Test types + creator = schema.get_type("Creator") + self.assertIsNotNone(creator) + self.assertEqual(creator.label, "Creator") + self.assertEqual(len(creator.rdfs_property), 3) # name, identifier, colleague + + participant = schema.get_type("Participant") + self.assertIsNotNone(participant) + self.assertEqual(participant.label, "Participant") + self.assertEqual(len(participant.rdfs_property), 2) # name, identifier + + # Test metadata entries + creator_md = schema.get_entry("creator1") + self.assertIsNotNone(creator_md) + self.assertEqual(creator_md.properties["name"], "John Author") + + participant_md = schema.get_entry("participant") + self.assertIsNotNone(participant_md) + self.assertEqual(participant_md.references["colleague"], ["creator1"]) + + # Test triple generation + triples = list(schema.to_triples()) + self.assertGreater(len(triples), 0) + + # Test JSON generation + json_data = schema.to_json() + self.assertIn("@context", json_data) + self.assertIn("@graph", json_data) + + def test_obenbis_import(self): + """Test importing the OpenBIS one-publication RO-Crate""" + + if not self.obenbis_crate.exists(): + self.skipTest(f"OpenBIS example file not found at {self.obenbis_crate}") + + # Import the OpenBIS RO-Crate + facade = SchemaFacade.from_ro_crate(self.obenbis_crate) + + # Test that import was successful + self.assertIsNotNone(facade) + + # Should have imported some types and/or metadata entries + total_items = len(facade.types) + len(facade.metadata_entries) + self.assertGreater(total_items, 0, "Should have imported some schema elements") + + # Test that we can generate JSON-LD from imported data + json_data = facade.to_json() + self.assertIn("@context", json_data) + self.assertIn("@graph", json_data) + + # Test that we can generate triples + triples = list(facade.to_triples()) + self.assertGreater(len(triples), 0, "Should generate RDF triples") + + print(f"Imported facade with {len(facade.types)} types and {len(facade.metadata_entries)} metadata entries") + + # If we have types, test they have proper structure + if facade.types: + first_type = facade.types[0] + self.assertIsNotNone(first_type.id) + print(f"First imported type: {first_type.id}") + + # If we have metadata entries, test they have proper structure + if facade.metadata_entries: + first_entry = facade.metadata_entries[0] + self.assertIsNotNone(first_entry.id) + self.assertIsNotNone(first_entry.class_id) + print(f"First imported entry: {first_entry.id} of type {first_entry.class_id}") + + def test_obenbis_structure_analysis(self): + """Test analyzing the structure of the OpenBIS RO-Crate""" + + if not self.obenbis_crate.exists(): + self.skipTest(f"OpenBIS example file not found at {self.obenbis_crate}") + + # Read raw JSON to analyze structure + with open(self.obenbis_crate, 'r') as f: + crate_data = json.load(f) + + self.assertIn("@graph", crate_data) + graph = crate_data["@graph"] + + # Analyze what types of entities are in the crate + entity_types = {} + rdfs_classes = [] + rdf_properties = [] + owl_restrictions = [] + metadata_entities = [] + + for item in graph: + item_type = item.get("@type", "Unknown") + item_id = item.get("@id", "") + + if item_type == "rdfs:Class": + rdfs_classes.append(item_id) + elif item_type in ["rdf:Property", "rdfs:Property"]: + rdf_properties.append(item_id) + elif item_type == "owl:Restriction": + owl_restrictions.append(item_id) + elif item_id not in ["./", "ro-crate-metadata.json"]: + metadata_entities.append((item_id, item_type)) + + # Count entity types + if item_type in entity_types: + entity_types[item_type] += 1 + else: + entity_types[item_type] = 1 + + print("\nOpenBIS RO-Crate structure analysis:") + print(f"Total entities: {len(graph)}") + print(f"RDFS Classes: {len(rdfs_classes)}") + print(f"RDF Properties: {len(rdf_properties)}") + print(f"OWL Restrictions: {len(owl_restrictions)}") + print(f"Metadata entities: {len(metadata_entities)}") + + print("\nEntity type distribution:") + for entity_type, count in sorted(entity_types.items()): + print(f" {entity_type}: {count}") + + # Test that the structure makes sense + self.assertGreater(len(graph), 0, "Should have entities in the graph") + + if rdfs_classes: + print(f"\nSample RDFS Classes: {rdfs_classes[:5]}") + if rdf_properties: + print(f"Sample RDF Properties: {rdf_properties[:5]}") + if metadata_entities: + print(f"Sample Metadata Entities: {[f'{id} ({type})' for id, type in metadata_entities[:5]]}") + + def test_create_minimal_example(self): + """Test creating a minimal working example similar to examples.py""" + + # Create a minimal Person schema + name_prop = TypeProperty( + id="name", + range_includes=[LiteralType.STRING], + required=True, + label="Name" + ) + + email_prop = TypeProperty( + id="email", + range_includes=[LiteralType.STRING], + required=False, + label="Email" + ) + + person_type = Type( + id="Person", + rdfs_property=[name_prop, email_prop], + label="Person", + comment="A person entity" + ) + + # Create a person instance + person_instance = MetadataEntry( + id="john_doe", + class_id="Person", + properties={ + "name": "John Doe", + "email": "john@example.com" + } + ) + + # Create facade + facade = SchemaFacade( + types=[person_type], + metadata_entries=[person_instance] + ) + + # Test basic functionality + self.assertEqual(len(facade.types), 1) + self.assertEqual(len(facade.metadata_entries), 1) + + # Test export to temporary directory + with tempfile.TemporaryDirectory() as temp_dir: + facade.write( + temp_dir, + name="Minimal Example", + description="A minimal RO-Crate example", + license="CC0" + ) + + # Verify files were created + metadata_file = Path(temp_dir) / "ro-crate-metadata.json" + self.assertTrue(metadata_file.exists()) + + # Verify the JSON structure + with open(metadata_file, 'r') as f: + exported_data = json.load(f) + + self.assertIn("@context", exported_data) + self.assertIn("@graph", exported_data) + + # Check that our Person type and instance are included + graph = exported_data["@graph"] + + person_class_found = any( + (item.get("@id") in ["Person", "base:Person", "http://example.com/Person"]) and item.get("@type") == "rdfs:Class" + for item in graph + ) + self.assertTrue(person_class_found, "Should export Person class") + + person_instance_found = any( + (item.get("@id") in ["john_doe", "base:john_doe", "http://example.com/john_doe"]) and + item.get("@type") in ["Person", "base:Person", "http://example.com/Person"] + for item in graph + ) + self.assertTrue(person_instance_found, "Should export person instance") + + print(f"\nMinimal example exported with {len(graph)} entities") + + def test_complex_relationship_example(self): + """Test creating example with complex relationships between entities""" + + # Define properties + name_prop = TypeProperty(id="name", range_includes=[LiteralType.STRING], required=True) + title_prop = TypeProperty(id="title", range_includes=[LiteralType.STRING], required=True) + author_prop = TypeProperty(id="author", range_includes=["Person"], required=True) + publisher_prop = TypeProperty(id="publisher", range_includes=["Organization"], required=False) + + # Define types + person_type = Type( + id="Person", + rdfs_property=[name_prop], + label="Person" + ) + + organization_type = Type( + id="Organization", + rdfs_property=[name_prop], + label="Organization" + ) + + article_type = Type( + id="Article", + rdfs_property=[title_prop, author_prop, publisher_prop], + label="Article" + ) + + # Create instances + author = MetadataEntry( + id="author1", + class_id="Person", + properties={"name": "Dr. Jane Smith"} + ) + + publisher = MetadataEntry( + id="pub1", + class_id="Organization", + properties={"name": "Academic Press"} + ) + + article = MetadataEntry( + id="article1", + class_id="Article", + properties={"title": "Advanced RO-Crate Techniques"}, + references={ + "author": ["author1"], + "publisher": ["pub1"] + } + ) + + # Create facade + facade = SchemaFacade( + types=[person_type, organization_type, article_type], + metadata_entries=[author, publisher, article] + ) + + # Test relationships + self.assertEqual(len(facade.types), 3) + self.assertEqual(len(facade.metadata_entries), 3) + + # Test that references work correctly + article_entry = facade.get_entry("article1") + self.assertIn("author1", article_entry.references["author"]) + self.assertIn("pub1", article_entry.references["publisher"]) + + # Test triple generation includes relationships + triples = list(facade.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Should have triples linking article to author and publisher + author_ref_found = any( + "article1" in triple[0] and "author" in triple[1] and "author1" in triple[2] + for triple in triple_strs + ) + self.assertTrue(author_ref_found, "Should generate author reference triple") + + publisher_ref_found = any( + "article1" in triple[0] and "publisher" in triple[1] and "pub1" in triple[2] + for triple in triple_strs + ) + self.assertTrue(publisher_ref_found, "Should generate publisher reference triple") + + print(f"\nComplex relationship example generated {len(triples)} triples") + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_metadata_entry.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_metadata_entry.py new file mode 100644 index 0000000..c3ced30 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_metadata_entry.py @@ -0,0 +1,272 @@ +import unittest +import sys +from pathlib import Path +from datetime import datetime + +# Add source to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry +from rdflib import URIRef, RDF, Literal + + +class TestMetadataEntry(unittest.TestCase): + """Test cases for the MetadataEntry class""" + + def setUp(self): + """Set up test fixtures""" + self.basic_entry = MetadataEntry( + id="basic_entry", + class_id="BasicClass" + ) + + self.complete_entry = MetadataEntry( + id="person1", + class_id="Person", + properties={ + "name": "John Doe", + "age": 30, + "active": True + }, + references={ + "knows": ["person2", "person3"], + "worksFor": ["organization1"] + } + ) + + self.datetime_entry = MetadataEntry( + id="event1", + class_id="Event", + properties={ + "title": "Important Meeting", + "startTime": datetime(2023, 12, 25, 14, 30, 0) + } + ) + + def test_metadata_entry_creation(self): + """Test basic MetadataEntry object creation""" + self.assertEqual(self.basic_entry.id, "basic_entry") + self.assertEqual(self.basic_entry.class_id, "BasicClass") + self.assertEqual(self.basic_entry.properties, {}) + self.assertEqual(self.basic_entry.references, {}) + + def test_complete_entry_properties(self): + """Test entry with complete properties and references""" + self.assertEqual(self.complete_entry.id, "person1") + self.assertEqual(self.complete_entry.class_id, "Person") + + # Check properties + self.assertEqual(self.complete_entry.properties["name"], "John Doe") + self.assertEqual(self.complete_entry.properties["age"], 30) + self.assertEqual(self.complete_entry.properties["active"], True) + + # Check references + self.assertEqual(self.complete_entry.references["knows"], ["person2", "person3"]) + self.assertEqual(self.complete_entry.references["worksFor"], ["organization1"]) + + def test_java_api_compatibility(self): + """Test Java API compatibility methods""" + self.assertEqual(self.complete_entry.getId(), "person1") + self.assertEqual(self.complete_entry.getClassId(), "Person") + + values = self.complete_entry.getValues() + self.assertEqual(values["name"], "John Doe") + self.assertEqual(values["age"], 30) + + references = self.complete_entry.getReferences() + self.assertEqual(references["knows"], ["person2", "person3"]) + + # Test alias method + self.assertEqual(self.complete_entry.get_values(), self.complete_entry.properties) + + def test_to_triples(self): + """Test RDF triple generation""" + triples = list(self.complete_entry.to_triples()) + + # Should generate multiple triples + self.assertGreater(len(triples), 0) + + # Convert to string representation for easier testing + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Check for type declaration + type_triple_found = any("Person" in triple[2] for triple in triple_strs) + self.assertTrue(type_triple_found, "Should generate class type triple") + + # Check for properties + name_triple_found = any("name" in triple[1] and "John Doe" in triple[2] for triple in triple_strs) + self.assertTrue(name_triple_found, "Should generate property triples") + + age_triple_found = any("age" in triple[1] and "30" in triple[2] for triple in triple_strs) + self.assertTrue(age_triple_found, "Should generate age property triple") + + def test_datetime_handling(self): + """Test handling of datetime objects in properties""" + triples = list(self.datetime_entry.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Datetime should be converted to ISO format string + datetime_found = any("startTime" in triple[1] and "2023-12-25T14:30:00" in triple[2] for triple in triple_strs) + self.assertTrue(datetime_found, "Should convert datetime to ISO string") + + def test_reference_triples(self): + """Test reference generation in triples""" + triples = list(self.complete_entry.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Check for reference triples (no Literal wrapper for references) + knows_ref_found = any("knows" in triple[1] and "person2" in triple[2] for triple in triple_strs) + self.assertTrue(knows_ref_found, "Should generate reference triples") + + works_for_ref_found = any("worksFor" in triple[1] and "organization1" in triple[2] for triple in triple_strs) + self.assertTrue(works_for_ref_found, "Should generate worksFor reference") + + def test_empty_entry_triples(self): + """Test triple generation for entry with no properties or references""" + empty_entry = MetadataEntry(id="empty", class_id="EmptyClass") + triples = list(empty_entry.to_triples()) + + # Should at least generate the type declaration + self.assertGreater(len(triples), 0) + + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + type_found = any("EmptyClass" in triple[2] for triple in triple_strs) + self.assertTrue(type_found, "Should generate type declaration even for empty entry") + + def test_mixed_property_types(self): + """Test entry with various property value types""" + mixed_entry = MetadataEntry( + id="mixed", + class_id="MixedType", + properties={ + "string_prop": "text value", + "int_prop": 42, + "float_prop": 3.14, + "bool_prop": False, + "none_prop": None # Should be filtered out + } + ) + + triples = list(mixed_entry.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Check each type is properly handled + string_found = any("string_prop" in triple[1] and "text value" in triple[2] for triple in triple_strs) + int_found = any("int_prop" in triple[1] and "42" in triple[2] for triple in triple_strs) + float_found = any("float_prop" in triple[1] and "3.14" in triple[2] for triple in triple_strs) + bool_found = any("bool_prop" in triple[1] and "false" in triple[2] for triple in triple_strs) + + self.assertTrue(string_found, "Should handle string properties") + self.assertTrue(int_found, "Should handle integer properties") + self.assertTrue(float_found, "Should handle float properties") + self.assertTrue(bool_found, "Should handle boolean properties") + + # None properties should not generate triples (filtered out in actual implementation) + none_found = any("none_prop" in triple[1] for triple in triple_strs) + # Note: The current implementation might include None values, + # but ideally they should be filtered out + + def test_multiple_references_same_property(self): + """Test property with multiple reference values""" + multi_ref_entry = MetadataEntry( + id="multi_ref", + class_id="MultiRef", + references={ + "collaborator": ["person1", "person2", "person3"] + } + ) + + triples = list(multi_ref_entry.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Should generate separate triples for each reference + collab1_found = any("collaborator" in triple[1] and "person1" in triple[2] for triple in triple_strs) + collab2_found = any("collaborator" in triple[1] and "person2" in triple[2] for triple in triple_strs) + collab3_found = any("collaborator" in triple[1] and "person3" in triple[2] for triple in triple_strs) + + self.assertTrue(collab1_found, "Should generate triple for person1") + self.assertTrue(collab2_found, "Should generate triple for person2") + self.assertTrue(collab3_found, "Should generate triple for person3") + + + def test_id_and_class_id_validation(self): + """Test that id and class_id are properly set and accessible""" + entry = MetadataEntry(id="test_id", class_id="TestClass") + + # Direct access + self.assertEqual(entry.id, "test_id") + self.assertEqual(entry.class_id, "TestClass") + + # Java API access + self.assertEqual(entry.getId(), "test_id") + self.assertEqual(entry.getClassId(), "TestClass") + + + def test_get_entry_as_compatibility(self): + """Test the get_entry_as method for SchemaFacade compatibility""" + # This test verifies that MetadataEntry objects work with the new get_entry_as method + from lib_ro_crate_schema.crate.schema_facade import SchemaFacade + from pydantic import BaseModel + from typing import Optional + + # Create a simple test model + class TestPerson(BaseModel): + name: str + age: Optional[int] = None + active: Optional[bool] = None + + # Create a facade and add our test entry + facade = SchemaFacade() + facade.addEntry(self.complete_entry) + + # Test conversion to our test model + person_instance = facade.get_entry_as("person1", TestPerson) + + self.assertIsNotNone(person_instance) + self.assertIsInstance(person_instance, TestPerson) + self.assertEqual(person_instance.name, "John Doe") + self.assertEqual(person_instance.age, 30) + self.assertEqual(person_instance.active, True) + + # Test with non-existent entry + none_result = facade.get_entry_as("nonexistent", TestPerson) + self.assertIsNone(none_result) + + def test_get_entry_as_with_references(self): + """Test get_entry_as handling of references""" + from lib_ro_crate_schema.crate.schema_facade import SchemaFacade + from pydantic import BaseModel + from typing import Optional, List + + class TestOrganization(BaseModel): + name: str + + class TestPersonWithRefs(BaseModel): + name: str + age: Optional[int] = None + knows: Optional[List[str]] = None # Keep as strings for this test + worksFor: Optional[str] = None # Single reference as string + + # Create facade and add entries + facade = SchemaFacade() + facade.addEntry(self.complete_entry) + + # Add a referenced organization entry + org_entry = MetadataEntry( + id="organization1", + class_id="Organization", + properties={"name": "Tech Corp"} + ) + facade.addEntry(org_entry) + + # Test conversion + person = facade.get_entry_as("person1", TestPersonWithRefs) + + self.assertIsNotNone(person) + self.assertEqual(person.name, "John Doe") + self.assertEqual(person.age, 30) + self.assertEqual(person.knows, ["person2", "person3"]) # References as IDs + self.assertEqual(person.worksFor, "organization1") # Single reference as ID + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_pydantic_export.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_pydantic_export.py new file mode 100644 index 0000000..6a29650 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_pydantic_export.py @@ -0,0 +1,209 @@ +""" +Test suite for Pydantic model export functionality in SchemaFacade. +""" + +import unittest +import sys +from pathlib import Path +from typing import List, Optional + +# Add src to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.type import Type +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.restriction import Restriction +from pydantic import BaseModel, ValidationError + + +class TestPydanticExport(unittest.TestCase): + """Test Pydantic model export functionality""" + + def setUp(self): + """Set up test fixtures""" + self.facade = SchemaFacade() + + # Create a simple Person type + person_name_prop = TypeProperty( + id="name", + label="Name", + comment="Person's name", + range_includes=["http://www.w3.org/2001/XMLSchema#string"], + required=True + ) + + person_age_prop = TypeProperty( + id="age", + label="Age", + comment="Age in years", + range_includes=["http://www.w3.org/2001/XMLSchema#integer"], + required=False + ) + + person_type = Type( + id="Person", + label="Person", + comment="A person", + rdfs_property=[person_name_prop, person_age_prop], + restrictions=[ + Restriction(property_type="name", min_cardinality=1, max_cardinality=1), + Restriction(property_type="age", min_cardinality=0, max_cardinality=1) + ] + ) + + self.facade.addType(person_type) + + def test_export_single_model(self): + """Test exporting a single model""" + PersonModel = self.facade.export_pydantic_model("Person") + + # Check class properties + self.assertEqual(PersonModel.__name__, "Person") + self.assertIn("name", PersonModel.__annotations__) + self.assertIn("age", PersonModel.__annotations__) + + # Test instance creation + person = PersonModel(name="Alice") + self.assertEqual(person.name, "Alice") + self.assertIsNone(person.age) + + # Test validation + with self.assertRaises(ValidationError): + PersonModel() # Missing required 'name' + + def test_export_all_models(self): + """Test exporting all models""" + models = self.facade.export_all_pydantic_models() + + self.assertIn("Person", models) + PersonModel = models["Person"] + + # Test functionality + person = PersonModel(name="Bob", age=30) + self.assertEqual(person.name, "Bob") + self.assertEqual(person.age, 30) + + def test_type_mapping(self): + """Test RDF type to Python type mapping""" + # Test different data types + string_type = self.facade._rdf_type_to_python_type(["http://www.w3.org/2001/XMLSchema#string"]) + self.assertEqual(string_type, str) + + int_type = self.facade._rdf_type_to_python_type(["http://www.w3.org/2001/XMLSchema#integer"]) + self.assertEqual(int_type, int) + + bool_type = self.facade._rdf_type_to_python_type(["http://www.w3.org/2001/XMLSchema#boolean"]) + self.assertEqual(bool_type, bool) + + # Test schema.org types + schema_text = self.facade._rdf_type_to_python_type(["https://schema.org/Text"]) + self.assertEqual(schema_text, str) + + def test_field_requirements(self): + """Test field requirement detection from restrictions""" + person_type = self.facade.get_type("Person") + + # name should be required (minCardinality: 1) + self.assertTrue(self.facade._is_field_required(person_type, "name")) + + # age should be optional (minCardinality: 0) + self.assertFalse(self.facade._is_field_required(person_type, "age")) + + def test_list_fields(self): + """Test list field detection""" + # Add a type with list property + list_prop = TypeProperty( + id="tags", + label="Tags", + range_includes=["http://www.w3.org/2001/XMLSchema#string"] + ) + + list_type = Type( + id="TaggedItem", + rdfs_property=[list_prop], + restrictions=[ + Restriction(property_type="tags", min_cardinality=0, max_cardinality=None) # Unbounded + ] + ) + + self.facade.addType(list_type) + + # Test list detection + self.assertTrue(self.facade._is_field_list(list_type, "tags")) + + # Export and test + TaggedModel = self.facade.export_pydantic_model("TaggedItem") + tagged = TaggedModel(tags=["tag1", "tag2"]) + self.assertEqual(tagged.tags, ["tag1", "tag2"]) + + def test_forward_references(self): + """Test forward references between models""" + # Add Organization type that references Person + org_name_prop = TypeProperty( + id="name", + label="Organization Name", + range_includes=["http://www.w3.org/2001/XMLSchema#string"] + ) + + org_members_prop = TypeProperty( + id="members", + label="Members", + range_includes=["Person"] # Forward reference + ) + + org_type = Type( + id="Organization", + rdfs_property=[org_name_prop, org_members_prop], + restrictions=[ + Restriction(property_type="name", min_cardinality=1, max_cardinality=1), + Restriction(property_type="members", min_cardinality=0, max_cardinality=None) + ] + ) + + self.facade.addType(org_type) + + # Export all models (should handle forward references) + models = self.facade.export_all_pydantic_models() + + # Test that both models were created + self.assertIn("Person", models) + self.assertIn("Organization", models) + + # Test basic functionality (forward ref might not work perfectly but shouldn't crash) + OrgModel = models["Organization"] + org = OrgModel(name="Test Corp") + self.assertEqual(org.name, "Test Corp") + + def test_nonexistent_type(self): + """Test error handling for nonexistent types""" + with self.assertRaises(ValueError): + self.facade.export_pydantic_model("NonExistentType") + + def test_custom_base_class(self): + """Test using custom base class""" + class CustomBase(BaseModel): + custom_field: str = "default" + + PersonModel = self.facade.export_pydantic_model("Person", base_class=CustomBase) + + # Should inherit from custom base + self.assertTrue(issubclass(PersonModel, CustomBase)) + + # Should have both custom and schema fields + person = PersonModel(name="Test") + self.assertEqual(person.name, "Test") + self.assertEqual(person.custom_field, "default") + + def test_field_metadata(self): + """Test that field metadata is preserved""" + PersonModel = self.facade.export_pydantic_model("Person") + + # Check model schema includes field descriptions + schema = PersonModel.model_json_schema() + self.assertIn("Person's name", schema["properties"]["name"]["description"]) + self.assertIn("Age in years", schema["properties"]["age"]["description"]) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_restriction.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_restriction.py new file mode 100644 index 0000000..8619cc3 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_restriction.py @@ -0,0 +1,211 @@ +import unittest +import sys +from pathlib import Path + +# Add source to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.restriction import Restriction +from rdflib import OWL, Literal, XSD + + +class TestRestriction(unittest.TestCase): + """Test cases for the Restriction class""" + + def setUp(self): + """Set up test fixtures""" + self.basic_restriction = Restriction(property_type="testProperty") + + self.complete_restriction = Restriction( + id="complete_restriction", + property_type="name", + min_cardinality=1, + max_cardinality=1 + ) + + self.unbounded_restriction = Restriction( + property_type="tags", + min_cardinality=0, + max_cardinality=None # Unbounded + ) + + def test_restriction_creation(self): + """Test basic Restriction object creation""" + self.assertEqual(self.basic_restriction.property_type, "testProperty") + self.assertIsNone(self.basic_restriction.min_cardinality) + self.assertIsNone(self.basic_restriction.max_cardinality) + self.assertIsNotNone(self.basic_restriction.id) # Auto-generated UUID + + def test_restriction_with_cardinalities(self): + """Test restriction with explicit cardinalities""" + self.assertEqual(self.complete_restriction.property_type, "name") + self.assertEqual(self.complete_restriction.min_cardinality, 1) + self.assertEqual(self.complete_restriction.max_cardinality, 1) + + def test_unbounded_restriction(self): + """Test restriction with unbounded max cardinality""" + self.assertEqual(self.unbounded_restriction.property_type, "tags") + self.assertEqual(self.unbounded_restriction.min_cardinality, 0) + self.assertIsNone(self.unbounded_restriction.max_cardinality) + + def test_to_triples(self): + """Test RDF triple generation""" + triples = list(self.complete_restriction.to_triples()) + + # Should generate multiple triples + self.assertGreater(len(triples), 0) + + # Convert to string representation for easier testing + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Check for essential triples + type_triple_found = any("Restriction" in triple[2] for triple in triple_strs) + self.assertTrue(type_triple_found, "Should generate owl:Restriction type triple") + + on_property_found = any("onProperty" in triple[1] for triple in triple_strs) + self.assertTrue(on_property_found, "Should generate owl:onProperty triple") + + min_card_found = any("minCardinality" in triple[1] for triple in triple_strs) + self.assertTrue(min_card_found, "Should generate owl:minCardinality triple") + + max_card_found = any("maxCardinality" in triple[1] for triple in triple_strs) + self.assertTrue(max_card_found, "Should generate owl:maxCardinality triple") + + def test_minimal_restriction_triples(self): + """Test triple generation for restriction with no cardinalities""" + minimal = Restriction(property_type="minimal_prop") + triples = list(minimal.to_triples()) + + # Should at least generate type and onProperty triples + self.assertGreater(len(triples), 0) + + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + type_found = any("Restriction" in triple[2] for triple in triple_strs) + self.assertTrue(type_found, "Should generate owl:Restriction type") + + on_property_found = any("onProperty" in triple[1] for triple in triple_strs) + self.assertTrue(on_property_found, "Should generate owl:onProperty") + + # Should NOT generate cardinality triples when they're None + min_card_found = any("minCardinality" in triple[1] for triple in triple_strs) + max_card_found = any("maxCardinality" in triple[1] for triple in triple_strs) + self.assertFalse(min_card_found, "Should not generate minCardinality when None") + self.assertFalse(max_card_found, "Should not generate maxCardinality when None") + + def test_only_min_cardinality(self): + """Test restriction with only min cardinality set""" + restriction = Restriction( + property_type="min_only", + min_cardinality=1 + ) + + triples = list(restriction.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + min_card_found = any("minCardinality" in triple[1] for triple in triple_strs) + max_card_found = any("maxCardinality" in triple[1] for triple in triple_strs) + + self.assertTrue(min_card_found, "Should generate minCardinality") + self.assertFalse(max_card_found, "Should not generate maxCardinality when None") + + def test_only_max_cardinality(self): + """Test restriction with only max cardinality set""" + restriction = Restriction( + property_type="max_only", + max_cardinality=5 + ) + + triples = list(restriction.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + min_card_found = any("minCardinality" in triple[1] for triple in triple_strs) + max_card_found = any("maxCardinality" in triple[1] for triple in triple_strs) + + self.assertFalse(min_card_found, "Should not generate minCardinality when None") + self.assertTrue(max_card_found, "Should generate maxCardinality") + + def test_zero_cardinalities(self): + """Test restriction with zero cardinalities (explicit zeros)""" + restriction = Restriction( + property_type="zero_test", + min_cardinality=0, + max_cardinality=0 + ) + + triples = list(restriction.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Zero cardinalities should be included (different from None) + min_card_found = any("minCardinality" in triple[1] and "0" in triple[2] for triple in triple_strs) + max_card_found = any("maxCardinality" in triple[1] and "0" in triple[2] for triple in triple_strs) + + self.assertTrue(min_card_found, "Should generate minCardinality=0") + self.assertTrue(max_card_found, "Should generate maxCardinality=0") + + def test_common_restriction_patterns(self): + """Test common restriction patterns used in RO-Crate schemas""" + + # Required single value (exactly one) + required_single = Restriction( + property_type="title", + min_cardinality=1, + max_cardinality=1 + ) + + # Optional single value (zero or one) + optional_single = Restriction( + property_type="description", + min_cardinality=0, + max_cardinality=1 + ) + + # Required multiple values (one or more) + required_multiple = Restriction( + property_type="author", + min_cardinality=1, + max_cardinality=None + ) + + # Optional multiple values (zero or more) + optional_multiple = Restriction( + property_type="keywords", + min_cardinality=0, + max_cardinality=None + ) + + # Test each pattern generates appropriate triples + patterns = [required_single, optional_single, required_multiple, optional_multiple] + + for restriction in patterns: + triples = list(restriction.to_triples()) + self.assertGreater(len(triples), 0, f"Restriction {restriction.property_type} should generate triples") + + # All should have type and onProperty + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + type_found = any("Restriction" in triple[2] for triple in triple_strs) + on_prop_found = any("onProperty" in triple[1] for triple in triple_strs) + + self.assertTrue(type_found, f"Restriction {restriction.property_type} should have type") + self.assertTrue(on_prop_found, f"Restriction {restriction.property_type} should have onProperty") + + def test_custom_id(self): + """Test restriction with custom ID""" + custom_id = "Person_name_restriction" + restriction = Restriction( + id=custom_id, + property_type="name", + min_cardinality=1 + ) + + self.assertEqual(restriction.id, custom_id) + + triples = list(restriction.to_triples()) + # The subject of triples should use the custom ID + subjects = set(str(triple[0]) for triple in triples) + custom_id_used = any(custom_id in subject for subject in subjects) + self.assertTrue(custom_id_used, "Should use custom ID in triples") + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_roundtrip.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_roundtrip.py new file mode 100644 index 0000000..c23404a --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_roundtrip.py @@ -0,0 +1,397 @@ +import unittest +import sys +import json +import tempfile +from pathlib import Path + +# Add source to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.type import Type +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.literal_type import LiteralType +from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry +from lib_ro_crate_schema.crate.restriction import Restriction + + +class TestRoundTripCycles(unittest.TestCase): + """Test round-trip conversion cycles to verify no data loss during import/export""" + + def setUp(self): + """Set up test fixtures with comprehensive schema""" + # Create a comprehensive test schema + + # Properties + self.name_prop = TypeProperty( + id="name", + range_includes=[LiteralType.STRING], + required=True, + label="Full Name", + comment="The complete name of the entity", + ontological_annotations=["https://schema.org/name"] + ) + + self.age_prop = TypeProperty( + id="age", + range_includes=[LiteralType.INTEGER], + required=False, + label="Age", + comment="Age in years" + ) + + self.email_prop = TypeProperty( + id="email", + range_includes=[LiteralType.STRING], + required=False, + label="Email Address" + ) + + self.knows_prop = TypeProperty( + id="knows", + range_includes=["Person"], + required=False, + label="Knows", + comment="People this person knows" + ) + + # Restrictions + self.name_restriction = Restriction( + id="Person_name_restriction", + property_type="name", + min_cardinality=1, + max_cardinality=1 + ) + + self.knows_restriction = Restriction( + id="Person_knows_restriction", + property_type="knows", + min_cardinality=0, + max_cardinality=None # Unbounded + ) + + # Types + self.person_type = Type( + id="Person", + subclass_of=["https://schema.org/Thing"], + ontological_annotations=["https://schema.org/Person"], + rdfs_property=[self.name_prop, self.age_prop, self.email_prop, self.knows_prop], + restrictions=[self.name_restriction, self.knows_restriction], + comment="A person entity with comprehensive metadata", + label="Person" + ) + + self.organization_type = Type( + id="Organization", + subclass_of=["https://schema.org/Thing"], + ontological_annotations=["https://schema.org/Organization"], + rdfs_property=[self.name_prop], + comment="An organization", + label="Organization" + ) + + # Metadata entries + self.person1 = MetadataEntry( + id="person1", + class_id="Person", + properties={ + "name": "Alice Johnson", + "age": 30, + "email": "alice@example.com" + }, + references={ + "knows": ["person2"] + } + ) + + self.person2 = MetadataEntry( + id="person2", + class_id="Person", + properties={ + "name": "Bob Smith", + "age": 25 + }, + references={ + "knows": ["person1"] # Mutual relationship + } + ) + + self.org1 = MetadataEntry( + id="org1", + class_id="Organization", + properties={ + "name": "Example Corp" + } + ) + + # Complete facade + self.original_facade = SchemaFacade( + types=[self.person_type, self.organization_type], + metadata_entries=[self.person1, self.person2, self.org1] + ) + + def test_export_import_roundtrip(self): + """Test export to file and import back maintains schema integrity""" + + with tempfile.TemporaryDirectory() as temp_dir: + # Export original facade + self.original_facade.write( + temp_dir, + name="Roundtrip Test", + description="Testing roundtrip conversion", + license="MIT" + ) + + # Import back from file + metadata_file = Path(temp_dir) / "ro-crate-metadata.json" + imported_facade = SchemaFacade.from_ro_crate(metadata_file) + + # Compare facades + self._compare_facades(self.original_facade, imported_facade, "File roundtrip") + + def test_json_dict_roundtrip(self): + """Test conversion to JSON dict and back maintains schema integrity""" + + # Convert to JSON dict + json_data = self.original_facade.to_json() + + # Import from dict + imported_facade = SchemaFacade.from_dict(json_data) + + # Compare facades + self._compare_facades(self.original_facade, imported_facade, "JSON dict roundtrip") + + def test_multiple_roundtrips(self): + """Test multiple export/import cycles to ensure stability""" + + current_facade = self.original_facade + + for cycle in range(3): # Test 3 cycles + with tempfile.TemporaryDirectory() as temp_dir: + # Export current facade + current_facade.write( + temp_dir, + name=f"Multi-roundtrip Cycle {cycle + 1}", + description="Testing multiple roundtrip cycles" + ) + + # Import back + metadata_file = Path(temp_dir) / "ro-crate-metadata.json" + current_facade = SchemaFacade.from_ro_crate(metadata_file) + + # Compare with original (should remain consistent) + self._compare_facades( + self.original_facade, + current_facade, + f"Multiple roundtrip cycle {cycle + 1}" + ) + + def test_triples_preservation(self): + """Test that RDF triples are preserved through roundtrip""" + + # Get original triples + original_triples = set() + for triple in self.original_facade.to_triples(): + # Normalize to string representation for comparison + triple_str = (str(triple[0]), str(triple[1]), str(triple[2])) + original_triples.add(triple_str) + + # Roundtrip via JSON + json_data = self.original_facade.to_json() + imported_facade = SchemaFacade.from_dict(json_data) + + # Get imported triples + imported_triples = set() + for triple in imported_facade.to_triples(): + triple_str = (str(triple[0]), str(triple[1]), str(triple[2])) + imported_triples.add(triple_str) + + # Compare triple sets + print(f"\nTriples preservation test:") + print(f"Original triples: {len(original_triples)}") + print(f"Imported triples: {len(imported_triples)}") + + # Find differences + only_in_original = original_triples - imported_triples + only_in_imported = imported_triples - original_triples + + if only_in_original: + print(f"Triples lost in import: {len(only_in_original)}") + for triple in list(only_in_original)[:5]: # Show first 5 + print(f" Lost: {triple}") + + if only_in_imported: + print(f"New triples in import: {len(only_in_imported)}") + for triple in list(only_in_imported)[:5]: # Show first 5 + print(f" New: {triple}") + + # Allow some differences due to RO-Crate structure additions + # But core schema triples should be preserved + self.assertGreater(len(imported_triples), 0, "Should have imported triples") + + def test_obenbis_roundtrip(self): + """Test roundtrip with the OpenBIS example if available""" + + obenbis_file = (Path(__file__).parent.parent.parent.parent / + "example" / "obenbis-one-publication" / "ro-crate-metadata.json") + + if not obenbis_file.exists(): + self.skipTest(f"OpenBIS example not found at {obenbis_file}") + + # Import OpenBIS RO-Crate + original_facade = SchemaFacade.from_ro_crate(obenbis_file) + + with tempfile.TemporaryDirectory() as temp_dir: + # Export it + original_facade.write( + temp_dir, + name="OpenBIS Roundtrip Test", + description="Testing OpenBIS RO-Crate roundtrip" + ) + + # Import back + metadata_file = Path(temp_dir) / "ro-crate-metadata.json" + imported_facade = SchemaFacade.from_ro_crate(metadata_file) + + # Basic consistency checks + print(f"\nOpenBIS roundtrip test:") + print(f"Original - Types: {len(original_facade.types)}, Entries: {len(original_facade.metadata_entries)}") + print(f"Imported - Types: {len(imported_facade.types)}, Entries: {len(imported_facade.metadata_entries)}") + + # Should have similar structure (allowing for some differences due to RO-Crate additions) + self.assertGreaterEqual( + len(imported_facade.types) + len(imported_facade.metadata_entries), + 0, + "Should have imported some entities" + ) + + def test_property_cardinality_preservation(self): + """Test that property cardinality information is preserved""" + + # Create a facade with specific cardinality requirements + required_prop = TypeProperty(id="required_field", range_includes=[LiteralType.STRING], required=True) + optional_prop = TypeProperty(id="optional_field", range_includes=[LiteralType.STRING], required=False) + + test_type = Type( + id="TestType", + rdfs_property=[required_prop, optional_prop] + ) + + test_facade = SchemaFacade(types=[test_type]) + + # Roundtrip via JSON + json_data = test_facade.to_json() + imported_facade = SchemaFacade.from_dict(json_data) + + # Check that cardinality info is preserved through restrictions + imported_type = imported_facade.get_type("TestType") + self.assertIsNotNone(imported_type) + + restrictions = imported_type.get_restrictions() + + # Find restrictions for our properties + required_restriction = None + optional_restriction = None + + for restriction in restrictions: + if restriction.property_type == "required_field": + required_restriction = restriction + elif restriction.property_type == "optional_field": + optional_restriction = restriction + + # Check cardinalities (if restrictions were generated) + if required_restriction: + self.assertEqual(required_restriction.min_cardinality, 1, "Required field should have min cardinality 1") + + if optional_restriction: + self.assertEqual(optional_restriction.min_cardinality, 0, "Optional field should have min cardinality 0") + + def test_ontological_annotations_preservation(self): + """Test that ontological annotations are preserved""" + + # Test facade with ontological annotations + json_data = self.original_facade.to_json() + imported_facade = SchemaFacade.from_dict(json_data) + + # Check Person type annotations + original_person = self.original_facade.get_type("Person") + imported_person = imported_facade.get_type("Person") + + if imported_person and original_person: + print(f"\nOntological annotations test:") + print(f"Original Person ontological annotations: {original_person.ontological_annotations}") + print(f"Imported Person ontological annotations: {imported_person.ontological_annotations}") + + # Should preserve ontological mapping + if original_person.ontological_annotations: + self.assertIsNotNone( + imported_person.ontological_annotations, + "Should preserve ontological annotations" + ) + + def _compare_facades(self, original: SchemaFacade, imported: SchemaFacade, test_name: str): + """Helper method to compare two facades for consistency""" + + print(f"\n{test_name} comparison:") + print(f"Original - Types: {len(original.types)}, Entries: {len(original.metadata_entries)}") + print(f"Imported - Types: {len(imported.types)}, Entries: {len(imported.metadata_entries)}") + + # Basic counts should be similar (allowing for RO-Crate structure additions) + self.assertGreaterEqual( + len(imported.types) + len(imported.metadata_entries), + len(original.types) + len(original.metadata_entries), + "Should preserve at least original entities" + ) + + # Check specific types are preserved + for original_type in original.types: + imported_type = imported.get_type(original_type.id) + if imported_type: # May not be preserved due to import/export limitations + self.assertEqual( + imported_type.id, + original_type.id, + f"Type ID should be preserved: {original_type.id}" + ) + + if original_type.label and imported_type.label: + self.assertEqual( + imported_type.label, + original_type.label, + f"Type label should be preserved: {original_type.id}" + ) + + # Check specific metadata entries are preserved + for original_entry in original.metadata_entries: + imported_entry = imported.get_entry(original_entry.id) + if imported_entry: # May not be preserved due to import/export limitations + self.assertEqual( + imported_entry.id, + original_entry.id, + f"Entry ID should be preserved: {original_entry.id}" + ) + + self.assertEqual( + imported_entry.class_id, + original_entry.class_id, + f"Entry class ID should be preserved: {original_entry.id}" + ) + + # Test that we can generate valid output from imported facade + try: + imported_json = imported.to_json() + self.assertIn("@context", imported_json) + self.assertIn("@graph", imported_json) + except Exception as e: + self.fail(f"Failed to generate JSON from imported facade: {e}") + + try: + imported_triples = list(imported.to_triples()) + self.assertGreater(len(imported_triples), 0, "Should generate triples from imported facade") + except Exception as e: + self.fail(f"Failed to generate triples from imported facade: {e}") + + print(f"✓ {test_name} completed successfully") + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_schema_facade.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_schema_facade.py new file mode 100644 index 0000000..fe0e241 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_schema_facade.py @@ -0,0 +1,337 @@ +import unittest +import sys +import json +import tempfile +from pathlib import Path + +# Add source to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.type import Type +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.literal_type import LiteralType +from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry +from lib_ro_crate_schema.crate.restriction import Restriction + + +class TestSchemaFacade(unittest.TestCase): + """Test cases for the SchemaFacade class""" + + def setUp(self): + """Set up test fixtures""" + # Create a basic schema with types and properties + self.name_property = TypeProperty( + id="name", + range_includes=[LiteralType.STRING], + required=True + ) + + self.age_property = TypeProperty( + id="age", + range_includes=[LiteralType.INTEGER], + required=False + ) + + self.person_type = Type( + id="Person", + rdfs_property=[self.name_property, self.age_property], + comment="A person entity", + label="Person" + ) + + self.person_entry = MetadataEntry( + id="person1", + class_id="Person", + properties={"name": "John Doe", "age": 30} + ) + + self.facade = SchemaFacade( + types=[self.person_type], + metadata_entries=[self.person_entry] + ) + + def test_facade_creation(self): + """Test basic SchemaFacade creation""" + empty_facade = SchemaFacade() + self.assertEqual(len(empty_facade.types), 0) + self.assertEqual(len(empty_facade.metadata_entries), 0) + + self.assertEqual(len(self.facade.types), 1) + self.assertEqual(len(self.facade.metadata_entries), 1) + + def test_fluent_api(self): + """Test fluent API methods""" + facade = SchemaFacade() + + result = facade.addType(self.person_type).addEntry(self.person_entry) + + # Check method chaining works + self.assertEqual(result, facade) + + # Check items were added + self.assertIn(self.person_type, facade.types) + self.assertIn(self.person_entry, facade.metadata_entries) + + def test_get_methods(self): + """Test getter methods""" + # Test get_types + types = self.facade.get_types() + self.assertEqual(len(types), 1) + self.assertEqual(types[0].id, "Person") + + # Test get_type + person_type = self.facade.get_type("Person") + self.assertIsNotNone(person_type) + self.assertEqual(person_type.id, "Person") + + non_existent = self.facade.get_type("NonExistent") + self.assertIsNone(non_existent) + + # Test get_entries + entries = self.facade.get_entries() + self.assertEqual(len(entries), 1) + self.assertEqual(entries[0].id, "person1") + + # Test get_entry + person_entry = self.facade.get_entry("person1") + self.assertIsNotNone(person_entry) + self.assertEqual(person_entry.id, "person1") + + # Test get_entries_by_class + person_entries = self.facade.get_entries_by_class("Person") + self.assertEqual(len(person_entries), 1) + self.assertEqual(person_entries[0].id, "person1") + + def test_java_api_compatibility(self): + """Test Java API compatibility methods""" + # Test property methods + properties = self.facade.get_property_types() + self.assertEqual(len(properties), 2) + property_ids = [prop.id for prop in properties] + self.assertIn("name", property_ids) + self.assertIn("age", property_ids) + + # Test get_property_type + name_prop = self.facade.get_property_type("name") + self.assertIsNotNone(name_prop) + self.assertEqual(name_prop.id, "name") + + # Test get_crate (basic functionality) + crate = self.facade.get_crate() + self.assertIsNotNone(crate) + + def test_to_triples(self): + """Test RDF triple generation""" + triples = list(self.facade.to_triples()) + + # Should generate triples for both types and metadata entries + self.assertGreater(len(triples), 0) + + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Should include type definition triples + class_triple_found = any("Class" in triple[2] for triple in triple_strs) + self.assertTrue(class_triple_found, "Should generate class definition triples") + + # Should include metadata entry triples + person_triple_found = any("person1" in triple[0] for triple in triple_strs) + self.assertTrue(person_triple_found, "Should generate metadata entry triples") + + def test_to_graph(self): + """Test RDF Graph generation""" + graph = self.facade.to_graph() + + # Should have triples + self.assertGreater(len(graph), 0) + + # Should have proper namespace binding + namespaces = dict(graph.namespaces()) + self.assertIn('base', namespaces) + + def test_to_json(self): + """Test JSON-LD generation""" + json_data = self.facade.to_json() + + self.assertIsInstance(json_data, dict) + self.assertIn("@context", json_data) + self.assertIn("@graph", json_data) + + def test_write_to_crate(self): + """Test writing to RO-Crate directory""" + with tempfile.TemporaryDirectory() as temp_dir: + self.facade.write( + temp_dir, + name="Test Crate", + description="A test RO-Crate", + license="MIT" + ) + + # Check that metadata file was created + metadata_file = Path(temp_dir) / "ro-crate-metadata.json" + self.assertTrue(metadata_file.exists()) + + # Check that the file contains valid JSON + with open(metadata_file, 'r') as f: + crate_data = json.load(f) + + self.assertIn("@context", crate_data) + self.assertIn("@graph", crate_data) + + def test_from_ro_crate_roundtrip(self): + """Test creating facade from RO-Crate and ensuring roundtrip consistency""" + with tempfile.TemporaryDirectory() as temp_dir: + # Write original facade + self.facade.write(temp_dir, name="Roundtrip Test") + + # Read back from file + metadata_file = Path(temp_dir) / "ro-crate-metadata.json" + imported_facade = SchemaFacade.from_ro_crate(metadata_file) + + # Check that types were imported + self.assertGreater(len(imported_facade.types), 0) + + # Check that metadata entries were imported + self.assertGreater(len(imported_facade.metadata_entries), 0) + + def test_from_dict(self): + """Test creating facade from dictionary""" + # Create a simple RO-Crate structure + crate_dict = { + "@context": ["https://w3id.org/ro/crate/1.1/context"], + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "name": "Test Dataset" + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"} + }, + { + "@id": "Person", + "@type": "rdfs:Class", + "rdfs:label": "Person", + "rdfs:comment": "A person" + }, + { + "@id": "name", + "@type": "rdf:Property", + "rdfs:label": "Name", + "schema:domainIncludes": {"@id": "Person"}, + "schema:rangeIncludes": {"@id": "http://www.w3.org/2001/XMLSchema#string"} + }, + { + "@id": "person1", + "@type": "Person", + "name": "Alice Johnson" + } + ] + } + + facade = SchemaFacade.from_dict(crate_dict) + + # Should have imported the class + person_type = facade.get_type("Person") + self.assertIsNotNone(person_type) + self.assertEqual(person_type.label, "Person") + + # Should have imported the metadata entry + person_entry = facade.get_entry("person1") + self.assertIsNotNone(person_entry) + self.assertEqual(person_entry.class_id, "Person") + + def test_resolve_forward_refs(self): + """Test forward reference resolution""" + # This is mostly an internal method, but we can test it doesn't crash + self.facade.resolve_forward_refs() + + # Should still have the same number of types and entries + self.assertEqual(len(self.facade.types), 1) + self.assertEqual(len(self.facade.metadata_entries), 1) + + def test_add_property_type(self): + """Test adding standalone property to registry""" + new_prop = TypeProperty(id="email", range_includes=[LiteralType.STRING]) + + result = self.facade.add_property_type(new_prop) + + # Should return self for chaining + self.assertEqual(result, self.facade) + + # Should be able to retrieve the property + retrieved_prop = self.facade.get_property_type("email") + self.assertIsNotNone(retrieved_prop) + self.assertEqual(retrieved_prop.id, "email") + + def test_complex_schema(self): + """Test facade with complex schema including restrictions""" + # Create a type with custom restrictions + title_prop = TypeProperty(id="title", range_includes=[LiteralType.STRING]) + authors_prop = TypeProperty(id="authors", range_includes=["Person"]) + + title_restriction = Restriction( + property_type="title", + min_cardinality=1, + max_cardinality=1 + ) + + authors_restriction = Restriction( + property_type="authors", + min_cardinality=1, + max_cardinality=None # Unbounded + ) + + article_type = Type( + id="Article", + rdfs_property=[title_prop, authors_prop], + restrictions=[title_restriction, authors_restriction], + comment="A research article", + label="Article" + ) + + article_entry = MetadataEntry( + id="article1", + class_id="Article", + properties={"title": "Great Research"}, + references={"authors": ["person1"]} + ) + + complex_facade = SchemaFacade( + types=[self.person_type, article_type], + metadata_entries=[self.person_entry, article_entry] + ) + + # Test that complex schema works + self.assertEqual(len(complex_facade.types), 2) + self.assertEqual(len(complex_facade.metadata_entries), 2) + + # Test restrictions are included + article = complex_facade.get_type("Article") + restrictions = article.get_restrictions() + self.assertGreater(len(restrictions), 0) + + # Test triple generation works + triples = list(complex_facade.to_triples()) + self.assertGreater(len(triples), 0) + + def test_empty_facade_operations(self): + """Test operations on empty facade""" + empty_facade = SchemaFacade() + + # Should handle empty operations gracefully + self.assertEqual(len(empty_facade.get_types()), 0) + self.assertEqual(len(empty_facade.get_entries()), 0) + self.assertIsNone(empty_facade.get_type("NonExistent")) + self.assertIsNone(empty_facade.get_entry("NonExistent")) + self.assertEqual(len(empty_facade.get_entries_by_class("NonExistent")), 0) + + # Should still generate basic structure + json_data = empty_facade.to_json() + self.assertIn("@context", json_data) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_standalone_elements.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_standalone_elements.py new file mode 100644 index 0000000..995b780 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_standalone_elements.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +""" +Test standalone properties and restrictions in SchemaFacade +""" + +import sys +sys.path.append('src') + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.restriction import Restriction +from lib_ro_crate_schema.crate.type import Type + +def test_standalone_elements(): + """Test adding and retrieving standalone properties and restrictions""" + + print("🧪 Testing standalone properties and restrictions...") + + # Create a facade + facade = SchemaFacade() + + # Test 1: Add standalone property + standalone_prop = TypeProperty( + id="globalProperty", + label="Global Property", + comment="A property that exists independently of any type", + range_includes=["xsd:string"] + ) + + facade.add_property_type(standalone_prop) + print(f"✅ Added standalone property: {standalone_prop.id}") + + # Test 2: Add standalone restriction + standalone_restriction = Restriction( + id="globalRestriction", + property_type="globalProperty", + min_cardinality=1, + max_cardinality=5 + ) + + facade.add_restriction(standalone_restriction) + print(f"✅ Added standalone restriction: {standalone_restriction.id}") + + # Test 3: Add a type with its own properties + person_name_prop = TypeProperty( + id="personName", + label="Person Name", + comment="Name property specific to Person type", + range_includes=["xsd:string"] + ) + + person_type = Type( + id="Person", + label="Person", + comment="A person entity", + rdfs_property=[person_name_prop] + ) + + facade.addType(person_type) + print(f"✅ Added type with attached property: {person_type.id}") + + # Test 4: Verify counts + all_properties = facade.get_property_types() + all_restrictions = facade.get_restrictions() + + print(f"\n📊 Summary:") + print(f" Total properties: {len(all_properties)}") + print(f" Total restrictions: {len(all_restrictions)}") + print(f" Total types: {len(facade.types)}") + + # Test 5: Check specific retrieval + retrieved_prop = facade.get_property_type("globalProperty") + retrieved_restriction = facade.get_restriction("globalRestriction") + + print(f"\n🔍 Specific retrieval:") + print(f" Retrieved global property: {'✅' if retrieved_prop else '❌'}") + print(f" Retrieved global restriction: {'✅' if retrieved_restriction else '❌'}") + + # Test 6: List all properties (standalone + type-attached) + print(f"\n📋 All properties found:") + for prop in all_properties: + is_standalone = any(p.id == prop.id for p in facade.property_types) + status = "standalone" if is_standalone else "type-attached" + print(f" - {prop.id} ({status})") + + # Test 7: Export to RDF and verify triples include standalone elements + print(f"\n🔄 RDF export test:") + graph = facade.to_graph() + triple_count = len(graph) + print(f" Generated {triple_count} RDF triples") + + # Test 8: Round-trip test - export and reimport + print(f"\n🔄 Round-trip test:") + import os + output_dir = "output_crates" + os.makedirs(output_dir, exist_ok=True) + + test_output_path = os.path.join(output_dir, "test_standalone_output") + facade.write(test_output_path, name="Standalone Elements Test") + + # Import back + imported_facade = SchemaFacade.from_ro_crate(test_output_path) + + imported_properties = imported_facade.get_property_types() + imported_restrictions = imported_facade.get_restrictions() + + print(f" Original properties: {len(all_properties)}") + print(f" Imported properties: {len(imported_properties)}") + print(f" Original restrictions: {len(all_restrictions)}") + print(f" Imported restrictions: {len(imported_restrictions)}") + + # Check if our standalone elements survived the round-trip + survived_global_prop = imported_facade.get_property_type("globalProperty") + survived_global_restr = imported_facade.get_restriction("globalRestriction") + + print(f" Standalone property survived: {'✅' if survived_global_prop else '❌'}") + print(f" Standalone restriction survived: {'✅' if survived_global_restr else '❌'}") + + print(f"\n🎉 Test completed!") + + # Verify test assertions instead of returning values + assert survived_global_prop is not None, "Standalone property should survive round-trip" + assert survived_global_restr is not None, "Standalone restriction should survive round-trip" + assert len(imported_properties) > 0, "Should have imported properties" + assert len(imported_restrictions) > 0, "Should have imported restrictions" + +if __name__ == "__main__": + test_standalone_elements() + print(f"\n📈 Test completed successfully!") \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_type.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_type.py new file mode 100644 index 0000000..97b775e --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_type.py @@ -0,0 +1,144 @@ +import unittest +import sys +from pathlib import Path + +# Add source to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.type import Type +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.literal_type import LiteralType +from lib_ro_crate_schema.crate.restriction import Restriction +from rdflib import RDFS, RDF, OWL, Literal, URIRef + + +class TestType(unittest.TestCase): + """Test cases for the Type class""" + + def setUp(self): + """Set up test fixtures""" + self.basic_type = Type(id="TestType") + + # Create a property for testing + self.test_property = TypeProperty( + id="testProperty", + range_includes=[LiteralType.STRING], + required=True + ) + + # Create a complete type with all features + self.complete_type = Type( + id="Person", + subclass_of=["https://schema.org/Thing"], + ontological_annotations=["https://schema.org/Person"], + rdfs_property=[self.test_property], + comment="A person entity", + label="Person" + ) + + def test_type_creation(self): + """Test basic Type object creation""" + self.assertEqual(self.basic_type.id, "TestType") + self.assertIsInstance(self.basic_type.subclass_of, list) + self.assertEqual(self.basic_type.subclass_of, ["https://schema.org/Thing"]) + + def test_fluent_api(self): + """Test fluent API methods""" + type_obj = Type(id="FluentTest") + result = (type_obj + .setLabel("Test Label") + .setComment("Test Comment") + .addProperty(self.test_property) + .setOntologicalAnnotations(["http://example.org/TestClass"])) + + # Check method chaining works + self.assertEqual(result, type_obj) + + # Check values were set + self.assertEqual(type_obj.label, "Test Label") + self.assertEqual(type_obj.comment, "Test Comment") + self.assertEqual(type_obj.ontological_annotations, ["http://example.org/TestClass"]) + self.assertIn(self.test_property, type_obj.rdfs_property) + + def test_java_api_compatibility(self): + """Test Java API compatibility methods""" + self.assertEqual(self.complete_type.getId(), "Person") + self.assertEqual(self.complete_type.getLabel(), "Person") + self.assertEqual(self.complete_type.getComment(), "A person entity") + self.assertEqual(self.complete_type.getSubClassOf(), ["https://schema.org/Thing"]) + self.assertEqual(self.complete_type.getOntologicalAnnotations(), ["https://schema.org/Person"]) + + def test_get_restrictions(self): + """Test restriction generation from properties""" + restrictions = self.complete_type.get_restrictions() + + self.assertIsInstance(restrictions, list) + self.assertTrue(len(restrictions) >= 1) + + # Find the restriction for our test property + test_prop_restriction = None + for restriction in restrictions: + if restriction.property_type == "testProperty": + test_prop_restriction = restriction + break + + self.assertIsNotNone(test_prop_restriction) + self.assertEqual(test_prop_restriction.min_cardinality, 1) # required=True + + def test_to_triples(self): + """Test RDF triple generation""" + triples = list(self.complete_type.to_triples()) + + # Should generate multiple triples + self.assertGreater(len(triples), 0) + + # Convert to list of tuples for easier testing + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Check for essential triples - look for Class in the object + type_triple_found = any("Class" in triple[2] for triple in triple_strs) + self.assertTrue(type_triple_found, "Should generate rdfs:Class type triple") + + label_triple_found = any("label" in triple[1] for triple in triple_strs) + self.assertTrue(label_triple_found, "Should generate rdfs:label triple") + + def test_empty_type(self): + """Test type with minimal configuration""" + empty_type = Type(id="MinimalType") + triples = list(empty_type.to_triples()) + + # Should at least generate the class type declaration + self.assertGreater(len(triples), 0) + + def test_property_addition(self): + """Test adding properties to a type""" + type_obj = Type(id="TestType") + + prop1 = TypeProperty(id="prop1", range_includes=[LiteralType.STRING]) + prop2 = TypeProperty(id="prop2", range_includes=[LiteralType.INTEGER]) + + type_obj.addProperty(prop1).addProperty(prop2) + + self.assertEqual(len(type_obj.rdfs_property), 2) + self.assertIn(prop1, type_obj.rdfs_property) + self.assertIn(prop2, type_obj.rdfs_property) + + def test_custom_restrictions(self): + """Test type with custom restrictions""" + custom_restriction = Restriction( + property_type="customProp", + min_cardinality=2, + max_cardinality=5 + ) + + type_obj = Type( + id="RestrictedType", + restrictions=[custom_restriction] + ) + + restrictions = type_obj.get_restrictions() + self.assertIn(custom_restriction, restrictions) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_type_property.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_type_property.py new file mode 100644 index 0000000..06e00cf --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_type_property.py @@ -0,0 +1,187 @@ +import unittest +import sys +from pathlib import Path + +# Add source to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from lib_ro_crate_schema.crate.type_property import TypeProperty +from lib_ro_crate_schema.crate.literal_type import LiteralType +from rdflib import RDF, RDFS, Literal, URIRef + + +class TestTypeProperty(unittest.TestCase): + """Test cases for the TypeProperty class""" + + def setUp(self): + """Set up test fixtures""" + self.basic_property = TypeProperty(id="basicProp") + + self.complete_property = TypeProperty( + id="completeProp", + domain_includes=["Person"], + range_includes=[LiteralType.STRING], + ontological_annotations=["https://schema.org/name"], + comment="A complete property for testing", + label="Complete Property", + required=True + ) + + def test_property_creation(self): + """Test basic TypeProperty object creation""" + self.assertEqual(self.basic_property.id, "basicProp") + self.assertEqual(self.basic_property.domain_includes, []) + self.assertEqual(self.basic_property.range_includes, []) + self.assertIsNone(self.basic_property.required) + + def test_fluent_api(self): + """Test fluent API methods""" + prop = TypeProperty(id="fluentTest") + result = (prop + .setLabel("Test Label") + .setComment("Test Comment") + .setTypes([LiteralType.STRING, LiteralType.INTEGER]) + .setRequired(True) + .setOntologicalAnnotations(["http://example.org/prop"])) + + # Check method chaining works + self.assertEqual(result, prop) + + # Check values were set + self.assertEqual(prop.label, "Test Label") + self.assertEqual(prop.comment, "Test Comment") + self.assertTrue(prop.required) + self.assertEqual(prop.range_includes, [LiteralType.STRING, LiteralType.INTEGER]) + self.assertEqual(prop.ontological_annotations, ["http://example.org/prop"]) + + def test_add_type(self): + """Test adding single type to range""" + prop = TypeProperty(id="testProp") + prop.addType(LiteralType.STRING) + prop.addType("CustomType") + + self.assertIn(LiteralType.STRING, prop.range_includes) + self.assertIn("CustomType", prop.range_includes) + + def test_java_api_compatibility(self): + """Test Java API compatibility methods""" + self.assertEqual(self.complete_property.getId(), "completeProp") + self.assertEqual(self.complete_property.getLabel(), "Complete Property") + self.assertEqual(self.complete_property.getComment(), "A complete property for testing") + self.assertEqual(self.complete_property.getDomain(), ["Person"]) + self.assertEqual(self.complete_property.getRange(), [LiteralType.STRING]) + self.assertEqual(self.complete_property.getOntologicalAnnotations(), ["https://schema.org/name"]) + + def test_cardinality_methods(self): + """Test cardinality getter methods""" + # Required property + required_prop = TypeProperty(id="required", required=True) + self.assertEqual(required_prop.get_min_cardinality(), 1) + self.assertEqual(required_prop.get_max_cardinality(), 1) + + # Optional property + optional_prop = TypeProperty(id="optional", required=False) + self.assertEqual(optional_prop.get_min_cardinality(), 0) + self.assertEqual(optional_prop.get_max_cardinality(), 1) + + # Unspecified property (defaults to optional) + unspecified_prop = TypeProperty(id="unspecified") + self.assertEqual(unspecified_prop.get_min_cardinality(), 0) + self.assertEqual(unspecified_prop.get_max_cardinality(), 1) + + def test_to_triples(self): + """Test RDF triple generation""" + triples = list(self.complete_property.to_triples()) + + # Should generate multiple triples + self.assertGreater(len(triples), 0) + + # Convert to string representation for easier testing + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Check for essential triples + type_triple_found = any("Property" in triple[2] for triple in triple_strs) + self.assertTrue(type_triple_found, "Should generate rdf:Property type triple") + + label_triple_found = any("label" in triple[1] for triple in triple_strs) + self.assertTrue(label_triple_found, "Should generate rdfs:label triple") + + domain_triple_found = any("domainIncludes" in triple[1] for triple in triple_strs) + self.assertTrue(domain_triple_found, "Should generate domainIncludes triple") + + def test_range_includes_xsd_types(self): + """Test handling of XSD data types in range_includes""" + prop = TypeProperty( + id="xsdTest", + range_includes=["xsd:string", "xsd:integer", "xsd:boolean"] + ) + + triples = list(prop.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Should convert xsd: prefixes to full URIs + xsd_string_found = any("XMLSchema#string" in triple[2] for triple in triple_strs) + self.assertTrue(xsd_string_found, "Should convert xsd:string to full URI") + + def test_range_includes_base_types(self): + """Test handling of base: prefixed types in range_includes""" + prop = TypeProperty( + id="baseTest", + range_includes=["base:CustomType"] + ) + + triples = list(prop.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Should handle base: prefixed types + base_type_found = any("CustomType" in triple[2] for triple in triple_strs) + self.assertTrue(base_type_found, "Should handle base: prefixed types") + + def test_ontological_annotations(self): + """Test ontological annotation handling""" + prop = TypeProperty( + id="ontoTest", + ontological_annotations=["https://schema.org/name", "http://purl.org/dc/terms/title"] + ) + + triples = list(prop.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Should generate owl:equivalentProperty triples + equiv_prop_found = any("equivalentProperty" in triple[1] for triple in triple_strs) + self.assertTrue(equiv_prop_found, "Should generate owl:equivalentProperty triples") + + def test_empty_property(self): + """Test property with minimal configuration""" + empty_prop = TypeProperty(id="minimal") + triples = list(empty_prop.to_triples()) + + # Should at least generate the property type declaration + self.assertGreater(len(triples), 0) + + # Should be an rdf:Property + type_triple_found = any("Property" in str(triple) for triple in triples) + self.assertTrue(type_triple_found) + + def test_multiple_domains(self): + """Test property with multiple domain classes""" + prop = TypeProperty( + id="multiDomain", + domain_includes=["Person", "Organization", "Event"] + ) + + triples = list(prop.to_triples()) + triple_strs = [(str(s), str(p), str(o)) for s, p, o in triples] + + # Should generate domainIncludes for each domain + person_domain = any("Person" in triple[2] and "domainIncludes" in triple[1] for triple in triple_strs) + org_domain = any("Organization" in triple[2] and "domainIncludes" in triple[1] for triple in triple_strs) + event_domain = any("Event" in triple[2] and "domainIncludes" in triple[1] for triple in triple_strs) + + self.assertTrue(person_domain, "Should include Person in domain") + self.assertTrue(org_domain, "Should include Organization in domain") + self.assertTrue(event_domain, "Should include Event in domain") + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/0.2.x/lib/python/lib-ro-crate-schema/tests/test_unknown_namespaces.py b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_unknown_namespaces.py new file mode 100644 index 0000000..3055778 --- /dev/null +++ b/0.2.x/lib/python/lib-ro-crate-schema/tests/test_unknown_namespaces.py @@ -0,0 +1,247 @@ +""" +Test for unknown namespace detection and resolution in JSON-LD contexts. + +This test verifies that the system can automatically detect and create prefixes +for namespaces that are not predefined in the namespace_prefixes dictionary. +""" + +import tempfile +import json +from pathlib import Path + +import pytest +from rocrate.rocrate import ROCrate + +from lib_ro_crate_schema.crate.schema_facade import SchemaFacade + + +class TestUnknownNamespaces: + """Test suite for unknown namespace handling.""" + + def test_unknown_namespace_detection_in_context(self): + """Test that unknown namespaces are automatically detected by get_context.""" + from lib_ro_crate_schema.crate.jsonld_utils import get_context + from rdflib import Graph, URIRef, Literal + from rdflib.namespace import RDF, RDFS + + # Create graph with unknown namespaces + g = Graph() + + # Add triples with unknown pokemon.org namespace + pokemon_ns = "http://pokemon.org/" + pikachu = URIRef(pokemon_ns + "pikachu") + pokemon_name = URIRef(pokemon_ns + "pokemonName") + electric_type = URIRef(pokemon_ns + "ElectricPokemon") + + g.add((pikachu, RDF.type, electric_type)) + g.add((pikachu, pokemon_name, Literal("Pikachu"))) + g.add((pokemon_name, RDF.type, RDF.Property)) + g.add((pokemon_name, RDFS.label, Literal("Pokemon Name"))) + + # Add triples with another unknown namespace + villains_ns = "http://villains.org/" + team_rocket = URIRef(villains_ns + "team_rocket") + criminal_org = URIRef(villains_ns + "CriminalOrganization") + motto = URIRef(villains_ns + "motto") + + g.add((team_rocket, RDF.type, criminal_org)) + g.add((team_rocket, motto, Literal("Prepare for trouble!"))) + + # Also add known namespace + schema_name = URIRef("https://schema.org/name") + g.add((pikachu, schema_name, Literal("Pikachu the Electric Mouse"))) + + # Test context generation + context = get_context(g) + + assert isinstance(context, list) + assert len(context) >= 2 + + # Check that both unknown namespaces were detected + detected_namespaces = {} + if len(context) > 1 and isinstance(context[1], dict): + detected_namespaces = context[1] + + assert "pokemon" in detected_namespaces + assert detected_namespaces["pokemon"] == "http://pokemon.org/" + assert "villains" in detected_namespaces + assert detected_namespaces["villains"] == "http://villains.org/" + assert "schema" in detected_namespaces + assert detected_namespaces["schema"] == "https://schema.org/" + + def test_known_namespaces_still_work(self): + """Test that predefined namespaces still work correctly.""" + from lib_ro_crate_schema.crate.jsonld_utils import get_context + from rdflib import Graph, URIRef, Literal + from rdflib.namespace import RDF, RDFS + + g = Graph() + + # Add triples with known namespaces used as predicates and types + person = URIRef("http://someone.example/john") + + # Use example.com as a predicate (will trigger base: namespace) + example_property = URIRef("http://example.com/customProperty") + g.add((person, example_property, Literal("Some value"))) + + # Use schema.org properties and types + schema_name = URIRef("https://schema.org/name") + g.add((person, schema_name, Literal("John Doe"))) + g.add((person, RDF.type, URIRef("https://schema.org/Person"))) + + # Use openbis.org as a predicate + openbis_property = URIRef("http://openbis.org/sampleId") + g.add((person, openbis_property, Literal("sample123"))) + + context = get_context(g) + + assert isinstance(context, list) + if len(context) > 1 and isinstance(context[1], dict): + namespaces = context[1] + assert "base" in namespaces + assert namespaces["base"] == "http://example.com/" + assert "schema" in namespaces + assert namespaces["schema"] == "https://schema.org/" + assert "openbis" in namespaces + assert namespaces["openbis"] == "http://openbis.org/" + + def test_prefix_collision_handling(self): + """Test that prefix collisions are handled gracefully.""" + from lib_ro_crate_schema.crate.jsonld_utils import get_context + from rdflib import Graph, URIRef, Literal + from rdflib.namespace import RDF + + g = Graph() + + # Create a scenario where we might have prefix collisions + # Use pokemon.org multiple times with DIFFERENT types (should get 'pokemon' prefix) + pokemon_uri1 = URIRef("http://pokemon.org/pikachu") + pokemon_uri2 = URIRef("http://pokemon.org/raichu") + g.add((pokemon_uri1, RDF.type, URIRef("http://pokemon.org/ElectricPokemon"))) + g.add((pokemon_uri2, RDF.type, URIRef("http://pokemon.org/EvolutionPokemon"))) + + # Use pokemon.com multiple times (should get 'pokemon1' or similar) + pokemon_com_uri1 = URIRef("http://pokemon.com/charizard") + pokemon_com_uri2 = URIRef("http://pokemon.com/blastoise") + g.add((pokemon_com_uri1, RDF.type, URIRef("http://pokemon.com/FirePokemon"))) + g.add((pokemon_com_uri2, RDF.type, URIRef("http://pokemon.com/WaterPokemon"))) + + context = get_context(g) + + if isinstance(context, list) and len(context) > 1 and isinstance(context[1], dict): + namespaces = context[1] + + # Both namespaces should be detected with different prefixes + pokemon_prefixes = [k for k, v in namespaces.items() + if 'pokemon.' in v] + assert len(pokemon_prefixes) == 2 + + # Verify the actual mappings exist + namespace_values = list(namespaces.values()) + assert "http://pokemon.org/" in namespace_values + assert "http://pokemon.com/" in namespace_values + + def test_minimum_usage_threshold(self): + """Test that namespaces need minimum usage count to be detected.""" + from lib_ro_crate_schema.crate.jsonld_utils import get_context + from rdflib import Graph, URIRef, Literal + from rdflib.namespace import RDF + + g = Graph() + + # Add only one URI from a namespace (below threshold) + single_use = URIRef("http://rarely-used.org/single") + g.add((single_use, RDF.type, URIRef("https://schema.org/Thing"))) + + # Add multiple URIs from another namespace (above threshold) + frequent_ns = "http://frequent.org/" + for i in range(3): + uri = URIRef(f"{frequent_ns}item{i}") + g.add((uri, RDF.type, URIRef(f"{frequent_ns}ItemType"))) + # Add another usage to ensure it meets the threshold + g.add((uri, URIRef(f"{frequent_ns}hasProperty"), Literal(f"value{i}"))) + + context = get_context(g) + + if isinstance(context, list) and len(context) > 1 and isinstance(context[1], dict): + namespaces = context[1] + + # frequent.org should be detected + assert "frequent" in namespaces + assert namespaces["frequent"] == "http://frequent.org/" + + # rarely-used.org should NOT be detected (only 1 usage) + rarely_used_prefixes = [k for k, v in namespaces.items() + if 'rarely-used.org' in v] + assert len(rarely_used_prefixes) == 0 + + +@pytest.fixture +def temp_ro_crate(): + """Create a temporary RO-Crate with unknown namespaces for testing.""" + crate = ROCrate() + + # Add entities with unknown namespaces + pokemon_entity = { + '@id': 'http://pokemon.org/pikachu', + '@type': 'http://pokemon.org/ElectricPokemon', + 'http://pokemon.org/pokemonName': 'Pikachu', + 'http://pokemon.org/type': 'Electric', + 'https://schema.org/name': 'Pikachu the Electric Mouse' + } + + villain_entity = { + '@id': 'http://villains.org/team_rocket', + '@type': 'http://villains.org/CriminalOrganization', + 'http://villains.org/motto': 'Prepare for trouble!', + 'https://schema.org/name': 'Team Rocket' + } + + crate.add_jsonld(pokemon_entity) + crate.add_jsonld(villain_entity) + + return crate + + +class TestRoundTripNamespaces: + """Test namespace handling through full import/export cycles.""" + + def test_rocrate_roundtrip_with_unknown_namespaces(self, temp_ro_crate): + """Test that unknown namespaces survive import/export cycles.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Export original crate + temp_ro_crate.metadata.write(temp_path) + metadata_file = temp_path / 'ro-crate-metadata.json' + original_data = json.loads(metadata_file.read_text()) + + # Verify original contains full URIs + original_entities = original_data.get('@graph', []) + pokemon_entities = [e for e in original_entities + if 'pokemon.org' in e.get('@id', '')] + assert len(pokemon_entities) >= 1 + + # Import via SchemaFacade + imported_facade = SchemaFacade.from_ro_crate(temp_path) + assert len(imported_facade.metadata_entries) > 0 + + # Re-export and check context + final_crate = imported_facade.get_crate() + + with tempfile.TemporaryDirectory() as final_dir: + final_crate.metadata.write(final_dir) + final_metadata_file = Path(final_dir) / 'ro-crate-metadata.json' + final_data = json.loads(final_metadata_file.read_text()) + + # Check that some form of context enhancement occurred + final_context = final_data.get('@context', []) + assert isinstance(final_context, list) + if len(final_context) > 1: + assert isinstance(final_context[1], dict) + # Should have some namespace mappings + assert len(final_context[1]) > 0 + + +if __name__ == "__main__": + pytest.main([__file__]) \ No newline at end of file diff --git a/0.2.x/lib/test-data/test-01-import-ro-crate-metadata/output/DELETE_ME b/0.2.x/lib/test-data/test-01-import-ro-crate-metadata/output/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-01-import-ro-crate-metadata/readme.txt b/0.2.x/lib/test-data/test-01-import-ro-crate-metadata/readme.txt deleted file mode 100644 index 5a261b7..0000000 --- a/0.2.x/lib/test-data/test-01-import-ro-crate-metadata/readme.txt +++ /dev/null @@ -1,2 +0,0 @@ -input: ro-crate-metadata.json -output: schema.json, the schema read from the ro-crate-metadata.json in json-schema format \ No newline at end of file diff --git a/0.2.x/lib/test-data/test-02-export-ro-crate-metadata/DELETE_ME b/0.2.x/lib/test-data/test-02-export-ro-crate-metadata/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-02-export-ro-crate-metadata/readme.txt b/0.2.x/lib/test-data/test-02-export-ro-crate-metadata/readme.txt deleted file mode 100644 index f2728b5..0000000 --- a/0.2.x/lib/test-data/test-02-export-ro-crate-metadata/readme.txt +++ /dev/null @@ -1,2 +0,0 @@ -input: schema.json, the schema in json-schema format -output: ro-crate-metadata.json \ No newline at end of file diff --git a/0.2.x/lib/test-data/test-03-import-export-ro-crate-metadata/DELETE_ME b/0.2.x/lib/test-data/test-03-import-export-ro-crate-metadata/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-03-import-export-ro-crate-metadata/readme.txt b/0.2.x/lib/test-data/test-03-import-export-ro-crate-metadata/readme.txt deleted file mode 100644 index fa86a3d..0000000 --- a/0.2.x/lib/test-data/test-03-import-export-ro-crate-metadata/readme.txt +++ /dev/null @@ -1,2 +0,0 @@ -input: ro-crate-metadata.json , the metadata to import into the internal model -output: ro-crate-metadata.json , the metadata exported from the internal model \ No newline at end of file diff --git a/0.2.x/lib/test-data/test-04-import-ro-crate-metadata-data/input/DELETE_ME b/0.2.x/lib/test-data/test-04-import-ro-crate-metadata-data/input/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-04-import-ro-crate-metadata-data/output/DELETE_ME b/0.2.x/lib/test-data/test-04-import-ro-crate-metadata-data/output/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-04-import-ro-crate-metadata-data/readme.txt b/0.2.x/lib/test-data/test-04-import-ro-crate-metadata-data/readme.txt deleted file mode 100644 index b06441b..0000000 --- a/0.2.x/lib/test-data/test-04-import-ro-crate-metadata-data/readme.txt +++ /dev/null @@ -1,2 +0,0 @@ -input: ro-crate.zip containing ro-crate-metadata.json and some folders with data -output: schema.json, the schema read from the ro-crate-metadata.json in json-schema format \ No newline at end of file diff --git a/0.2.x/lib/test-data/test-05-export-ro-crate-metadata-data/DELETE_ME b/0.2.x/lib/test-data/test-05-export-ro-crate-metadata-data/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-05-export-ro-crate-metadata-data/readme.txt b/0.2.x/lib/test-data/test-05-export-ro-crate-metadata-data/readme.txt deleted file mode 100644 index 70320df..0000000 --- a/0.2.x/lib/test-data/test-05-export-ro-crate-metadata-data/readme.txt +++ /dev/null @@ -1,2 +0,0 @@ -input: schema.json, the schema in json-schema format and the folders containing the data -output: ro-crate.zip with ro-crate-metadata.json and the folders containing the data \ No newline at end of file diff --git a/0.2.x/lib/test-data/test-06-import-export-ro-crate-metadata-data/DELETE_ME b/0.2.x/lib/test-data/test-06-import-export-ro-crate-metadata-data/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-06-import-export-ro-crate-metadata-data/readme.txt b/0.2.x/lib/test-data/test-06-import-export-ro-crate-metadata-data/readme.txt deleted file mode 100644 index 8c58d0d..0000000 --- a/0.2.x/lib/test-data/test-06-import-export-ro-crate-metadata-data/readme.txt +++ /dev/null @@ -1,2 +0,0 @@ -input: ro-crate.zip containing ro-crate-metadata.json and the folders containing the data to import -output: ro-crate.zip containing ro-crate-metadata.json and the folders containing the data exported \ No newline at end of file diff --git a/0.2.x/lib/test-data/test-07-import-ro-crate-metadata-data-types/input/DELETE_ME b/0.2.x/lib/test-data/test-07-import-ro-crate-metadata-data-types/input/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-07-import-ro-crate-metadata-data-types/output/DELETE_ME b/0.2.x/lib/test-data/test-07-import-ro-crate-metadata-data-types/output/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-08-import-ro-crate-metadata-classes/input/DELETE_ME b/0.2.x/lib/test-data/test-08-import-ro-crate-metadata-classes/input/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-08-import-ro-crate-metadata-classes/output/DELETE_ME b/0.2.x/lib/test-data/test-08-import-ro-crate-metadata-classes/output/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-09-import-ro-crate-metadata-classes-with-inheritance/input/DELETE_ME b/0.2.x/lib/test-data/test-09-import-ro-crate-metadata-classes-with-inheritance/input/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-09-import-ro-crate-metadata-classes-with-inheritance/output/DELETE_ME b/0.2.x/lib/test-data/test-09-import-ro-crate-metadata-classes-with-inheritance/output/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-10-import-ro-crate-metadata-data-types/input/DELETE_ME b/0.2.x/lib/test-data/test-10-import-ro-crate-metadata-data-types/input/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-10-import-ro-crate-metadata-data-types/output/DELETE_ME b/0.2.x/lib/test-data/test-10-import-ro-crate-metadata-data-types/output/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-11-export-ro-crate-metadata-classes/input/DELETE_ME b/0.2.x/lib/test-data/test-11-export-ro-crate-metadata-classes/input/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-11-export-ro-crate-metadata-classes/output/DELETE_ME b/0.2.x/lib/test-data/test-11-export-ro-crate-metadata-classes/output/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-12-export-ro-crate-metadata-classes-with-inheritance/input/DELETE_ME b/0.2.x/lib/test-data/test-12-export-ro-crate-metadata-classes-with-inheritance/input/DELETE_ME deleted file mode 100644 index e69de29..0000000 diff --git a/0.2.x/lib/test-data/test-12-export-ro-crate-metadata-classes-with-inheritance/output/DELETE_ME b/0.2.x/lib/test-data/test-12-export-ro-crate-metadata-classes-with-inheritance/output/DELETE_ME deleted file mode 100644 index e69de29..0000000 From 91a2cab4ac9f05a0c8f9e9169a39283346359cc9 Mon Sep 17 00:00:00 2001 From: Simone Baffelli Date: Fri, 31 Oct 2025 20:45:28 +0100 Subject: [PATCH 2/3] Improved phrasing of README --- 0.2.x/lib/python/lib-ro-crate-schema/README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/0.2.x/lib/python/lib-ro-crate-schema/README.md b/0.2.x/lib/python/lib-ro-crate-schema/README.md index df0bd54..59602d6 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/README.md +++ b/0.2.x/lib/python/lib-ro-crate-schema/README.md @@ -3,13 +3,17 @@ [![Python 3.13+](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -A Pythonic library for creating and managing [RO-Crates](https://www.researchobject.org/ro-crate/) with schema definitions using Pydantic models. +A Pythonic library for creating and managing [RO-Crates](https://www.researchobject.org/ro-crate/). **🚀 New to RO-Crate? Start with the [Quick Start Guide](QUICKSTART.md)!** ## What is it? -This library provides a clean, type-safe interface for creating RO-Crates (Research Object Crates) - a community standard for packaging research data with their metadata. It uses familiar Pydantic models with decorators to define schemas that automatically generate RDF/OWL definitions. +This library provides an interface for creating RO-Crates (Research Object Crates) a community standard for packaging research data with their metadata. Additionally to conventional crates, it allows to easily add objects of custom types (not present in Schema.org) and encode them as RDF according to the [profile](../../../spec.md). +The modules offers two interface to operate with crates: + +1. Pydantic models with decorators to declaratively define schemas that automatically generate RDF/OWL definitions. +2. Pogrammatic builder-style interfaces for integration with other tooling or for working with objects whose schema isn't known at *compile time*. ## Installation @@ -140,7 +144,7 @@ pytest tests/ ### Manual Construction (without decorators) -For fine-grained control, you can manually construct Type, TypeProperty, and MetadataEntry objects: +For fine-grained control, you can manually construct Type, TypeProperty, and MetadataEntry objects. This is useful for example when constructing objects from other schemas like SQL or JSON schemas, which don't immediately correspond to pydantic models. You can use it like this: ```python from lib_ro_crate_schema import SchemaFacade, Type, TypeProperty, MetadataEntry From 87ece646463591899af09df61d01dae4e4bc8a66 Mon Sep 17 00:00:00 2001 From: Simone Baffelli Date: Fri, 31 Oct 2025 20:46:39 +0100 Subject: [PATCH 3/3] Fixed reference to Pascals repo --- 0.2.x/lib/python/lib-ro-crate-schema/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/0.2.x/lib/python/lib-ro-crate-schema/README.md b/0.2.x/lib/python/lib-ro-crate-schema/README.md index 59602d6..478f87d 100644 --- a/0.2.x/lib/python/lib-ro-crate-schema/README.md +++ b/0.2.x/lib/python/lib-ro-crate-schema/README.md @@ -26,7 +26,7 @@ pip install lib-ro-crate-schema ### From Source ```bash -git clone https://github.com/Snowwpanda/ro-crate-interoperability-profile.git +git clone https://github.com/researchobjectschema/ro-crate-interoperability-profile cd ro-crate-interoperability-profile/0.2.x/lib/python/lib-ro-crate-schema pip install -e . ```