Working on reconstruction

This commit is contained in:
Simone Baffelli
2025-08-19 10:30:37 +02:00
parent 0a9b97d090
commit 163033539d
6 changed files with 92 additions and 41 deletions

View File

@@ -55,20 +55,16 @@ def add_schema_to_crate(schema: SchemaFacade, crate: ROCrate) -> dict:
Emits triples from schema, builds a graph, compacts JSON-LD, adds objects to the crate,
writes to a tempfile, updates context using pyld, and returns the final JSON-LD dict.
"""
triples = schema.to_triples()
metadata_graph = Graph()
metadata_graph.bind("base", BASE)
for t in triples:
metadata_graph.add(t)
metadata_graph = schema.to_graph()
# Serialize and compact JSON-LD
ld_ser = metadata_graph.serialize(format="json-ld")
ld_obj = pyld.jsonld.json.loads(ld_ser)
context = {**get_context(metadata_graph), **RO_EXTRA_CTX}
ld_obj_compact = update_jsonld_context(ld_obj, context)
breakpoint()
# Add each object in the compacted graph to the crate
for obj in ld_obj_compact.get("@graph", []):
crate.add_jsonld(obj)
# Use the tempfile-based utility to update context and return
new_crate = emit_crate_with_context(crate, context)
return new_crate

View File

@@ -3,7 +3,7 @@ from rdflib import Graph
from rdflib import Node, URIRef, RDF, IdentifiedNode
from rdflib import Namespace
from rdflib.namespace import NamespaceManager
from typing import TypeVar
type Triple = tuple[IdentifiedNode, IdentifiedNode, Node]
SCHEMA = Namespace("http://schema.org/")
@@ -14,6 +14,11 @@ class RDFSerializable(Protocol):
def to_rdf(self) -> list[Triple]: ...
class RDFDeserializable[T](Protocol):
@classmethod
def from_rdf(cls, triples: list[Triple]): ...
def is_type(id: str, type: URIRef) -> Triple:
"""
Prepare a triple that asserts that something

View File

@@ -1,5 +1,5 @@
from typing import Literal as TLiteral
from lib_ro_crate_schema.crate.rdf import is_type, object_id
from lib_ro_crate_schema.crate.rdf import is_type, object_id, Triple
from pydantic import BaseModel
from rdflib import OWL, Literal, XSD

View File

@@ -3,12 +3,12 @@ from collections import defaultdict
from typing import Generator, Literal
from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry
from lib_ro_crate_schema.crate.rdf import Triple, object_id
from lib_ro_crate_schema.crate.rdf import BASE, Triple, object_id
from lib_ro_crate_schema.crate.type import Type
from lib_ro_crate_schema.crate.type_property import TypeProperty
from pydantic import BaseModel
from lib_ro_crate_schema.crate.rdf import SCHEMA
from rdflib import RDFS, RDF
from rdflib import RDFS, RDF, Graph
type TypeRegistry = dict[TypeProperty, list[Type]]
@@ -32,26 +32,26 @@ def types_to_triples(used_types: TypeRegistry) -> Generator[Triple, None, None]:
class SchemaFacade(BaseModel):
types: list[Type]
properties: list[TypeProperty]
metadata_entries: list[MetadataEntry]
ro = ROC
def collect_properties(self) -> TypeRegistry:
"""
Creates a registry of RDFS properties used in all RDFS classes.
Maps TypeProperty objects to the list of Type objects using them.
"""
result: List[Tuple[TypeProperty, Type]] = []
for cls in self.types:
for prop in getattr(cls, "rdfs_property", []):
result.append((prop, cls))
return result
prefix: str = "base"
def to_triples(self) -> Generator[Triple, None, None]:
registry = self.collect_properties()
yield from types_to_triples(registry)
for p in self.types:
yield from p.to_triples()
for m in self.metadata_entries:
yield from m.to_triples()
def get_properties(self) -> Generator[TypeProperty, None, None]:
yield from set(
[
property
for current_type in self.types
for property in current_type.rdfs_property
]
)
def to_graph(self) -> Graph:
local_graph = Graph()
[local_graph.add(triple) for triple in self.to_triples()]
local_graph.bind(prefix=self.prefix, namespace=BASE)
return local_graph

View File

@@ -42,9 +42,11 @@ class Type(BaseModel):
]
for ann in annotations:
yield ann
for prop in self.get_restrictions():
print(prop)
yield from prop.to_triples()
for restriction in self.get_restrictions():
yield from restriction.to_triples()
for prop in self.rdfs_property:
prop_with_domain = prop.model_copy(update=dict(domain_includes=[self.id]))
yield from prop_with_domain.to_triples()
# def to_ro(self) -> RdfsClass:
# return RdfsClass(id=self.id,

View File

@@ -1,3 +1,4 @@
from collections import defaultdict
import json
from lib_ro_crate_schema.crate.rdf import BASE
from lib_ro_crate_schema.crate.type import Type
@@ -6,12 +7,9 @@ from lib_ro_crate_schema.crate.literal_type import LiteralType
from lib_ro_crate_schema.crate.metadata_entry import MetadataEntry
from lib_ro_crate_schema.crate.schema_facade import SchemaFacade
from rocrate.rocrate import ROCrate
from rdflib import Graph
from rdflib import Graph, RDF, RDFS, OWL, URIRef, Node
from lib_ro_crate_schema.crate.jsonld_utils import (
add_schema_to_crate,
emit_crate_with_context,
update_jsonld_context,
get_context,
)
@@ -31,6 +29,16 @@ def main():
label="",
)
participant_type = Type(
id="Participant",
type="Type",
subclass_of=["https://schema.org/Thing"],
ontological_annotations=["http://purl.org/dc/terms/creator"],
rdfs_property=[has_name, has_identifier],
comment="",
label="",
)
# Example MetadataEntry using property and type references (object and string)
creator_entry = MetadataEntry(
id="creator1",
@@ -42,21 +50,20 @@ def main():
references={},
)
# Example with string property references (for flexibility)
creator_entry_str = MetadataEntry(
id="creator2",
types=[creator_type],
participant_entry = MetadataEntry(
id="participant",
types=[participant_type],
props={
"hasName": "Jane Author",
"hasName": "Karl Participant",
"hasIdentifier": "https://orcid.org/0000-0000-0000-0001",
},
references={},
)
schema = SchemaFacade(
types=[creator_type],
properties=[has_name, has_identifier],
metadata_entries=[creator_entry, creator_entry_str],
types=[creator_type, participant_type],
# properties=[has_name, has_identifier],
metadata_entries=[creator_entry, participant_entry],
)
crate = ROCrate()
@@ -64,8 +71,49 @@ def main():
crate.name = "mtcrate"
crate.description = "test crate"
res = add_schema_to_crate(schema, crate)
schema_graph = schema.to_graph()
reconstruct(schema_graph)
print(json.dumps(res))
def reconstruct(graph: Graph) -> SchemaFacade:
# Utility functions for reconstruction
def get_subjects_by_type(graph: Graph, rdf_type: Node) -> set[Node]:
"""Return all subjects of a given rdf:type."""
return set(graph.subjects(RDF.type, rdf_type))
def get_predicate_object_map(graph: Graph, subject: Node) -> dict[URIRef, Node]:
"""Return a dict of predicate -> object for a given subject."""
return {p: o for p, o in graph.predicate_objects(subject)}
# Reconstruct in correct order: Classes, Properties, Restrictions, Metadata Entries
print("Reconstructing Classes:")
for class_subject in get_subjects_by_type(graph, RDFS.Class):
props = get_predicate_object_map(graph, class_subject)
print(f" Class: {class_subject}, {props}")
# Here you would instantiate Type(...)
print("Reconstructing Properties:")
for prop_subject in get_subjects_by_type(graph, RDF.Property):
props = get_predicate_object_map(graph, prop_subject)
print(f" Property: {prop_subject}, {props}")
# Here you would instantiate TypeProperty(...)
print("Reconstructing Restrictions:")
for restr_subject in get_subjects_by_type(graph, OWL.Restriction):
props = get_predicate_object_map(graph, restr_subject)
print(f" Restriction: {restr_subject}, {props}")
# Here you would handle restrictions
# Example: reconstructing metadata entries if you have a special type
# for entry_subject in get_subjects_by_type(graph, PROFILE.MetadataEntry):
# props = get_predicate_object_map(graph, entry_subject)
# print(f" MetadataEntry: {entry_subject}, {props}")
breakpoint()
if __name__ == "__main__":
main()