diff --git a/scientific-image-metadata-provenance-guard/README.md b/scientific-image-metadata-provenance-guard/README.md
new file mode 100644
index 00000000..5680a9bb
--- /dev/null
+++ b/scientific-image-metadata-provenance-guard/README.md
@@ -0,0 +1,42 @@
+# Scientific Image Metadata Provenance Guard
+
+This module adds a dependency-free provenance guard for scientific image
+records before they are published into the Scientific Knowledge Graph. It is
+focused on image nodes and safe graph edges, not duplicate-panel detection or
+manuscript image-integrity review.
+
+## What It Checks
+
+- Source artifact, dataset, and protocol linkage for each image node.
+- SHA-256 checksum format before KG publication.
+- Acquisition date versus publication date chronology.
+- Channel and pixel-size metadata for microscopy and fluorescence images.
+- License and access state before public graph release.
+- Private identifier leakage in filenames, metadata, and privacy tags.
+- Derived figure provenance back to source images or datasets.
+
+## Outputs
+
+The guard emits:
+
+- A JSON report with per-record `publish`, `review`, or `block` decisions.
+- Safe graph edges suitable for KG ingestion or reviewer inspection.
+- Redaction actions for private or high-risk fields.
+- A Markdown reviewer summary.
+- A compact SVG decision chart for PR review and demos.
+
+## Run
+
+```bash
+python3 scientific-image-metadata-provenance-guard/image_metadata_provenance_guard.py \
+ --sample \
+ --json scientific-image-metadata-provenance-guard/demo/report.json \
+ --markdown scientific-image-metadata-provenance-guard/demo/summary.md \
+ --svg scientific-image-metadata-provenance-guard/demo/graph.svg
+```
+
+## Test
+
+```bash
+python3 -m unittest scientific-image-metadata-provenance-guard/test_image_metadata_provenance_guard.py
+```
diff --git a/scientific-image-metadata-provenance-guard/demo/graph.svg b/scientific-image-metadata-provenance-guard/demo/graph.svg
new file mode 100644
index 00000000..04c353d8
--- /dev/null
+++ b/scientific-image-metadata-provenance-guard/demo/graph.svg
@@ -0,0 +1,7 @@
+
diff --git a/scientific-image-metadata-provenance-guard/demo/report.json b/scientific-image-metadata-provenance-guard/demo/report.json
new file mode 100644
index 00000000..a2bfe3cb
--- /dev/null
+++ b/scientific-image-metadata-provenance-guard/demo/report.json
@@ -0,0 +1,316 @@
+{
+ "guard": "scientific-image-metadata-provenance-guard",
+ "records": [
+ {
+ "decision": "publish",
+ "findings": [],
+ "image_id": "IMG-CELL-001",
+ "redactions": [],
+ "release_scope": "public_kg",
+ "safe_edges": [
+ {
+ "evidence": "metadata",
+ "object": "dataset:DS-ASTROCYTE-ATLAS-2026",
+ "predicate": "member_of_dataset",
+ "subject": "image:IMG-CELL-001",
+ "visibility": "public"
+ },
+ {
+ "evidence": "metadata",
+ "object": "protocol:PR-IMMUNO-STAIN-V2",
+ "predicate": "uses_protocol",
+ "subject": "image:IMG-CELL-001",
+ "visibility": "public"
+ },
+ {
+ "evidence": "metadata",
+ "object": "instrument:INST-CONFOCAL-A1",
+ "predicate": "captured_by",
+ "subject": "image:IMG-CELL-001",
+ "visibility": "public"
+ },
+ {
+ "evidence": "metadata",
+ "object": "artifact:s3://scibase-lab/raw/astrocyte/IMG-CELL-001.ome.tiff",
+ "predicate": "has_source_artifact",
+ "subject": "image:IMG-CELL-001",
+ "visibility": "public"
+ },
+ {
+ "evidence": "metadata",
+ "object": "license:CC-BY-4.0",
+ "predicate": "released_under",
+ "subject": "image:IMG-CELL-001",
+ "visibility": "public"
+ }
+ ],
+ "title": "Confocal astrocyte culture panel"
+ },
+ {
+ "decision": "review",
+ "findings": [
+ {
+ "code": "MISSING_CHANNELS",
+ "field": "channels",
+ "message": "Microscopy image should include channel metadata before KG recommendation use.",
+ "severity": "review"
+ },
+ {
+ "code": "MISSING_PIXEL_SIZE",
+ "field": "pixel_size_um",
+ "message": "Microscopy image should include pixel_size_um for scale-aware graph navigation.",
+ "severity": "review"
+ }
+ ],
+ "image_id": "IMG-SLIDE-002",
+ "redactions": [],
+ "release_scope": "review_queue",
+ "safe_edges": [
+ {
+ "evidence": "metadata",
+ "object": "dataset:DS-EMBRYO-STAIN-2026",
+ "predicate": "member_of_dataset",
+ "subject": "image:IMG-SLIDE-002",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "protocol:PR-EMBRYO-STAIN-V1",
+ "predicate": "uses_protocol",
+ "subject": "image:IMG-SLIDE-002",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "instrument:INST-WIDEFIELD-7",
+ "predicate": "captured_by",
+ "subject": "image:IMG-SLIDE-002",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "artifact:s3://scibase-lab/raw/embryo/IMG-SLIDE-002.tiff",
+ "predicate": "has_source_artifact",
+ "subject": "image:IMG-SLIDE-002",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "license:CC-BY-4.0",
+ "predicate": "released_under",
+ "subject": "image:IMG-SLIDE-002",
+ "visibility": "internal"
+ }
+ ],
+ "title": "Widefield embryo slide"
+ },
+ {
+ "decision": "block",
+ "findings": [
+ {
+ "code": "ACQUIRED_AFTER_PUBLICATION",
+ "field": "acquired_at",
+ "message": "Image acquisition date is later than the publication date.",
+ "severity": "block"
+ },
+ {
+ "code": "NON_PUBLIC_ACCESS",
+ "field": "access",
+ "message": "Image is not public and should be kept out of the public KG release.",
+ "severity": "review"
+ },
+ {
+ "code": "LICENSE_REVIEW_REQUIRED",
+ "field": "license",
+ "message": "Image license is missing or not clearly open for graph publication.",
+ "severity": "review"
+ },
+ {
+ "code": "PRIVATE_TAG_PRESENT",
+ "field": "privacy_tags",
+ "message": "Privacy tags indicate private identifiers that must not enter public KG nodes.",
+ "severity": "block"
+ },
+ {
+ "code": "PRIVATE_VALUE_IN_FILENAME",
+ "field": "filename",
+ "message": "Filename appears to contain private identifiers.",
+ "severity": "block"
+ },
+ {
+ "code": "PRIVATE_METADATA_FIELD",
+ "field": "metadata.mrn",
+ "message": "Metadata contains a private field name or value.",
+ "severity": "block"
+ },
+ {
+ "code": "PRIVATE_METADATA_FIELD",
+ "field": "metadata.patient_name",
+ "message": "Metadata contains a private field name or value.",
+ "severity": "block"
+ }
+ ],
+ "image_id": "IMG-PATH-003",
+ "redactions": [
+ {
+ "action": "remove from public graph payload",
+ "field": "privacy_tags"
+ },
+ {
+ "action": "replace with stable image_id",
+ "field": "filename"
+ },
+ {
+ "action": "drop before KG release",
+ "field": "metadata.mrn"
+ },
+ {
+ "action": "drop before KG release",
+ "field": "metadata.patient_name"
+ }
+ ],
+ "release_scope": "do_not_publish",
+ "safe_edges": [
+ {
+ "evidence": "metadata",
+ "object": "dataset:DS-PATHOLOGY-RESTRICTED",
+ "predicate": "member_of_dataset",
+ "subject": "image:IMG-PATH-003",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "protocol:PR-HISTOLOGY-REVIEW",
+ "predicate": "uses_protocol",
+ "subject": "image:IMG-PATH-003",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "instrument:INST-SLIDE-SCANNER-2",
+ "predicate": "captured_by",
+ "subject": "image:IMG-PATH-003",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "artifact:s3://restricted-lab/raw/pathology/IMG-PATH-003.svs",
+ "predicate": "has_source_artifact",
+ "subject": "image:IMG-PATH-003",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "license:restricted",
+ "predicate": "released_under",
+ "subject": "image:IMG-PATH-003",
+ "visibility": "internal"
+ }
+ ],
+ "title": "Clinical pathology field with leaked identifiers"
+ },
+ {
+ "decision": "block",
+ "findings": [
+ {
+ "code": "MISSING_DERIVED_SOURCE",
+ "field": "derived_from",
+ "message": "Derived figure cannot enter the KG without source image or artifact lineage.",
+ "severity": "block"
+ }
+ ],
+ "image_id": "FIG-DERIVED-004",
+ "redactions": [],
+ "release_scope": "do_not_publish",
+ "safe_edges": [
+ {
+ "evidence": "metadata",
+ "object": "dataset:DS-CELL-MIGRATION-2026",
+ "predicate": "member_of_dataset",
+ "subject": "image:FIG-DERIVED-004",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "protocol:PR-FIGURE-ASSEMBLY",
+ "predicate": "uses_protocol",
+ "subject": "image:FIG-DERIVED-004",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "license:CC-BY-4.0",
+ "predicate": "released_under",
+ "subject": "image:FIG-DERIVED-004",
+ "visibility": "internal"
+ }
+ ],
+ "title": "Composite figure without source chain"
+ },
+ {
+ "decision": "review",
+ "findings": [
+ {
+ "code": "MISSING_CHANNELS",
+ "field": "channels",
+ "message": "Microscopy image should include channel metadata before KG recommendation use.",
+ "severity": "review"
+ },
+ {
+ "code": "NON_PUBLIC_ACCESS",
+ "field": "access",
+ "message": "Image is not public and should be kept out of the public KG release.",
+ "severity": "review"
+ },
+ {
+ "code": "LICENSE_REVIEW_REQUIRED",
+ "field": "license",
+ "message": "Image license is missing or not clearly open for graph publication.",
+ "severity": "review"
+ }
+ ],
+ "image_id": "IMG-EMBARGO-005",
+ "redactions": [],
+ "release_scope": "internal_review_only",
+ "safe_edges": [
+ {
+ "evidence": "metadata",
+ "object": "dataset:DS-CRYOEM-EMBARGO-2026",
+ "predicate": "member_of_dataset",
+ "subject": "image:IMG-EMBARGO-005",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "protocol:PR-CRYOEM-CAPTURE-V4",
+ "predicate": "uses_protocol",
+ "subject": "image:IMG-EMBARGO-005",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "instrument:INST-CRYOEM-4",
+ "predicate": "captured_by",
+ "subject": "image:IMG-EMBARGO-005",
+ "visibility": "internal"
+ },
+ {
+ "evidence": "metadata",
+ "object": "artifact:s3://scibase-lab/embargo/cryo/IMG-EMBARGO-005.mrc",
+ "predicate": "has_source_artifact",
+ "subject": "image:IMG-EMBARGO-005",
+ "visibility": "internal"
+ }
+ ],
+ "title": "Embargoed cryo-EM map preview"
+ }
+ ],
+ "summary": {
+ "block": 2,
+ "publish": 1,
+ "redaction_count": 4,
+ "review": 2,
+ "safe_edge_count": 22,
+ "total": 5
+ }
+}
diff --git a/scientific-image-metadata-provenance-guard/demo/summary.md b/scientific-image-metadata-provenance-guard/demo/summary.md
new file mode 100644
index 00000000..3d568a8c
--- /dev/null
+++ b/scientific-image-metadata-provenance-guard/demo/summary.md
@@ -0,0 +1,43 @@
+# Scientific Image Metadata Provenance Guard Summary
+
+- Total records: 5
+- Publish: 1
+- Review: 2
+- Block: 2
+- Safe graph edges: 22
+- Redactions: 4
+
+| Image | Decision | Release scope | Findings |
+| --- | --- | --- | --- |
+| IMG-CELL-001 | publish | public_kg | none |
+| IMG-SLIDE-002 | review | review_queue | MISSING_CHANNELS, MISSING_PIXEL_SIZE |
+| IMG-PATH-003 | block | do_not_publish | ACQUIRED_AFTER_PUBLICATION, NON_PUBLIC_ACCESS, LICENSE_REVIEW_REQUIRED, PRIVATE_TAG_PRESENT, PRIVATE_VALUE_IN_FILENAME, PRIVATE_METADATA_FIELD, PRIVATE_METADATA_FIELD |
+| FIG-DERIVED-004 | block | do_not_publish | MISSING_DERIVED_SOURCE |
+| IMG-EMBARGO-005 | review | internal_review_only | MISSING_CHANNELS, NON_PUBLIC_ACCESS, LICENSE_REVIEW_REQUIRED |
+
+## Reviewer Actions
+
+### IMG-SLIDE-002
+- REVIEW MISSING_CHANNELS: Microscopy image should include channel metadata before KG recommendation use.
+- REVIEW MISSING_PIXEL_SIZE: Microscopy image should include pixel_size_um for scale-aware graph navigation.
+
+### IMG-PATH-003
+- BLOCK ACQUIRED_AFTER_PUBLICATION: Image acquisition date is later than the publication date.
+- REVIEW NON_PUBLIC_ACCESS: Image is not public and should be kept out of the public KG release.
+- REVIEW LICENSE_REVIEW_REQUIRED: Image license is missing or not clearly open for graph publication.
+- BLOCK PRIVATE_TAG_PRESENT: Privacy tags indicate private identifiers that must not enter public KG nodes.
+- BLOCK PRIVATE_VALUE_IN_FILENAME: Filename appears to contain private identifiers.
+- BLOCK PRIVATE_METADATA_FIELD: Metadata contains a private field name or value.
+- BLOCK PRIVATE_METADATA_FIELD: Metadata contains a private field name or value.
+- Redact `privacy_tags`: remove from public graph payload
+- Redact `filename`: replace with stable image_id
+- Redact `metadata.mrn`: drop before KG release
+- Redact `metadata.patient_name`: drop before KG release
+
+### FIG-DERIVED-004
+- BLOCK MISSING_DERIVED_SOURCE: Derived figure cannot enter the KG without source image or artifact lineage.
+
+### IMG-EMBARGO-005
+- REVIEW MISSING_CHANNELS: Microscopy image should include channel metadata before KG recommendation use.
+- REVIEW NON_PUBLIC_ACCESS: Image is not public and should be kept out of the public KG release.
+- REVIEW LICENSE_REVIEW_REQUIRED: Image license is missing or not clearly open for graph publication.
diff --git a/scientific-image-metadata-provenance-guard/image_metadata_provenance_guard.py b/scientific-image-metadata-provenance-guard/image_metadata_provenance_guard.py
new file mode 100644
index 00000000..6d652b5d
--- /dev/null
+++ b/scientific-image-metadata-provenance-guard/image_metadata_provenance_guard.py
@@ -0,0 +1,515 @@
+#!/usr/bin/env python3
+"""Guard scientific image metadata before Knowledge Graph publication."""
+
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import html
+import json
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Iterable
+
+
+CHECKSUM_RE = re.compile(r"^[0-9a-fA-F]{64}$")
+PRIVATE_FIELD_RE = re.compile(
+ r"(patient|mrn|medical_record|dob|date_of_birth|email|phone|ssn|address)",
+ re.IGNORECASE,
+)
+PRIVATE_VALUE_RE = re.compile(
+ r"(MRN[_ -]?\d{4,}|\b\d{3}-\d{2}-\d{4}\b|[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,})",
+ re.IGNORECASE,
+)
+MICROSCOPY_MODALITIES = {
+ "confocal_microscopy",
+ "fluorescence_microscopy",
+ "widefield_microscopy",
+ "electron_microscopy",
+ "microscopy",
+}
+NON_PUBLIC_ACCESS = {"private", "restricted", "embargoed", "confidential"}
+OPEN_LICENSE_PREFIXES = ("CC-", "MIT", "BSD", "Apache", "ODC-")
+
+
+@dataclass(frozen=True)
+class Finding:
+ code: str
+ severity: str
+ message: str
+ field: str | None = None
+
+ def to_dict(self) -> dict[str, Any]:
+ data = {
+ "code": self.code,
+ "severity": self.severity,
+ "message": self.message,
+ }
+ if self.field:
+ data["field"] = self.field
+ return data
+
+
+@dataclass(frozen=True)
+class GraphEdge:
+ subject: str
+ predicate: str
+ object: str
+ visibility: str = "public"
+ evidence: str = "metadata"
+
+ def to_dict(self) -> dict[str, str]:
+ return {
+ "subject": self.subject,
+ "predicate": self.predicate,
+ "object": self.object,
+ "visibility": self.visibility,
+ "evidence": self.evidence,
+ }
+
+
+@dataclass
+class GuardResult:
+ image_id: str
+ title: str
+ decision: str
+ release_scope: str
+ findings: list[Finding] = field(default_factory=list)
+ redactions: list[dict[str, str]] = field(default_factory=list)
+ safe_edges: list[GraphEdge] = field(default_factory=list)
+
+ def to_dict(self) -> dict[str, Any]:
+ return {
+ "image_id": self.image_id,
+ "title": self.title,
+ "decision": self.decision,
+ "release_scope": self.release_scope,
+ "findings": [finding.to_dict() for finding in self.findings],
+ "redactions": self.redactions,
+ "safe_edges": [edge.to_dict() for edge in self.safe_edges],
+ }
+
+
+def _parse_date(value: Any) -> dt.date | None:
+ if not value:
+ return None
+ if isinstance(value, dt.date):
+ return value
+ try:
+ return dt.date.fromisoformat(str(value)[:10])
+ except ValueError:
+ return None
+
+
+def _as_text(value: Any) -> str:
+ if value is None:
+ return ""
+ if isinstance(value, (dict, list)):
+ return json.dumps(value, sort_keys=True)
+ return str(value)
+
+
+def _contains_private_value(value: Any) -> bool:
+ text = _as_text(value)
+ return bool(PRIVATE_VALUE_RE.search(text))
+
+
+def _field_has_private_name(field_name: str) -> bool:
+ return bool(PRIVATE_FIELD_RE.search(field_name))
+
+
+def _open_license(license_value: str) -> bool:
+ normalized = license_value.strip()
+ return bool(normalized) and normalized.startswith(OPEN_LICENSE_PREFIXES)
+
+
+def evaluate_record(record: dict[str, Any]) -> GuardResult:
+ """Evaluate one image metadata record and return a KG publication decision."""
+
+ image_id = str(record.get("image_id") or "UNKNOWN")
+ title = str(record.get("title") or image_id)
+ modality = str(record.get("modality") or "").strip().lower()
+ image_kind = str(record.get("image_kind") or "raw_image").strip().lower()
+ source_artifact = str(record.get("source_artifact") or "").strip()
+ dataset_id = str(record.get("dataset_id") or "").strip()
+ protocol_id = str(record.get("protocol_id") or "").strip()
+ instrument_id = str(record.get("instrument_id") or "").strip()
+ access = str(record.get("access") or "public").strip().lower()
+ license_value = str(record.get("license") or "").strip()
+ filename = str(record.get("filename") or "").strip()
+ channels = record.get("channels") or []
+ pixel_size = record.get("pixel_size_um")
+ derived_from = record.get("derived_from") or []
+ metadata = record.get("metadata") or {}
+ privacy_tags = [str(tag).lower() for tag in record.get("privacy_tags") or []]
+ findings: list[Finding] = []
+ redactions: list[dict[str, str]] = []
+
+ if not image_id or image_id == "UNKNOWN":
+ findings.append(
+ Finding("MISSING_IMAGE_ID", "block", "Image node is missing a stable image_id.", "image_id")
+ )
+
+ checksum = str(record.get("sha256") or "").strip()
+ if not CHECKSUM_RE.match(checksum):
+ findings.append(
+ Finding(
+ "INVALID_CHECKSUM",
+ "block",
+ "Image checksum must be a 64-character SHA-256 hex digest.",
+ "sha256",
+ )
+ )
+
+ if not dataset_id:
+ findings.append(
+ Finding("MISSING_DATASET", "review", "Image has no dataset node for KG membership.", "dataset_id")
+ )
+
+ if not protocol_id:
+ findings.append(
+ Finding("MISSING_PROTOCOL", "review", "Image has no protocol node for provenance.", "protocol_id")
+ )
+
+ if image_kind == "derived_figure":
+ if not derived_from and not source_artifact:
+ findings.append(
+ Finding(
+ "MISSING_DERIVED_SOURCE",
+ "block",
+ "Derived figure cannot enter the KG without source image or artifact lineage.",
+ "derived_from",
+ )
+ )
+ elif not source_artifact:
+ findings.append(
+ Finding(
+ "MISSING_SOURCE_ARTIFACT",
+ "review",
+ "Raw image has no source artifact location for provenance audit.",
+ "source_artifact",
+ )
+ )
+
+ acquired_at = _parse_date(record.get("acquired_at"))
+ publication_date = _parse_date(record.get("publication_date"))
+ if acquired_at is None:
+ findings.append(
+ Finding("MISSING_ACQUISITION_DATE", "review", "Image has no parseable acquisition date.", "acquired_at")
+ )
+ if publication_date is None:
+ findings.append(
+ Finding("MISSING_PUBLICATION_DATE", "review", "Image has no parseable publication date.", "publication_date")
+ )
+ if acquired_at and publication_date and acquired_at > publication_date:
+ findings.append(
+ Finding(
+ "ACQUIRED_AFTER_PUBLICATION",
+ "block",
+ "Image acquisition date is later than the publication date.",
+ "acquired_at",
+ )
+ )
+
+ if modality in MICROSCOPY_MODALITIES:
+ if not isinstance(channels, list) or not channels:
+ findings.append(
+ Finding(
+ "MISSING_CHANNELS",
+ "review",
+ "Microscopy image should include channel metadata before KG recommendation use.",
+ "channels",
+ )
+ )
+ if pixel_size in (None, ""):
+ findings.append(
+ Finding(
+ "MISSING_PIXEL_SIZE",
+ "review",
+ "Microscopy image should include pixel_size_um for scale-aware graph navigation.",
+ "pixel_size_um",
+ )
+ )
+ elif not isinstance(pixel_size, (int, float)) or pixel_size <= 0:
+ findings.append(
+ Finding(
+ "INVALID_PIXEL_SIZE",
+ "review",
+ "pixel_size_um must be a positive number.",
+ "pixel_size_um",
+ )
+ )
+
+ if access in NON_PUBLIC_ACCESS:
+ findings.append(
+ Finding(
+ "NON_PUBLIC_ACCESS",
+ "review",
+ "Image is not public and should be kept out of the public KG release.",
+ "access",
+ )
+ )
+
+ if not _open_license(license_value):
+ findings.append(
+ Finding(
+ "LICENSE_REVIEW_REQUIRED",
+ "review",
+ "Image license is missing or not clearly open for graph publication.",
+ "license",
+ )
+ )
+
+ if any(tag in {"phi", "pii", "private_identifier", "patient_identifier"} for tag in privacy_tags):
+ findings.append(
+ Finding(
+ "PRIVATE_TAG_PRESENT",
+ "block",
+ "Privacy tags indicate private identifiers that must not enter public KG nodes.",
+ "privacy_tags",
+ )
+ )
+ redactions.append({"field": "privacy_tags", "action": "remove from public graph payload"})
+
+ if _contains_private_value(filename):
+ findings.append(
+ Finding(
+ "PRIVATE_VALUE_IN_FILENAME",
+ "block",
+ "Filename appears to contain private identifiers.",
+ "filename",
+ )
+ )
+ redactions.append({"field": "filename", "action": "replace with stable image_id"})
+
+ if isinstance(metadata, dict):
+ for key, value in sorted(metadata.items()):
+ if _field_has_private_name(str(key)) or _contains_private_value(value):
+ findings.append(
+ Finding(
+ "PRIVATE_METADATA_FIELD",
+ "block",
+ "Metadata contains a private field name or value.",
+ f"metadata.{key}",
+ )
+ )
+ redactions.append({"field": f"metadata.{key}", "action": "drop before KG release"})
+
+ decision = _decision_from_findings(findings)
+ release_scope = _release_scope(decision, access)
+ safe_edges = _build_safe_edges(
+ image_id=image_id,
+ dataset_id=dataset_id,
+ protocol_id=protocol_id,
+ instrument_id=instrument_id,
+ source_artifact=source_artifact,
+ license_value=license_value,
+ derived_from=derived_from,
+ release_scope=release_scope,
+ allow_public=decision == "publish",
+ )
+
+ return GuardResult(
+ image_id=image_id,
+ title=title,
+ decision=decision,
+ release_scope=release_scope,
+ findings=findings,
+ redactions=redactions,
+ safe_edges=safe_edges,
+ )
+
+
+def _decision_from_findings(findings: Iterable[Finding]) -> str:
+ severities = {finding.severity for finding in findings}
+ if "block" in severities:
+ return "block"
+ if "review" in severities:
+ return "review"
+ return "publish"
+
+
+def _release_scope(decision: str, access: str) -> str:
+ if decision == "block":
+ return "do_not_publish"
+ if access in NON_PUBLIC_ACCESS:
+ return "internal_review_only"
+ if decision == "review":
+ return "review_queue"
+ return "public_kg"
+
+
+def _build_safe_edges(
+ *,
+ image_id: str,
+ dataset_id: str,
+ protocol_id: str,
+ instrument_id: str,
+ source_artifact: str,
+ license_value: str,
+ derived_from: list[Any],
+ release_scope: str,
+ allow_public: bool,
+) -> list[GraphEdge]:
+ visibility = "public" if allow_public and release_scope == "public_kg" else "internal"
+ image_node = f"image:{image_id}"
+ edges: list[GraphEdge] = []
+
+ if dataset_id:
+ edges.append(GraphEdge(image_node, "member_of_dataset", f"dataset:{dataset_id}", visibility))
+ if protocol_id:
+ edges.append(GraphEdge(image_node, "uses_protocol", f"protocol:{protocol_id}", visibility))
+ if instrument_id:
+ edges.append(GraphEdge(image_node, "captured_by", f"instrument:{instrument_id}", visibility))
+ if source_artifact:
+ edges.append(GraphEdge(image_node, "has_source_artifact", f"artifact:{source_artifact}", visibility))
+ if license_value:
+ edges.append(GraphEdge(image_node, "released_under", f"license:{license_value}", visibility))
+ for source in derived_from:
+ source_text = str(source).strip()
+ if source_text:
+ edges.append(GraphEdge(image_node, "derived_from", f"image:{source_text}", visibility))
+ return edges
+
+
+def evaluate_records(records: Iterable[dict[str, Any]]) -> dict[str, Any]:
+ results = [evaluate_record(record) for record in records]
+ summary = {
+ "total": len(results),
+ "publish": sum(1 for result in results if result.decision == "publish"),
+ "review": sum(1 for result in results if result.decision == "review"),
+ "block": sum(1 for result in results if result.decision == "block"),
+ "safe_edge_count": sum(len(result.safe_edges) for result in results),
+ "redaction_count": sum(len(result.redactions) for result in results),
+ }
+ return {
+ "guard": "scientific-image-metadata-provenance-guard",
+ "summary": summary,
+ "records": [result.to_dict() for result in results],
+ }
+
+
+def load_records(path: Path) -> list[dict[str, Any]]:
+ with path.open("r", encoding="utf-8") as handle:
+ records = json.load(handle)
+ if not isinstance(records, list):
+ raise ValueError("Input JSON must contain a list of image metadata records.")
+ return records
+
+
+def sample_path() -> Path:
+ return Path(__file__).with_name("sample_image_records.json")
+
+
+def write_json(report: dict[str, Any], path: Path) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+
+def write_markdown(report: dict[str, Any], path: Path) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ summary = report["summary"]
+ lines = [
+ "# Scientific Image Metadata Provenance Guard Summary",
+ "",
+ f"- Total records: {summary['total']}",
+ f"- Publish: {summary['publish']}",
+ f"- Review: {summary['review']}",
+ f"- Block: {summary['block']}",
+ f"- Safe graph edges: {summary['safe_edge_count']}",
+ f"- Redactions: {summary['redaction_count']}",
+ "",
+ "| Image | Decision | Release scope | Findings |",
+ "| --- | --- | --- | --- |",
+ ]
+ for record in report["records"]:
+ findings = ", ".join(finding["code"] for finding in record["findings"]) or "none"
+ lines.append(
+ "| {image} | {decision} | {scope} | {findings} |".format(
+ image=record["image_id"],
+ decision=record["decision"],
+ scope=record["release_scope"],
+ findings=findings,
+ )
+ )
+ lines.extend(["", "## Reviewer Actions", ""])
+ for record in report["records"]:
+ if record["decision"] == "publish":
+ continue
+ lines.append(f"### {record['image_id']}")
+ for finding in record["findings"]:
+ lines.append(f"- {finding['severity'].upper()} {finding['code']}: {finding['message']}")
+ for redaction in record["redactions"]:
+ lines.append(f"- Redact `{redaction['field']}`: {redaction['action']}")
+ lines.append("")
+ path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
+
+
+def write_svg(report: dict[str, Any], path: Path) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ summary = report["summary"]
+ bars = [
+ ("publish", "#2f855a", summary["publish"]),
+ ("review", "#b7791f", summary["review"]),
+ ("block", "#c53030", summary["block"]),
+ ]
+ max_count = max([count for _, _, count in bars] + [1])
+ bar_markup = []
+ for index, (label, color, count) in enumerate(bars):
+ y = 80 + index * 58
+ width = int(300 * count / max_count)
+ bar_markup.append(
+ f'{html.escape(label)}'
+ )
+ bar_markup.append(
+ f''
+ )
+ bar_markup.append(
+ f'{count}'
+ )
+ svg = f"""
+"""
+ path.write_text(svg, encoding="utf-8")
+
+
+def build_parser() -> argparse.ArgumentParser:
+ parser = argparse.ArgumentParser(description=__doc__)
+ input_group = parser.add_mutually_exclusive_group(required=True)
+ input_group.add_argument("--input", type=Path, help="Path to image metadata JSON records.")
+ input_group.add_argument("--sample", action="store_true", help="Use the bundled sample image records.")
+ parser.add_argument("--json", type=Path, help="Write JSON report to this path.")
+ parser.add_argument("--markdown", type=Path, help="Write Markdown summary to this path.")
+ parser.add_argument("--svg", type=Path, help="Write SVG summary chart to this path.")
+ return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+ args = build_parser().parse_args(argv)
+ records = load_records(sample_path() if args.sample else args.input)
+ report = evaluate_records(records)
+
+ if args.json:
+ write_json(report, args.json)
+ if args.markdown:
+ write_markdown(report, args.markdown)
+ if args.svg:
+ write_svg(report, args.svg)
+
+ summary = report["summary"]
+ print(
+ "Image metadata KG guard: "
+ f"{summary['publish']} publish, {summary['review']} review, "
+ f"{summary['block']} block, {summary['safe_edge_count']} safe edges."
+ )
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/scientific-image-metadata-provenance-guard/sample_image_records.json b/scientific-image-metadata-provenance-guard/sample_image_records.json
new file mode 100644
index 00000000..216e84b5
--- /dev/null
+++ b/scientific-image-metadata-provenance-guard/sample_image_records.json
@@ -0,0 +1,128 @@
+[
+ {
+ "image_id": "IMG-CELL-001",
+ "title": "Confocal astrocyte culture panel",
+ "image_kind": "raw_image",
+ "modality": "confocal_microscopy",
+ "source_artifact": "s3://scibase-lab/raw/astrocyte/IMG-CELL-001.ome.tiff",
+ "dataset_id": "DS-ASTROCYTE-ATLAS-2026",
+ "protocol_id": "PR-IMMUNO-STAIN-V2",
+ "instrument_id": "INST-CONFOCAL-A1",
+ "sha256": "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ "acquired_at": "2026-02-11",
+ "publication_date": "2026-03-08",
+ "license": "CC-BY-4.0",
+ "access": "public",
+ "filename": "astrocyte_culture_a1_ome.tiff",
+ "privacy_tags": [],
+ "metadata": {
+ "operator": "lab-tech-17",
+ "objective": "63x oil",
+ "ome_model": "2016-06"
+ },
+ "channels": [
+ {
+ "name": "DAPI",
+ "wavelength_nm": 405
+ },
+ {
+ "name": "GFAP",
+ "wavelength_nm": 488
+ }
+ ],
+ "pixel_size_um": 0.108
+ },
+ {
+ "image_id": "IMG-SLIDE-002",
+ "title": "Widefield embryo slide",
+ "image_kind": "raw_image",
+ "modality": "fluorescence_microscopy",
+ "source_artifact": "s3://scibase-lab/raw/embryo/IMG-SLIDE-002.tiff",
+ "dataset_id": "DS-EMBRYO-STAIN-2026",
+ "protocol_id": "PR-EMBRYO-STAIN-V1",
+ "instrument_id": "INST-WIDEFIELD-7",
+ "sha256": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "acquired_at": "2026-01-15",
+ "publication_date": "2026-03-01",
+ "license": "CC-BY-4.0",
+ "access": "public",
+ "filename": "embryo_slide_002.tiff",
+ "privacy_tags": [],
+ "metadata": {
+ "operator": "lab-tech-22"
+ },
+ "channels": [],
+ "pixel_size_um": null
+ },
+ {
+ "image_id": "IMG-PATH-003",
+ "title": "Clinical pathology field with leaked identifiers",
+ "image_kind": "raw_image",
+ "modality": "pathology_slide",
+ "source_artifact": "s3://restricted-lab/raw/pathology/IMG-PATH-003.svs",
+ "dataset_id": "DS-PATHOLOGY-RESTRICTED",
+ "protocol_id": "PR-HISTOLOGY-REVIEW",
+ "instrument_id": "INST-SLIDE-SCANNER-2",
+ "sha256": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "acquired_at": "2026-06-12",
+ "publication_date": "2026-05-01",
+ "license": "restricted",
+ "access": "private",
+ "filename": "Jane_Doe_MRN_1938472_biopsy.svs",
+ "privacy_tags": [
+ "phi"
+ ],
+ "metadata": {
+ "patient_name": "Jane Doe",
+ "mrn": "1938472",
+ "operator": "clinical-tech-3"
+ },
+ "channels": [],
+ "pixel_size_um": 0.24
+ },
+ {
+ "image_id": "FIG-DERIVED-004",
+ "title": "Composite figure without source chain",
+ "image_kind": "derived_figure",
+ "modality": "composite_figure",
+ "source_artifact": "",
+ "dataset_id": "DS-CELL-MIGRATION-2026",
+ "protocol_id": "PR-FIGURE-ASSEMBLY",
+ "instrument_id": "",
+ "sha256": "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
+ "acquired_at": "2026-02-05",
+ "publication_date": "2026-04-01",
+ "license": "CC-BY-4.0",
+ "access": "public",
+ "filename": "figure_2_panel_c.png",
+ "privacy_tags": [],
+ "metadata": {
+ "assembly_tool": "imagej"
+ },
+ "channels": [],
+ "pixel_size_um": null,
+ "derived_from": []
+ },
+ {
+ "image_id": "IMG-EMBARGO-005",
+ "title": "Embargoed cryo-EM map preview",
+ "image_kind": "raw_image",
+ "modality": "electron_microscopy",
+ "source_artifact": "s3://scibase-lab/embargo/cryo/IMG-EMBARGO-005.mrc",
+ "dataset_id": "DS-CRYOEM-EMBARGO-2026",
+ "protocol_id": "PR-CRYOEM-CAPTURE-V4",
+ "instrument_id": "INST-CRYOEM-4",
+ "sha256": "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd",
+ "acquired_at": "2026-03-04",
+ "publication_date": "2026-07-01",
+ "license": "",
+ "access": "embargoed",
+ "filename": "cryoem_preview_005.mrc",
+ "privacy_tags": [],
+ "metadata": {
+ "magnification": "105000x"
+ },
+ "channels": [],
+ "pixel_size_um": 1.08
+ }
+]
diff --git a/scientific-image-metadata-provenance-guard/test_image_metadata_provenance_guard.py b/scientific-image-metadata-provenance-guard/test_image_metadata_provenance_guard.py
new file mode 100644
index 00000000..0568a27c
--- /dev/null
+++ b/scientific-image-metadata-provenance-guard/test_image_metadata_provenance_guard.py
@@ -0,0 +1,114 @@
+import importlib.util
+import json
+import subprocess
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+
+
+MODULE_PATH = Path(__file__).with_name("image_metadata_provenance_guard.py")
+SPEC = importlib.util.spec_from_file_location("image_metadata_provenance_guard", MODULE_PATH)
+guard = importlib.util.module_from_spec(SPEC)
+sys.modules[SPEC.name] = guard
+SPEC.loader.exec_module(guard)
+
+
+class ImageMetadataProvenanceGuardTest(unittest.TestCase):
+ def test_sample_summary_counts(self):
+ report = guard.evaluate_records(guard.load_records(guard.sample_path()))
+
+ self.assertEqual(report["summary"]["total"], 5)
+ self.assertEqual(report["summary"]["publish"], 1)
+ self.assertEqual(report["summary"]["review"], 2)
+ self.assertEqual(report["summary"]["block"], 2)
+ self.assertGreaterEqual(report["summary"]["safe_edge_count"], 12)
+
+ def test_publish_record_emits_public_graph_edges(self):
+ report = guard.evaluate_records(guard.load_records(guard.sample_path()))
+ published = next(record for record in report["records"] if record["image_id"] == "IMG-CELL-001")
+
+ self.assertEqual(published["decision"], "publish")
+ self.assertEqual(published["release_scope"], "public_kg")
+ predicates = {edge["predicate"] for edge in published["safe_edges"]}
+ self.assertIn("member_of_dataset", predicates)
+ self.assertIn("uses_protocol", predicates)
+ self.assertIn("captured_by", predicates)
+ self.assertTrue(all(edge["visibility"] == "public" for edge in published["safe_edges"]))
+
+ def test_phi_and_chronology_block_public_release(self):
+ report = guard.evaluate_records(guard.load_records(guard.sample_path()))
+ blocked = next(record for record in report["records"] if record["image_id"] == "IMG-PATH-003")
+ codes = {finding["code"] for finding in blocked["findings"]}
+
+ self.assertEqual(blocked["decision"], "block")
+ self.assertEqual(blocked["release_scope"], "do_not_publish")
+ self.assertIn("ACQUIRED_AFTER_PUBLICATION", codes)
+ self.assertIn("PRIVATE_TAG_PRESENT", codes)
+ self.assertIn("PRIVATE_VALUE_IN_FILENAME", codes)
+ self.assertIn("PRIVATE_METADATA_FIELD", codes)
+ self.assertGreaterEqual(len(blocked["redactions"]), 3)
+
+ def test_microscopy_without_channel_metadata_requires_review(self):
+ report = guard.evaluate_records(guard.load_records(guard.sample_path()))
+ reviewed = next(record for record in report["records"] if record["image_id"] == "IMG-SLIDE-002")
+ codes = {finding["code"] for finding in reviewed["findings"]}
+
+ self.assertEqual(reviewed["decision"], "review")
+ self.assertIn("MISSING_CHANNELS", codes)
+ self.assertIn("MISSING_PIXEL_SIZE", codes)
+ self.assertTrue(all(edge["visibility"] == "internal" for edge in reviewed["safe_edges"]))
+
+ def test_derived_figure_without_source_is_blocked(self):
+ report = guard.evaluate_records(guard.load_records(guard.sample_path()))
+ blocked = next(record for record in report["records"] if record["image_id"] == "FIG-DERIVED-004")
+ codes = {finding["code"] for finding in blocked["findings"]}
+
+ self.assertEqual(blocked["decision"], "block")
+ self.assertIn("MISSING_DERIVED_SOURCE", codes)
+
+ def test_report_writers_create_reviewer_artifacts(self):
+ report = guard.evaluate_records(guard.load_records(guard.sample_path()))
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ tmp_path = Path(tmpdir)
+ json_path = tmp_path / "report.json"
+ markdown_path = tmp_path / "summary.md"
+ svg_path = tmp_path / "graph.svg"
+
+ guard.write_json(report, json_path)
+ guard.write_markdown(report, markdown_path)
+ guard.write_svg(report, svg_path)
+
+ self.assertEqual(json.loads(json_path.read_text())["summary"]["total"], 5)
+ self.assertIn("IMG-PATH-003", markdown_path.read_text())
+ self.assertIn("