75 lines
2.0 KiB
Python
75 lines
2.0 KiB
Python
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from urllib.parse import unquote
|
|
|
|
LINK_RE = re.compile(r"\[[^\]]*\]\(([^)]+)\)") # [text](target)
|
|
|
|
|
|
def is_external(target: str) -> bool:
|
|
return bool(re.match(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", target))
|
|
|
|
|
|
def normalize_target(target: str) -> str:
|
|
target = target.strip()
|
|
# strip surrounding <>
|
|
if target.startswith("<") and target.endswith(">"):
|
|
target = target[1:-1].strip()
|
|
# drop anchor
|
|
target = target.split("#", 1)[0]
|
|
# drop query
|
|
target = target.split("?", 1)[0]
|
|
# URL decode
|
|
target = unquote(target)
|
|
return target
|
|
|
|
|
|
def main() -> int:
|
|
root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(".")
|
|
root = root.resolve()
|
|
|
|
missing = []
|
|
for md in root.rglob("*.md"):
|
|
text = md.read_text(encoding="utf-8", errors="ignore")
|
|
for m in LINK_RE.finditer(text):
|
|
raw = m.group(1)
|
|
if is_external(raw):
|
|
continue
|
|
target = normalize_target(raw)
|
|
if not target:
|
|
continue
|
|
# ignore mailto:, etc.
|
|
if (
|
|
":" in target
|
|
and not target.startswith("./")
|
|
and not target.startswith("../")
|
|
):
|
|
continue
|
|
|
|
resolved = (md.parent / target).resolve()
|
|
try:
|
|
resolved.relative_to(root)
|
|
except ValueError:
|
|
# points outside repo; treat as external-ish
|
|
continue
|
|
|
|
if not resolved.exists():
|
|
missing.append((md, raw, str(resolved.relative_to(root))))
|
|
|
|
if not missing:
|
|
print("OK: no missing internal link targets found.")
|
|
return 0
|
|
|
|
print("Missing internal link targets:\n")
|
|
for src, raw, resolved_rel in missing:
|
|
print(f"- {src.relative_to(root)}: ({raw}) -> {resolved_rel}")
|
|
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|