Add python script to replace symlinks in CryoSPARC projects

This commit is contained in:
2025-07-30 17:25:25 +02:00
parent 3f4b1e18a9
commit bd15c88b7f
2 changed files with 194 additions and 0 deletions

View File

@@ -4,3 +4,40 @@ This repository contains the necessary scripts to install CryoSPARC on the Merli
For the complete installation guide, please refer to the following page:
https://intranet.psi.ch/en/cls/cryosparc-merlin-7-installation
# Fixing broken symlinks
When raw data is imported into a CryoSPARC project, rather than copy the data into the project directory, symlinks are created inside the import job directories pointing to the original data files.
If you're moving data that you used an "Import Particles", "Import Micrographs" or "Import Movies" job to bring into CryoSPARC, you will need to repair these jobs. When CryoSPARC imports these three types of data, it creates symlinks to each file inside the job's imported directory. These symlinks may become broken if the original path to the file no longer exists. You can check the status of the symlinks by running `ls -l` inside the imported directory of the job. Note: The "Import Templates" and "Import Volumes" jobs copy the specified files directly into the job directory.
The script `replace_cryosparc_symlinks.py` automates the repair of broken symlinks in CryoSPARC jobs by replacing
outdated path prefixes with new ones.
It works as follows:
1. Lists available CryoSPARC projects and lets the user select one or all projects.
2. Lists jobs within the selected project(s) and allows the user to select specific jobs or all jobs.
3. Analyzes current symlink targets in the selected jobs and displays their path prefixes.
4. Prompts the user for the old prefix (to be removed) and the new prefix (to be inserted).
5. Confirms changes with the user before applying any modifications.
6. Uses `cli.job_import_replace_symlinks` to update symlinks in the selected jobs.
7. Reports modified links and any jobs that failed to repair.
This tool is interactive and requires confirmation before making any changes.
Before running the script, you first need to start the CryoSPARC interactive Python session:
```bash
/data/user/$USER/cryosparc/cryosparc_master/bin/cryosparcm icli
```
Once inside the interactive Python shell (indicated by `>>>`), you can execute the script by running
```bash
exec(open("/path/to/script/replace_cryosparc_symlinks.py").read())
```
> ⚠️ Important:
Replace `/path/to/script/replace_cryosparc_symlinks.py` with the actual path to the script file.
This will run the script in the context of the CryoSPARC CLI environment, so the cli object will be available.

View File

@@ -0,0 +1,157 @@
"""
This script automates the repair of broken symlinks in CryoSPARC jobs by replacing
outdated path prefixes with new ones.
It works as follows:
1. Lists available CryoSPARC projects and lets the user select one or all projects.
2. Lists jobs within the selected project(s) and allows the user to select specific jobs or all jobs.
3. Analyzes current symlink targets in the selected jobs and displays their path prefixes.
4. Prompts the user for the old prefix (to be removed) and the new prefix (to be inserted).
5. Confirms changes with the user before applying any modifications.
6. Uses `cli.job_import_replace_symlinks` to update symlinks in the selected jobs.
7. Reports modified links and any jobs that failed to repair.
This tool is interactive and requires confirmation before making any changes.
"""
def main():
import sys
from collections import defaultdict
# ANSI formatting
BOLD = '\033[1m'
RESET = '\033[0m'
print("🧩 This script will loop over CryoSPARC jobs and attempt to repair symlinks.")
print("🔁 It replaces path prefixes in imported file links using `cli.job_import_replace_symlinks`.")
print("🛑 You will be asked to confirm before any changes are made.\n")
# Step 1: Project selection
projects = cli.list_projects()
project_uids = [proj['uid'] for proj in projects]
print("Available projects:")
for proj in projects:
print(f" - {proj['uid']}")
project_choice = input("\nEnter a project UID to process, or 'all' to process all projects: ").strip()
if project_choice != 'all' and project_choice not in project_uids:
print("❌ Invalid project UID. Exiting.")
return
selected_projects = [project_choice] if project_choice != 'all' else project_uids
selected_jobs = []
# Step 2: Job selection
if len(selected_projects) == 1:
project_uid = selected_projects[0]
try:
jobs = cli.list_jobs(project_uid=project_uid)
except Exception as e:
print(f"❌ Could not get jobs for project {project_uid}: {e}")
return
job_uids = [job['uid'] for job in jobs]
print(f"\nJobs for project {project_uid}:")
for job in jobs:
print(f" - {job['uid']}")
job_choice = input("\nEnter a job UID to process, or 'all' to process all jobs: ").strip()
if job_choice != 'all' and job_choice not in job_uids:
print("❌ Invalid job UID. Exiting.")
return
if job_choice == 'all':
selected_jobs = [(project_uid, job_uid) for job_uid in job_uids]
else:
selected_jobs = [(project_uid, job_choice)]
else:
for proj_uid in selected_projects:
try:
jobs = cli.list_jobs(project_uid=proj_uid)
for job in jobs:
selected_jobs.append((proj_uid, job['uid']))
except Exception as e:
print(f"⚠️ Could not get jobs for project {proj_uid}: {e}")
# Step 3: Symlink summary
print("\n🔍 Analyzing current symlink targets...")
symlink_roots = defaultdict(set)
for project_uid, job_uid in selected_jobs:
try:
symlinks = cli.get_job_symlinks(project_uid, job_uid)
except Exception as e:
print(f"⚠️ Could not get symlinks for {project_uid} {job_uid}: {e}")
continue
for item in symlinks:
target = item.get("link_target")
if not target:
continue
root = target.rsplit('/', 1)[0]
symlink_roots[root].add((project_uid, job_uid))
if symlink_roots:
print("\n🔗 Current symlink target prefixes and associated jobs:")
for root, jobs in symlink_roots.items():
print(f"\n{BOLD}{root}{RESET}")
for pj, jb in jobs:
print(f" - {pj} {jb}")
else:
print("⚠️ No symlinks found or accessible in the selected jobs.")
# Step 4: Prefixes
print()
prefix_cut = input("Enter old prefix to remove (prefix_cut): ").strip()
prefix_new = input("Enter new prefix to insert (prefix_new): ").strip()
if not prefix_cut or not prefix_new:
print("❌ Both prefix_cut and prefix_new must be provided. Exiting.")
return
cut_slash = prefix_cut.endswith('/')
new_slash = prefix_new.endswith('/')
if cut_slash != new_slash:
print("⚠️ Warning: The trailing slash on prefix_cut and prefix_new differs.")
print(f" prefix_cut ends with '/'? {cut_slash}")
print(f" prefix_new ends with '/'? {new_slash}")
print(" This might cause unexpected mismatches during replacement.\n")
print(f"\n📂 prefix_cut: {prefix_cut}")
print(f"📂 prefix_new: {prefix_new}")
print(f"\nYou are about to process {len(selected_jobs)} job(s).")
confirm = input("Are you sure you want to apply these changes? [y/N]: ").strip().lower()
if confirm != 'y':
print("❌ Aborted by user.")
return
failed_jobs = []
for project_uid, job_uid in selected_jobs:
try:
print(f"🔧 Repairing {project_uid} {job_uid}")
modified_count = cli.job_import_replace_symlinks(
project_uid,
job_uid,
prefix_cut,
prefix_new
)
if modified_count > 0:
print(f"{BOLD}✅ Finished. Modified {modified_count} links.{RESET}\n")
else:
print(f"✅ Finished. Modified {modified_count} links.\n")
except Exception as e:
failed_jobs.append((project_uid, job_uid))
print(f"❌ Failed to repair {project_uid} {job_uid}: {e}\n")
print(f"🎯 Completed. {len(failed_jobs)} jobs failed to repair.")
if failed_jobs:
print("🔻 Failed jobs:")
for pj, j in failed_jobs:
print(f" - {pj} {j}")
# Run
main()