From bd15c88b7f17b7640ef2e4f678fb39a7cdfdc649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Pedro=20Agostinho=20de=20Sousa?= Date: Wed, 30 Jul 2025 17:25:25 +0200 Subject: [PATCH] Add python script to replace symlinks in CryoSPARC projects --- README.md | 37 ++++++ scripts/replace_cryosparc_symlinks.py | 157 ++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100755 scripts/replace_cryosparc_symlinks.py diff --git a/README.md b/README.md index bfed65e..29ed217 100644 --- a/README.md +++ b/README.md @@ -4,3 +4,40 @@ This repository contains the necessary scripts to install CryoSPARC on the Merli For the complete installation guide, please refer to the following page: https://intranet.psi.ch/en/cls/cryosparc-merlin-7-installation + +# Fixing broken symlinks + +When raw data is imported into a CryoSPARC project, rather than copy the data into the project directory, symlinks are created inside the import job directories pointing to the original data files. + +If you're moving data that you used an "Import Particles", "Import Micrographs" or "Import Movies" job to bring into CryoSPARC, you will need to repair these jobs. When CryoSPARC imports these three types of data, it creates symlinks to each file inside the job's imported directory. These symlinks may become broken if the original path to the file no longer exists. You can check the status of the symlinks by running `ls -l` inside the imported directory of the job. Note: The "Import Templates" and "Import Volumes" jobs copy the specified files directly into the job directory. + +The script `replace_cryosparc_symlinks.py` automates the repair of broken symlinks in CryoSPARC jobs by replacing +outdated path prefixes with new ones. + +It works as follows: +1. Lists available CryoSPARC projects and lets the user select one or all projects. +2. Lists jobs within the selected project(s) and allows the user to select specific jobs or all jobs. +3. Analyzes current symlink targets in the selected jobs and displays their path prefixes. +4. Prompts the user for the old prefix (to be removed) and the new prefix (to be inserted). +5. Confirms changes with the user before applying any modifications. +6. Uses `cli.job_import_replace_symlinks` to update symlinks in the selected jobs. +7. Reports modified links and any jobs that failed to repair. + +This tool is interactive and requires confirmation before making any changes. + +Before running the script, you first need to start the CryoSPARC interactive Python session: + +```bash +/data/user/$USER/cryosparc/cryosparc_master/bin/cryosparcm icli +``` + +Once inside the interactive Python shell (indicated by `>>>`), you can execute the script by running + +```bash +exec(open("/path/to/script/replace_cryosparc_symlinks.py").read()) +``` + +> āš ļø Important: +Replace `/path/to/script/replace_cryosparc_symlinks.py` with the actual path to the script file. +This will run the script in the context of the CryoSPARC CLI environment, so the cli object will be available. + diff --git a/scripts/replace_cryosparc_symlinks.py b/scripts/replace_cryosparc_symlinks.py new file mode 100755 index 0000000..6249dd0 --- /dev/null +++ b/scripts/replace_cryosparc_symlinks.py @@ -0,0 +1,157 @@ +""" +This script automates the repair of broken symlinks in CryoSPARC jobs by replacing +outdated path prefixes with new ones. + +It works as follows: +1. Lists available CryoSPARC projects and lets the user select one or all projects. +2. Lists jobs within the selected project(s) and allows the user to select specific jobs or all jobs. +3. Analyzes current symlink targets in the selected jobs and displays their path prefixes. +4. Prompts the user for the old prefix (to be removed) and the new prefix (to be inserted). +5. Confirms changes with the user before applying any modifications. +6. Uses `cli.job_import_replace_symlinks` to update symlinks in the selected jobs. +7. Reports modified links and any jobs that failed to repair. + +This tool is interactive and requires confirmation before making any changes. +""" + +def main(): + import sys + from collections import defaultdict + + # ANSI formatting + BOLD = '\033[1m' + RESET = '\033[0m' + + print("🧩 This script will loop over CryoSPARC jobs and attempt to repair symlinks.") + print("šŸ” It replaces path prefixes in imported file links using `cli.job_import_replace_symlinks`.") + print("šŸ›‘ You will be asked to confirm before any changes are made.\n") + + # Step 1: Project selection + projects = cli.list_projects() + project_uids = [proj['uid'] for proj in projects] + + print("Available projects:") + for proj in projects: + print(f" - {proj['uid']}") + + project_choice = input("\nEnter a project UID to process, or 'all' to process all projects: ").strip() + + if project_choice != 'all' and project_choice not in project_uids: + print("āŒ Invalid project UID. Exiting.") + return + + selected_projects = [project_choice] if project_choice != 'all' else project_uids + selected_jobs = [] + + # Step 2: Job selection + if len(selected_projects) == 1: + project_uid = selected_projects[0] + try: + jobs = cli.list_jobs(project_uid=project_uid) + except Exception as e: + print(f"āŒ Could not get jobs for project {project_uid}: {e}") + return + + job_uids = [job['uid'] for job in jobs] + print(f"\nJobs for project {project_uid}:") + for job in jobs: + print(f" - {job['uid']}") + + job_choice = input("\nEnter a job UID to process, or 'all' to process all jobs: ").strip() + + if job_choice != 'all' and job_choice not in job_uids: + print("āŒ Invalid job UID. Exiting.") + return + + if job_choice == 'all': + selected_jobs = [(project_uid, job_uid) for job_uid in job_uids] + else: + selected_jobs = [(project_uid, job_choice)] + else: + for proj_uid in selected_projects: + try: + jobs = cli.list_jobs(project_uid=proj_uid) + for job in jobs: + selected_jobs.append((proj_uid, job['uid'])) + except Exception as e: + print(f"āš ļø Could not get jobs for project {proj_uid}: {e}") + + # Step 3: Symlink summary + print("\nšŸ” Analyzing current symlink targets...") + symlink_roots = defaultdict(set) + + for project_uid, job_uid in selected_jobs: + try: + symlinks = cli.get_job_symlinks(project_uid, job_uid) + except Exception as e: + print(f"āš ļø Could not get symlinks for {project_uid} {job_uid}: {e}") + continue + + for item in symlinks: + target = item.get("link_target") + if not target: + continue + root = target.rsplit('/', 1)[0] + symlink_roots[root].add((project_uid, job_uid)) + + if symlink_roots: + print("\nšŸ”— Current symlink target prefixes and associated jobs:") + for root, jobs in symlink_roots.items(): + print(f"\n{BOLD}{root}{RESET}") + for pj, jb in jobs: + print(f" - {pj} {jb}") + else: + print("āš ļø No symlinks found or accessible in the selected jobs.") + + # Step 4: Prefixes + print() + prefix_cut = input("Enter old prefix to remove (prefix_cut): ").strip() + prefix_new = input("Enter new prefix to insert (prefix_new): ").strip() + + if not prefix_cut or not prefix_new: + print("āŒ Both prefix_cut and prefix_new must be provided. Exiting.") + return + + cut_slash = prefix_cut.endswith('/') + new_slash = prefix_new.endswith('/') + if cut_slash != new_slash: + print("āš ļø Warning: The trailing slash on prefix_cut and prefix_new differs.") + print(f" prefix_cut ends with '/'? {cut_slash}") + print(f" prefix_new ends with '/'? {new_slash}") + print(" This might cause unexpected mismatches during replacement.\n") + + print(f"\nšŸ“‚ prefix_cut: {prefix_cut}") + print(f"šŸ“‚ prefix_new: {prefix_new}") + print(f"\nYou are about to process {len(selected_jobs)} job(s).") + confirm = input("Are you sure you want to apply these changes? [y/N]: ").strip().lower() + if confirm != 'y': + print("āŒ Aborted by user.") + return + + failed_jobs = [] + for project_uid, job_uid in selected_jobs: + try: + print(f"šŸ”§ Repairing {project_uid} {job_uid}") + modified_count = cli.job_import_replace_symlinks( + project_uid, + job_uid, + prefix_cut, + prefix_new + ) + if modified_count > 0: + print(f"{BOLD}āœ… Finished. Modified {modified_count} links.{RESET}\n") + else: + print(f"āœ… Finished. Modified {modified_count} links.\n") + except Exception as e: + failed_jobs.append((project_uid, job_uid)) + print(f"āŒ Failed to repair {project_uid} {job_uid}: {e}\n") + + print(f"šŸŽÆ Completed. {len(failed_jobs)} jobs failed to repair.") + if failed_jobs: + print("šŸ”» Failed jobs:") + for pj, j in failed_jobs: + print(f" - {pj} {j}") + +# Run +main() +