From f7283512daf08cf47c96ab67b31ea93ca7af790b Mon Sep 17 00:00:00 2001 From: caubet_m Date: Thu, 20 Jun 2019 20:18:13 +0200 Subject: [PATCH] Added Known Problems and Troubleshooting (splitted) --- _data/sidebars/merlin6_sidebar.yml | 10 ++- .../known-problems-and-troubleshooting.md | 83 ------------------- pages/merlin6/known-problems.md | 36 ++++++++ pages/merlin6/troubleshooting.md | 43 ++++++++++ 4 files changed, 85 insertions(+), 87 deletions(-) delete mode 100644 pages/merlin6/known-problems-and-troubleshooting.md create mode 100644 pages/merlin6/known-problems.md create mode 100644 pages/merlin6/troubleshooting.md diff --git a/_data/sidebars/merlin6_sidebar.yml b/_data/sidebars/merlin6_sidebar.yml index b3fc49c..0faa1bf 100644 --- a/_data/sidebars/merlin6_sidebar.yml +++ b/_data/sidebars/merlin6_sidebar.yml @@ -15,8 +15,6 @@ entries: url: /merlin6/code-of-conduct.html - title: Hardware And Software Description url: /merlin6/hardware-and-software.html - - title: Migrating From Merlin5 - url: /merlin6/migrating.html - title: Accessing Merlin folderitems: - title: Requesting Accounts @@ -39,7 +37,11 @@ entries: url: /merlin6/running-jobs.html - title: Support folderitems: + - title: Migrating From Merlin5 + url: /merlin6/migrating.html + - title: Known Problems + url: /merlin6/known-problems.html + - title: Troubleshooting + url: /merlin6/troubleshooting.html - title: Contact url: /merlin6/contact.html - - title: Known Problems and Troubleshooting - url: /merlin6/troubleshooting.html diff --git a/pages/merlin6/known-problems-and-troubleshooting.md b/pages/merlin6/known-problems-and-troubleshooting.md deleted file mode 100644 index 80b3ddd..0000000 --- a/pages/merlin6/known-problems-and-troubleshooting.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -title: Known Problems and Troubleshooting -#tags: -#keywords: -last_updated: 13 June 2019 -#summary: "" -sidebar: merlin6_sidebar -permalink: /merlin6/troubleshooting.html ---- - -## Known Problems - -### Paraview, ANSYS and OpenGL - -Try to use X11(mesa) driver for Paraview and ANSYS instead of OpenGL: - -```bash -# ANSYS -module load ANSYS -fluent -driver x11 - -# ParaView -module load paraview -paraview --mesa -``` - -###+ Illegal instructions - -It may happened that your code, compiled on one machine will not be executed on another throwing exception like "(Illegal instruction)". -Check (with "hostname" command) on which of the node you are and compare it with the names from first item. We observe few applications -that can't be run on merlin-c-01..16 because of this problem (notice that these machines are more then 5 years old). Hint: you may -choose the particular flavour of the machines for your slurm job, check the "--cores-per-node" option for sbatch: - -```bash -sbatch --cores-per-socket=8 Script.sh # will filter the selection of the machine and exclude the oldest one, merlin-c-01..16 -``` - -## Troubleshooting - -### Before asking for help - -Please, if you have problems running jobs and you want to report something or just ask for help, -please gather and attach in advance the following information: - -* Unix username and session (``who am i`` command output) -* Environment settings (``env`` command output) -* Slurm batch script location (path to script and input/output files) -* Slurm job_id (``id`` is returned on ``sbatch``/``salloc`` command, but also can be taken from ``squeue`` commmand) - -### Troubleshooting SSH - -Use the ssh command with the "-vvv" option and copy and paste (no screenshot please) -the output to your request in Service-Now. Example - -```bash -ssh -Y -vvv bond_j@merlin-l-01 -``` - -### Troubleshooting SLURM - -If one copies Slurm commands or batch scripts from another cluster, -they may need some changes (often minor) to run successfully on Merlin5. -Examine carefully the error message, especially concerning the options -used in the slurm commands. - -Try to submit jobs using the examples given in the section "Using Batch System to Submit Jobs to Merlin5". -If you can run successfully an example for a type of job (!OpenMP, MPI) similar to your one, -try to edit the example to run your application. - -If the problem remains, then, in your request in Service-Now, describe the problem in details that -are needed to reproduce it. Include the output of the following commands: - -```bash -date -hostname -pwd -module list - -# All slurm commands used with the corresponding output -``` - -Do not delete any output and error files generated by Slurm. -Make a copy of the failed job script if you like to edit it meanwhile. diff --git a/pages/merlin6/known-problems.md b/pages/merlin6/known-problems.md new file mode 100644 index 0000000..11ab045 --- /dev/null +++ b/pages/merlin6/known-problems.md @@ -0,0 +1,36 @@ +--- +title: Known Problems +#tags: +#keywords: +last_updated: 20 June 2019 +#summary: "" +sidebar: merlin6_sidebar +permalink: /merlin6/known-problems.html +--- + +## Known Problems + +### Paraview, ANSYS and OpenGL + +Try to use X11(mesa) driver for Paraview and ANSYS instead of OpenGL: + +```bash +# ANSYS +module load ANSYS +fluent -driver x11 + +# ParaView +module load paraview +paraview --mesa +``` + +###+ Illegal instructions + +It may happened that your code, compiled on one machine will not be executed on another throwing exception like "(Illegal instruction)". +Check (with "hostname" command) on which of the node you are and compare it with the names from first item. We observe few applications +that can't be run on merlin-c-01..16 because of this problem (notice that these machines are more then 5 years old). Hint: you may +choose the particular flavour of the machines for your slurm job, check the "--cores-per-node" option for sbatch: + +```bash +sbatch --cores-per-socket=8 Script.sh # will filter the selection of the machine and exclude the oldest one, merlin-c-01..16 +``` diff --git a/pages/merlin6/troubleshooting.md b/pages/merlin6/troubleshooting.md new file mode 100644 index 0000000..6e4e19c --- /dev/null +++ b/pages/merlin6/troubleshooting.md @@ -0,0 +1,43 @@ +--- +title: Troubleshooting +#tags: +#keywords: +last_updated: 20 June 2019 +#summary: "" +sidebar: merlin6_sidebar +permalink: /merlin6/troubleshooting.html +--- + +For troubleshooting, please contact us through the official channels. See [Contact](/merlin6/contact.html) +for more information. + +## Troubleshooting running Slurm jobs + +If you want to report a problem or request for help when running jobs, please **always provide** +the following information: + +1. Provide your batch script or, alternatively, the path to your batch script. +2. Add **always** the following commands to your batch script + + ```bash + echo "User information:"; who am i + echo "Running hostname:"; hostname + echo "Current location:"; pwd + echo "User environment:"; env + echo "List of PModules:"; module list + ``` + +3. Whenever possible, provide the Slurm JobID. + +Providing this information is **extremely important** in order to ease debugging, otherwise +only with the description of the issue or just the error message is completely insufficient +in most cases. + +### Troubleshooting SSH + +Use the ssh command with the "-vvv" option and copy and paste (no screenshots please) +the output to your request in Service-Now. Example + +```bash +ssh -Y -vvv $username@merlin-l-01.psi.ch +```