Compare commits
65 Commits
jekyll-doc
...
rmount
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2fd43f8912 | ||
| 557204e1c9 | |||
|
|
3296e3e9e7 | ||
| e806dc2d3c | |||
| 85c19e51d5 | |||
| 5f058b6488 | |||
| 7527861f0f | |||
| aa491274de | |||
| 7848bcf191 | |||
| 1a750b3250 | |||
| b5957bed56 | |||
| 5b9c4d2fe9 | |||
| 4b0291ec09 | |||
| 8778464836 | |||
| e618a4f5e7 | |||
| 6a3f8bcbd0 | |||
| 86fe7781ca | |||
| e4f562e8d6 | |||
| a667f6414d | |||
| b69862d44f | |||
| 92c0e114a0 | |||
|
|
d46e5bfb63 | ||
| 077e869ed9 | |||
| 9b7da5c407 | |||
|
|
79eee6e8f9 | ||
|
|
827e0de138 | ||
| 8e01f45056 | |||
| 94d850c513 | |||
| dddb88a94a | |||
| 5719e247ef | |||
| 0d606942ec | |||
| 5e055979f9 | |||
| 1ababa5169 | |||
| 2ece10f050 | |||
| 2d267ae8d2 | |||
| 7e8b56e0f7 | |||
| c64149733f | |||
| a80fcbe7f0 | |||
| 6050d62c96 | |||
| 8ffd1b6019 | |||
|
|
205f174ba7 | ||
|
|
9356e3554c | ||
| 300b1950a4 | |||
| 9d7a1e6af5 | |||
| a7a5173ec0 | |||
| 057d792517 | |||
| 168dc84226 | |||
| c48ebe41a1 | |||
| b8ff389a9d | |||
| 9837429824 | |||
| 4bcde04fea | |||
| 4cade03842 | |||
| a4a32ffdf9 | |||
| b9c7881617 | |||
| 731d2fea73 | |||
| 1f510c2461 | |||
| a2ddf59412 | |||
| 252d08affb | |||
| 3fa5ae5f36 | |||
| 821ef38eb4 | |||
| 5952e5cbe5 | |||
| 57af7019c5 | |||
| b6ecdcaeb3 | |||
| 464c118384 | |||
| 51435c5cf5 |
@@ -2,7 +2,7 @@ stages:
|
||||
- test
|
||||
- deploy
|
||||
|
||||
image: alpine:latest
|
||||
image: alpine:3.13
|
||||
|
||||
variables:
|
||||
JEKYLL_ENV: production
|
||||
|
||||
2
404.md
@@ -1,6 +1,6 @@
|
||||
---
|
||||
title: "Page Not Found"
|
||||
search: exclude
|
||||
---
|
||||
---
|
||||
|
||||
Sorry, but the page you were trying to view does not exist. Try searching for it or looking at the URL to see if it looks correct.
|
||||
|
||||
15
Dockerfile
@@ -1,11 +1,20 @@
|
||||
FROM jekyll/builder
|
||||
FROM alpine:3.13
|
||||
|
||||
WORKDIR /tmp
|
||||
ADD Gemfile /tmp/
|
||||
ADD Gemfile.lock /tmp/
|
||||
RUN bundle install --frozen
|
||||
|
||||
FROM jekyll/jekyll
|
||||
RUN apk list -I && \
|
||||
apk --no-cache add libatomic readline readline-dev libxml2 libxml2-dev \
|
||||
ncurses-terminfo-base ncurses-terminfo \
|
||||
libxslt libxslt-dev zlib-dev zlib \
|
||||
ruby ruby-dev yaml yaml-dev \
|
||||
libffi-dev build-base git nodejs
|
||||
RUN gem env
|
||||
RUN gem install etc bundler --no-document
|
||||
RUN pwd
|
||||
RUN ls -l
|
||||
RUN bundle install
|
||||
|
||||
VOLUME /src
|
||||
EXPOSE 4001
|
||||
|
||||
22
Gemfile
@@ -1,13 +1,25 @@
|
||||
source "https://rubygems.org"
|
||||
|
||||
# to publish on github page
|
||||
gem 'github-pages', group: :jekyll_plugins
|
||||
# gem 'github-pages', group: :jekyll_plugins
|
||||
gem 'github-pages', "~> 215"
|
||||
|
||||
# to publish without github page
|
||||
#gem "jekyll"
|
||||
|
||||
# gem "jekyll-redirect-from", group: :jekyll_plugins
|
||||
gem "jekyll-redirect-from", "~> 0.16.0"
|
||||
gem "json", "~> 2.2"
|
||||
|
||||
gem "webrick", "~> 1.7.0"
|
||||
gem "etc", "~> 1.2.0"
|
||||
gem "bigdecimal", "~> 1.4"
|
||||
gem 'eventmachine', "~> 1.2.7"
|
||||
# gem 'faraday', "~> 1.4.3"
|
||||
# gem 'addressable', "~> 2.7.0"
|
||||
# gem 'faraday-net_http_persistent', "~> 1.1.0"
|
||||
# gem 'nokogiri', '~> 1.11', '>= 1.11.7'
|
||||
# gem 'ruby2_keywords', "~> 0.0.4"
|
||||
# gem 'rubyzip', "~> 2.3.0"
|
||||
|
||||
gem 'ffi', "~> 1.15.3"
|
||||
gem 'http_parser.rb', "~> 0.6.0" # requires mkdir in /usr/bin/mkdir
|
||||
gem "jekyll", "~> 3.9.0"
|
||||
|
||||
gem "jekyll-redirect-from", group: :jekyll_plugins
|
||||
152
Gemfile.lock
@@ -1,13 +1,13 @@
|
||||
GEM
|
||||
remote: https://rubygems.org/
|
||||
specs:
|
||||
activesupport (6.0.3.2)
|
||||
activesupport (6.0.4)
|
||||
concurrent-ruby (~> 1.0, >= 1.0.2)
|
||||
i18n (>= 0.7, < 2)
|
||||
minitest (~> 5.1)
|
||||
tzinfo (~> 1.1)
|
||||
zeitwerk (~> 2.2, >= 2.2.2)
|
||||
addressable (2.7.0)
|
||||
addressable (2.8.0)
|
||||
public_suffix (>= 2.0.2, < 5.0)
|
||||
bigdecimal (1.4.4)
|
||||
coffee-script (2.4.1)
|
||||
@@ -17,46 +17,62 @@ GEM
|
||||
colorator (1.1.0)
|
||||
commonmarker (0.17.13)
|
||||
ruby-enum (~> 0.5)
|
||||
concurrent-ruby (1.1.6)
|
||||
dnsruby (1.61.3)
|
||||
addressable (~> 2.5)
|
||||
em-websocket (0.5.1)
|
||||
concurrent-ruby (1.1.9)
|
||||
dnsruby (1.61.7)
|
||||
simpleidn (~> 0.1)
|
||||
em-websocket (0.5.2)
|
||||
eventmachine (>= 0.12.9)
|
||||
http_parser.rb (~> 0.6.0)
|
||||
ethon (0.12.0)
|
||||
ffi (>= 1.3.0)
|
||||
etc (1.2.0)
|
||||
ethon (0.14.0)
|
||||
ffi (>= 1.15.0)
|
||||
eventmachine (1.2.7)
|
||||
execjs (2.7.0)
|
||||
faraday (1.0.1)
|
||||
execjs (2.8.1)
|
||||
faraday (1.5.1)
|
||||
faraday-em_http (~> 1.0)
|
||||
faraday-em_synchrony (~> 1.0)
|
||||
faraday-excon (~> 1.1)
|
||||
faraday-httpclient (~> 1.0.1)
|
||||
faraday-net_http (~> 1.0)
|
||||
faraday-net_http_persistent (~> 1.1)
|
||||
faraday-patron (~> 1.0)
|
||||
multipart-post (>= 1.2, < 3)
|
||||
ffi (1.13.1)
|
||||
ruby2_keywords (>= 0.0.4)
|
||||
faraday-em_http (1.0.0)
|
||||
faraday-em_synchrony (1.0.0)
|
||||
faraday-excon (1.1.0)
|
||||
faraday-httpclient (1.0.1)
|
||||
faraday-net_http (1.0.1)
|
||||
faraday-net_http_persistent (1.2.0)
|
||||
faraday-patron (1.0.0)
|
||||
ffi (1.15.3)
|
||||
forwardable-extended (2.6.0)
|
||||
gemoji (3.0.1)
|
||||
github-pages (206)
|
||||
github-pages-health-check (= 1.16.1)
|
||||
jekyll (= 3.8.7)
|
||||
github-pages (215)
|
||||
github-pages-health-check (= 1.17.2)
|
||||
jekyll (= 3.9.0)
|
||||
jekyll-avatar (= 0.7.0)
|
||||
jekyll-coffeescript (= 1.1.1)
|
||||
jekyll-commonmark-ghpages (= 0.1.6)
|
||||
jekyll-default-layout (= 0.1.4)
|
||||
jekyll-feed (= 0.13.0)
|
||||
jekyll-feed (= 0.15.1)
|
||||
jekyll-gist (= 1.5.0)
|
||||
jekyll-github-metadata (= 2.13.0)
|
||||
jekyll-mentions (= 1.5.1)
|
||||
jekyll-mentions (= 1.6.0)
|
||||
jekyll-optional-front-matter (= 0.3.2)
|
||||
jekyll-paginate (= 1.1.0)
|
||||
jekyll-readme-index (= 0.3.0)
|
||||
jekyll-redirect-from (= 0.15.0)
|
||||
jekyll-redirect-from (= 0.16.0)
|
||||
jekyll-relative-links (= 0.6.1)
|
||||
jekyll-remote-theme (= 0.4.1)
|
||||
jekyll-remote-theme (= 0.4.3)
|
||||
jekyll-sass-converter (= 1.5.2)
|
||||
jekyll-seo-tag (= 2.6.1)
|
||||
jekyll-seo-tag (= 2.7.1)
|
||||
jekyll-sitemap (= 1.4.0)
|
||||
jekyll-swiss (= 1.0.0)
|
||||
jekyll-theme-architect (= 0.1.1)
|
||||
jekyll-theme-cayman (= 0.1.1)
|
||||
jekyll-theme-dinky (= 0.1.1)
|
||||
jekyll-theme-hacker (= 0.1.1)
|
||||
jekyll-theme-hacker (= 0.1.2)
|
||||
jekyll-theme-leap-day (= 0.1.1)
|
||||
jekyll-theme-merlot (= 0.1.1)
|
||||
jekyll-theme-midnight (= 0.1.1)
|
||||
@@ -67,34 +83,35 @@ GEM
|
||||
jekyll-theme-tactile (= 0.1.1)
|
||||
jekyll-theme-time-machine (= 0.1.1)
|
||||
jekyll-titles-from-headings (= 0.5.3)
|
||||
jemoji (= 0.11.1)
|
||||
kramdown (= 1.17.0)
|
||||
jemoji (= 0.12.0)
|
||||
kramdown (= 2.3.1)
|
||||
kramdown-parser-gfm (= 1.1.0)
|
||||
liquid (= 4.0.3)
|
||||
mercenary (~> 0.3)
|
||||
minima (= 2.5.1)
|
||||
nokogiri (>= 1.10.4, < 2.0)
|
||||
rouge (= 3.19.0)
|
||||
rouge (= 3.26.0)
|
||||
terminal-table (~> 1.4)
|
||||
github-pages-health-check (1.16.1)
|
||||
github-pages-health-check (1.17.2)
|
||||
addressable (~> 2.3)
|
||||
dnsruby (~> 1.60)
|
||||
octokit (~> 4.0)
|
||||
public_suffix (~> 3.0)
|
||||
public_suffix (>= 2.0.2, < 5.0)
|
||||
typhoeus (~> 1.3)
|
||||
html-pipeline (2.13.0)
|
||||
html-pipeline (2.14.0)
|
||||
activesupport (>= 2)
|
||||
nokogiri (>= 1.4)
|
||||
http_parser.rb (0.6.0)
|
||||
i18n (0.9.5)
|
||||
concurrent-ruby (~> 1.0)
|
||||
jekyll (3.8.7)
|
||||
jekyll (3.9.0)
|
||||
addressable (~> 2.4)
|
||||
colorator (~> 1.0)
|
||||
em-websocket (~> 0.5)
|
||||
i18n (~> 0.7)
|
||||
jekyll-sass-converter (~> 1.0)
|
||||
jekyll-watch (~> 2.0)
|
||||
kramdown (~> 1.14)
|
||||
kramdown (>= 1.17, < 3)
|
||||
liquid (~> 4.0)
|
||||
mercenary (~> 0.3.3)
|
||||
pathutil (~> 0.9)
|
||||
@@ -114,14 +131,14 @@ GEM
|
||||
rouge (>= 2.0, < 4.0)
|
||||
jekyll-default-layout (0.1.4)
|
||||
jekyll (~> 3.0)
|
||||
jekyll-feed (0.13.0)
|
||||
jekyll-feed (0.15.1)
|
||||
jekyll (>= 3.7, < 5.0)
|
||||
jekyll-gist (1.5.0)
|
||||
octokit (~> 4.2)
|
||||
jekyll-github-metadata (2.13.0)
|
||||
jekyll (>= 3.4, < 5.0)
|
||||
octokit (~> 4.0, != 4.4.0)
|
||||
jekyll-mentions (1.5.1)
|
||||
jekyll-mentions (1.6.0)
|
||||
html-pipeline (~> 2.3)
|
||||
jekyll (>= 3.7, < 5.0)
|
||||
jekyll-optional-front-matter (0.3.2)
|
||||
@@ -129,18 +146,19 @@ GEM
|
||||
jekyll-paginate (1.1.0)
|
||||
jekyll-readme-index (0.3.0)
|
||||
jekyll (>= 3.0, < 5.0)
|
||||
jekyll-redirect-from (0.15.0)
|
||||
jekyll-redirect-from (0.16.0)
|
||||
jekyll (>= 3.3, < 5.0)
|
||||
jekyll-relative-links (0.6.1)
|
||||
jekyll (>= 3.3, < 5.0)
|
||||
jekyll-remote-theme (0.4.1)
|
||||
jekyll-remote-theme (0.4.3)
|
||||
addressable (~> 2.0)
|
||||
jekyll (>= 3.5, < 5.0)
|
||||
rubyzip (>= 1.3.0)
|
||||
jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
|
||||
rubyzip (>= 1.3.0, < 3.0)
|
||||
jekyll-sass-converter (1.5.2)
|
||||
sass (~> 3.4)
|
||||
jekyll-seo-tag (2.6.1)
|
||||
jekyll (>= 3.3, < 5.0)
|
||||
jekyll-seo-tag (2.7.1)
|
||||
jekyll (>= 3.8, < 5.0)
|
||||
jekyll-sitemap (1.4.0)
|
||||
jekyll (>= 3.7, < 5.0)
|
||||
jekyll-swiss (1.0.0)
|
||||
@@ -153,8 +171,8 @@ GEM
|
||||
jekyll-theme-dinky (0.1.1)
|
||||
jekyll (~> 3.5)
|
||||
jekyll-seo-tag (~> 2.0)
|
||||
jekyll-theme-hacker (0.1.1)
|
||||
jekyll (~> 3.5)
|
||||
jekyll-theme-hacker (0.1.2)
|
||||
jekyll (> 3.5, < 5.0)
|
||||
jekyll-seo-tag (~> 2.0)
|
||||
jekyll-theme-leap-day (0.1.1)
|
||||
jekyll (~> 3.5)
|
||||
@@ -188,39 +206,48 @@ GEM
|
||||
jekyll (>= 3.3, < 5.0)
|
||||
jekyll-watch (2.2.1)
|
||||
listen (~> 3.0)
|
||||
jemoji (0.11.1)
|
||||
jemoji (0.12.0)
|
||||
gemoji (~> 3.0)
|
||||
html-pipeline (~> 2.2)
|
||||
jekyll (>= 3.0, < 5.0)
|
||||
json (2.3.0)
|
||||
kramdown (1.17.0)
|
||||
json (2.5.1)
|
||||
kramdown (2.3.1)
|
||||
rexml
|
||||
kramdown-parser-gfm (1.1.0)
|
||||
kramdown (~> 2.0)
|
||||
liquid (4.0.3)
|
||||
listen (3.2.1)
|
||||
listen (3.5.1)
|
||||
rb-fsevent (~> 0.10, >= 0.10.3)
|
||||
rb-inotify (~> 0.9, >= 0.9.10)
|
||||
mercenary (0.3.6)
|
||||
mini_portile2 (2.4.0)
|
||||
mini_portile2 (2.5.3)
|
||||
minima (2.5.1)
|
||||
jekyll (>= 3.5, < 5.0)
|
||||
jekyll-feed (~> 0.9)
|
||||
jekyll-seo-tag (~> 2.1)
|
||||
minitest (5.14.1)
|
||||
minitest (5.14.4)
|
||||
multipart-post (2.1.1)
|
||||
nokogiri (1.10.9)
|
||||
mini_portile2 (~> 2.4.0)
|
||||
octokit (4.18.0)
|
||||
nokogiri (1.11.7)
|
||||
mini_portile2 (~> 2.5.0)
|
||||
racc (~> 1.4)
|
||||
nokogiri (1.11.7-x86_64-linux)
|
||||
racc (~> 1.4)
|
||||
octokit (4.21.0)
|
||||
faraday (>= 0.9)
|
||||
sawyer (~> 0.8.0, >= 0.5.3)
|
||||
pathutil (0.16.2)
|
||||
forwardable-extended (~> 2.6)
|
||||
public_suffix (3.1.1)
|
||||
rb-fsevent (0.10.4)
|
||||
public_suffix (4.0.6)
|
||||
racc (1.5.2)
|
||||
rb-fsevent (0.11.0)
|
||||
rb-inotify (0.10.1)
|
||||
ffi (~> 1.0)
|
||||
rouge (3.19.0)
|
||||
ruby-enum (0.8.0)
|
||||
rexml (3.2.5)
|
||||
rouge (3.26.0)
|
||||
ruby-enum (0.9.0)
|
||||
i18n
|
||||
rubyzip (2.3.0)
|
||||
ruby2_keywords (0.0.5)
|
||||
rubyzip (2.3.2)
|
||||
safe_yaml (1.0.5)
|
||||
sass (3.7.4)
|
||||
sass-listen (~> 4.0.0)
|
||||
@@ -230,24 +257,37 @@ GEM
|
||||
sawyer (0.8.2)
|
||||
addressable (>= 2.3.5)
|
||||
faraday (> 0.8, < 2.0)
|
||||
simpleidn (0.2.1)
|
||||
unf (~> 0.1.4)
|
||||
terminal-table (1.8.0)
|
||||
unicode-display_width (~> 1.1, >= 1.1.1)
|
||||
thread_safe (0.3.6)
|
||||
typhoeus (1.4.0)
|
||||
ethon (>= 0.9.0)
|
||||
tzinfo (1.2.7)
|
||||
tzinfo (1.2.9)
|
||||
thread_safe (~> 0.1)
|
||||
unf (0.1.4)
|
||||
unf_ext
|
||||
unf_ext (0.0.7.7)
|
||||
unicode-display_width (1.7.0)
|
||||
zeitwerk (2.3.1)
|
||||
webrick (1.7.0)
|
||||
zeitwerk (2.4.2)
|
||||
|
||||
PLATFORMS
|
||||
ruby
|
||||
x86_64-linux
|
||||
|
||||
DEPENDENCIES
|
||||
bigdecimal (~> 1.4)
|
||||
github-pages
|
||||
jekyll-redirect-from
|
||||
etc (~> 1.2.0)
|
||||
eventmachine (~> 1.2.7)
|
||||
ffi (~> 1.15.3)
|
||||
github-pages (~> 215)
|
||||
http_parser.rb (~> 0.6.0)
|
||||
jekyll (~> 3.9.0)
|
||||
jekyll-redirect-from (~> 0.16.0)
|
||||
json (~> 2.2)
|
||||
webrick (~> 1.7.0)
|
||||
|
||||
BUNDLED WITH
|
||||
2.1.4
|
||||
2.2.21
|
||||
|
||||
@@ -48,12 +48,14 @@ by Tom Johnson.
|
||||
title: Introduction
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/introduction.html
|
||||
keywords: key1, key2
|
||||
---
|
||||
```
|
||||
- Sidebars are specified in data files, e.g. `_data/sidebars/merlin6_sidebar.yml`.
|
||||
- The top menu is controlled by `_data/topnav.yml`
|
||||
- News can be addin in `_posts`. Filenames must include the date.
|
||||
- Lots of features still need to be configured (e.g. pdf output, tags, etc)
|
||||
- The search bar uses finds substring of the title, tags, keywords, and summary frontmatter.
|
||||
|
||||
## License
|
||||
|
||||
|
||||
15
_data/sidebars/CSCS_sidebar.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
# Follow the pattern here for the URLs -- no slash at the beginning, and include the .html. The link here is rendered exactly as is in the Markdown references.
|
||||
|
||||
entries:
|
||||
- product: PSI HPC@CSCS
|
||||
folders:
|
||||
- title: Overview
|
||||
# URLs for top-level folders are optional. If omitted it is a bit easier to toggle the accordion.
|
||||
folderitems:
|
||||
- title: Overview
|
||||
url: /CSCS/index.html
|
||||
- title: Operations
|
||||
folderitems:
|
||||
- title: Transfer Data
|
||||
url: /CSCS/transfer-data.html
|
||||
@@ -16,3 +16,7 @@ entries:
|
||||
- title: The Merlin Local HPC Cluster
|
||||
url: /merlin6/introduction.html
|
||||
output: web
|
||||
- title: PSI HPC@CSCS
|
||||
url: /CSCS/index.html
|
||||
output: web
|
||||
|
||||
|
||||
@@ -39,6 +39,8 @@ entries:
|
||||
url: /merlin6/nomachine.html
|
||||
- title: Configuring SSH Keys
|
||||
url: /merlin6/ssh-keys.html
|
||||
- title: Kerberos and AFS authentication
|
||||
url: /merlin6/kerberos.html
|
||||
- title: Software repository - PModules
|
||||
url: /merlin6/using-modules.html
|
||||
- title: Slurm General Documentation
|
||||
@@ -87,22 +89,34 @@ entries:
|
||||
url: /merlin6/jupyterhub-trouble.html
|
||||
- title: Software Support
|
||||
folderitems:
|
||||
- title: OpenMPI
|
||||
url: /merlin6/openmpi.html
|
||||
- title: IntelMPI
|
||||
url: /merlin6/impi.html
|
||||
- title: Python
|
||||
url: /merlin6/python.html
|
||||
- title: ANSYS
|
||||
url: /merlin6/ansys.html
|
||||
- title: ANSYS RSM
|
||||
url: /merlin6/ansys-rsm.html
|
||||
- title: ANSYS/CFX
|
||||
url: /merlin6/ansys-cfx.html
|
||||
- title: ANSYS/Fluent
|
||||
url: /merlin6/ansys-fluent.html
|
||||
- title: ANSYS/MAPDL
|
||||
url: /merlin6/ansys-mapdl.html
|
||||
- title: ANSYS/HFSS
|
||||
url: /merlin6/ansys-hfss.html
|
||||
- title: GOTHIC
|
||||
url: /merlin6/gothic.html
|
||||
- title: merlin_rmount
|
||||
url: /merlin6/merlin-rmount.html
|
||||
- title: IntelMPI
|
||||
url: /merlin6/impi.html
|
||||
- title: OpenMPI
|
||||
url: /merlin6/openmpi.html
|
||||
- title: ParaView
|
||||
url: /merlin6/paraview.html
|
||||
- title: Python
|
||||
url: /merlin6/python.html
|
||||
- title: Support
|
||||
folderitems:
|
||||
- title: FAQ
|
||||
url: /merlin6/faq.html
|
||||
- title: Known Problems
|
||||
url: /merlin6/known-problems.html
|
||||
- title: Troubleshooting
|
||||
|
||||
26
_data/sidebars/merlin7_sidebar.yml
Normal file
@@ -0,0 +1,26 @@
|
||||
# Follow the pattern here for the URLs -- no slash at the beginning, and include the .html. The link here is rendered exactly as is in the Markdown references.
|
||||
|
||||
entries:
|
||||
- product: Merlin
|
||||
version: 7
|
||||
folders:
|
||||
- title: Quick Start Guide
|
||||
folderitems:
|
||||
- title: Introduction
|
||||
url: /merlin7/introduction.html
|
||||
- title: How To Use Merlin7
|
||||
folderitems:
|
||||
- title: Cray systems modules
|
||||
url: /merlin7/cray-module-env.html
|
||||
- title: Transferring files
|
||||
url: /merlin7/file-transfers.html
|
||||
- title: Slurm General Documentation
|
||||
folderitems:
|
||||
- title: Merlin7 Infrastructure
|
||||
url: /merlin7/slurm-configuration.html
|
||||
- title: Slurm Batch Script Examples
|
||||
url: /merlin7/slurm-examples.html
|
||||
- title: Software Support
|
||||
folderitems:
|
||||
- title: ANSYS RSM
|
||||
url: /merlin7/ansys-rsm.html
|
||||
@@ -1,34 +1,36 @@
|
||||
## Topnav single links
|
||||
## if you want to list an external url, use external_url instead of url. the theme will apply a different link base.
|
||||
topnav:
|
||||
- title: Topnav
|
||||
items:
|
||||
# - title: GitHub
|
||||
# external_url: https://github.com/tomjoht/documentation-theme-jekyll
|
||||
- title: News
|
||||
url: /news
|
||||
- title: Topnav
|
||||
items:
|
||||
# - title: GitHub
|
||||
# external_url: https://github.com/tomjoht/documentation-theme-jekyll
|
||||
- title: News
|
||||
url: /news
|
||||
|
||||
#Topnav dropdowns
|
||||
topnav_dropdowns:
|
||||
- title: Topnav dropdowns
|
||||
folders:
|
||||
- title: Quick Start
|
||||
folderitems:
|
||||
- title: Introduction
|
||||
url: /merlin6/introduction.html
|
||||
- title: Requesting Accounts
|
||||
url: /merlin6/request-account.html
|
||||
- title: Requesting Projects
|
||||
url: /merlin6/request-project.html
|
||||
- title: Accessing the Interactive Nodes
|
||||
url: /merlin6/interactive.html
|
||||
- title: Accessing the Slurm Clusters
|
||||
url: /merlin6/slurm-access.html
|
||||
- title: Merlin Slurm Clusters
|
||||
folderitems:
|
||||
- title: Cluster 'merlin5'
|
||||
url: /merlin5/slurm-configuration.html
|
||||
- title: Cluster 'merlin6'
|
||||
url: /gmerlin6/slurm-configuration.html
|
||||
- title: Cluster 'gmerlin6'
|
||||
url: /gmerlin6/slurm-configuration.html
|
||||
- title: Topnav dropdowns
|
||||
folders:
|
||||
- title: Quick Start
|
||||
folderitems:
|
||||
- title: Introduction
|
||||
url: /merlin6/introduction.html
|
||||
- title: Requesting Accounts
|
||||
url: /merlin6/request-account.html
|
||||
- title: Requesting Projects
|
||||
url: /merlin6/request-project.html
|
||||
- title: Accessing the Interactive Nodes
|
||||
url: /merlin6/interactive.html
|
||||
- title: Accessing the Slurm Clusters
|
||||
url: /merlin6/slurm-access.html
|
||||
- title: Clusters
|
||||
folderitems:
|
||||
- title: Cluster 'merlin5'
|
||||
url: /merlin5/slurm-configuration.html
|
||||
- title: Cluster 'merlin6'
|
||||
url: /gmerlin6/slurm-configuration.html
|
||||
- title: Cluster 'merlin7'
|
||||
url: /merlin7/slurm-configuration.html
|
||||
- title: Cluster 'gmerlin6'
|
||||
url: /gmerlin6/slurm-configuration.html
|
||||
|
||||
BIN
images/ANSYS/HFSS/01_Select_Scheduler_Menu.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
images/ANSYS/HFSS/02_Select_Scheduler_RSM_Remote.png
Normal file
|
After Width: | Height: | Size: 9.6 KiB |
BIN
images/ANSYS/HFSS/03_Select_Scheduler_Slurm.png
Normal file
|
After Width: | Height: | Size: 9.7 KiB |
BIN
images/ANSYS/HFSS/04_Submit_Job_Menu.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
images/ANSYS/HFSS/05_Submit_Job_Product_Path.png
Normal file
|
After Width: | Height: | Size: 67 KiB |
BIN
images/ANSYS/rsm-1-add_hpc_resource.png
Normal file
|
After Width: | Height: | Size: 508 KiB |
BIN
images/ANSYS/rsm-2-add_cluster.png
Normal file
|
After Width: | Height: | Size: 23 KiB |
BIN
images/ANSYS/rsm-3-add_scratch_info.png
Normal file
|
After Width: | Height: | Size: 29 KiB |
BIN
images/ANSYS/rsm-4-get_slurm_queues.png
Normal file
|
After Width: | Height: | Size: 21 KiB |
BIN
images/ANSYS/rsm-5-authenticating.png
Normal file
|
After Width: | Height: | Size: 6.4 KiB |
BIN
images/ANSYS/rsm-6-selected-partitions.png
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
images/NoMachine/screen_nx_connect.png
Normal file
|
After Width: | Height: | Size: 61 KiB |
BIN
images/NoMachine/screen_nx_existingsession.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
images/NoMachine/screen_nx_newsession.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
images/Slurm/scom.gif
Normal file
|
After Width: | Height: | Size: 1008 KiB |
|
Before Width: | Height: | Size: 134 KiB After Width: | Height: | Size: 157 KiB |
BIN
images/rmount/mount.png
Normal file
|
After Width: | Height: | Size: 52 KiB |
BIN
images/rmount/select-mount.png
Normal file
|
After Width: | Height: | Size: 41 KiB |
BIN
images/rmount/thunar_mount.png
Normal file
|
After Width: | Height: | Size: 127 KiB |
BIN
pages/CSCS/downloads/CSCS/PSI_CSCSAllocations2023.xltx
Normal file
64
pages/CSCS/index.md
Normal file
@@ -0,0 +1,64 @@
|
||||
---
|
||||
title: PSI HPC@CSCS
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 13 April 2022
|
||||
#summary: ""
|
||||
sidebar: CSCS_sidebar
|
||||
permalink: /CSCS/index.html
|
||||
---
|
||||
|
||||
## PSI HPC@CSCS
|
||||
|
||||
For offering high-end HPC sources to PSI users, PSI has a long standing collaboration with
|
||||
the national supercomputing centre CSCS (since 2005). Some of the resources are procured by
|
||||
central PSI funds while users have the optionsof an additional buy-in at the same rates.
|
||||
|
||||
### PSI resources at Piz Daint
|
||||
|
||||
The yearly computing resources at CSCS for the PSI projects are in general 627,000 NH (Node Hours).
|
||||
The yearly storage resources for the PSI projects is a total of 80TB. These resources are
|
||||
centrally financed, but in addition experiments can individually purchase more resources.
|
||||
|
||||
### How to request a PSI project
|
||||
|
||||
A survey is sent out in the third quarter of each year. This survey is used to request
|
||||
CSCS resources for the upcoming year.
|
||||
|
||||
Users registered in the **PSI HPC@CSCS mailing list** <psi-hpc-at-cscs@lists.psi.ch> will
|
||||
receive notification and details about the survey, in example:
|
||||
* Link to the survey
|
||||
* Update of resource changes
|
||||
* Other details of the process
|
||||
|
||||
Generally users need to specify in the survey the total resources they intend to use
|
||||
next year and also how they would like to split it over the 4 quarters (e.g. 25%, 25%,
|
||||
25%, 25%). In general, we provide the possibility to adapt the distribution over the
|
||||
course of next year if required. The minimum allocation over a year is 10,000 node hours.
|
||||
|
||||
By default allocated nodes are on the CPU partition of PizDaint (36 cores per node).
|
||||
However, allocations to the GPU partition are also possible (1 x NVIDIA P100 and 12cores per
|
||||
node), but needs to be splicitly stated in the survey.
|
||||
|
||||
### PSI resources at Piz Daint
|
||||
|
||||
#### 2023
|
||||
|
||||
The yearly computing resources at CSCS for the PSI projects in 2023 are 522,500 NH (Node Hours). The yearly storage resources for the PSI projects is a total of 80TB.
|
||||
These resources are centrally financed, but in addition experiments can individually purchase more resources.
|
||||
|
||||
PizDaint resource distribution for the different PSI projects in 2023 is detailed in the following Excel file: [PSI_CSCSAllocations2023.xltx]({{ site.url }}/downloads/CSCS/PSI_CSCSAllocations2023.xltx) directly.
|
||||
|
||||
### Piz Daint total resources
|
||||
|
||||
References:
|
||||
|
||||
* [Piz Daint Information](https://www.cscs.ch/computers/piz-daint/)
|
||||
* [Piz Daint: One of the most powerful supercomputers in the world](https://www.cscs.ch/publications/news/2017/piz-daint-one-of-the-most-powerful-supercomputers-in-the-world)
|
||||
|
||||
## Contact information
|
||||
|
||||
* Contact responsibles:
|
||||
* Mail list contact: <psi-hpc-at-cscs-admin@lists.psi.ch>
|
||||
* Marc Caubet Serrabou <marc.caubet@psi.ch>
|
||||
* Derek Feichtinger <derek.feichtinger@psi.ch>
|
||||
52
pages/CSCS/transfer-data.md
Normal file
@@ -0,0 +1,52 @@
|
||||
---
|
||||
title: Transferring Data betweem PSI and CSCS
|
||||
#tags:
|
||||
keywords: CSCS, data-transfer
|
||||
last_updated: 02 March 2022
|
||||
summary: "This Document shows the procedure for transferring data between CSCS and PSI"
|
||||
sidebar: CSCS_sidebar
|
||||
permalink: /CSCS/transfer-data.html
|
||||
---
|
||||
|
||||
# Transferring Data
|
||||
|
||||
This document shows how to transfer data between PSI and CSCS by using a Linux workstation.
|
||||
|
||||
## Preparing SSH configuration
|
||||
|
||||
If the directory **`.ssh`** does not exist in your home directory, create it with **`0700`** permissions:
|
||||
|
||||
```bash
|
||||
mkdir ~/.ssh
|
||||
chmod 0700 ~/.ssh
|
||||
```
|
||||
|
||||
Then, if it does not exist, create a new file **`.ssh/config`**, otherwise add the following lines
|
||||
to the already existing file, by replacing **`$cscs_accountname`** by your CSCS `username`:
|
||||
|
||||
```bash
|
||||
Host daint.cscs.ch
|
||||
Compression yes
|
||||
ProxyJump ela.cscs.ch
|
||||
Host *.cscs.ch
|
||||
User $cscs_accountname
|
||||
```
|
||||
|
||||
### Advanced SSH configuration
|
||||
|
||||
There are many different SSH settings available which would allow advanced configurations.
|
||||
Users may have some configurations already present, therefore would need to adapt it accordingly.
|
||||
|
||||
|
||||
## Transferring files
|
||||
|
||||
Once the above configuration is set, then try to rsync from Merlin to CSCS, on any direction:
|
||||
|
||||
```bash
|
||||
# CSCS -> PSI
|
||||
rsync -azv daint.cscs.ch:<source_path> <destination_path>
|
||||
|
||||
# PSI -> CSCS
|
||||
rsync -azv <source_path> daint.cscs.ch:<destination_path>
|
||||
```
|
||||
|
||||
@@ -23,7 +23,7 @@ The below table summarizes the hardware setup for the Merlin6 GPU computing node
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope='colgroup' style="vertical-align:middle;text-align:center;" colspan="8">Merlin5 CPU Computing Nodes</th>
|
||||
<th scope='colgroup' style="vertical-align:middle;text-align:center;" colspan="9">Merlin6 GPU Computing Nodes</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;" colspan="1">Node</th>
|
||||
@@ -33,7 +33,8 @@ The below table summarizes the hardware setup for the Merlin6 GPU computing node
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;" colspan="1">Threads</th>
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;" colspan="1">Scratch</th>
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;" colspan="1">Memory</th>
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;" colspan="1">GPU</th>
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;" colspan="1">GPUs</th>
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;" colspan="1">GPU Model</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -45,6 +46,7 @@ The below table summarizes the hardware setup for the Merlin6 GPU computing node
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">2</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">1.8TB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">128GB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">2</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">GTX1080</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
@@ -55,6 +57,7 @@ The below table summarizes the hardware setup for the Merlin6 GPU computing node
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">1</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">1.8TB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">128GB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">4</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">GTX1080</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
@@ -65,6 +68,7 @@ The below table summarizes the hardware setup for the Merlin6 GPU computing node
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">1</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">800GB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">128GB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">4</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">GTX1080Ti</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
@@ -75,6 +79,7 @@ The below table summarizes the hardware setup for the Merlin6 GPU computing node
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">1</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">3.5TB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">128GB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">4</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">GTX1080Ti</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
@@ -85,8 +90,31 @@ The below table summarizes the hardware setup for the Merlin6 GPU computing node
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">1</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">1.7TB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">128GB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">4</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">RTX2080Ti</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1"><b>merlin-g-014</b></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1"><a href="https://www.intel.com/content/www/us/en/products/sku/199343/intel-xeon-gold-6240r-processor-35-75m-cache-2-40-ghz/specifications.html?wapkw=Intel(R)%20Xeon(R)%20Gold%206240R%20CP">Intel Xeon Gold 6240R</a></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">2</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">48</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">1</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">2.9TB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">384GB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">8</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">RTX2080Ti</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1"><b>merlin-g-015</b></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1"><a href="https://www.intel.com/content/www/us/en/products/sku/215279/intel-xeon-gold-5318s-processor-36m-cache-2-10-ghz/specifications.html">Intel(R) Xeon Gold 5318S</a></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">2</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">48</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">1</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">2.9TB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">384GB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">8</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">RTX A5000</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
@@ -48,11 +48,12 @@ Users might need to specify the Slurm partition. If no partition is specified, i
|
||||
|
||||
The table below resumes shows all possible partitions available to users:
|
||||
|
||||
| GPU Partition | Default Time | Max Time | PriorityJobFactor\* | PriorityTier\*\* |
|
||||
|:-----------------: | :----------: | :------: | :-----------------: | :--------------: |
|
||||
| `gpu` | 1 day | 1 week | 1 | 1 |
|
||||
| `gpu-short` | 2 hours | 2 hours | 1000 | 500 |
|
||||
| `gwendolen` | 1 hour | 12 hours | 1000 | 1000 |
|
||||
| GPU Partition | Default Time | Max Time | PriorityJobFactor\* | PriorityTier\*\* |
|
||||
|:---------------------: | :----------: | :--------: | :-----------------: | :--------------: |
|
||||
| `gpu` | 1 day | 1 week | 1 | 1 |
|
||||
| `gpu-short` | 2 hours | 2 hours | 1000 | 500 |
|
||||
| `gwendolen` | 30 minutes | 2 hours | 1000 | 1000 |
|
||||
| `gwendolen-long`\*\*\* | 30 minutes | 8 hours | 1 | 1 |
|
||||
|
||||
\*The **PriorityJobFactor** value will be added to the job priority (*PARTITION* column in `sprio -l` ). In other words, jobs sent to higher priority
|
||||
partitions will usually run first (however, other factors such like **job age** or mainly **fair share** might affect to that decision). For the GPU
|
||||
@@ -61,6 +62,8 @@ partitions, Slurm will also attempt first to allocate jobs on partitions with hi
|
||||
\*\*Jobs submitted to a partition with a higher **PriorityTier** value will be dispatched before pending jobs in partition with lower *PriorityTier* value
|
||||
and, if possible, they will preempt running jobs from partitions with lower *PriorityTier* values.
|
||||
|
||||
\*\*\***gwnedolen-long** is a special partition which is enabled during non-working hours only. As of _Nov 2023_, the current policy is to disable this partition from Mon to Fri, from 1am to 5pm. However, jobs can be submitted anytime, but can only be scheduled outside this time range.
|
||||
|
||||
### Merlin6 GPU Accounts
|
||||
|
||||
Users need to ensure that the public **`merlin`** account is specified. No specifying account options would default to this account.
|
||||
@@ -71,24 +74,21 @@ This is mostly needed by users which have multiple Slurm accounts, which may def
|
||||
```
|
||||
Not all the accounts can be used on all partitions. This is resumed in the table below:
|
||||
|
||||
| Slurm Account | Slurm Partitions | Special QoS |
|
||||
|:-------------------: | :------------------: | :---------------------------------: |
|
||||
| **`merlin`** | **`gpu`**,`gpu-short` | |
|
||||
| `gwendolen` | `gwendolen` | `gwendolen`, **`gwendolen_public`** |
|
||||
| Slurm Account | Slurm Partitions |
|
||||
|:-------------------: | :------------------: |
|
||||
| **`merlin`** | **`gpu`**,`gpu-short` |
|
||||
| `gwendolen` | `gwendolen`,`gwendolen-long` |
|
||||
|
||||
By default, all users belong to the `merlin` and `gwendolen` Slurm accounts.
|
||||
By default, all users belong to the `merlin` Slurm accounts, and jobs are submitted to the `gpu` partition when no partition is defined.
|
||||
|
||||
Users only need to specify `gwendolen` when using `gwendolen`, otherwise specfying account is not needed (it will always default to `merlin`). `gwendolen` is a special account, with two different **QoS** granting different types of access (see details below).
|
||||
Users only need to specify the `gwendolen` account when using the `gwendolen` or `gwendolen-long` partitions, otherwise specifying account is not needed (it will always default to `merlin`).
|
||||
|
||||
#### The 'gwendolen' account
|
||||
|
||||
For running jobs in the **`gwendolen`** partition, users must specify the `gwendolen` account. The `merlin` account is not allowed to use the `gwendolen` partition.
|
||||
For running jobs in the **`gwendolen`/`gwendolen-long`** partitions, users must specify the **`gwendolen`** account.
|
||||
The `merlin` account is not allowed to use the Gwendolen partitions.
|
||||
|
||||
In addition, in Slurm there is the concept of **QoS**, which stands for **Quality of Service**. The **`gwendolen`** account has two different QoS configured:
|
||||
* The **QoS** **`gwendolen_public`** is set by default to all Merlin users. This restricts the number of resources than can be used on **Gwendolen**. For further information about restrictions, please read the ['User and Job Limits'](/gmerlin6/slurm-configuration.html#user-and-job-limits) documentation.
|
||||
* The **QoS** **`gwendolen`** provides full access to **`gwendolen`**, however this is restricted to a set of users belonging to the **`unx-gwendolen`** Unix group.
|
||||
|
||||
Users don't need to specify any QoS, however, they need to be aware about resources restrictions. If you belong to one of the projects which is allowed to use **Gwendolen** without restrictions, please request access to the **`unx-gwendolen`** through [PSI Service Now](https://psi.service-now.com/).
|
||||
Gwendolen is restricted to a set of users belonging to the **`unx-gwendolen`** Unix group. If you belong to a project allowed to use **Gwendolen**, or you are a user which would like to have access to it, please request access to the **`unx-gwendolen`** Unix group through [PSI Service Now](https://psi.service-now.com/): the request will be redirected to the responsible of the project (Andreas Adelmann).
|
||||
|
||||
### Slurm GPU specific options
|
||||
|
||||
@@ -184,7 +184,7 @@ Please, notice that when defining `[<type>:]` once, then all other options must
|
||||
|
||||
#### Dealing with Hyper-Threading
|
||||
|
||||
The **`gmerlin6`** cluster contains the partition `gwendolen`, which has a node with Hyper-Threading enabled.
|
||||
The **`gmerlin6`** cluster contains the partitions `gwendolen` and `gwendolen-long`, which have a node with Hyper-Threading enabled.
|
||||
In that case, one should always specify whether to use Hyper-Threading or not. If not defined, Slurm will
|
||||
generally use it (exceptions apply). For this machine, generally HT is recommended.
|
||||
|
||||
@@ -204,12 +204,12 @@ These are limits applying to a single job. In other words, there is a maximum of
|
||||
Limits are defined using QoS, and this is usually set at the partition level. Limits are described in the table below with the format: `SlurmQoS(limits)`
|
||||
(possible `SlurmQoS` values can be listed with the command `sacctmgr show qos`):
|
||||
|
||||
| Partition | Slurm Account | Mon-Sun 0h-24h |
|
||||
|:-------------:| :------------: | :------------------------------------------: |
|
||||
| **gpu** | **`merlin`** | gpu_week(cpu=40,gres/gpu=8,mem=200G) |
|
||||
| **gpu-short** | **`merlin`** | gpu_week(cpu=40,gres/gpu=8,mem=200G) |
|
||||
| **gwendolen** | `gwendolen` | gwendolen_public(cpu=32,gres/gpu=2,mem=200G) |
|
||||
| **gwendolen** | `gwendolen` | gwendolen(No limits, full access granted) |
|
||||
| Partition | Slurm Account | Mon-Sun 0h-24h |
|
||||
|:------------------:| :------------: | :------------------------------------------: |
|
||||
| **gpu** | **`merlin`** | gpu_week(cpu=40,gres/gpu=8,mem=200G) |
|
||||
| **gpu-short** | **`merlin`** | gpu_week(cpu=40,gres/gpu=8,mem=200G) |
|
||||
| **gwendolen** | `gwendolen` | No limits |
|
||||
| **gwendolen-long** | `gwendolen` | No limits, active from 9pm to 5:30am |
|
||||
|
||||
* With the limits in the public `gpu` and `gpu-short` partitions, a single job using the `merlin` acccount
|
||||
(default account) can not use more than 40 CPUs, more than 8 GPUs or more than 200GB.
|
||||
@@ -218,9 +218,12 @@ As there are no more existing QoS during the week temporary overriding job limit
|
||||
instance in the CPU **daily** partition), the job needs to be cancelled, and the requested resources
|
||||
must be adapted according to the above resource limits.
|
||||
|
||||
* The **gwendolen** partition is a special partition with a **[NVIDIA DGX A100](https://www.nvidia.com/en-us/data-center/dgx-a100/)** machine.
|
||||
Public access is possible through the `gwendolen` account, however this is limited to 2 GPUs per job, 32 CPUs and 121875MB of memory).
|
||||
For full access, the `gwendolen` account with `gwendolen` **QoS** (Quality of Service) is needed, and this is restricted to a set of users (belonging to the **`unx-gwendolen`** Unix group). Any other user will have by default a QoS **`gwendolen_public`**, which restricts resources in Gwendolen.
|
||||
* The **gwendolen** and **gwendolen-long** partitions are two special partitions for a **[NVIDIA DGX A100](https://www.nvidia.com/en-us/data-center/dgx-a100/)** machine.
|
||||
Only users belonging to the **`unx-gwendolen`** Unix group can run in these partitions. No limits are applied (machine resources can be completely used).
|
||||
|
||||
* The **`gwendolen-long`** partition is available 24h. However,
|
||||
* from 5:30am to 9pm the partition is `down` (jobs can be submitted, but can not run until the partition is set to `active`).
|
||||
* from 9pm to 5:30am jobs are allowed to run (partition is set to `active`).
|
||||
|
||||
### Per user limits for GPU partitions
|
||||
|
||||
@@ -228,12 +231,12 @@ These limits apply exclusively to users. In other words, there is a maximum of r
|
||||
Limits are defined using QoS, and this is usually set at the partition level. Limits are described in the table below with the format: `SlurmQoS(limits)`
|
||||
(possible `SlurmQoS` values can be listed with the command `sacctmgr show qos`):
|
||||
|
||||
| Partition | Slurm Account | Mon-Sun 0h-24h |
|
||||
|:-------------:| :----------------: | :---------------------------------------------: |
|
||||
| **gpu** | **`merlin`** | gpu_week(cpu=80,gres/gpu=16,mem=400G) |
|
||||
| **gpu-short** | **`merlin`** | gpu_week(cpu=80,gres/gpu=16,mem=400G) |
|
||||
| **gwendolen** | `gwendolen` | gwendolen_public(cpu=64,gres/gpu=4,mem=243750M) |
|
||||
| **gwendolen** | `gwendolen` | gwendolen(No limits, full access granted) |
|
||||
| Partition | Slurm Account | Mon-Sun 0h-24h |
|
||||
|:------------------:| :----------------: | :---------------------------------------------: |
|
||||
| **gpu** | **`merlin`** | gpu_week(cpu=80,gres/gpu=16,mem=400G) |
|
||||
| **gpu-short** | **`merlin`** | gpu_week(cpu=80,gres/gpu=16,mem=400G) |
|
||||
| **gwendolen** | `gwendolen` | No limits |
|
||||
| **gwendolen-long** | `gwendolen` | No limits, active from 9pm to 5:30am |
|
||||
|
||||
* With the limits in the public `gpu` and `gpu-short` partitions, a single user can not use more than 80 CPUs, more than 16 GPUs or more than 400GB.
|
||||
Jobs sent by any user already exceeding such limits will stay in the queue with the message **`QOSMax[Cpu|GRES|Mem]PerUser`**.
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Accessing Interactive Nodes
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 20 May 2021
|
||||
keywords: How to, HowTo, access, accessing, nomachine, ssh
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/interactive.html
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Accessing Slurm Cluster
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 13 June 2019
|
||||
keywords: slurm, batch system, merlin5, merlin6, gmerlin6, cpu, gpu
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/slurm-access.html
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Code Of Conduct
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 13 June 2019
|
||||
keywords: code of conduct, rules, principle, policy, policies, administrator, backup
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/code-of-conduct.html
|
||||
@@ -32,6 +32,16 @@ The basic principle is courtesy and consideration for other users.
|
||||
* Prefer ``/scratch`` over ``/shared-scratch`` and use the latter only when you require the temporary files to be visible from multiple nodes.
|
||||
* Read the description in **[Merlin6 directory structure](### Merlin6 directory structure)** for learning about the correct usage of each partition type.
|
||||
|
||||
## User and project data
|
||||
|
||||
* ***Users are responsible for backing up their own data***. Is recommended to backup the data on third party independent systems (i.e. LTS, Archive, AFS, SwitchDrive, Windows Shares, etc.).
|
||||
* **`/psi/home`**, as this contains a small amount of data, is the only directory where we can provide daily snapshots for one week. This can be found in the following directory **`/psi/home/.snapshot/`**
|
||||
* ***When a user leaves PSI, she or her supervisor/team are responsible to backup and move the data out from the cluster***: every few months, the storage space will be recycled for those old users who do not have an existing and valid PSI account.
|
||||
|
||||
{{site.data.alerts.warning}}When a user leaves PSI and his account has been removed, her storage space in Merlin may be recycled.
|
||||
Hence, <b>when a user leaves PSI</b>, she, her supervisor or team <b>must ensure that the data is backed up to an external storage</b>
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## System Administrator Rights
|
||||
|
||||
* The system administrator has the right to temporarily block the access to Merlin6 for an account violating the Code of Conduct in order to maintain the efficiency and stability of the system.
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Introduction
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 28 June 2019
|
||||
keywords: introduction, home, welcome, architecture, design
|
||||
last_updated: 07 September 2022
|
||||
#summary: "Merlin 6 cluster overview"
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/introduction.html
|
||||
@@ -16,7 +16,18 @@ redirect_from:
|
||||
Historically, the local HPC clusters at PSI were named **Merlin**. Over the years,
|
||||
multiple generations of Merlin have been deployed.
|
||||
|
||||
At present, the **Merlin local HPC cluster** contains _two_ generations of it: the old **Merlin5** cluster and the newest **Merlin6**.
|
||||
At present, the **Merlin local HPC cluster** contains _two_ generations of it:
|
||||
* the old **Merlin5** cluster (`merlin5` Slurm cluster), and
|
||||
* the newest generation **Merlin6**, which is divided in two Slurm clusters:
|
||||
* `merlin6` as the Slurm CPU cluster
|
||||
* `gmerlin6` as the Slurm GPU cluster.
|
||||
|
||||
Access to the different Slurm clusters is possible from the [**Merlin login nodes**](/merlin6/interactive.html),
|
||||
which can be accessed through the [SSH protocol](/merlin6/interactive.html#ssh-access) or the [NoMachine (NX) service](/merlin6/nomachine.html).
|
||||
|
||||
The following image shows the Slurm architecture design for the Merlin5 & Merlin6 (CPU & GPU) clusters:
|
||||
|
||||

|
||||
|
||||
### Merlin6
|
||||
|
||||
@@ -51,15 +62,3 @@ The old Slurm **CPU** *merlin* cluster is still active and is maintained in a be
|
||||
**Merlin5** only contains **computing nodes** resources in a dedicated **[Slurm](https://slurm.schedmd.com/overview.html)** cluster.
|
||||
* The Merlin5 CPU cluster is called [**merlin5**](/merlin5/slurm-configuration.html).
|
||||
|
||||
## Merlin Architecture
|
||||
|
||||
The following image shows the Slurm architecture design for the Merlin5 & Merlin6 clusters:
|
||||
|
||||

|
||||
|
||||
### Merlin6 Architecture Diagram
|
||||
|
||||
The following image shows the Merlin6 cluster architecture diagram:
|
||||
|
||||

|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Requesting Accounts
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 28 June 2019
|
||||
keywords: registration, register, account, merlin5, merlin6, snow, service now
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/request-account.html
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Requesting a Project
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 01 July 2019
|
||||
keywords: merlin project, project, snow, service now
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/request-project.html
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
---
|
||||
title: Archive & PSI Data Catalog
|
||||
|
||||
#tags:
|
||||
keywords: Linux, archive, DataCatalog,
|
||||
keywords: linux, archive, data catalog, archiving, lts, tape, long term storage, ingestion, datacatalog
|
||||
last_updated: 31 January 2020
|
||||
summary: "This document describes how to use the PSI Data Catalog for archiving Merlin6 data."
|
||||
sidebar: merlin6_sidebar
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
---
|
||||
title: Connecting from a Linux Client
|
||||
|
||||
#tags:
|
||||
keywords: Linux, connecting, client, configuration, SSH, X11
|
||||
last_updated: 23 Oct 2019
|
||||
keywords: linux, connecting, client, configuration, SSH, X11
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes a recommended setup for a Linux client."
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/connect-from-linux.html
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
---
|
||||
title: Connecting from a MacOS Client
|
||||
|
||||
#tags:
|
||||
keywords: MacOS, connecting, client, configuration, SSH, X11
|
||||
last_updated: 23 Oct 2019
|
||||
keywords: MacOS, mac os, mac, connecting, client, configuration, SSH, X11
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes a recommended setup for a MacOS client."
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/connect-from-macos.html
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
---
|
||||
title: Connecting from a Windows Client
|
||||
|
||||
#tags:
|
||||
keywords: Windows, connecting, client, configuration, SSH, X11
|
||||
last_updated: 23 Oct 2019
|
||||
keywords: microsoft, mocosoft, windows, putty, xming, connecting, client, configuration, SSH, X11
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes a recommended setup for a Windows client."
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/connect-from-windows.html
|
||||
|
||||
192
pages/merlin6/02-How-To-Use-Merlin/kerberos.md
Normal file
@@ -0,0 +1,192 @@
|
||||
---
|
||||
title: Kerberos and AFS authentication
|
||||
#tags:
|
||||
keywords: kerberos, AFS, kinit, klist, keytab, tickets, connecting, client, configuration, slurm
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to use Kerberos."
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/kerberos.html
|
||||
---
|
||||
|
||||
Projects and users have their own areas in the central PSI AFS service. In order
|
||||
to access to these areas, valid Kerberos and AFS tickets must be granted.
|
||||
|
||||
These tickets are automatically granted when accessing through SSH with
|
||||
username and password. Alternatively, one can get a granting ticket with the `kinit` (Kerberos)
|
||||
and `aklog` (AFS ticket, which needs to be run after `kinit`) commands.
|
||||
|
||||
Due to PSI security policies, the maximum lifetime of the ticket is 7 days, and the default
|
||||
time is 10 hours. It means than one needs to constantly renew (`krenew` command) the existing
|
||||
granting tickets, and their validity can not be extended longer than 7 days. At this point,
|
||||
one needs to obtain new granting tickets.
|
||||
|
||||
|
||||
## Obtaining granting tickets with username and password
|
||||
|
||||
As already described above, the most common use case is to obtain Kerberos and AFS granting tickets
|
||||
by introducing username and password:
|
||||
* When login to Merlin through SSH protocol, if this is done with username + password authentication,
|
||||
tickets for Kerberos and AFS will be automatically obtained.
|
||||
* When login to Merlin through NoMachine, no Kerberos and AFS are granted. Therefore, users need to
|
||||
run `kinit` (to obtain a granting Kerberos ticket) followed by `aklog` (to obtain a granting AFS ticket).
|
||||
See further details below.
|
||||
|
||||
To manually obtain granting tickets, one has to:
|
||||
1. To obtain a granting Kerberos ticket, one needs to run `kinit $USER` and enter the PSI password.
|
||||
```bash
|
||||
kinit $USER@D.PSI.CH
|
||||
```
|
||||
2. To obtain a granting ticket for AFS, one needs to run `aklog`. No password is necessary, but a valid
|
||||
Kerberos ticket is mandatory.
|
||||
```bash
|
||||
aklog
|
||||
```
|
||||
3. To list the status of your granted tickets, users can use the `klist` command.
|
||||
```bash
|
||||
klist
|
||||
```
|
||||
4. To extend the validity of existing granting tickets, users can use the `krenew` command.
|
||||
```bash
|
||||
krenew
|
||||
```
|
||||
* Keep in mind that the maximum lifetime for granting tickets is 7 days, therefore `krenew` can not be used beyond that limit,
|
||||
and then `kinit** should be used instead.
|
||||
|
||||
|
||||
## Obtanining granting tickets with keytab
|
||||
|
||||
Sometimes, obtaining granting tickets by using password authentication is not possible. An example are user Slurm jobs
|
||||
requiring access to private areas in AFS. For that, there's the possibility to generate a **keytab** file.
|
||||
|
||||
Be aware that the **keytab** file must be **private**, **fully protected** by correct permissions and not shared with any
|
||||
other users.
|
||||
|
||||
### Creating a keytab file
|
||||
|
||||
For generating a **keytab**, one has to:
|
||||
|
||||
1. Load a newer Kerberos ( `krb5/1.20` or higher) from Pmodules:
|
||||
```bash
|
||||
module load krb5/1.20
|
||||
```
|
||||
2. Create a private directory for storing the Kerberos **keytab** file
|
||||
```bash
|
||||
mkdir -p ~/.k5
|
||||
```
|
||||
3. Run the `ktutil` utility which comes with the loaded `krb5` Pmodule:
|
||||
```bash
|
||||
ktutil
|
||||
```
|
||||
4. In the `ktutil` console, one has to generate a **keytab** file as follows:
|
||||
```bash
|
||||
# Replace $USER by your username
|
||||
add_entry -password -k 0 -f -p $USER
|
||||
wkt /psi/home/$USER/.k5/krb5.keytab
|
||||
exit
|
||||
```
|
||||
Notice that you will need to add your password once. This step is required for generating the **keytab** file.
|
||||
5. Once back to the main shell, one has to ensure that the file contains the proper permissions:
|
||||
```bash
|
||||
chmod 0400 ~/.k5/krb5.keytab
|
||||
```
|
||||
|
||||
### Obtaining tickets by using keytab files
|
||||
|
||||
Once the keytab is created, one can obtain kerberos tickets without being prompted for a password as follows:
|
||||
|
||||
```bash
|
||||
kinit -kt ~/.k5/krb5.keytab $USER
|
||||
aklog
|
||||
```
|
||||
|
||||
## Slurm jobs accessing AFS
|
||||
|
||||
Some jobs may require to access private areas in AFS. For that, having a valid [**keytab**](/merlin6/kerberos.html#generating-granting-tickets-with-keytab) file is required.
|
||||
Then, from inside the batch script one can obtain granting tickets for Kerberos and AFS, which can be used for accessing AFS private areas.
|
||||
|
||||
The steps should be the following:
|
||||
|
||||
* Setup `KRB5CCNAME`, which can be used to specify the location of the Kerberos5 credentials (ticket) cache. In general it should point to a shared area
|
||||
(`$HOME/.k5` is a good location), and is strongly recommended to generate an independent Kerberos5 credential cache (it is, creating a new credential cache per Slurm job):
|
||||
```bash
|
||||
export KRB5CCNAME="$(mktemp "$HOME/.k5/krb5cc_XXXXXX")"
|
||||
```
|
||||
* To obtain a Kerberos5 granting ticket, run `kinit` by using your keytab:
|
||||
```bash
|
||||
kinit -kt "$HOME/.k5/krb5.keytab" $USER@D.PSI.CH
|
||||
```
|
||||
* To obtain a granting AFS ticket, run `aklog`:
|
||||
```bash
|
||||
aklog
|
||||
```
|
||||
* At the end of the job, you can remove destroy existing Kerberos tickets.
|
||||
```bash
|
||||
kdestroy
|
||||
```
|
||||
|
||||
### Slurm batch script example: obtaining KRB+AFS granting tickets
|
||||
|
||||
#### Example 1: Independent crendetial cache per Slurm job
|
||||
|
||||
This is the **recommended** way. At the end of the job, is strongly recommended to remove / destroy the existing kerberos tickets.
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
#SBATCH --partition=hourly # Specify 'general' or 'daily' or 'hourly'
|
||||
#SBATCH --time=01:00:00 # Strictly recommended when using 'general' partition.
|
||||
#SBATCH --output=run.out # Generate custom output file
|
||||
#SBATCH --error=run.err # Generate custom error file
|
||||
#SBATCH --nodes=1 # Uncomment and specify #nodes to use
|
||||
#SBATCH --ntasks=1 # Uncomment and specify #nodes to use
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --constraint=xeon-gold-6152
|
||||
#SBATCH --hint=nomultithread
|
||||
#SBATCH --job-name=krb5
|
||||
|
||||
export KRB5CCNAME="$(mktemp "$HOME/.k5/krb5cc_XXXXXX")"
|
||||
kinit -kt "$HOME/.k5/krb5.keytab" $USER@D.PSI.CH
|
||||
aklog
|
||||
klist
|
||||
|
||||
echo "Here should go my batch script code."
|
||||
|
||||
# Destroy Kerberos tickets created for this job only
|
||||
kdestroy
|
||||
klist
|
||||
```
|
||||
|
||||
#### Example 2: Shared credential cache
|
||||
|
||||
Some users may need/prefer to run with a shared cache file. For doing that, one needs to
|
||||
setup `KRB5CCNAME` from the **login node** session, before submitting the job.
|
||||
|
||||
```bash
|
||||
export KRB5CCNAME="$(mktemp "$HOME/.k5/krb5cc_XXXXXX")"
|
||||
```
|
||||
|
||||
Then, you can run one or multiple jobs scripts (or parallel job with `srun`). `KRB5CCNAME` will be propagated to the
|
||||
job script or to the parallel job, therefore a single credential cache will be shared amongst different Slurm runs.
|
||||
|
||||
```bash
|
||||
|
||||
#!/bin/bash
|
||||
#SBATCH --partition=hourly # Specify 'general' or 'daily' or 'hourly'
|
||||
#SBATCH --time=01:00:00 # Strictly recommended when using 'general' partition.
|
||||
#SBATCH --output=run.out # Generate custom output file
|
||||
#SBATCH --error=run.err # Generate custom error file
|
||||
#SBATCH --nodes=1 # Uncomment and specify #nodes to use
|
||||
#SBATCH --ntasks=1 # Uncomment and specify #nodes to use
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --constraint=xeon-gold-6152
|
||||
#SBATCH --hint=nomultithread
|
||||
#SBATCH --job-name=krb5
|
||||
|
||||
# KRB5CCNAME is inherit from the login node session
|
||||
kinit -kt "$HOME/.k5/krb5.keytab" $USER@D.PSI.CH
|
||||
aklog
|
||||
klist
|
||||
|
||||
echo "Here should go my batch script code."
|
||||
|
||||
echo "No need to run 'kdestroy', as it may have to survive for running other jobs"
|
||||
```
|
||||
109
pages/merlin6/02-How-To-Use-Merlin/merlin-rmount.md
Normal file
@@ -0,0 +1,109 @@
|
||||
---
|
||||
title: Using merlin_rmount
|
||||
#tags:
|
||||
keywords: >-
|
||||
transferring data, data transfer, rsync, dav, webdav, sftp, ftp, smb, cifs,
|
||||
copy data, copying, mount, file, folder, sharing
|
||||
last_updated: 24 August 2023
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/merlin-rmount.html
|
||||
---
|
||||
|
||||
## Background
|
||||
|
||||
Merlin provides a command for mounting remote file systems, called `merlin_rmount`. This
|
||||
provides a helpful wrapper over the Gnome storage utilities (GIO and GVFS), and provides support for a wide range of remote file formats, including
|
||||
- SMB/CIFS (Windows shared folders)
|
||||
- WebDav
|
||||
- AFP
|
||||
- FTP, SFTP
|
||||
- [complete list](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/using_the_desktop_environment_in_rhel_8/managing-storage-volumes-in-gnome_using-the-desktop-environment-in-rhel-8#gvfs-back-ends_managing-storage-volumes-in-gnome)
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
### Start a session
|
||||
|
||||
First, start a new session. This will start a new bash shell in the current terminal where you can add further commands.
|
||||
|
||||
```
|
||||
$ merlin_rmount --init
|
||||
[INFO] Starting new D-Bus RMOUNT session
|
||||
|
||||
(RMOUNT STARTED) [bliven_s@merlin-l-002 ~]$
|
||||
```
|
||||
|
||||
Note that behind the scenes this is creating a new dbus daemon. Running multiple daemons on the same login node leads to unpredictable results, so it is best not to initialize multiple sessions in parallel.
|
||||
|
||||
### Standard Endpoints
|
||||
|
||||
Standard endpoints can be mounted using
|
||||
|
||||
```
|
||||
merlin_rmount --select-mount
|
||||
```
|
||||
|
||||
Select the desired url using the arrow keys.
|
||||
|
||||

|
||||
|
||||
From this list any of the standard supported endpoints can be mounted.
|
||||
|
||||
### Other endpoints
|
||||
|
||||
Other endpoints can be mounted using the `merlin_rmount --mount <endpoint>` command.
|
||||
|
||||

|
||||
|
||||
|
||||
### Accessing Files
|
||||
|
||||
After mounting a volume the script will print the mountpoint. It should be of the form
|
||||
|
||||
```
|
||||
/run/user/$UID/gvfs/<endpoint>
|
||||
```
|
||||
|
||||
where `$UID` gives your unix user id (a 5-digit number, also viewable with `id -u`) and
|
||||
`<endpoint>` is some string generated from the mount options.
|
||||
|
||||
For convenience, it may be useful to add a symbolic link for this gvfs directory. For instance, this would allow all volumes to be accessed in ~/mnt/:
|
||||
|
||||
```
|
||||
ln -s ~/mnt /run/user/$UID/gvfs
|
||||
```
|
||||
|
||||
Files are accessible as long as the `merlin_rmount` shell remains open.
|
||||
|
||||
|
||||
### Disconnecting
|
||||
|
||||
To disconnect, close the session with one of the following:
|
||||
|
||||
- The exit command
|
||||
- CTRL-D
|
||||
- Closing the terminal
|
||||
|
||||
Disconnecting will unmount all volumes.
|
||||
|
||||
|
||||
## Alternatives
|
||||
|
||||
### Thunar
|
||||
|
||||
Users that prefer a GUI file browser may prefer the `thunar` command, which opens the Gnome File Browser. This is also available in NoMachine sessions in the bottom bar (1). Thunar supports the same remote filesystems as `merlin_rmount`; just type the URL in the address bar (2).
|
||||
|
||||

|
||||
|
||||
When using thunar within a NoMachine session, file transfers continue after closing NoMachine (as long as the NoMachine session stays active).
|
||||
|
||||
Files can also be accessed at the command line as needed (see 'Accessing Files' above).
|
||||
|
||||
## Resources
|
||||
|
||||
- [BIO docs](https://intranet.psi.ch/en/bio/webdav-data) on using these tools for
|
||||
transfering EM data
|
||||
- [Redhad docs on GVFS](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/using_the_desktop_environment_in_rhel_8/managing-storage-volumes-in-gnome_using-the-desktop-environment-in-rhel-8)
|
||||
- [gio reference](https://developer-old.gnome.org/gio/stable/gio.html)
|
||||
@@ -1,9 +1,8 @@
|
||||
---
|
||||
title: Remote Desktop Access
|
||||
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 19 Aug 2019
|
||||
keywords: NX, nomachine, remote desktop access, login node, merlin-l-001, merlin-l-002, merlin-nx-01, merlin-nx-02, merlin-nx, rem-acc, vpn
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/nomachine.html
|
||||
@@ -21,77 +20,61 @@ Linux, the NoMachine client can be downloaded from https://www.nomachine.com/.
|
||||
|
||||
## Accessing Merlin6 NoMachine from PSI
|
||||
|
||||
The Merlin6 NoMachine service is located **only** in the following login nodes:
|
||||
The Merlin6 NoMachine service is hosted in the following machine:
|
||||
|
||||
* **`merlin-nx.psi.ch`**
|
||||
|
||||
This is the **front-end** (hence, *the door*) to the NoMachine **back-end nodes**,
|
||||
which contain the NoMachine desktop service. The **back-end nodes** are the following:
|
||||
|
||||
* `merlin-l-001.psi.ch`
|
||||
* `merlin-l-002.psi.ch`
|
||||
|
||||
Any access to the login node desktops must be done through **`merlin-nx.psi.ch`**
|
||||
(or from **`rem-acc.psi.ch -> merlin-nx.psi.ch`** when connecting from outside PSI).
|
||||
|
||||
The **front-end** service running on **`merlin-nx.psi.ch`** will load balance the sessions
|
||||
and login to any of the available nodes in the **back-end**.
|
||||
|
||||
**Only 1 session per back-end** is possible.
|
||||
|
||||
Below are explained all the steps necessary for configuring the access to the
|
||||
NoMachine service running on a login node.
|
||||
|
||||
### Creating a Merlin6 NoMachine connection
|
||||
|
||||
#### Creating a **New** connection
|
||||
#### Adding a new connection to the front-end
|
||||
|
||||
Click on the **New** button to create a new connection:
|
||||
Click the **Add** button to create a new connection to the **`merlin-nx.psi.ch` front-end**, and fill up
|
||||
the following fields:
|
||||
* **Name**: Specify a custom name for the connection. Examples: `merlin-nx`, `merlin-nx.psi.ch`, `Merlin Desktop`
|
||||
* **Host**: Specify the hostname of the **front-end** service: **`merlin-nx.psi.ch`**
|
||||
* **Protocol**: specify the protocol that will be used for the connection. *Recommended* protocol: **`NX`**
|
||||
* **Port**: Specify the listening port of the **front-end**. It must be **`4000`**.
|
||||
|
||||

|
||||
|
||||
#### Configuring **NX** protocol
|
||||
|
||||

|
||||
|
||||
#### Configuring NoMachine Server Information
|
||||
|
||||
Select the corresponding login node server where the NoMachine service is running and
|
||||
keep **4000** as the listening port; then **Continue**.
|
||||
|
||||

|
||||

|
||||
|
||||
#### Configuring NoMachine Authentication Method
|
||||
|
||||
Choose your authentication method and **Continue**. **Password** or *Kerberos* are the recommended ones:
|
||||
Depending on the client version, it may ask for different authentication options.
|
||||
If it's required, choose your authentication method and **Continue** (**Password** or *Kerberos* are the recommended ones).
|
||||
|
||||

|
||||
You will be requested for the crendentials (username / password). **Do not add `PSICH\`** as a prefix for the username.
|
||||
|
||||
#### Configuring Proxy
|
||||
### Opening NoMachine desktop sessions
|
||||
|
||||
In Merlin6, we will check **Don't use a proxy**, and **Continue**:
|
||||
By default, when connecting to the **`merlin-nx.psi.ch` front-end** it will automatically open a new
|
||||
session if none exists.
|
||||
|
||||

|
||||
If there are existing sessions, instead of opening a new desktop session, users can reconnect to an
|
||||
existing one by clicking to the proper icon (see image below).
|
||||
|
||||
#### Configuring Connection Name
|
||||

|
||||
|
||||
We strongly recommend to add the login node hostname, but
|
||||
you are free to choose any other name for your connection:
|
||||
Users can also create a second desktop session by selecting the **`New Desktop`** button (*red* rectangle in the
|
||||
below image). This will create a second session on the second login node, as long as this node is up and running.
|
||||
|
||||

|
||||
|
||||
### Connecting to Merlin6 NoMachine
|
||||
|
||||
#### Opening an existing NoMachine connection
|
||||
|
||||
Double click on the NoMachine server in order to connect to it:
|
||||
|
||||

|
||||
|
||||
#### Authenticating (whenever necessary)
|
||||
|
||||
If authentication is required, you will be asked for it. The example below corresponds to **Password**
|
||||
authentication:
|
||||
|
||||

|
||||
|
||||
#### Creating/Re-Connecting Virtual Desktops
|
||||
|
||||
Finally, create a virtual desktop in order to get in. If a previous virtual desktop was created, you
|
||||
might be able to re-attach the session.
|
||||
|
||||

|
||||
|
||||
Some hints of how to manage the resolution and windows will be shown.
|
||||
|
||||

|
||||

|
||||
|
||||
### NoMachine LightDM Session Example
|
||||
|
||||
@@ -106,12 +89,12 @@ X Windows:
|
||||
|
||||
Access to the Merlin6 NoMachine service is possible without VPN through **'rem-acc.psi.ch'**.
|
||||
Please follow the steps described in [PSI Remote Interactive Access](https://www.psi.ch/en/photon-science-data-services/remote-interactive-access) for
|
||||
remote access to the Merlin6 NoMachine services. Once logged in **'rem-acc.psi.ch'**, you must then login to one of the available MErlin6 NoMachine
|
||||
remote access to the Merlin6 NoMachine services. Once logged in **'rem-acc.psi.ch'**, you must then login to the **`merlin-nx.psi.ch` front-end** .
|
||||
services.
|
||||
|
||||
### VPN access
|
||||
|
||||
Remote access is also possible through VPN, however, you **must not use 'rem-acc.psi.ch'**, and you have to connect directly
|
||||
to the Merlin6 NoMachine services as if you were inside PSI. For VPN access, you should request it to the IT department by
|
||||
opening a PSI Service Now ticket:
|
||||
to the Merlin6 NoMachine **`merlin-nx.psi.ch` front-end** as if you were inside PSI. For VPN access, you should request
|
||||
it to the IT department by opening a PSI Service Now ticket:
|
||||
[VPN Access (PSI employees)](https://psi.service-now.com/psisp?id=psi_new_sc_cat_item&sys_id=beccc01b6f44a200d02a82eeae3ee440).
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
title: Configuring SSH Keys in Merlin
|
||||
|
||||
#tags:
|
||||
keywords: Linux, connecting, client, configuration, SSH, Keys, SSH-Keys, RSA
|
||||
keywords: linux, connecting, client, configuration, SSH, Keys, SSH-Keys, RSA, authorization, authentication
|
||||
last_updated: 15 Jul 2020
|
||||
summary: "This document describes how to deploy SSH Keys in Merlin."
|
||||
sidebar: merlin6_sidebar
|
||||
@@ -28,7 +28,7 @@ ls ~/.ssh/id*
|
||||
For creating **SSH RSA Keys**, one should:
|
||||
|
||||
1. Run `ssh-keygen`, a password will be requested twice. You **must remember** this password for the future.
|
||||
* Due to security reasons, ***always add a password***. Never leave an empty password.
|
||||
* Due to security reasons, ***always try protecting it with a password***. There is only one exception, when running ANSYS software, which in general should not use password to simplify the way of running the software in Slurm.
|
||||
* This will generate a private key **id_rsa**, and a public key **id_rsa.pub** in your **~/.ssh** directory.
|
||||
2. Add your public key to the **`authorized_keys`** file, and ensure proper permissions for that file, as follows:
|
||||
```bash
|
||||
@@ -39,6 +39,19 @@ For creating **SSH RSA Keys**, one should:
|
||||
```bash
|
||||
echo "CanonicalizeHostname yes" >> ~/.ssh/config
|
||||
```
|
||||
4. Configure further SSH options as follows:
|
||||
```bash
|
||||
echo "AddKeysToAgent yes" >> ~/.ssh/config
|
||||
echo "ForwardAgent yes" >> ~/.ssh/config
|
||||
```
|
||||
Other options may be added.
|
||||
5. Check that your SSH config file contains at least the lines mentioned in steps 3 and 4:
|
||||
```bash
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# cat ~/.ssh/config
|
||||
CanonicalizeHostname yes
|
||||
AddKeysToAgent yes
|
||||
ForwardAgent yes
|
||||
```
|
||||
|
||||
## Using the SSH Keys
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Merlin6 Storage
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 28 June 2019
|
||||
keywords: storage, /data/user, /data/software, /data/project, /scratch, /shared-scratch, quota, export, user, project, scratch, data, shared-scratch, merlin_quotas
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
redirect_from: /merlin6/data-directories.html
|
||||
@@ -13,6 +13,16 @@ permalink: /merlin6/storage.html
|
||||
|
||||
This document describes the different directories of the Merlin6 cluster.
|
||||
|
||||
### User and project data
|
||||
|
||||
* ***Users are responsible for backing up their own data***. Is recommended to backup the data on third party independent systems (i.e. LTS, Archive, AFS, SwitchDrive, Windows Shares, etc.).
|
||||
* **`/psi/home`**, as this contains a small amount of data, is the only directory where we can provide daily snapshots for one week. This can be found in the following directory **`/psi/home/.snapshot/`**
|
||||
* ***When a user leaves PSI, she or her supervisor/team are responsible to backup and move the data out from the cluster***: every few months, the storage space will be recycled for those old users who do not have an existing and valid PSI account.
|
||||
|
||||
{{site.data.alerts.warning}}When a user leaves PSI and his account has been removed, her storage space in Merlin may be recycled.
|
||||
Hence, <b>when a user leaves PSI</b>, she, her supervisor or team <b>must ensure that the data is backed up to an external storage</b>
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### Checking user quota
|
||||
|
||||
For each directory, we provide a way for checking quotas (when required). However, a single command ``merlin_quotas``
|
||||
|
||||
@@ -1,14 +1,35 @@
|
||||
---
|
||||
title: Transferring Data
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 9 July 2019
|
||||
keywords: transferring data, data transfer, rsync, winscp, copy data, copying, sftp, import, export, hop, vpn
|
||||
last_updated: 24 August 2023
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/transfer-data.html
|
||||
---
|
||||
|
||||
## Transferring Data from the PSI Network to/from Merlin6
|
||||
## Overview
|
||||
|
||||
Most methods allow data to be either transmitted or received, so it may make sense to
|
||||
initiate the transfer from either merlin or the other system, depending on the network
|
||||
visibility.
|
||||
|
||||
- Merlin login nodes are visible from the PSI network, so direct data transfer
|
||||
(rsync/WinSCP) is generally preferable. This can be initiated from either endpoint.
|
||||
- Merlin login nodes can access the internet using a limited set of protocols
|
||||
- SSH-based protocols using port 22 (rsync-over-ssh, sftp, WinSCP, etc)
|
||||
- HTTP-based protocols using ports 80 or 445 (https, WebDav, etc)
|
||||
- Protocols using other ports require admin configuration and may only work with
|
||||
specific hosts (ftp, rsync daemons, etc)
|
||||
- Systems on the internet can access the Remote Access Merlin servers
|
||||
(ra-merlin*.psi.ch) using ssh-based protocols
|
||||
|
||||
|
||||
## Direct transfer via Merlin6 login nodes
|
||||
|
||||
The following methods transfer data directly via the [login
|
||||
nodes](/merlin6/interactive.html#login-nodes-hardware-description). They are suitable
|
||||
for use from within the PSI network.
|
||||
|
||||
### Rsync
|
||||
|
||||
@@ -37,29 +58,33 @@ from the Software Kiosk on PSI machines. Add `merlin-l-01.psi.ch` as a host and
|
||||
connect with your PSI credentials. You can then drag-and-drop files between your
|
||||
local computer and merlin.
|
||||
|
||||
## Transferring Data to/from outside PSI
|
||||
|
||||
Two servers are enabled for exporting data from Merlin to outside PSI.
|
||||
## Remote Access Servers
|
||||
|
||||
Two servers are enabled for data transfers originating from outside PSI.
|
||||
This is a central service managed by a different team, which is managing the different Remote Access
|
||||
services at PSI for the different facilities (including the one for Merlin). However, any problems
|
||||
or questions related to it can be directly [reported](/merlin6/contact.html) to the Merlin adminstrators,
|
||||
which will forward the request if necessary.
|
||||
|
||||
These Remote Access Merlin servers are the following:
|
||||
* **'ra-merlin-01.psi.ch'**: standard password authentication (with PSI password)
|
||||
* `/data/user` mounted in RO (read-only)
|
||||
* `/export` directory in RW (read-write). `/export` is also visible from login nodes.
|
||||
* **'ra-merlin-02.psi.ch'**: ***Two factor authentication*** (2FA), required **RSA SecurID** token (same as VPN)
|
||||
* `/data/project` directories mounted in RW on demand. Project responsibles must request it.
|
||||
* `/data/user` mounted in RW (read-write)
|
||||
* `/export` directory in RW (read-write). `/export` is also visible from login nodes.
|
||||
* **`ra-merlin-01.psi.ch`**
|
||||
* **`ra-merlin-02.psi.ch`**
|
||||
|
||||
In the future, **'ra-merlin-01.psi.ch'** will be also configured with 2FA and will mount the same
|
||||
as **'ra-merlin-02.psi.ch'**. In the meantime, we keep **'ra-merlin-01.psi.ch'** with standard authentication
|
||||
until we can ensure that most of the Merlin users have a RSA SecurID token or until PSI security policy makes
|
||||
its use mandatory. Using **'ra-merlin-02.psi.ch'** over **'ra-merlin-01.psi.ch'** is always recommended (2FA
|
||||
is always more secure than standard authentication)
|
||||
Both servers have mounted the following Merlin filesystems:
|
||||
* `/data/project` directories mounted in RW on demand. Project responsibles must request it.
|
||||
* `/data/user` mounted in RW (read-write)
|
||||
* `/data/experiment/mu3e` directories mounted in RW (read-write), except `data` (read-only mounted)
|
||||
* `/export` directory in RW (read-write). `/export` is also visible from login nodes.
|
||||
|
||||
Access to the Remote Access server uses ***Multi factor authentication*** (MFA).
|
||||
Therefore, having the Microsoft Authenticator App is required as explained [here](https://www.psi.ch/en/computing/change-to-mfa).
|
||||
|
||||
### Directories
|
||||
|
||||
#### /data/user
|
||||
|
||||
User data directories are mounted in RO on 'ra-merlin-01', and RW on 'ra-merlin-02'.
|
||||
User data directories are mounted in RW on both 'ra-merlin-01' and 'ra-merlin-02'.
|
||||
|
||||
{{site.data.alerts.warning}}Please, <b>ensure proper secured permissions</b> in your '/data/user'
|
||||
directory. By default, when directory is created, the system applies the most restrictive
|
||||
@@ -97,7 +122,7 @@ Ensure to properly secure your directories and files with proper permissions.
|
||||
|
||||
Optionally, instead of using `/export`, experiments with a Merlin project can request Read/Write or Read/Only access to their project directory.
|
||||
|
||||
{{site.data.alerts.tip}}<b>Merlin projects can request direct access on 'ra-merlin-02.psi.ch'</b>
|
||||
{{site.data.alerts.tip}}<b>Merlin projects can request direct access.</b>
|
||||
This can be configured in Read/Write or Read/Only modes. If your project needs access, please,
|
||||
contact the Merlin administrators.
|
||||
{{site.data.alerts.end}}
|
||||
@@ -129,3 +154,18 @@ Merlin6 is fully accessible from within the PSI network. To connect from outside
|
||||
- [No Machine](nomachine.md)
|
||||
* Remote Interactive Access through [**'rem-acc.psi.ch'**](https://www.psi.ch/en/photon-science-data-services/remote-interactive-access)
|
||||
* Please avoid transferring big amount of data through **NoMachine**
|
||||
|
||||
## Connecting from Merlin6 to outside file shares
|
||||
|
||||
### `merlin_rmount` command
|
||||
|
||||
Merlin provides a command for mounting remote file systems, called `merlin_rmount`. This
|
||||
provides a helpful wrapper over the Gnome storage utilities, and provides support for a wide range of remote file formats, including
|
||||
- SMB/CIFS (Windows shared folders)
|
||||
- WebDav
|
||||
- AFP
|
||||
- FTP, SFTP
|
||||
- [others](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/using_the_desktop_environment_in_rhel_8/managing-storage-volumes-in-gnome_using-the-desktop-environment-in-rhel-8#gvfs-back-ends_managing-storage-volumes-in-gnome)
|
||||
|
||||
|
||||
[More instruction on using `merlin_rmount`](/merlin6/merlin-rmount.html)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Using PModules
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 21 May 2021
|
||||
keywords: Pmodules, software, stable, unstable, deprecated, overlay, overlays, release stage, module, package, packages, library, libraries
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/using-modules.html
|
||||
@@ -52,6 +52,62 @@ module use deprecated
|
||||
|
||||
However, software moved to this release stage can be directly loaded without the need of invoking it. This ensure proper life cycling of the software, and making it transparent for the end users.
|
||||
|
||||
## Module overlays
|
||||
|
||||
Recent Pmodules releases contain a feature called **Pmodules overlays**. In Merlin, overlays are used to source software from a different location.
|
||||
In that way, we can have custom private versions of software in the cluster installed on high performance storage accessed over a low latency network.
|
||||
|
||||
**Pmodules overlays** are still ***under development***, therefore consider that *some features may not work or do not work as expected*.
|
||||
|
||||
Pmodule overlays can be used from Pmodules `v1.1.5`. However, Merlin is running Pmodules `v1.0.0rc10` as the default version.
|
||||
Therefore, one needs to load first a newer version of it: this is available in the repositories and can be loaded with **`module load Pmodules/$version`** command.
|
||||
|
||||
Once running the proper Pmodules version, **overlays** are added (or invoked) with the **`module use $overlay_name`** command.
|
||||
|
||||
### overlay_merlin
|
||||
|
||||
Some Merlin software is already provided through **PModule overlays** and has been validated for using and running it in that way.
|
||||
Therefore, Melin contains an overlay called **`overlay_merlin`**. In this overlay, the software is installed in the Merlin high performance storage,
|
||||
specifically in the ``/data/software/pmodules`` directory. In general, if another copy exists in the standard repository, we strongly recommend to use
|
||||
the replica in the `overlay_merlin` overlay instead, as it provides faster access and it may also provide some customizations for the Merlin6 cluster.
|
||||
|
||||
For loading the `overlay_merlin`, please run:
|
||||
```bash
|
||||
module load Pmodules/1.1.6 # Or newer version
|
||||
module use overlay_merlin
|
||||
```
|
||||
|
||||
Then, once `overlay_merlin` is invoked, it will disable central software installations with the same version (if exist), and will be replaced
|
||||
by the local ones in Merlin. Releases from the central Pmodules repository which do not have a copy in the Merlin overlay will remain
|
||||
visible. In example, for each ANSYS release, one can identify where it is installed by searching ANSYS in PModules with the `--verbose`
|
||||
option. This will show the location of the different ANSYS releases as follows:
|
||||
* For ANSYS releases installed in the central repositories, the path starts with `/opt/psi`
|
||||
* For ANSYS releases installed in the Merlin6 repository (and/or overwritting the central ones), the path starts with `/data/software/pmodules`
|
||||
|
||||
```bash
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# module load Pmodules/1.1.6
|
||||
module load: unstable module has been loaded -- Pmodules/1.1.6
|
||||
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# module use merlin_overlay
|
||||
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# module search ANSYS --verbose
|
||||
|
||||
Module Rel.stage Group Dependencies/Modulefile
|
||||
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
ANSYS/2019R3 stable Tools dependencies:
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2019R3
|
||||
ANSYS/2020R1 stable Tools dependencies:
|
||||
modulefile: /opt/psi/Tools/modulefiles/ANSYS/2020R1
|
||||
ANSYS/2020R1-1 stable Tools dependencies:
|
||||
modulefile: /opt/psi/Tools/modulefiles/ANSYS/2020R1-1
|
||||
ANSYS/2020R2 stable Tools dependencies:
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2020R2
|
||||
ANSYS/2021R1 stable Tools dependencies:
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2021R1
|
||||
ANSYS/2021R2 stable Tools dependencies:
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2021R2
|
||||
```
|
||||
|
||||
## PModules commands
|
||||
|
||||
Below is listed a summary of all available commands:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Running Interactive Jobs
|
||||
#tags:
|
||||
keywords: interactive, X11, X, srun
|
||||
last_updated: 23 January 2020
|
||||
keywords: interactive, X11, X, srun, salloc, job, jobs, slurm, nomachine, nx
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to run interactive jobs as well as X based software."
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/interactive-jobs.html
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Monitoring
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 20 June 2019
|
||||
keywords: monitoring, jobs, slurm, job status, squeue, sinfo, sacct
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/monitoring.html
|
||||
@@ -75,6 +75,65 @@ gpu up 7-00:00:00 1-infinite no NO all 8 allocate
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
### Slurm commander
|
||||
|
||||
The **[Slurm Commander (scom)](https://github.com/CLIP-HPC/SlurmCommander/)** is a simple but very useful open source text-based user interface for
|
||||
simple and efficient interaction with Slurm. It is developed by the **CLoud Infrastructure Project (CLIP-HPC)** and external contributions. To use it, one can
|
||||
simply run the following command:
|
||||
|
||||
```bash
|
||||
scom # merlin6 cluster
|
||||
SLURM_CLUSTERS=merlin5 scom # merlin5 cluster
|
||||
SLURM_CLUSTERS=gmerlin6 scom # gmerlin6 cluster
|
||||
scom -h # Help and extra options
|
||||
scom -d 14 # Set Job History to 14 days (instead of default 7)
|
||||
```
|
||||
With this simple interface, users can interact with their jobs, as well as getting information about past and present jobs:
|
||||
* Filtering jobs by substring is possible with the `/` key.
|
||||
* Users can perform multiple actions on their jobs (such like cancelling, holding or requeing a job), SSH to a node with an already running job,
|
||||
or getting extended details and statistics of the job itself.
|
||||
|
||||
Also, users can check the status of the cluster, to get statistics and node usage information as well as getting information about node properties.
|
||||
|
||||
The interface also provides a few job templates for different use cases (i.e. MPI, OpenMP, Hybrid, single core). Users can modify these templates,
|
||||
save it locally to the current directory, and submit the job to the cluster.
|
||||
|
||||
{{site.data.alerts.note}}Currently, <span style="color:darkblue;">scom</span> does not provide live updated information for the <span style="color:darkorange;">[Job History]</span> tab.
|
||||
To update Job History information, users have to exit the application with the <span style="color:darkorange;">q</span> key. Other tabs will be updated every 5 seconds (default).
|
||||
On the other hand, the <span style="color:darkorange;">[Job History]</span> tab contains only information for the <b>merlin6</b> CPU cluster only. Future updates will provide information
|
||||
for other clusters.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
For further information about how to use **scom**, please refer to the **[Slurm Commander Project webpage](https://github.com/CLIP-HPC/SlurmCommander/)**
|
||||
|
||||

|
||||
|
||||
### Job accounting
|
||||
|
||||
Users can check detailed information of jobs (pending, running, completed, failed, etc.) with the `sacct` command.
|
||||
This command is very flexible and can provide a lot of information. For checking all the available options, please read `man sacct`.
|
||||
Below, we summarize some examples that can be useful for the users:
|
||||
|
||||
```bash
|
||||
# Today jobs, basic summary
|
||||
sacct
|
||||
|
||||
# Today jobs, with details
|
||||
sacct --long
|
||||
|
||||
# Jobs from January 1, 2022, 12pm, with details
|
||||
sacct -S 2021-01-01T12:00:00 --long
|
||||
|
||||
# Specific job accounting
|
||||
sacct --long -j $jobid
|
||||
|
||||
# Jobs custom details, without steps (-X)
|
||||
sacct -X --format=User%20,JobID,Jobname,partition,state,time,submit,start,end,elapsed,AveRss,MaxRss,MaxRSSTask,MaxRSSNode%20,MaxVMSize,nnodes,ncpus,ntasks,reqcpus,totalcpu,reqmem,cluster,TimeLimit,TimeLimitRaw,cputime,nodelist%50,AllocTRES%80
|
||||
|
||||
# Jobs custom details, with steps
|
||||
sacct --format=User%20,JobID,Jobname,partition,state,time,submit,start,end,elapsed,AveRss,MaxRss,MaxRSSTask,MaxRSSNode%20,MaxVMSize,nnodes,ncpus,ntasks,reqcpus,totalcpu,reqmem,cluster,TimeLimit,TimeLimitRaw,cputime,nodelist%50,AllocTRES%80
|
||||
```
|
||||
|
||||
### Job efficiency
|
||||
|
||||
Users can check how efficient are their jobs. For that, the ``seff`` command is available.
|
||||
@@ -106,7 +165,7 @@ Memory Efficiency: 0.19% of 31.25 GB
|
||||
The ``sjstat`` command is used to display statistics of jobs under control of SLURM. To use it
|
||||
|
||||
```bash
|
||||
jstat
|
||||
sjstat
|
||||
```
|
||||
|
||||
<details>
|
||||
@@ -190,11 +249,11 @@ JobID User Procs Pool Status Used Limit Starte
|
||||
|
||||
### Graphical user interface
|
||||
|
||||
When using **ssh** with X11 forwarding (``ssh -XY``) users can use ``sview``. **SView** is a graphical user
|
||||
interface to view and modify Slurm state. To run **sview**:
|
||||
When using **ssh** with X11 forwarding (``ssh -XY``), or when using NoMachine, users can use ``sview``.
|
||||
**SView** is a graphical user interface to view and modify Slurm states. To run **sview**:
|
||||
|
||||
```bash
|
||||
ssh -XY $username@merlin-l-001.psi.ch
|
||||
ssh -XY $username@merlin-l-001.psi.ch # Not necessary when using NoMachine
|
||||
sview
|
||||
```
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Running Slurm Scripts
|
||||
#tags:
|
||||
keywords: batch script, slurm, sbatch, srun
|
||||
last_updated: 23 January 2020
|
||||
keywords: batch script, slurm, sbatch, srun, jobs, job, submit, submission, array jobs, array, squeue, sinfo, scancel, packed jobs, short jobs, very short jobs, multithread, rules, no-multithread, HT
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to run batch scripts in Slurm."
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/running-jobs.html
|
||||
@@ -151,23 +151,23 @@ The following template should be used by any user submitting jobs to GPU nodes:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
#SBATCH --cluster=gmerlin6 # Cluster name
|
||||
#SBATCH --partition=gpu,gpu-short,gwendolen # Specify one or multiple partitions
|
||||
#SBATCH --gpus="<type>:<num_gpus>" # <type> is optional, <num_gpus> is mandatory
|
||||
#SBATCH --time=<D-HH:MM:SS> # Strongly recommended
|
||||
#SBATCH --output=<output_file> # Generate custom output file
|
||||
#SBATCH --error=<error_file> # Generate custom error file
|
||||
##SBATCH --exclusive # Uncomment if you need exclusive node usage
|
||||
##SBATCH --account=gwendolen_public # Uncomment if you need to use gwendolen
|
||||
#SBATCH --cluster=gmerlin6 # Cluster name
|
||||
#SBATCH --partition=gpu,gpu-short # Specify one or multiple partitions, or
|
||||
#SBATCH --partition=gwendolen,gwendolen-long # Only for Gwendolen users
|
||||
#SBATCH --gpus="<type>:<num_gpus>" # <type> is optional, <num_gpus> is mandatory
|
||||
#SBATCH --time=<D-HH:MM:SS> # Strongly recommended
|
||||
#SBATCH --output=<output_file> # Generate custom output file
|
||||
#SBATCH --error=<error_file> # Generate custom error file
|
||||
##SBATCH --exclusive # Uncomment if you need exclusive node usage
|
||||
|
||||
## Advanced options example
|
||||
##SBATCH --nodes=1 # Uncomment and specify number of nodes to use
|
||||
##SBATCH --ntasks=1 # Uncomment and specify number of nodes to use
|
||||
##SBATCH --cpus-per-gpu=5 # Uncomment and specify the number of cores per task
|
||||
##SBATCH --mem-per-gpu=16000 # Uncomment and specify the number of cores per task
|
||||
##SBATCH --gpus-per-node=<type>:2 # Uncomment and specify the number of GPUs per node
|
||||
##SBATCH --gpus-per-socket=<type>:2 # Uncomment and specify the number of GPUs per socket
|
||||
##SBATCH --gpus-per-task=<type>:1 # Uncomment and specify the number of GPUs per task
|
||||
##SBATCH --nodes=1 # Uncomment and specify number of nodes to use
|
||||
##SBATCH --ntasks=1 # Uncomment and specify number of nodes to use
|
||||
##SBATCH --cpus-per-gpu=5 # Uncomment and specify the number of cores per task
|
||||
##SBATCH --mem-per-gpu=16000 # Uncomment and specify the number of cores per task
|
||||
##SBATCH --gpus-per-node=<type>:2 # Uncomment and specify the number of GPUs per node
|
||||
##SBATCH --gpus-per-socket=<type>:2 # Uncomment and specify the number of GPUs per socket
|
||||
##SBATCH --gpus-per-task=<type>:1 # Uncomment and specify the number of GPUs per task
|
||||
```
|
||||
|
||||
## Advanced configurations
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Slurm Basic Commands
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 19 June 2019
|
||||
keywords: sinfo, squeue, sbatch, srun, salloc, scancel, sview, seff, sjstat, sacct, basic commands, slurm commands, cluster
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/slurm-basics.html
|
||||
@@ -28,6 +28,7 @@ scancel job_id # to cancel slurm job, job id is the numeric id, seen by the sq
|
||||
sview # X interface for managing jobs and track job run information.
|
||||
seff # Calculates the efficiency of a job
|
||||
sjstat # List attributes of jobs under the SLURM control
|
||||
sacct # Show job accounting, useful for checking details of finished jobs.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Slurm Examples
|
||||
#tags:
|
||||
keywords: example, template, examples, templates, running jobs, sbatch
|
||||
last_updated: 28 June 2019
|
||||
keywords: slurm example, template, examples, templates, running jobs, sbatch, single core based jobs, HT, multithread, no-multithread, mpi, openmp, packed jobs, hands-on, array jobs, gpu
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document shows different template examples for running jobs in the Merlin cluster."
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/slurm-examples.html
|
||||
@@ -32,7 +32,7 @@ srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
### Example 2: Non-hyperthreaded job
|
||||
|
||||
In this example we do not want hyper-threading (``--ntasks-per-core=1`` and ``--hint=nomultithread``). In our Merlin6 configuration,
|
||||
the default memory per cpu (a CPU is equivalent to a core thread) is 4000MB. If we do not specify anything else, our
|
||||
the default memory per cpu (a CPU is equivalent to a core thread) is 4000MB. If we do not specify anything else, our
|
||||
single core task will use a default of 4000MB. However, one could double it with ``--mem-per-cpu=8000`` if you require more memory
|
||||
(remember, the second thread will not be used so we can safely assign +4000MB to the unique active thread).
|
||||
|
||||
@@ -56,7 +56,7 @@ srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
|
||||
In this example we run a job that will run 88 tasks. Merlin6 Apollo nodes have 44 cores each one with hyper-threading
|
||||
enabled. This means that we can run 2 threads per core, in total 88 threads. To accomplish that, users should specify
|
||||
``--ntasks-per-core=2`` and ``--hint=multithread``.
|
||||
``--ntasks-per-core=2`` and ``--hint=multithread``.
|
||||
|
||||
Use `--nodes=1` if you want to use a node exclusively (88 hyperthreaded tasks would fit in a Merlin6 node).
|
||||
|
||||
@@ -77,10 +77,10 @@ srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
|
||||
### Example 2: MPI without Hyper-Threading
|
||||
|
||||
In this example, we want to run a job that will run 44 tasks, and due to performance reasons we want to disable hyper-threading.
|
||||
Merlin6 Apollo nodes have 44 cores, each one with hyper-threading enabled. For ensuring that only 1 thread will be used per task,
|
||||
In this example, we want to run a job that will run 44 tasks, and due to performance reasons we want to disable hyper-threading.
|
||||
Merlin6 Apollo nodes have 44 cores, each one with hyper-threading enabled. For ensuring that only 1 thread will be used per task,
|
||||
users should specify ``--ntasks-per-core=1`` and ``--hint=nomultithread``. With this configuration, we tell Slurm to run only 1
|
||||
tasks per core and no hyperthreading should be used. Hence, each tasks will be assigned to an independent core.
|
||||
tasks per core and no hyperthreading should be used. Hence, each tasks will be assigned to an independent core.
|
||||
|
||||
Use `--nodes=1` if you want to use a node exclusively (44 non-hyperthreaded tasks would fit in a Merlin6 node).
|
||||
|
||||
@@ -90,7 +90,7 @@ Use `--nodes=1` if you want to use a node exclusively (44 non-hyperthreaded task
|
||||
#SBATCH --ntasks=44 # Job will run 44 tasks
|
||||
#SBATCH --ntasks-per-core=1 # Request the max ntasks be invoked on each core
|
||||
#SBATCH --hint=nomultithread # Don't use extra threads with in-core multi-threading
|
||||
#SBATCH --time=00:30:00 # Define max time job will run
|
||||
#SBATCH --time=00:30:00 # Define max time job will run
|
||||
#SBATCH --output=myscript.out # Define your output file
|
||||
#SBATCH --error=myscript.err # Define your output file
|
||||
|
||||
@@ -101,8 +101,8 @@ srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
|
||||
### Example 3: Hyperthreaded Hybrid MPI/OpenMP job
|
||||
|
||||
In this example, we want to run a Hybrid Job using MPI and OpenMP using hyperthreading. In this job, we want to run 4 MPI
|
||||
tasks by using 8 CPUs per task. Each task in our example requires 128GB of memory. Then we specify 16000MB per CPU
|
||||
In this example, we want to run a Hybrid Job using MPI and OpenMP using hyperthreading. In this job, we want to run 4 MPI
|
||||
tasks by using 8 CPUs per task. Each task in our example requires 128GB of memory. Then we specify 16000MB per CPU
|
||||
(8 x 16000MB = 128000MB). Notice that since hyperthreading is enabled, Slurm will use 4 cores per task (with hyperthreading
|
||||
2 threads -a.k.a. Slurm CPUs- fit into a core).
|
||||
|
||||
@@ -130,24 +130,24 @@ srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
|
||||
### Example 4: Non-hyperthreaded Hybrid MPI/OpenMP job
|
||||
|
||||
In this example, we want to run a Hybrid Job using MPI and OpenMP without hyperthreading. In this job, we want to run 4 MPI
|
||||
tasks by using 8 CPUs per task. Each task in our example requires 128GB of memory. Then we specify 16000MB per CPU
|
||||
In this example, we want to run a Hybrid Job using MPI and OpenMP without hyperthreading. In this job, we want to run 4 MPI
|
||||
tasks by using 8 CPUs per task. Each task in our example requires 128GB of memory. Then we specify 16000MB per CPU
|
||||
(8 x 16000MB = 128000MB). Notice that since hyperthreading is disabled, Slurm will use 8 cores per task (disabling hyperthreading
|
||||
we force the use of only 1 thread -a.k.a. 1 CPU- per core).
|
||||
|
||||
```bash
|
||||
#!/bin/bash -l
|
||||
#SBATCH --clusters=merlin6
|
||||
#!/bin/bash -l
|
||||
#SBATCH --clusters=merlin6
|
||||
#SBATCH --job-name=test
|
||||
#SBATCH --ntasks=4
|
||||
#SBATCH --ntasks-per-socket=1
|
||||
#SBATCH --ntasks=4
|
||||
#SBATCH --ntasks-per-socket=1
|
||||
#SBATCH --mem-per-cpu=16000
|
||||
#SBATCH --cpus-per-task=8
|
||||
#SBATCH --cpus-per-task=8
|
||||
#SBATCH --partition=hourly
|
||||
#SBATCH --time=01:00:00
|
||||
#SBATCH --output=srun_%j.out
|
||||
#SBATCH --error=srun_%j.err
|
||||
#SBATCH --hint=nomultithread
|
||||
#SBATCH --time=01:00:00
|
||||
#SBATCH --output=srun_%j.out
|
||||
#SBATCH --error=srun_%j.err
|
||||
#SBATCH --hint=nomultithread
|
||||
|
||||
module purge
|
||||
module load $MODULE_NAME # where $MODULE_NAME is a software in PModules
|
||||
@@ -157,6 +157,31 @@ srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
{{site.data.alerts.tip}} Also, always consider that **`'--mem-per-cpu' x '--cpus-per-task'`** can **never** exceed the maximum amount of memory per node (352000MB).
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## GPU examples
|
||||
|
||||
Using GPUs requires two major changes. First, the cluster needs to be specified
|
||||
to `gmerlin6`. This should also be added to later commands pertaining to the
|
||||
job, e.g. `scancel --cluster=gmerlin6 <jobid>`. Second, the number of GPUs
|
||||
should be specified using `--gpus`, `--gpus-per-task`, or similar parameters.
|
||||
Here's an example for a simple test job:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
#SBATCH --partition=gpu # Or 'gpu-short' for higher priority but 2-hour limit
|
||||
#SBATCH --cluster=gmerlin6 # Required for GPU
|
||||
#SBATCH --gpus=2 # Total number of GPUs
|
||||
#SBATCH --cpus-per-gpu=5 # Request CPU resources
|
||||
#SBATCH --time=1-00:00:00 # Define max time job will run
|
||||
#SBATCH --output=myscript.out # Define your output file
|
||||
#SBATCH --error=myscript.err # Define your error file
|
||||
|
||||
module purge
|
||||
module load cuda # load any needed modules here
|
||||
srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
```
|
||||
|
||||
Slurm will automatically set the gpu visibility (eg `$CUDA_VISIBLE_DEVICES`).
|
||||
|
||||
## Advanced examples
|
||||
|
||||
### Array Jobs: launching a large number of related jobs
|
||||
@@ -190,7 +215,7 @@ have their own output file.
|
||||
* Do not use such jobs if you have very short tasks, since each array sub job will incur the full overhead for launching an independent Slurm job. For such cases you should used a **packed job** (see below).
|
||||
* If you want to control how many of these jobs can run in parallel, you can use the `#SBATCH --array=1-100%5` syntax. The `%5` will define
|
||||
that only 5 sub jobs may ever run in parallel.
|
||||
|
||||
|
||||
You also can use an array job approach to run over all files in a directory, substituting the payload with
|
||||
|
||||
``` bash
|
||||
@@ -220,7 +245,7 @@ strategy:
|
||||
#SBATCH --time=7-00:00:00 # each job can run for 7 days
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --array=1-10%1 # Run a 10-job array, one job at a time.
|
||||
if test -e checkpointfile; then
|
||||
if test -e checkpointfile; then
|
||||
# There is a checkpoint file;
|
||||
$MYEXEC --read-checkp checkpointfile
|
||||
else
|
||||
@@ -250,7 +275,7 @@ arguments passed from 1 to 1000. But with the =-N1 -n1 -c1
|
||||
instances are effectively running, each being allocated one CPU. You
|
||||
can at this point decide to allocate several CPUs or tasks by adapting
|
||||
the corresponding parameters.
|
||||
|
||||
|
||||
``` bash
|
||||
#! /bin/bash
|
||||
#SBATCH --job-name=test-checkpoint
|
||||
@@ -289,11 +314,11 @@ echo "Example MPI:" ; srun hostname # will print one hostname per ntask
|
||||
```
|
||||
|
||||
In the above example are specified the options ``--nodes=2`` and ``--ntasks=44``. This means that up 2 nodes are requested,
|
||||
and is expected to run 44 tasks. Hence, 44 cores are needed for running that job. Slurm will try to allocate a maximum of
|
||||
2 nodes, both together having at least 44 cores. Since our nodes have 44 cores / each, if nodes are empty (no other users
|
||||
and is expected to run 44 tasks. Hence, 44 cores are needed for running that job. Slurm will try to allocate a maximum of
|
||||
2 nodes, both together having at least 44 cores. Since our nodes have 44 cores / each, if nodes are empty (no other users
|
||||
have running jobs there), job can land on a single node (it has enough cores to run 44 tasks).
|
||||
|
||||
If we want to ensure that job is using at least two different nodes (i.e. for boosting CPU frequency, or because the job
|
||||
If we want to ensure that job is using at least two different nodes (i.e. for boosting CPU frequency, or because the job
|
||||
requires more memory per core) you should specify other options.
|
||||
|
||||
A good example is ``--ntasks-per-node=22``. This will equally distribute 22 tasks on 2 nodes.
|
||||
@@ -304,7 +329,7 @@ A good example is ``--ntasks-per-node=22``. This will equally distribute 22 task
|
||||
|
||||
A different example could be by specifying how much memory per core is needed. For instance ``--mem-per-cpu=32000`` will reserve
|
||||
~32000MB per core. Since we have a maximum of 352000MB per Apollo node, Slurm will be only able to allocate 11 cores (32000MB x 11cores = 352000MB) per node.
|
||||
It means that 4 nodes will be needed (max 11 tasks per node due to memory definition, and we need to run 44 tasks), in this case we need to change ``--nodes=4``
|
||||
It means that 4 nodes will be needed (max 11 tasks per node due to memory definition, and we need to run 44 tasks), in this case we need to change ``--nodes=4``
|
||||
(or remove ``--nodes``). Alternatively, we can decrease ``--mem-per-cpu`` to a lower value which can allow the use of at least 44 cores per node (i.e. with ``16000``
|
||||
should be able to use 2 nodes)
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ The service is available inside of PSI (or through a VPN connection) at
|
||||
Slurm batch system. If the cluster is not currently overloaded
|
||||
and the resources you requested are available, your job will
|
||||
usually start within 30 seconds.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -63,21 +63,50 @@ conda info -e
|
||||
You can get more info on the use of the `conda` package management tool at its official [https://conda.io/projects/conda/en/latest/commands.html](documentation site).
|
||||
|
||||
## Using your own custom made environments with jupyterhub
|
||||
Python environments can take up a lot of space due to the many dependencies that will be installed. You should always install your extra environments to the data area belonging to your account, e.g. `/data/user/${YOUR-USERNAME}/conda-envs`
|
||||
Python environments can take up a lot of space due to the many dependencies that will be installed. You should always install your extra environments to the data area belonging to your account, e.g. `/data/user/${YOUR-USERNAME}/conda-envs`
|
||||
|
||||
In order for jupyterhub (and jupyter in general) to recognize the provided environment as a valid kernel, make sure that you include the `nb_conda_kernels` package in your environment. This package provides the necessary activation and the dependencies.
|
||||
|
||||
Example:
|
||||
```
|
||||
conda create -c conda-forge -p /data/user/${USER}/conda-envs/my-test-env python=3.7 nb_conda_kernels
|
||||
|
||||
```
|
||||
|
||||
After this, your new kernel will be visible as `my-test-env` inside of your jupyterhub session.
|
||||
|
||||
|
||||
## Requesting additional resources
|
||||
|
||||
The **Spawner Options** page covers the most common options. These are used to
|
||||
create a submission script for the jupyterhub job and submit it to the slurm
|
||||
queue. Additional customization can be implemented using the *'Optional user
|
||||
defined line to be added to the batch launcher script'* option. This line is
|
||||
added to the submission script at the end of other `#SBATCH` lines. Parameters can
|
||||
be passed to SLURM by starting the line with `#SBATCH`, like in [Running Slurm
|
||||
Scripts](/merlin6/running-jobs.html). Some ideas:
|
||||
|
||||
**Request additional memory**
|
||||
|
||||
```
|
||||
#SBATCH --mem=100G
|
||||
```
|
||||
|
||||
**Request multiple GPUs** (gpu partition only)
|
||||
|
||||
```
|
||||
#SBATCH --gpus=2
|
||||
```
|
||||
|
||||
**Log additional information**
|
||||
|
||||
```
|
||||
hostname; date; echo $USER
|
||||
```
|
||||
|
||||
Output is found in `~/jupyterhub_batchspawner_<jobid>.log`.
|
||||
|
||||
## Contact
|
||||
In case of problems or requests, please either submit a **[PSI Service Now](https://psi.service-now.com/psisp)** incident containing *"Merlin Jupyterhub"* as part of the subject, or contact us by mail through <merlin-admins@lists.psi.ch>.
|
||||
|
||||
|
||||
In case of problems or requests, please either submit a **[PSI Service
|
||||
Now](https://psi.service-now.com/psisp)** incident containing *"Merlin
|
||||
Jupyterhub"* as part of the subject, or contact us by mail through
|
||||
<merlin-admins@lists.psi.ch>.
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: ANSYS / CFX
|
||||
#tags:
|
||||
last_updated: 30 June 2020
|
||||
keywords: software, ansys, cfx5, cfx, slurm
|
||||
keywords: software, ansys, cfx5, cfx, slurm, interactive, rsm, batch job
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to run ANSYS/CFX in the Merlin6 cluster"
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/ansys-cfx.html
|
||||
@@ -19,13 +19,20 @@ For that, run `cfx5solve -help` for getting a list of options.
|
||||
|
||||
### PModules
|
||||
|
||||
Is strongly recommended the use of the latest ANSYS software **ANSYS/2020R1-1** available in PModules.
|
||||
Is strongly recommended the use of the latest ANSYS software available in PModules.
|
||||
|
||||
```bash
|
||||
module use unstable
|
||||
module load ANSYS/2020R1-1
|
||||
module load Pmodules/1.1.6
|
||||
module use overlay_merlin
|
||||
module load ANSYS/2022R1
|
||||
```
|
||||
|
||||
### Interactive: RSM from remote PSI Workstations
|
||||
|
||||
Is possible to run CFX through RSM from remote PSI (Linux or Windows) Workstation having a local installation of ANSYS CFX and RSM client.
|
||||
For that, please refer to the ***[ANSYS RSM]*(/merlin6/ansys-rsm.html)** in the Merlin documentation for further information of how to setup a RSM client for submitting jobs to Merlin.
|
||||
|
||||
### Non-interactive: sbatch
|
||||
|
||||
Running jobs with `sbatch` is always the recommended method. This makes the use of the resources more efficient. Notice that for
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: ANSYS / Fluent
|
||||
#tags:
|
||||
last_updated: 30 June 2020
|
||||
keywords: software, ansys, fluent, slurm
|
||||
keywords: software, ansys, fluent, slurm, interactive, rsm, batch job
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to run ANSYS/Fluent in the Merlin6 cluster"
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/ansys-fluent.html
|
||||
@@ -24,13 +24,20 @@ following flags:
|
||||
|
||||
### PModules
|
||||
|
||||
Is strongly recommended the use of the latest ANSYS software **ANSYS/2020R1-1** available in PModules.
|
||||
Is strongly recommended the use of the latest ANSYS software available in PModules.
|
||||
|
||||
```bash
|
||||
module use unstable
|
||||
module load ANSYS/2020R1-1
|
||||
module load Pmodules/1.1.6
|
||||
module use overlay_merlin
|
||||
module load ANSYS/2022R1
|
||||
```
|
||||
|
||||
### Interactive: RSM from remote PSI Workstations
|
||||
|
||||
Is possible to run Fluent through RSM from remote PSI (Linux or Windows) Workstation having a local installation of ANSYS Fluent and RSM client.
|
||||
For that, please refer to the ***[ANSYS RSM]*(/merlin6/ansys-rsm.html)** in the Merlin documentation for further information of how to setup a RSM client for submitting jobs to Merlin.
|
||||
|
||||
### Non-interactive: sbatch
|
||||
|
||||
Running jobs with `sbatch` is always the recommended method. This makes the use of the resources more efficient.
|
||||
@@ -99,7 +106,6 @@ In the above example, one can increase the number of *nodes* and/or *ntasks* if
|
||||
`--nodes` for running on multiple nodes, but may lead to communication overhead. In general, **no
|
||||
hyperthreading** is recommended for MPI based jobs. Also, one can combine it with `--exclusive` when necessary.
|
||||
|
||||
|
||||
## Interactive: salloc
|
||||
|
||||
Running Fluent interactively is strongly not recommended and one should whenever possible use `sbatch`.
|
||||
|
||||
112
pages/merlin6/05-Software-Support/ansys-hfss.md
Normal file
@@ -0,0 +1,112 @@
|
||||
---
|
||||
title: ANSYS HFSS / ElectroMagnetics
|
||||
#tags:
|
||||
keywords: software, ansys, ansysEM, em, slurm, hfss, interactive, rsm, batch job
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to run ANSYS HFSS (ElectroMagnetics) in the Merlin6 cluster"
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/ansys-hfss.html
|
||||
---
|
||||
|
||||
This document describes the different ways for running **ANSYS HFSS (ElectroMagnetics)**
|
||||
|
||||
## ANSYS HFSS (ElectroMagnetics)
|
||||
|
||||
This recipe is intended to show how to run ANSYS HFSS (ElectroMagnetics) in Slurm.
|
||||
Having in mind that in general, running ANSYS HFSS means running **ANSYS Electronics Desktop**.
|
||||
|
||||
## Running HFSS / Electromagnetics jobs
|
||||
|
||||
### PModules
|
||||
|
||||
Is necessary to run at least ANSYS software **ANSYS/2022R1**, which is available in PModules:
|
||||
|
||||
```bash
|
||||
module use unstable
|
||||
module load Pmodules/1.1.6
|
||||
module use overlay_merlin
|
||||
module load ANSYS/2022R1
|
||||
```
|
||||
|
||||
## Remote job submission: HFSS RSM and SLURM
|
||||
|
||||
Running jobs through Remote RSM or Slurm is the recommended way for running ANSYS HFSS.
|
||||
* **HFSS RSM** can be used from ANSYS HFSS installations running on Windows workstations at PSI (as long as are in the internal PSI network).
|
||||
* **Slurm** can be used when submitting directly from a Merlin login node (i.e. `sbatch` command or interactively from **ANSYS Electronics Desktop**)
|
||||
|
||||
### HFSS RSM (from remote workstations)
|
||||
|
||||
Running jobs through Remote RSM is the way for running ANSYS HFSS when submitting from an ANSYS HFSS installation on a PSI Windows workstation.
|
||||
A HFSS RSM service is running on each **Merlin login node**, and the listening port depends on the ANSYS EM version. Current support ANSYS EM RSM
|
||||
release and associated listening ports are the following:
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;">ANSYS version</th>
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;">Login nodes</th>
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;">Listening port</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr align="center">
|
||||
<td>2022R1</td>
|
||||
<td><font size="2" face="Courier New">merlin-l-001 merlin-l-001 merlin-l-001</font></td>
|
||||
<td>32958</td>
|
||||
</tr>
|
||||
<tr align="center">
|
||||
<td>2022R2</td>
|
||||
<td><font size="2" face="Courier New">merlin-l-001 merlin-l-001 merlin-l-001</font></td>
|
||||
<td>32959</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
Notice that by default ANSYS EM is listening on port **`32958`**, this is the default for **ANSYS/2022R1** only.
|
||||
* Workstations connecting to the Merlin ANSYS EM service must ensure that **Electronics Desktop** is connecting to the proper port.
|
||||
* In the same way, the ANSYS Workstation version must be the same as the version running on Merlin.
|
||||
|
||||
Notice that _HFSS RSM is not the same RSM provided for other ANSYS products._ Therefore, the configuration is different from [ANSYS RSM](/merlin6/ansys-rsm.html).
|
||||
|
||||
To setup HFSS RSM for using it with the Merlin cluster, it must be done from the following **ANSYS Electronics Desktop** menu:
|
||||
|
||||
1. **[Tools]->[Job Management]->[Select Scheduler]**.
|
||||

|
||||
2. In the new **[Select scheduler]** window, setup the following settings and **Refresh**:
|
||||

|
||||
* **Select Scheduler**: `Remote RSM`.
|
||||
* **Server**: Add a Merlin login node.
|
||||
* **User name**: Add your Merlin username.
|
||||
* **Password**: Add you Merlin username password.
|
||||
|
||||
Once *refreshed*, the **Scheduler info** box must provide **Slurm** information of the server (see above picture). If the box contains that information, then you can save changes (`OK` button).
|
||||
3. **[Tools]->[Job Management]->[Submit Job...]**.
|
||||
|
||||

|
||||
|
||||
4. In the new **[Submite Job]** window, you must specify the location of the **ANSYS Electronics Desktop** binary.
|
||||

|
||||
* In example, for **ANSYS/2022R1**, the location is `/data/software/pmodules/Tools/ANSYS/2021R1/v211/AnsysEM21.1/Linux64/ansysedt.exe`:.
|
||||
|
||||
### HFSS Slurm (from login node only)
|
||||
|
||||
Running jobs through Slurm from **ANSYS Electronics Desktop** is the way for running ANSYS HFSS when submitting from an ANSYS HFSS installation in a Merlin login node. **ANSYS Electronics Desktop** usually needs to be run from the **[Merlin NoMachine](/merlin6/nomachine.html)** service, which currently runs on:
|
||||
- `merlin-l-001.psi.ch`
|
||||
- `merlin-l-002.psi.ch`
|
||||
|
||||
Since the Slurm client is present in the login node (where **ANSYS Electronics Desktop** is running), the application will be able to detect and to submit directly to Slurm. Therefore, we only have to configure **ANSYS Electronics Desktop** to submit to Slurm. This can set as follows:
|
||||
|
||||
1. **[Tools]->[Job Management]->[Select Scheduler]**.
|
||||

|
||||
2. In the new **[Select scheduler]** window, setup the following settings and **Refresh**:
|
||||

|
||||
* **Select Scheduler**: `Slurm`.
|
||||
* **Server**: must point to `localhost`.
|
||||
* **User name**: must be empty.
|
||||
* **Password**: must be empty.
|
||||
The **Server, User name** and **Password** boxes can't be modified, but if value do not match with the above settings, they should be changed by selecting another Scheduler which allows editig these boxes (i.e. **RSM Remote**).
|
||||
|
||||
Once *refreshed*, the **Scheduler info** box must provide **Slurm** information of the server (see above picture). If the box contains that information, then you can save changes (`OK` button).
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: ANSYS / MAPDL
|
||||
#tags:
|
||||
last_updated: 30 June 2020
|
||||
keywords: software, ansys, mapdl, slurm, apdl
|
||||
keywords: software, ansys, mapdl, slurm, apdl, interactive, rsm, batch job
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to run ANSYS/Mechanical APDL in the Merlin6 cluster"
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/ansys-mapdl.html
|
||||
@@ -19,13 +19,20 @@ For that, please refer to the official Mechanical APDL documentation.
|
||||
|
||||
### PModules
|
||||
|
||||
Is strongly recommended the use of the latest ANSYS software **ANSYS/2020R1-1** available in PModules.
|
||||
Is strongly recommended the use of the latest ANSYS software available in PModules.
|
||||
|
||||
```bash
|
||||
module use unstable
|
||||
module load ANSYS/2020R1-1
|
||||
module load Pmodules/1.1.6
|
||||
module use overlay_merlin
|
||||
module load ANSYS/2022R1
|
||||
```
|
||||
|
||||
### Interactive: RSM from remote PSI Workstations
|
||||
|
||||
Is possible to run Mechanical through RSM from remote PSI (Linux or Windows) Workstation having a local installation of ANSYS Mechanical and RSM client.
|
||||
For that, please refer to the ***[ANSYS RSM]*(/merlin6/ansys-rsm.html)** in the Merlin documentation for further information of how to setup a RSM client for submitting jobs to Merlin.
|
||||
|
||||
### Non-interactive: sbatch
|
||||
|
||||
Running jobs with `sbatch` is always the recommended method. This makes the use of the resources more efficient. Notice that for
|
||||
|
||||
108
pages/merlin6/05-Software-Support/ansys-rsm.md
Normal file
@@ -0,0 +1,108 @@
|
||||
---
|
||||
title: ANSYS RSM (Remote Resolve Manager)
|
||||
#tags:
|
||||
keywords: software, ansys, rsm, slurm, interactive, rsm, windows
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to use the ANSYS Remote Resolve Manager service in the Merlin6 cluster"
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/ansys-rsm.html
|
||||
---
|
||||
|
||||
## ANSYS Remote Resolve Manager
|
||||
|
||||
**ANSYS Remote Solve Manager (RSM)** is used by ANSYS Workbench to submit computational jobs to HPC clusters directly from Workbench on your desktop.
|
||||
Therefore, PSI workstations ***with direct access to Merlin*** can submit jobs by using RSM.
|
||||
|
||||
Users are responsible for requesting possible necessary network access and debugging any possible connectivity problem with the cluster.
|
||||
In example, in case that the workstation is behind a firewall, users would need to request a **[firewall rule](https://psi.service-now.com/psisp/?id=psi_new_sc_category&sys_id=6f07ab1e4f3913007f7660fe0310c7ba)** to enable access to Merlin.
|
||||
|
||||
{{site.data.alerts.warning}} The Merlin6 administrators <b>are not responsible for connectivity problems</b> between users workstations and the Merlin6 cluster.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### The Merlin6 RSM service
|
||||
|
||||
A RSM service is running on each login node. This service will listen a specific port and will process any request using RSM (in example, from ANSYS users workstations).
|
||||
The following login nodes are configured with such services:
|
||||
* `merlin-l-01.psi.ch`
|
||||
* `merlin-l-001.psi.ch`
|
||||
* `merlin-l-002.psi.ch`
|
||||
|
||||
Each ANSYS release installed in `/data/software/pmodules/ANSYS` should have its own RSM service running (the listening port is the default one set by that ANSYS release). With the following command users can check which ANSYS releases have an RSM instance running:
|
||||
|
||||
```bash
|
||||
systemctl | grep pli-ansys-rsm-v[0-9][0-9][0-9].service
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>[Example] Listing RSM service running on merlin-l-001.psi.ch</summary>
|
||||
<pre class="terminal code highlight js-syntax-highlight plaintext" lang="plaintext" markdown="false">
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# systemctl | grep pli-ansys-rsm-v[0-9][0-9][0-9].service
|
||||
pli-ansys-rsm-v195.service loaded active exited PSI ANSYS RSM v195
|
||||
pli-ansys-rsm-v202.service loaded active exited PSI ANSYS RSM v202
|
||||
pli-ansys-rsm-v211.service loaded active exited PSI ANSYS RSM v211
|
||||
pli-ansys-rsm-v212.service loaded active exited PSI ANSYS RSM v212
|
||||
pli-ansys-rsm-v221.service loaded active exited PSI ANSYS RSM v221
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
## Configuring RSM client on Windows workstations
|
||||
|
||||
Users can setup ANSYS RSM in their workstations to connect to the Merlin6 cluster.
|
||||
The different steps and settings required to make it work are that following:
|
||||
|
||||
1. Open the RSM Configuration service in Windows for the ANSYS release you want to configure.
|
||||
2. Right-click the **HPC Resources** icon followed by **Add HPC Resource...**
|
||||

|
||||
3. In the **HPC Resource** tab, fill up the corresponding fields as follows:
|
||||

|
||||
* **"Name"**: Add here the preffered name for the cluster. In example: `Merlin6 cluster - merlin-l-001`
|
||||
* **"HPC Type"**: Select `SLURM`
|
||||
* **"Submit host"**: Add one of the login nodes. In example `merlin-l-001`.
|
||||
* **"Slurm Job submission arguments (optional)"**: Add any required Slurm options for running your jobs.
|
||||
* In general, `--hint=nomultithread` should be at least present.
|
||||
* Check **"Use SSH protocol for inter and intra-node communication (Linux only)"**
|
||||
* Select **"Able to directly submit and monitor HPC jobs"**.
|
||||
* **"Apply"** changes.
|
||||
4. In the **"File Management"** tab, fill up the corresponding fields as follows:
|
||||

|
||||
* Select **"RSM internal file transfer mechanism"** and add **`/shared-scratch`** as the **"Staging directory path on Cluster"**
|
||||
* Select **"Scratch directory local to the execution node(s)"** and add **`/scratch`** as the **HPC scratch directory**.
|
||||
* **Never check** the option "Keep job files in the staging directory when job is complete" if the previous
|
||||
option "Scratch directory local to the execution node(s)" was set.
|
||||
* **"Apply"** changes.
|
||||
5. In the **"Queues"** tab, use the left button to auto-discover partitions
|
||||

|
||||
* If no authentication method was configured before, an authentication window will appear. Use your
|
||||
PSI account to authenticate. Notice that the **`PSICH\`** prefix **must not be added**.
|
||||

|
||||
* From the partition list, select the ones you want to typically use.
|
||||
* In general, standard Merlin users must use **`hourly`**, **`daily`** and **`general`** only.
|
||||
* Other partitions are reserved for allowed users only.
|
||||
* **"Apply"** changes.
|
||||

|
||||
6. *[Optional]* You can perform a test by submitting a test job on each partition by clicking on the **Submit** button
|
||||
for each selected partition.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
Repeat the process from for adding other login nodes if necessary. This will give users the alternative
|
||||
of using another login node in case of maintenance windows.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## Using RSM in ANSYS
|
||||
|
||||
Using the RSM service in ANSYS is slightly different depending on the ANSYS software being used.
|
||||
Please follow the official ANSYS documentation for details about how to use it for that specific software.
|
||||
|
||||
Alternativaly, please refer to some the examples showed in the following chapters (ANSYS specific software).
|
||||
|
||||
### Using RSM in ANSYS Fluent
|
||||
|
||||
For further information for using RSM with Fluent, please visit the **[ANSYS RSM](/merlin6/ansys-fluent.html)** section.
|
||||
|
||||
### Using RSM in ANSYS CFX
|
||||
|
||||
For further information for using RSM with CFX, please visit the **[ANSYS RSM](/merlin6/ansys-cfx.html)** section.
|
||||
|
||||
### Using RSM in ANSYS MAPDL
|
||||
|
||||
For further information for using RSM with MAPDL, please visit the **[ANSYS RSM](/merlin6/ansys-mapdl.html)** section.
|
||||
89
pages/merlin6/05-Software-Support/ansys.md
Normal file
@@ -0,0 +1,89 @@
|
||||
---
|
||||
title: ANSYS
|
||||
#tags:
|
||||
keywords: software, ansys, slurm, interactive, rsm, pmodules, overlay, overlays
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to load and use ANSYS in the Merlin6 cluster"
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/ansys.html
|
||||
---
|
||||
|
||||
This document describes generic information of how to load and run ANSYS software in the Merlin cluster
|
||||
|
||||
## ANSYS software in Pmodules
|
||||
|
||||
The ANSYS software can be loaded through **[PModules](/merlin6/using-modules.html)**.
|
||||
|
||||
The default ANSYS versions are loaded from the central PModules repository.
|
||||
However, there are some known problems that can pop up when using some specific ANSYS packages in advanced mode.
|
||||
Due to this, and also to improve the interactive experience of the user, ANSYS has been also installed in the
|
||||
Merlin high performance storage and we have made it available from Pmodules.
|
||||
|
||||
### Loading Merlin6 ANSYS
|
||||
|
||||
For loading the Merlin6 ANSYS software, one needs to run Pmodules v1.1.4 or newer, and then use a specific repository
|
||||
(called **`overlay_merlin`**) which is ***only available from the Merlin cluster***:
|
||||
|
||||
```bash
|
||||
module load Pmodules/1.1.6
|
||||
module use overlay_merlin
|
||||
```
|
||||
|
||||
Once `overlay_merlin` is invoked, it will disable central ANSYS installations with the same version, which will be replaced
|
||||
by the local ones in Merlin. Releases from the central Pmodules repository which have not a local installation will remain
|
||||
visible. For each ANSYS release, one can identify where it is installed by searching ANSYS in PModules with the `--verbose`
|
||||
option. This will show the location of the different ANSYS releases as follows:
|
||||
* For ANSYS releases installed in the central repositories, the path starts with `/opt/psi`
|
||||
* For ANSYS releases installed in the Merlin6 repository (and/or overwritting the central ones), the path starts with `/data/software/pmodules`
|
||||
|
||||
<details>
|
||||
<summary>[Example] Loading ANSYS from the Merlin6 PModules repository</summary>
|
||||
<pre class="terminal code highlight js-syntax-highlight plaintext" lang="plaintext" markdown="false">
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# module load Pmodules/1.1.6
|
||||
module load: unstable module has been loaded -- Pmodules/1.1.6
|
||||
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# module use merlin_overlay
|
||||
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# module search ANSYS --verbose
|
||||
|
||||
Module Rel.stage Group Dependencies/Modulefile
|
||||
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
ANSYS/2019R3 stable Tools dependencies:
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2019R3
|
||||
ANSYS/2020R1 stable Tools dependencies:
|
||||
modulefile: /opt/psi/Tools/modulefiles/ANSYS/2020R1
|
||||
ANSYS/2020R1-1 stable Tools dependencies:
|
||||
modulefile: /opt/psi/Tools/modulefiles/ANSYS/2020R1-1
|
||||
ANSYS/2020R2 stable Tools dependencies:
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2020R2
|
||||
ANSYS/2021R1 stable Tools dependencies:
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2021R1
|
||||
ANSYS/2021R2 stable Tools dependencies:
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2021R2
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
|
||||
{{site.data.alerts.tip}} Please <b>only use Merlin6 ANSYS installations from `overlay_merlin`</b> in the Merlin cluster.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## ANSYS Documentation by product
|
||||
|
||||
### ANSYS RSM
|
||||
|
||||
**ANSYS Remote Solve Manager (RSM)** is used by ANSYS Workbench to submit computational jobs to HPC clusters directly from Workbench on your desktop.
|
||||
Therefore, PSI workstations with direct access to Merlin can submit jobs by using RSM.
|
||||
|
||||
For further information, please visit the **[ANSYS RSM](/merlin6/ansys-rsm.html)** section.
|
||||
|
||||
### ANSYS Fluent
|
||||
|
||||
For further information, please visit the **[ANSYS RSM](/merlin6/ansys-fluent.html)** section.
|
||||
|
||||
### ANSYS CFX
|
||||
|
||||
For further information, please visit the **[ANSYS RSM](/merlin6/ansys-cfx.html)** section.
|
||||
|
||||
### ANSYS MAPDL
|
||||
|
||||
For further information, please visit the **[ANSYS RSM](/merlin6/ansys-mapdl.html)** section.
|
||||
216
pages/merlin6/05-Software-Support/gothic.md
Normal file
@@ -0,0 +1,216 @@
|
||||
---
|
||||
title: GOTHIC
|
||||
#tags:
|
||||
keywords: software, gothic, slurm, interactive, batch job
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to run Gothic in the Merlin cluster"
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/gothic.html
|
||||
---
|
||||
|
||||
This document describes generic information of how to run Gothic in the
|
||||
Merlin cluster
|
||||
|
||||
## Gothic installation
|
||||
|
||||
Gothic is locally installed in Merlin in the following directory:
|
||||
```bash
|
||||
/data/project/general/software/gothic
|
||||
```
|
||||
|
||||
Multiple versions are available. As of August 22, 2022, the latest
|
||||
installed version is **Gothic 8.3 QA**.
|
||||
|
||||
Future releases will be placed in the PSI Modules system, therefore,
|
||||
loading it through PModules will be possible at some point. However, in the
|
||||
meantime one has to use the existing installations present in
|
||||
`/data/project/general/software/gothic`.
|
||||
|
||||
## Running Gothic
|
||||
|
||||
### General requirements
|
||||
|
||||
When running Gothic in interactive or batch mode, one has to consider
|
||||
the following requirements:
|
||||
|
||||
* **Use always one node only**: Gothic runs a single instance.
|
||||
Therefore, it can not run on multiple nodes. Adding option `--nodes=1-1`
|
||||
or `-N 1-1` is strongly recommended: this will prevent Slurm to allocate
|
||||
multiple nodes if the Slurm allocation definition is ambiguous.
|
||||
* **Use one task only**: Gothic spawns one main process, which then will
|
||||
spawn multiple threads depending on the number of available cores.
|
||||
Therefore, one has to specify 1 task (`--ntasks=1` or `-n 1`).
|
||||
* **Use multiple CPUs**: since Gothic will spawn multiple threads, then
|
||||
multiple CPUs can be used. Adding `--cpus-per-task=<num_cpus>`
|
||||
or `-c <num_cpus>` is in general recommended.
|
||||
Notice that `<num_cpus>` must never exceed the maximum number of CPUS
|
||||
in a compute node (usually *88*).
|
||||
* **Use multithread**: Gothic is an OpenMP based software, therefore,
|
||||
running in hyper-threading mode is strongly recommended. Use the option
|
||||
`--hint=multithread` for enforcing hyper-threading.
|
||||
* **[Optional]** *Memory setup*: The default memory per CPU (4000MB)
|
||||
is usually enough for running Gothic. If you require more memory, you
|
||||
can always set the `--mem=<mem_in_MB>` option. This is in general
|
||||
*not necessary*.
|
||||
|
||||
### Interactive
|
||||
|
||||
**Is not allowed to run CPU intensive interactive jobs in the
|
||||
login nodes**. Only applications capable to limit the number of cores are
|
||||
allowed to run for longer time. Also, **running in the login nodes is not
|
||||
efficient**, since resources are shared with other processes and users.
|
||||
|
||||
Is possible to submit interactive jobs to the cluster by allocating a
|
||||
full compute node, or even by allocating a few cores only. This will grant
|
||||
dedicated CPUs and resources and in general it will not affect other users.
|
||||
|
||||
For interactive jobs, is strongly recommended to use the `hourly` partition,
|
||||
which usually has a good availability of nodes.
|
||||
|
||||
For longer runs, one should use the `daily` (or `general`) partition.
|
||||
However, getting interactive access to nodes on these partitions is
|
||||
sometimes more difficult if the cluster is pretty full.
|
||||
|
||||
To submit an interactive job, consider the following requirements:
|
||||
* **X11 forwarding must be enabled**: Gothic spawns an interactive
|
||||
window which requires X11 forwarding when using it remotely, therefore
|
||||
using the Slurm option `--x11` is necessary.
|
||||
* **Ensure that the scratch area is accessible**: For running Gothic,
|
||||
one has to define a scratch area with the `GTHTMP` environment variable.
|
||||
There are two options:
|
||||
1. **Use local scratch**: Each compute node has its own `/scratch` area.
|
||||
This area is independent to any other node, therefore not visible by other nodes.
|
||||
Using the top directory `/scratch` for interactive jobs is the simplest way,
|
||||
and it can be defined before or after the allocation creation, as follows:
|
||||
```bash
|
||||
# Example 1: Define GTHTMP before the allocation
|
||||
export GTHTMP=/scratch
|
||||
salloc ...
|
||||
|
||||
# Example 2: Define GTHTMP after the allocation
|
||||
salloc ...
|
||||
export GTHTMP=/scratch
|
||||
```
|
||||
Notice that if you want to create a custom sub-directory (i.e.
|
||||
`/scratch/$USER`, one has to create the sub-directory on every new
|
||||
allocation! In example:
|
||||
```bash
|
||||
# Example 1:
|
||||
export GTHTMP=/scratch/$USER
|
||||
salloc ...
|
||||
mkdir -p $GTHTMP
|
||||
|
||||
# Example 2:
|
||||
salloc ...
|
||||
export GTHTMP=/scratch/$USER
|
||||
mkdir -p $GTHTMP
|
||||
```
|
||||
Creating sub-directories makes the process more complex, therefore
|
||||
using just `/scratch` is simpler and recommended.
|
||||
2. **Shared scratch**: Using shared scratch allows to have a
|
||||
directory visible from all compute nodes and login nodes. Therefore,
|
||||
one can use `/shared-scratch` to achieve the same as in **1.**, but
|
||||
creating a sub-directory needs to be done just once.
|
||||
|
||||
Please, consider that `/scratch` usually provides better performance and,
|
||||
in addition, will offload the main storage. Therefore, using **local scratch**
|
||||
is strongly recommended. Use the shared scratch only when strongly necessary.
|
||||
* **Use the `hourly` partition**: Using the `hourly` partition is
|
||||
recommended for running interactive jobs (latency is in general
|
||||
lower). However, `daily` and `general` are also available if you expect
|
||||
longer runs, but in these cases you should expect longer waiting times.
|
||||
|
||||
These requirements are in addition to the requirements previously described
|
||||
in the [General requirements](/merlin6/gothic.html#general-requirements)
|
||||
section.
|
||||
|
||||
#### Interactive allocations: examples
|
||||
* Requesting a full node,
|
||||
```bash
|
||||
salloc --partition=hourly -N 1 -n 1 -c 88 --hint=multithread --x11 --exclusive --mem=0
|
||||
```
|
||||
* Requesting 22 CPUs from a node, with default memory per CPU (4000MB/CPU):
|
||||
```bash
|
||||
num_cpus=22
|
||||
salloc --partition=hourly -N 1 -n 1 -c $num_cpus --hint=multithread --x11
|
||||
```
|
||||
|
||||
### Batch job
|
||||
|
||||
The Slurm cluster is mainly used by non interactive batch jobs: Users
|
||||
submit a job, which goes into a queue, and waits until Slurm can assign
|
||||
resources to it. In general, the longer the job, the longer the waiting time,
|
||||
unless there are enough free resources to inmediately start running it.
|
||||
|
||||
Running Gothic in a Slurm batch script is pretty simple. One has to mainly
|
||||
consider the requirements described in the [General requirements](/merlin6/gothic.html#general-requirements)
|
||||
section, and:
|
||||
* **Use local scratch** for running batch jobs. In general, defining
|
||||
`GTHTMP` in a batch script is simpler than on an allocation. If you plan
|
||||
to run multiple jobs in the same node, you can even create a second sub-directory
|
||||
level based on the Slurm Job ID:
|
||||
```bash
|
||||
mkdir -p /scratch/$USER/$SLURM_JOB_ID
|
||||
export GTHTMP=/scratch/$USER/$SLURM_JOB_ID
|
||||
... # Run Gothic here
|
||||
rm -rf /scratch/$USER/$SLURM_JOB_ID
|
||||
```
|
||||
Temporary data generated by the job in `GTHTMP` must be removed at the end of
|
||||
the job, as showed above.
|
||||
|
||||
#### Batch script: examples
|
||||
|
||||
* Requesting a full node:
|
||||
```bash
|
||||
#!/bin/bash -l
|
||||
#SBATCH --job-name=Gothic
|
||||
#SBATCH --time=3-00:00:00
|
||||
#SBATCH --partition=general
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --cpus-per-task=88
|
||||
#SBATCH --hint=multithread
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --mem=0
|
||||
#SBATCH --clusters=merlin6
|
||||
|
||||
INPUT_FILE='MY_INPUT.SIN'
|
||||
|
||||
mkdir -p /scratch/$USER/$SLURM_JOB_ID
|
||||
export GTHTMP=/scratch/$USER/$SLURM_JOB_ID
|
||||
|
||||
/data/project/general/software/gothic/gothic8.3qa/bin/gothic_s.sh $INPUT_FILE -m -np $SLURM_CPUS_PER_TASK
|
||||
gth_exit_code=$?
|
||||
|
||||
# Clean up data in /scratch
|
||||
rm -rf /scratch/$USER/$SLURM_JOB_ID
|
||||
|
||||
# Return exit code from GOTHIC
|
||||
exit $gth_exit_code
|
||||
```
|
||||
* Requesting 22 CPUs from a node, with default memory per CPU (4000MB/CPU):
|
||||
```bash
|
||||
#!/bin/bash -l
|
||||
#SBATCH --job-name=Gothic
|
||||
#SBATCH --time=3-00:00:00
|
||||
#SBATCH --partition=general
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --cpus-per-task=22
|
||||
#SBATCH --hint=multithread
|
||||
#SBATCH --clusters=merlin6
|
||||
|
||||
INPUT_FILE='MY_INPUT.SIN'
|
||||
|
||||
mkdir -p /scratch/$USER/$SLURM_JOB_ID
|
||||
export GTHTMP=/scratch/$USER/$SLURM_JOB_ID
|
||||
|
||||
/data/project/general/software/gothic/gothic8.3qa/bin/gothic_s.sh $INPUT_FILE -m -np $SLURM_CPUS_PER_TASK
|
||||
gth_exit_code=$?
|
||||
|
||||
# Clean up data in /scratch
|
||||
rm -rf /scratch/$USER/$SLURM_JOB_ID
|
||||
|
||||
# Return exit code from GOTHIC
|
||||
exit $gth_exit_code
|
||||
```
|
||||
@@ -2,7 +2,7 @@
|
||||
title: Running Paraview
|
||||
#tags:
|
||||
last_updated: 03 December 2020
|
||||
keywords: software, paraview, mesa, OpenGL
|
||||
keywords: software, paraview, mesa, OpenGL, interactive
|
||||
summary: "This document describes how to run ParaView in the Merlin6 cluster"
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/paraview.html
|
||||
|
||||
@@ -92,7 +92,6 @@ pkgs_dirs:
|
||||
- /opt/psi/Programming/anaconda/2019.07/conda/pkgs
|
||||
|
||||
channels:
|
||||
- http://conda-pkg.intranet.psi.ch
|
||||
- conda-forge
|
||||
- defaults
|
||||
```
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Contact
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 28 June 2019
|
||||
keywords: contact, support, snow, service now, mailing list, mailing, email, mail, merlin-admins@lists.psi.ch, merlin-users@lists.psi.ch, merlin users
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/contact.html
|
||||
|
||||
48
pages/merlin6/99-support/faq.md
Normal file
@@ -0,0 +1,48 @@
|
||||
---
|
||||
title: FAQ
|
||||
#tags:
|
||||
keywords: faq, frequently asked questions, support
|
||||
last_updated: 27 October 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/faq.html
|
||||
---
|
||||
|
||||
{%include toc.html %}
|
||||
|
||||
## How do I register for Merlin?
|
||||
|
||||
See [Requesting Accounts](/merlin6/request-account.html).
|
||||
|
||||
## How do I get information about downtimes and updates?
|
||||
|
||||
See [Get updated through the Merlin User list!](/merlin6/contact.html#get-updated-through-the-merlin-user-list)
|
||||
|
||||
## How can I request access to a Merlin project directory?
|
||||
|
||||
Merlin projects are placed in the `/data/project` directory. Access to each project is controlled by Unix group membership.
|
||||
If you require access to an existing project, please request group membership as described in [Requesting extra Unix groups](/merlin6/request-account.html#requesting-extra-unix-groups).
|
||||
|
||||
Your project leader or project colleagues will know what Unix group you should belong to. Otherwise, you can check what Unix group is allowed to access that project directory (simply run `ls -ltrha`).
|
||||
|
||||
## Can I install software myself?
|
||||
|
||||
Most software can be installed in user directories without any special permissions. We recommend using `/data/user/$USER/bin` for software since home directories are fairly small. For software that will be used by multiple groups/users you can also [request the admins](/merlin6/contact.html) install it as a [module](/merlin6/using-modules.html).
|
||||
|
||||
How to install depends a bit on the software itself. There are three common installation procedures:
|
||||
|
||||
1. *binary distributions*. These are easy; just put them in a directory (eg `/data/user/$USER/bin`) and add that to your PATH.
|
||||
2. *source compilation* using make/cmake/autoconfig/etc. Usually the compilation scripts accept a `--prefix=/data/user/$USER` directory for where to install it. Then they place files under `<prefix>/bin`, `<prefix>/lib`, etc. The exact syntax should be documented in the installation instructions.
|
||||
3. *conda environment*. This is now becoming standard for python-based software, including lots of the AI tools. First follow the [initial setup instructions](/merlin6/python.html#anaconda) to configure conda to use /data/user instead of your home directory. Then you can create environments like:
|
||||
|
||||
```
|
||||
module load anaconda/2019.07
|
||||
# if they provide environment.yml
|
||||
conda env create -f environment.yml
|
||||
|
||||
# or to create manually
|
||||
conda create --name myenv python==3.9 ...
|
||||
|
||||
conda activate myenv
|
||||
```
|
||||
|
||||
@@ -1,24 +1,93 @@
|
||||
---
|
||||
title: Known Problems
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 21 January 2021
|
||||
keywords: known problems, troubleshooting, illegal instructions, paraview, ansys, shell, opengl, mesa
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/known-problems.html
|
||||
---
|
||||
|
||||
## Known Problems Summary
|
||||
## Common errors
|
||||
|
||||
| Topic |
|
||||
|:----------------------------------------------------------------------------------------- |
|
||||
| [Default Shell](/merlin6/known-problems.html#default-shell) |
|
||||
| [OpenGL vs Mesa](/merlin6/known-problems.html#opengl-vs-mesa) |
|
||||
| [Paraview](/merlin6/known-problems.html#OpenGL) |
|
||||
| [ANSYS](/merlin6/known-problems.html#opengl-support-paraview-ansys-etc) |
|
||||
| [Illegal instructions error](i/merlin6/known-problems.html#illegal-instructions) |
|
||||
### Illegal instruction error
|
||||
|
||||
## Default SHELL
|
||||
It may happened that your code, compiled on one machine will not be executed on another throwing exception like **"(Illegal instruction)"**.
|
||||
This is usually because the software was compiled with a set of instructions newer than the ones available in the node where the software runs,
|
||||
and it mostly depends on the processor generation.
|
||||
|
||||
In example, `merlin-l-001` and `merlin-l-002` contain a newer generation of processors than the old GPUs nodes, or than the Merlin5 cluster.
|
||||
Hence, unless one compiles the software with compatibility with set of instructions from older processors, it will not run on old nodes.
|
||||
Sometimes, this is properly set by default at the compilation time, but sometimes is not.
|
||||
|
||||
For GCC, please refer to [GCC x86 Options](https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html) for compiling options. In case of doubts, contact us.
|
||||
|
||||
## Slurm
|
||||
|
||||
### sbatch using one core despite setting -c/--cpus-per-task
|
||||
|
||||
From **Slurm v22.05.6**, the behavior of `srun` has changed. Merlin has been updated to this version since *Tuesday 13.12.2022*.
|
||||
|
||||
`srun` will no longer read in `SLURM_CPUS_PER_TASK`, which is typically set when defining `-c/--cpus-per-task` in the `sbatch` command.
|
||||
This means you will implicitly have to specify `-c\--cpus-per-task` also on your `srun` calls, or set the new `SRUN_CPUS_PER_TASK` environment variable to accomplish the same thing.
|
||||
Therefore, unless this is implicitly specified, `srun` will use only one Core per task (resulting in 2 CPUs per task when multithreading is enabled)
|
||||
|
||||
An example for setting up `srun` with `-c\--cpus-per-task`:
|
||||
```bash
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# cat mysbatch_method1
|
||||
#!/bin/bash
|
||||
#SBATCH -n 1
|
||||
#SBATCH --cpus-per-task=8
|
||||
|
||||
echo 'From Slurm v22.05.8 srun does not inherit $SLURM_CPUS_PER_TASK'
|
||||
srun python -c "import os; print(os.sched_getaffinity(0))"
|
||||
|
||||
echo 'One has to implicitly specify $SLURM_CPUS_PER_TASK'
|
||||
echo 'In this example, by setting -c/--cpus-per-task in srun'
|
||||
srun --cpus-per-task=$SLURM_CPUS_PER_TASK python -c "import os; print(os.sched_getaffinity(0))"
|
||||
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# sbatch mysbatch_method1
|
||||
Submitted batch job 8000813
|
||||
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# cat slurm-8000813.out
|
||||
From Slurm v22.05.8 srun does not inherit $SLURM_CPUS_PER_TASK
|
||||
{1, 45}
|
||||
One has to implicitly specify $SLURM_CPUS_PER_TASK
|
||||
In this example, by setting -c/--cpus-per-task in srun
|
||||
{1, 2, 3, 4, 45, 46, 47, 48}
|
||||
```
|
||||
|
||||
An example to accomplish the same thing with the `SRUN_CPUS_PER_TASK` environment variable:
|
||||
```bash
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# cat mysbatch_method2
|
||||
#!/bin/bash
|
||||
#SBATCH -n 1
|
||||
#SBATCH --cpus-per-task=8
|
||||
|
||||
echo 'From Slurm v22.05.8 srun does not inherit $SLURM_CPUS_PER_TASK'
|
||||
srun python -c "import os; print(os.sched_getaffinity(0))"
|
||||
|
||||
echo 'One has to implicitly specify $SLURM_CPUS_PER_TASK'
|
||||
echo 'In this example, by setting an environment variable SRUN_CPUS_PER_TASK'
|
||||
export SRUN_CPUS_PER_TASK=$SLURM_CPUS_PER_TASK
|
||||
srun python -c "import os; print(os.sched_getaffinity(0))"
|
||||
|
||||
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# sbatch mysbatch_method2
|
||||
Submitted batch job 8000815
|
||||
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# cat slurm-8000815.out
|
||||
From Slurm v22.05.8 srun does not inherit $SLURM_CPUS_PER_TASK
|
||||
{1, 45}
|
||||
One has to implicitly specify $SLURM_CPUS_PER_TASK
|
||||
In this example, by setting an environment variable SRUN_CPUS_PER_TASK
|
||||
{1, 2, 3, 4, 45, 46, 47, 48}
|
||||
```
|
||||
|
||||
|
||||
## General topics
|
||||
|
||||
### Default SHELL
|
||||
|
||||
In general, **`/bin/bash` is the recommended default user's SHELL** when working in Merlin.
|
||||
|
||||
@@ -53,7 +122,7 @@ Notice that available *shells* can be found in the following file:
|
||||
cat /etc/shells
|
||||
```
|
||||
|
||||
## OpenGL vs Mesa
|
||||
### 3D acceleration: OpenGL vs Mesa
|
||||
|
||||
Some applications can run with OpenGL support. This is only possible when the node contains a GPU card.
|
||||
|
||||
@@ -64,16 +133,20 @@ module load paraview
|
||||
paraview-mesa paraview # 'paraview --mesa' for old releases
|
||||
```
|
||||
|
||||
However, if one needs to run with OpenGL support, this is still possible by running `vglrun`. Officially, the supported method is
|
||||
NoMachine remote desktop (SSH with X11 Forwarding is slow, but also needs to properly setup the client -desktop or laptop-, where
|
||||
Merlin admins have no access or rights to it). In example, for running Paraview:
|
||||
However, if one needs to run with OpenGL support, this is still possible by running `vglrun`. In example, for running Paraview:
|
||||
|
||||
```bash
|
||||
module load paraview
|
||||
vglrun paraview
|
||||
```
|
||||
|
||||
## ANSYS
|
||||
Officially, the supported method for running `vglrun` is by using the [NoMachine remote desktop](/merlin6/nomachine.html).
|
||||
Running `vglrun` it's also possible using SSH with X11 Forwarding. However, it's very slow and it's only recommended when running
|
||||
in Slurm (from [NoMachine](/merlin6/nomachine.html)). Please, avoid running `vglrun` over SSH from a desktop or laptop.
|
||||
|
||||
## Software
|
||||
|
||||
### ANSYS
|
||||
|
||||
Sometimes, running ANSYS/Fluent requires X11 support. For that, one should run fluent as follows.
|
||||
|
||||
@@ -82,27 +155,7 @@ module load ANSYS
|
||||
fluent -driver x11
|
||||
```
|
||||
|
||||
## Paraview
|
||||
### Paraview
|
||||
|
||||
For running Paraview, one can run it with Mesa support or OpenGL support.
|
||||
|
||||
```bash
|
||||
module load paraview
|
||||
|
||||
# Run with Mesa support (nodes without GPU)
|
||||
paraview-mesa paraview # 'paraview --mesa' for old releases
|
||||
# Run with OpenGL support (nodes with GPU)
|
||||
vglrun paraview
|
||||
```
|
||||
|
||||
## Illegal instructions
|
||||
|
||||
It may happened that your code, compiled on one machine will not be executed on another throwing exception like **"(Illegal instruction)"**.
|
||||
This is usually because the software was compiled with a set of instructions newer than the ones available in the node where the software runs,
|
||||
and it mostly depends on the processor generation.
|
||||
|
||||
In example, `merlin-l-001` and `merlin-l-002` contain a newer generation of processors than the old GPUs nodes, or than the Merlin5 cluster.
|
||||
Hence, unless one compiles the software with compatibility with set of instructions from older processors, it will not run on old nodes.
|
||||
Sometimes, this is properly set by default at the compilation time, but sometimes is not.
|
||||
|
||||
For GCC, please refer to https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html for compiling options. In case of doubts, contact us.
|
||||
For running Paraview, one can run it with Mesa support or OpenGL support. Please refer to [OpenGL vs Mesa](/merlin6/known-problems.html#opengl-vs-mesa) for
|
||||
further information about how to run it.
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Migration From Merlin5
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 18 June 2019
|
||||
keywords: merlin5, merlin6, migration, rsync, archive, archiving, lts, long-term storage
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/migrating.html
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: Troubleshooting
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 21 January 2021
|
||||
keywords: troubleshooting, problems, faq, known problems
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/troubleshooting.html
|
||||
|
||||
@@ -69,19 +69,24 @@ The connectivity for the Merlin6 cluster is based on **ConnectX-5 EDR-100Gbps**,
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">384GB</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="2"><b>#3</b></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1"><b>merlin-c-3[01-06]</b></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="2"><a href="https://ark.intel.com/content/www/us/en/ark/products/199343/intel-xeon-gold-6240r-processor-35-75m-cache-2-40-ghz.html">Intel Xeon Gold 6240R</a></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="2">2</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="2">48</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="2">2</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="2">1.2TB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">384GB</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
<td rowspan="1"><b>merlin-c-3[07-12]</b></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">768GB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="3"><b>#3</b></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1"><b>merlin-c-3[01-12]</b></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="3"><a href="https://ark.intel.com/content/www/us/en/ark/products/199343/intel-xeon-gold-6240r-processor-35-75m-cache-2-40-ghz.html">Intel Xeon Gold 6240R</a></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="3">2</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="3">48</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">2</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="3">1.2TB</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="2">768GB</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
<td rowspan="1"><b>merlin-c-3[03-18]</b></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">1</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
<td rowspan="1"><b>merlin-c-3[19-24]</b></td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">2</td>
|
||||
<td style="vertical-align:middle;text-align:center;" rowspan="1">384GB</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
Each blade contains a NVMe disk, where up to 300TB are dedicated to the O.S., and ~1.2TB are reserved for local `/scratch`.
|
||||
|
||||
@@ -14,12 +14,14 @@ This documentation shows basic Slurm configuration and options needed to run job
|
||||
|
||||
The following table show default and maximum resources that can be used per node:
|
||||
|
||||
| Nodes | Def.#CPUs | Max.#CPUs | #Threads | Def.Mem/CPU | Max.Mem/CPU | Max.Mem/Node | Max.Swap | Def.#GPUs | Max.#GPUs |
|
||||
|:------------------:| ---------:| :--------:| :------: | :----------:| :----------:| :-----------:| :-------:| :-------: | :-------: |
|
||||
| merlin-c-[001-024] | 1 core | 44 cores | 2 | 4000 | 352000 | 352000 | 10000 | N/A | N/A |
|
||||
| merlin-c-[101-124] | 1 core | 44 cores | 2 | 4000 | 352000 | 352000 | 10000 | N/A | N/A |
|
||||
| merlin-c-[201-224] | 1 core | 44 cores | 2 | 4000 | 352000 | 352000 | 10000 | N/A | N/A |
|
||||
| merlin-c-[301-306] | 1 core | 44 cores | 2 | 4000 | 352000 | 352000 | 10000 | N/A | N/A |
|
||||
| Nodes | Def.#CPUs | Max.#CPUs | #Threads | Max.Mem/CPU | Max.Mem/Node | Max.Swap | Def.#GPUs | Max.#GPUs |
|
||||
|:--------------------:| ---------:| :--------:| :------: | :----------:| :-----------:| :-------:| :-------: | :-------: |
|
||||
| merlin-c-[001-024] | 1 core | 44 cores | 2 | 352000 | 352000 | 10000 | N/A | N/A |
|
||||
| merlin-c-[101-124] | 1 core | 44 cores | 2 | 352000 | 352000 | 10000 | N/A | N/A |
|
||||
| merlin-c-[201-224] | 1 core | 44 cores | 2 | 352000 | 352000 | 10000 | N/A | N/A |
|
||||
| merlin-c-[301-312] | 1 core | 44 cores | 2 | 748800 | 748800 | 10000 | N/A | N/A |
|
||||
| merlin-c-[313-318] | 1 core | 44 cores | 1 | 748800 | 748800 | 10000 | N/A | N/A |
|
||||
| merlin-c-[319-324] | 1 core | 44 cores | 2 | 748800 | 748800 | 10000 | N/A | N/A |
|
||||
|
||||
If nothing is specified, by default each core will use up to 8GB of memory. Memory can be increased with the `--mem=<mem_in_MB>` and
|
||||
`--mem-per-cpu=<mem_in_MB>` options, and maximum memory allowed is `Max.Mem/Node`.
|
||||
@@ -31,10 +33,6 @@ and memory was by default oversubscribed.
|
||||
{{site.data.alerts.tip}}Always check <b>'/etc/slurm/slurm.conf'</b> for changes in the hardware.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## Running jobs in the 'merlin6' cluster
|
||||
|
||||
In this chapter we will cover basic settings that users need to specify in order to run jobs in the Merlin6 CPU cluster.
|
||||
|
||||
### Merlin6 CPU cluster
|
||||
|
||||
To run jobs in the **`merlin6`** cluster users **can optionally** specify the cluster name in Slurm:
|
||||
@@ -57,12 +55,16 @@ Users might need to specify the Slurm partition. If no partition is specified, i
|
||||
|
||||
The following *partitions* (also known as *queues*) are configured in Slurm:
|
||||
|
||||
| CPU Partition | Default Time | Max Time | Max Nodes | PriorityJobFactor\* | PriorityTier\*\* |
|
||||
|:-----------------: | :----------: | :------: | :-------: | :-----------------: | :--------------: |
|
||||
| **<u>general</u>** | 1 day | 1 week | 50 | 1 | 1 |
|
||||
| **daily** | 1 day | 1 day | 67 | 500 | 1 |
|
||||
| **hourly** | 1 hour | 1 hour | unlimited | 1000 | 1 |
|
||||
| **gfa-asa** | 1 day | 1 week | 11 | 1000 | 1000 |
|
||||
| CPU Partition | Default Time | Max Time | Max Nodes | PriorityJobFactor\* | PriorityTier\*\* | DefMemPerCPU |
|
||||
|:-----------------: | :----------: | :------: | :-------: | :-----------------: | :--------------: |:------------:|
|
||||
| **<u>general</u>** | 1 day | 1 week | 50 | 1 | 1 | 4000 |
|
||||
| **daily** | 1 day | 1 day | 67 | 500 | 1 | 4000 |
|
||||
| **hourly** | 1 hour | 1 hour | unlimited | 1000 | 1 | 4000 |
|
||||
| **asa-general** | 1 hour | 2 weeks | unlimited | 1 | 2 | 3712 |
|
||||
| **asa-daily** | 1 hour | 1 week | unlimited | 500 | 2 | 3712 |
|
||||
| **asa-visas** | 1 hour | 90 days | unlimited | 1000 | 4 | 3712 |
|
||||
| **asa-ansys** | 1 hour | 90 days | unlimited | 1000 | 4 | 15600 |
|
||||
| **mu3e** | 1 day | 7 days | unlimited | 1000 | 4 | 3712 |
|
||||
|
||||
\*The **PriorityJobFactor** value will be added to the job priority (*PARTITION* column in `sprio -l` ). In other words, jobs sent to higher priority
|
||||
partitions will usually run first (however, other factors such like **job age** or mainly **fair share** might affect to that decision). For the GPU
|
||||
@@ -74,8 +76,7 @@ and, if possible, they will preempt running jobs from partitions with lower *Pri
|
||||
* The **`general`** partition is the **default**. It can not have more than 50 nodes running jobs.
|
||||
* For **`daily`** this limitation is extended to 67 nodes.
|
||||
* For **`hourly`** there are no limits.
|
||||
* **`gfa-asa`** is a **private hidden** partition, belonging to one experiment. **Access is restricted**. However, by agreement with the experiment,
|
||||
nodes are usually added to the **`hourly`** partition as extra resources for the public resources.
|
||||
* **`asa-general`,`asa-daily`,`asa-ansys`,`asa-visas` and `mu3e`** are **private** partitions, belonging to different experiments owning the machines. **Access is restricted** in all cases. However, by agreement with the experiments, nodes are usually added to the **`hourly`** partition as extra resources for the public resources.
|
||||
|
||||
{{site.data.alerts.tip}}Jobs which would run for less than one day should be always sent to <b>daily</b>, while jobs that would run for less
|
||||
than one hour should be sent to <b>hourly</b>. This would ensure that you have highest priority over jobs sent to partitions with less priority,
|
||||
@@ -97,12 +98,13 @@ Not all the accounts can be used on all partitions. This is resumed in the table
|
||||
| Slurm Account | Slurm Partitions |
|
||||
| :------------------: | :----------------------------------: |
|
||||
| **<u>merlin</u>** | `hourly`,`daily`, `general` |
|
||||
| **gfa-asa** | `gfa-asa`,`hourly`,`daily`, `general` |
|
||||
| **gfa-asa** | `asa-general`,`asa-daily`,`asa-visas`,`asa-ansys`,`hourly`,`daily`, `general` |
|
||||
| **mu3e** | `mu3e` |
|
||||
|
||||
#### The 'gfa-asa' private account
|
||||
#### Private accounts
|
||||
|
||||
For accessing the **`gfa-asa`** partition, it must be done through the **`gfa-asa`** account. This account **is restricted**
|
||||
to a group of users and is not public.
|
||||
* The *`gfa-asa`* and *`mu3e`* accounts are private accounts. These can be used for accessing dedicated
|
||||
partitions with nodes owned by different groups.
|
||||
|
||||
### Slurm CPU specific options
|
||||
|
||||
@@ -124,7 +126,7 @@ Below are listed the most common settings:
|
||||
#SBATCH --cpu-bind=[{quiet,verbose},]<type> # only for 'srun' command
|
||||
```
|
||||
|
||||
#### Dealing with Hyper-Threading
|
||||
#### Enabling/Disabling Hyper-Threading
|
||||
|
||||
The **`merlin6`** cluster contains nodes with Hyper-Threading enabled. One should always specify
|
||||
whether to use Hyper-Threading or not. If not defined, Slurm will generally use it (exceptions apply).
|
||||
@@ -134,6 +136,51 @@ whether to use Hyper-Threading or not. If not defined, Slurm will generally use
|
||||
#SBATCH --hint=nomultithread # Don't use extra threads with in-core multi-threading.
|
||||
```
|
||||
|
||||
#### Constraint / Features
|
||||
|
||||
Slurm allows to define a set of features in the node definition. This can be used to filter and select nodes according to one or more
|
||||
specific features. For the CPU nodes, we have the following features:
|
||||
|
||||
```
|
||||
NodeName=merlin-c-[001-024,101-124,201-224] Features=mem_384gb,xeon-gold-6152
|
||||
NodeName=merlin-c-[301-312] Features=mem_768gb,xeon-gold-6240r
|
||||
NodeName=merlin-c-[313-318] Features=mem_768gb,xeon-gold-6240r
|
||||
NodeName=merlin-c-[319-324] Features=mem_384gb,xeon-gold-6240r
|
||||
```
|
||||
|
||||
Therefore, users running on `hourly` can select which node they want to use (fat memory nodes vs regular memory nodes, CPU type).
|
||||
This is possible by using the option `--constraint=<feature_name>` in Slurm.
|
||||
|
||||
Examples:
|
||||
1. Select nodes with 48 cores only (nodes with [2 x Xeon Gold 6240R](https://ark.intel.com/content/www/us/en/ark/products/199343/intel-xeon-gold-6240r-processor-35-75m-cache-2-40-ghz.html)):
|
||||
```
|
||||
sbatch --constraint=xeon-gold-6240r ...
|
||||
```
|
||||
2. Select nodes with 44 cores only (nodes with [2 x Xeon Gold 6152](https://ark.intel.com/content/www/us/en/ark/products/120491/intel-xeon-gold-6152-processor-30-25m-cache-2-10-ghz.html)):
|
||||
```
|
||||
sbatch --constraint=xeon-gold-6152 ...
|
||||
```
|
||||
3. Select fat memory nodes only:
|
||||
```
|
||||
sbatch --constraint=mem_768gb ...
|
||||
```
|
||||
4. Select regular memory nodes only:
|
||||
```
|
||||
sbatch --constraint=mem_384gb ...
|
||||
```
|
||||
5. Select fat memory nodes with 48 cores only:
|
||||
```
|
||||
sbatch --constraint=mem_768gb,xeon-gold-6240r ...
|
||||
```
|
||||
|
||||
Detailing exactly which type of nodes you want to use is important, therefore, for groups with private accounts (`mu3e`,`gfa-asa`) or for
|
||||
public users running on the `hourly` partition, *constraining nodes by features is recommended*. This becomes even more important when
|
||||
having heterogeneous clusters.
|
||||
|
||||
## Running jobs in the 'merlin6' cluster
|
||||
|
||||
In this chapter we will cover basic settings that users need to specify in order to run jobs in the Merlin6 CPU cluster.
|
||||
|
||||
### User and job limits
|
||||
|
||||
In the CPU cluster we provide some limits which basically apply to jobs and users. The idea behind this is to ensure a fair usage of the resources and to
|
||||
|
||||
18
pages/merlin7/01-Quick-Start-Guide/introduction.md
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
title: Introduction
|
||||
#tags:
|
||||
keywords: introduction, home, welcome, architecture, design
|
||||
last_updated: 07 September 2022
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/introduction.html
|
||||
redirect_from:
|
||||
- /merlin7
|
||||
- /merlin7/index.html
|
||||
---
|
||||
|
||||
## Logging in
|
||||
|
||||
To get onto the machine, you have to start from ela.cscs.ch or from login.psi.ch at PSI.
|
||||
|
||||
$ ssh psi-username@psi-dev.cscs.ch
|
||||
|
||||
90
pages/merlin7/02-How-To-Use-Merlin/cray-module-env.md
Normal file
@@ -0,0 +1,90 @@
|
||||
---
|
||||
title: Cray Module Environment
|
||||
#tags:
|
||||
keywords: cray, module
|
||||
last_updated: 24 Mai 2023
|
||||
summary: "This document describes how to use the cray module environment on Merlin7."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/cray-module-env.html
|
||||
---
|
||||
|
||||
To switch from the PSI Module Environment to the provided Cray Programming Environment (CPE), please do the following
|
||||
Switch to Cray PrgEnv
|
||||
|
||||
$ source /etc/bash.bashrc.local.2023-04-26T164631
|
||||
|
||||
Cray Programming Environment, with Cray's compilers and MPI are loaded by default. You can check with module list command or use the short form as shown below.
|
||||
ml
|
||||
|
||||
$ ml #means: module list
|
||||
Currently Loaded Modules:
|
||||
1) craype-x86-rome 4) perftools-base/21.12.0 7) craype/2.7.13 10) cray-libsci/21.08.1.2
|
||||
2) libfabric/1.15.2.0 5) xpmem/2.4.4-2.3_13.8__gff0e1d9.shasta 8) cray-dsmml/0.2.2 11) PrgEnv-cray/8.3.0
|
||||
3) craype-network-ofi 6) cce/13.0.0 9) cray-mpich/8.1.12
|
||||
|
||||
You will notice an unfamiliar PrgEnv-cray/8.3.0 that was loaded. This is meta-module that Cray provides to simplify the switch of compilers and their associated dependencies and libraries, as a whole called Programming Environment. In the Cray Programming Environment, there are 4 key modules.
|
||||
|
||||
cray-libsci is a collection of numerical routines tuned for performance on Cray systems.
|
||||
libfabric is an important low-level library that allows you to take advantage of the high performance Slingshot11 network.
|
||||
cray-mpich is a CUDA-aware MPI implementation.
|
||||
cce is the compiler from Cray. C/C++ compilers are based on Clang/LLVM while Fortran supports Fortran 2018 standard. More info: https://user.cscs.ch/computing/compilation/cray/
|
||||
|
||||
You can switch between different programming environments. You can check the available module with module avail command or the short form (ml av)as shown below.
|
||||
ml av
|
||||
$ ml av PrgEnv
|
||||
|
||||
PrgEnv-aocc/8.3.0 (D) PrgEnv-cray/8.3.3 PrgEnv-intel/8.3.0 (D) PrgEnv-nvidia/8.3.0 (D)
|
||||
PrgEnv-aocc/8.3.3 PrgEnv-gnu/8.3.0 (D) PrgEnv-intel/8.3.3 PrgEnv-nvidia/8.3.3
|
||||
PrgEnv-cray/8.3.0 (L,D) PrgEnv-gnu/8.3.3 PrgEnv-nvhpc/8.3.3
|
||||
|
||||
If you want an in-depth information on the different programming environments, you can use the module spider command. This command allows you to explore the hierarchical structure of Lmod module environment that is in use here.
|
||||
module spider
|
||||
|
||||
$ module spider PrgEnv-cray
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
PrgEnv-cray:
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Versions:
|
||||
PrgEnv-cray/8.3.0
|
||||
PrgEnv-cray/8.3.3
|
||||
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
For detailed information about a specific "PrgEnv-cray" package (including how to load the modules) use the module's full name.
|
||||
Note that names that have a trailing (E) are extensions provided by other modules.
|
||||
For example:
|
||||
|
||||
$ module spider PrgEnv-cray/8.3.3
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
## Switching compiler suites
|
||||
|
||||
Compiler suites can be exchanged with PrgEnv (Programming Environments) provided by HPE-Cray. The wrappers call the correct compiler with appropriate options to build and link applications with relevant libraries, as required by the loaded modules (only dynamic linking is supported) and therefore should replace direct calls to compiler drivers in Makefiles and build scripts.
|
||||
|
||||
To swap the the compiler suite from the default Cray to Intel, you can do the following. If there are no PrgEnv loaded, then you can load the one you prefer with ml command.
|
||||
Swapping PrgEnv
|
||||
|
||||
$ module swap PrgEnv-cray PrgEnv-intel
|
||||
|
||||
Please note that in a Cray provided PrgEnv, cray-mpich will always be used by default. This is because this MPI library has been strongly optimised for a Cray system. In the case of Intel, cray-mpich has been compiled with Intel compiler to improve interoperability. Please note that the same condition applies when you use the GNU and AMD (AOCC) programming environments.
|
||||
|
||||
If you would like to use a pure Intel-MPI, please refer to the the advanced guide on how to install and set up Intel such that the optimised Slingshot11 network is used.
|
||||
|
||||
You can switch versions of intel compilers by using the module swap command. If you want to know the available versions of intel compilers, you can use the module avail or module spider commands.
|
||||
|
||||
An example to switch the version of Cray compiler while you are in the PrgEnv-cray environment
|
||||
module swap
|
||||
|
||||
$ module swap cce/13.0.0 cce/14.0.0
|
||||
|
||||
Due to the use of wrapper, you can use the same commands when compiling with say cce or gnu compilers. In the case of intel compilers, you have to use the original commands, e.g. icc.
|
||||
|
||||
C compiler : cc
|
||||
C++ compiler : CC
|
||||
Fortran compiler : ftn
|
||||
MPI C compiler: mpicc
|
||||
MPI C++ compiler: mpic++
|
||||
MPI Fortran compiler: mpif90
|
||||
|
||||
When using gnu compiler, you will need to specify the architecture (-march or -mtune or --offload-arch) you would like to optimise your code for, in this case Milan, you need to use craype-x86-milan.
|
||||
|
||||
13
pages/merlin7/02-How-To-Use-Merlin/file-transfers.md
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
title: Transferring files between systems
|
||||
#tags:
|
||||
keywords: files, transfer, scp
|
||||
last_updated: 24 Mai 2023
|
||||
summary: "This document describes some possibilities to transfer files from Merlin6 to Merlin7."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/file-transfers.html
|
||||
---
|
||||
|
||||
From Merlin6 to Merlin7 you can use (ideally SSH keys should be set beforehand):
|
||||
|
||||
$ rsync -avAHXS ~/merlin6_localdata $USER@psi-dev.cscs.ch:/scratch/home/$USER/
|
||||
@@ -0,0 +1,25 @@
|
||||
---
|
||||
title: Slurm Examples
|
||||
#tags:
|
||||
keywords: slurm example, template, examples, templates, running jobs, sbatch, single core based jobs, HT, multithread, no-multithread, mpi, openmp, packed jobs, hands-on, array jobs, gpu
|
||||
last_updated: 24 Mai 2023
|
||||
summary: "This document shows different template examples for running jobs in the Merlin cluster."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/slurm-examples.html
|
||||
---
|
||||
|
||||
## Single core based job examples
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
#SBATCH --partition=hourly # Using 'hourly' will grant higher priority
|
||||
#SBATCH --ntasks-per-core=2 # Request the max ntasks be invoked on each core
|
||||
#SBATCH --hint=multithread # Use extra threads with in-core multi-threading
|
||||
#SBATCH --time=00:30:00 # Define max time job will run
|
||||
#SBATCH --output=myscript.out # Define your output file
|
||||
#SBATCH --error=myscript.err # Define your error file
|
||||
|
||||
module purge
|
||||
module load $MODULE_NAME # where $MODULE_NAME is a software in PModules
|
||||
srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
```
|
||||
29
pages/merlin7/05-Software-Support/ansys-rsm.md
Normal file
@@ -0,0 +1,29 @@
|
||||
---
|
||||
title: ANSYS RSM (Remote Resolve Manager)
|
||||
#tags:
|
||||
keywords: software, ansys, rsm, slurm, interactive, rsm, windows
|
||||
last_updated: 24 Mai 2023
|
||||
summary: "This document describes how to use the ANSYS Remote Resolve Manager service in the Merlin6 cluster"
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/ansys-rsm.html
|
||||
---
|
||||
|
||||
## ANSYS RSM Configuration tool settings:
|
||||
|
||||
Use Merlin6 ANSYS to submit to RSM:
|
||||
|
||||
If you want to submit from Titan is possible too, you have to set SSH keys in Titan as described in: https://www.purdue.edu/science/scienceit/ssh-keys-windows.html
|
||||
|
||||
"HPC_Resource" tab configuration:
|
||||
|
||||
HPC Type: Slurm
|
||||
Submit Host: psi-dev.cscs.ch
|
||||
Slurm job arguments: --hint=nomultithread
|
||||
|
||||
"File Management" tab configuration:
|
||||
|
||||
External mechanism for the transfer (SCP, custom)
|
||||
Transfer Mechanism SCP via SSH
|
||||
As staging directory, use /scratch/tmp
|
||||
As account, use your PSI username
|
||||
SSH Keys have to be configured to make it work.
|
||||
49
pages/merlin7/slurm-configuration.md
Normal file
@@ -0,0 +1,49 @@
|
||||
---
|
||||
title: Slurm cluster 'merlin7'
|
||||
#tags:
|
||||
keywords: configuration, partitions, node definition
|
||||
last_updated: 24 Mai 2023
|
||||
summary: "This document describes a summary of the Merlin6 configuration."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/slurm-configuration.html
|
||||
---
|
||||
|
||||
This documentation shows basic Slurm configuration and options needed to run jobs in the Merlin7 cluster.
|
||||
|
||||
### Infrastructure
|
||||
|
||||
#### Hardware
|
||||
|
||||
The current configuration for the _test_ phase is made up as:
|
||||
|
||||
* 9 nodes for the _PSI-Dev_ development system
|
||||
* 8 nodes were meant for baremetal and k8s
|
||||
* 1 login node
|
||||
|
||||
| Node | CPU | RAM | GRES | Notes |
|
||||
| ---- | --- | --- | ---- | ----- |
|
||||
| Compute node | _2x_ AMD EPYC 7713 (x86_64 Milan, 64 Cores, 3.2GHz) | 512GB DDR4 3200Mhz | _4x_ NVidia A100 (Ampere, 80GB) | |
|
||||
| Login node | _2x_ AMD EPYC 7742 (x86_64 Rome, 64 Cores, 3.2GHz) | 512GB DRR4 3200Mhz | | |
|
||||
|
||||
#### Storage
|
||||
|
||||
* CephFS only for `/home` -- 1 TB
|
||||
* ClusterStor L300 for `/scratch` -- 224 TB usable space
|
||||
* CephRBD `/local` -- 100GB
|
||||
|
||||
#### Node IDs
|
||||
|
||||
Cray user various identifies to uniquely label each node, details on this can be found on the [Crayism page](cray-conventions.html).
|
||||
The table below collates these together for the current configuration:
|
||||
|
||||
| Node ID | Cray XNAME | Notes |
|
||||
| ---------- | ---------- | - |
|
||||
| nid003204 | x1500c4s7b0n0 | login node, to which **psi-dev.cscs.ch** points |
|
||||
| nid002808 | x1007c0s4b0n0 | |
|
||||
| nid002809 | x1007c0s4b0n1 | |
|
||||
| nid002812 | x1007c0s5b0n0 | |
|
||||
| nid002813 | x1007c0s5b0n1 | |
|
||||
| nid002824 | x1007c1s0b0n0 | |
|
||||
| nid002825 | x1007c1s0b0n1 | |
|
||||
| nid002828 | x1007c1s1b0n0 | |
|
||||
| nid002829 | x1007c1s1b0n1 | |
|
||||
@@ -11,7 +11,7 @@ search: exclude
|
||||
"title": "{{ page.title | escape }}",
|
||||
"tags": "{{ page.tags }}",
|
||||
"keywords": "{{page.keywords}}",
|
||||
"url": "{{ page.url | remove: "/"}}",
|
||||
"url": "{% if site.baseurl != '/'%}{{site.baseurl}}{% endif %}{{ page.url }}",
|
||||
"summary": "{{page.summary | strip }}"
|
||||
}
|
||||
{% unless forloop.last and site.posts.size < 1 %},{% endunless %}
|
||||
|
||||