Compare commits
106 Commits
Author | SHA1 | Date | |
---|---|---|---|
4af3f996d2 | |||
0d2c22d5e4 | |||
0fdfd9b5e9 | |||
0d311e4776 | |||
241957f665 | |||
23060d5d78 | |||
a25450ea15 | |||
7def9e7584 | |||
8c4dd96128 | |||
a4f4f32e22 | |||
f2eb5110b7 | |||
2a85a9f03c | |||
0802f12fcf | |||
85fcf9afc0 | |||
46f256ce75 | |||
1757aad9ca | |||
cd422a9a48 | |||
9407024d99 | |||
d0756e01b3 | |||
1aa2b07d17 | |||
205683582a | |||
893f866cad | |||
da210298ad | |||
39303ef76f | |||
53a856af8e | |||
45941cb243 | |||
3a970492c7 | |||
5a1f4e7bcf | |||
25c260c036 | |||
0820227bdf | |||
d72266e1a2 | |||
6157453523 | |||
4727bf5591 | |||
5fc6adc5ab | |||
2cc87f6f44 | |||
46f2484623 | |||
d3ad669a89 | |||
8cf6db43f3 | |||
8ba2b715cd | |||
a39f0fa234 | |||
08c999f97a | |||
f24a644c8e | |||
8695c0dc42 | |||
358132a5c6 | |||
5fc3e79c4d | |||
a7c2d11e95 | |||
8cf2674d33 | |||
74c6e6866c | |||
0392a2b3e4 | |||
a74fd2c13f | |||
e72a8fd35e | |||
21869bc61c | |||
54c0cf9b45 | |||
7c7d8239b8 | |||
![]() |
f444297637 | ||
b1856d0089 | |||
f5fd35fdb1 | |||
7263dfca15 | |||
3e86b7380b | |||
7b39610473 | |||
def3612ba9 | |||
68c8ba575c | |||
9f21e24ead | |||
058fe5caa7 | |||
e6f006012c | |||
5e4be6f1f0 | |||
bb5030de3c | |||
5c71055255 | |||
fc3281ce5e | |||
7fedc7525c | |||
![]() |
fd16825e39 | ||
eff7143bfe | |||
1d4cde414c | |||
5eb5bad57e | |||
e58920bc31 | |||
db5d7b368b | |||
9ff6b0f360 | |||
626beb921e | |||
7308986be0 | |||
a13672aa5c | |||
357b4317ed | |||
16bddf2c12 | |||
b63ecb4141 | |||
a0e38e612f | |||
ae96f6be53 | |||
![]() |
3645515793 | ||
18199199ae | |||
779ff85ea2 | |||
f05b40b6fe | |||
0fa39e7af8 | |||
ee2e65351e | |||
342f610e1a | |||
be69b022f6 | |||
86218336b2 | |||
e4cc733681 | |||
4eb067426e | |||
bcedfc8736 | |||
9faf68c0f0 | |||
156d3b7d14 | |||
0709804f23 | |||
6e76d6acdb | |||
5740d25b08 | |||
![]() |
03f36bf45c | ||
![]() |
94b1b6c4d1 | ||
![]() |
dbb3336a83 | ||
05ef348dc3 |
39
.gitea/workflows/deploy-pages.yml
Normal file
@ -0,0 +1,39 @@
|
||||
---
|
||||
name: Build and Deploy Documentation
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build-and-deploy:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: gitea.psi.ch/hpce/gitea-pages
|
||||
env:
|
||||
JEKYLL_ENV: production
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Build Jekyll website
|
||||
run: |
|
||||
bundle exec jekyll --version
|
||||
bundle exec jekyll build -d public
|
||||
|
||||
- name: Configure Git
|
||||
run: |
|
||||
git config --global user.name "Gitea Actions"
|
||||
git config --global user.email "actions@gitea.local"
|
||||
|
||||
- name: Push to gitea-pages branch
|
||||
run: |
|
||||
git checkout --orphan gitea-pages
|
||||
git reset --hard
|
||||
ls -la
|
||||
cp -r ./public/* .
|
||||
git add .
|
||||
git commit -m "Deploy site"
|
||||
git push -f https://${{secrets.GITHUB_TOKEN}}@gitea.psi.ch/${{ github.repository }}.git gitea-pages
|
@ -1,47 +0,0 @@
|
||||
stages:
|
||||
- test
|
||||
- deploy
|
||||
|
||||
image: alpine:3.13
|
||||
|
||||
variables:
|
||||
JEKYLL_ENV: production
|
||||
|
||||
before_script:
|
||||
- apk list -I
|
||||
- >
|
||||
apk --no-cache add libatomic readline readline-dev libxml2 libxml2-dev
|
||||
ncurses-terminfo-base ncurses-terminfo
|
||||
libxslt libxslt-dev zlib-dev zlib
|
||||
ruby ruby-dev yaml yaml-dev
|
||||
libffi-dev build-base git nodejs
|
||||
- gem install etc bundler --no-document
|
||||
- pwd
|
||||
- ls -l
|
||||
- bundle install
|
||||
|
||||
######################################################################
|
||||
# this defines the job (for gitlab-pages it has the special name "pages")
|
||||
pages:
|
||||
stage: deploy
|
||||
script:
|
||||
- pwd
|
||||
- ls -l
|
||||
- bundle exec jekyll --version
|
||||
- bundle exec jekyll build -d public
|
||||
|
||||
# defines what is produced by the scripts (the "artifacts")
|
||||
artifacts:
|
||||
paths:
|
||||
- public
|
||||
|
||||
# the tags define which runners (builders) will be selected. A
|
||||
# suitable runner must have an identical tag
|
||||
tags:
|
||||
- shared
|
||||
- gitlab-pages
|
||||
- docker
|
||||
|
||||
# the "pages" job is only to be run for the master branch
|
||||
only:
|
||||
- master
|
23
Dockerfile
@ -1,23 +1,24 @@
|
||||
FROM alpine:3.13
|
||||
|
||||
WORKDIR /tmp
|
||||
ADD Gemfile /tmp/
|
||||
ADD Gemfile.lock /tmp/
|
||||
COPY Gemfile /tmp/
|
||||
COPY Gemfile.lock /tmp/
|
||||
|
||||
RUN apk list -I && \
|
||||
apk --no-cache add libatomic readline readline-dev libxml2 libxml2-dev \
|
||||
RUN apk list -I && apk --no-cache add \
|
||||
libatomic readline readline-dev libxml2 libxml2-dev \
|
||||
ncurses-terminfo-base ncurses-terminfo \
|
||||
libxslt libxslt-dev zlib-dev zlib \
|
||||
ruby ruby-dev yaml yaml-dev \
|
||||
libffi-dev build-base git nodejs
|
||||
RUN gem env
|
||||
RUN gem install etc bundler --no-document
|
||||
RUN pwd
|
||||
RUN ls -l
|
||||
RUN bundle install
|
||||
libffi-dev build-base git nodejs \
|
||||
&& gem env \
|
||||
&& gem install etc --no-document \
|
||||
&& gem install bundler -v 2.4.22 --no-document \
|
||||
&& pwd \
|
||||
&& ls -l \
|
||||
&& bundle install
|
||||
|
||||
VOLUME /src
|
||||
EXPOSE 4001
|
||||
|
||||
WORKDIR /src
|
||||
ENTRYPOINT ["jekyll", "serve", "--livereload", "-H", "0.0.0.0"]
|
||||
CMD ["jekyll", "serve", "--livereload", "-H", "0.0.0.0"]
|
||||
|
14
Gemfile
@ -12,14 +12,12 @@ gem "webrick", "~> 1.7.0"
|
||||
gem "etc", "~> 1.2.0"
|
||||
gem "bigdecimal", "~> 1.4"
|
||||
gem 'eventmachine', "~> 1.2.7"
|
||||
# gem 'faraday', "~> 1.4.3"
|
||||
# gem 'addressable', "~> 2.7.0"
|
||||
# gem 'faraday-net_http_persistent', "~> 1.1.0"
|
||||
# gem 'nokogiri', '~> 1.11', '>= 1.11.7'
|
||||
# gem 'ruby2_keywords', "~> 0.0.4"
|
||||
# gem 'rubyzip', "~> 2.3.0"
|
||||
|
||||
gem 'faraday', "~> 1.4.3"
|
||||
gem 'addressable', "~> 2.7.0"
|
||||
gem 'faraday-net_http_persistent', "~> 1.1.0"
|
||||
gem 'ruby2_keywords', "~> 0.0.4"
|
||||
gem 'rubyzip', "~> 2.3.0"
|
||||
#
|
||||
gem 'ffi', "~> 1.15.3"
|
||||
gem 'http_parser.rb', "~> 0.6.0" # requires mkdir in /usr/bin/mkdir
|
||||
gem "jekyll", "~> 3.9.0"
|
||||
|
||||
|
81
Gemfile.lock
@ -1,13 +1,13 @@
|
||||
GEM
|
||||
remote: https://rubygems.org/
|
||||
specs:
|
||||
activesupport (6.0.4)
|
||||
activesupport (6.0.6.1)
|
||||
concurrent-ruby (~> 1.0, >= 1.0.2)
|
||||
i18n (>= 0.7, < 2)
|
||||
minitest (~> 5.1)
|
||||
tzinfo (~> 1.1)
|
||||
zeitwerk (~> 2.2, >= 2.2.2)
|
||||
addressable (2.8.0)
|
||||
addressable (2.7.0)
|
||||
public_suffix (>= 2.0.2, < 5.0)
|
||||
bigdecimal (1.4.4)
|
||||
coffee-script (2.4.1)
|
||||
@ -17,35 +17,31 @@ GEM
|
||||
colorator (1.1.0)
|
||||
commonmarker (0.17.13)
|
||||
ruby-enum (~> 0.5)
|
||||
concurrent-ruby (1.1.9)
|
||||
dnsruby (1.61.7)
|
||||
simpleidn (~> 0.1)
|
||||
em-websocket (0.5.2)
|
||||
concurrent-ruby (1.2.3)
|
||||
dnsruby (1.70.0)
|
||||
simpleidn (~> 0.2.1)
|
||||
em-websocket (0.5.3)
|
||||
eventmachine (>= 0.12.9)
|
||||
http_parser.rb (~> 0.6.0)
|
||||
http_parser.rb (~> 0)
|
||||
etc (1.2.0)
|
||||
ethon (0.14.0)
|
||||
ethon (0.16.0)
|
||||
ffi (>= 1.15.0)
|
||||
eventmachine (1.2.7)
|
||||
execjs (2.8.1)
|
||||
faraday (1.5.1)
|
||||
execjs (2.9.1)
|
||||
faraday (1.4.3)
|
||||
faraday-em_http (~> 1.0)
|
||||
faraday-em_synchrony (~> 1.0)
|
||||
faraday-excon (~> 1.1)
|
||||
faraday-httpclient (~> 1.0.1)
|
||||
faraday-net_http (~> 1.0)
|
||||
faraday-net_http_persistent (~> 1.1)
|
||||
faraday-patron (~> 1.0)
|
||||
multipart-post (>= 1.2, < 3)
|
||||
ruby2_keywords (>= 0.0.4)
|
||||
faraday-em_http (1.0.0)
|
||||
faraday-em_synchrony (1.0.0)
|
||||
faraday-excon (1.1.0)
|
||||
faraday-httpclient (1.0.1)
|
||||
faraday-net_http (1.0.1)
|
||||
faraday-net_http_persistent (1.2.0)
|
||||
faraday-patron (1.0.0)
|
||||
ffi (1.15.3)
|
||||
faraday-net_http_persistent (1.1.0)
|
||||
ffi (1.15.5)
|
||||
forwardable-extended (2.6.0)
|
||||
gemoji (3.0.1)
|
||||
github-pages (215)
|
||||
@ -98,7 +94,7 @@ GEM
|
||||
octokit (~> 4.0)
|
||||
public_suffix (>= 2.0.2, < 5.0)
|
||||
typhoeus (~> 1.3)
|
||||
html-pipeline (2.14.0)
|
||||
html-pipeline (2.14.3)
|
||||
activesupport (>= 2)
|
||||
nokogiri (>= 1.4)
|
||||
http_parser.rb (0.6.0)
|
||||
@ -210,39 +206,37 @@ GEM
|
||||
gemoji (~> 3.0)
|
||||
html-pipeline (~> 2.2)
|
||||
jekyll (>= 3.0, < 5.0)
|
||||
json (2.5.1)
|
||||
json (2.7.1)
|
||||
kramdown (2.3.1)
|
||||
rexml
|
||||
kramdown-parser-gfm (1.1.0)
|
||||
kramdown (~> 2.0)
|
||||
liquid (4.0.3)
|
||||
listen (3.5.1)
|
||||
listen (3.8.0)
|
||||
rb-fsevent (~> 0.10, >= 0.10.3)
|
||||
rb-inotify (~> 0.9, >= 0.9.10)
|
||||
mercenary (0.3.6)
|
||||
mini_portile2 (2.5.3)
|
||||
mini_portile2 (2.8.5)
|
||||
minima (2.5.1)
|
||||
jekyll (>= 3.5, < 5.0)
|
||||
jekyll-feed (~> 0.9)
|
||||
jekyll-seo-tag (~> 2.1)
|
||||
minitest (5.14.4)
|
||||
multipart-post (2.1.1)
|
||||
nokogiri (1.11.7)
|
||||
mini_portile2 (~> 2.5.0)
|
||||
minitest (5.21.2)
|
||||
multipart-post (2.3.0)
|
||||
nokogiri (1.15.5)
|
||||
mini_portile2 (~> 2.8.2)
|
||||
racc (~> 1.4)
|
||||
nokogiri (1.11.7-x86_64-linux)
|
||||
racc (~> 1.4)
|
||||
octokit (4.21.0)
|
||||
faraday (>= 0.9)
|
||||
sawyer (~> 0.8.0, >= 0.5.3)
|
||||
octokit (4.25.1)
|
||||
faraday (>= 1, < 3)
|
||||
sawyer (~> 0.9)
|
||||
pathutil (0.16.2)
|
||||
forwardable-extended (~> 2.6)
|
||||
public_suffix (4.0.6)
|
||||
racc (1.5.2)
|
||||
rb-fsevent (0.11.0)
|
||||
public_suffix (4.0.7)
|
||||
racc (1.7.3)
|
||||
rb-fsevent (0.11.2)
|
||||
rb-inotify (0.10.1)
|
||||
ffi (~> 1.0)
|
||||
rexml (3.2.5)
|
||||
rexml (3.2.6)
|
||||
rouge (3.26.0)
|
||||
ruby-enum (0.9.0)
|
||||
i18n
|
||||
@ -254,40 +248,45 @@ GEM
|
||||
sass-listen (4.0.0)
|
||||
rb-fsevent (~> 0.9, >= 0.9.4)
|
||||
rb-inotify (~> 0.9, >= 0.9.7)
|
||||
sawyer (0.8.2)
|
||||
sawyer (0.9.2)
|
||||
addressable (>= 2.3.5)
|
||||
faraday (> 0.8, < 2.0)
|
||||
faraday (>= 0.17.3, < 3)
|
||||
simpleidn (0.2.1)
|
||||
unf (~> 0.1.4)
|
||||
terminal-table (1.8.0)
|
||||
unicode-display_width (~> 1.1, >= 1.1.1)
|
||||
thread_safe (0.3.6)
|
||||
typhoeus (1.4.0)
|
||||
typhoeus (1.4.1)
|
||||
ethon (>= 0.9.0)
|
||||
tzinfo (1.2.9)
|
||||
tzinfo (1.2.11)
|
||||
thread_safe (~> 0.1)
|
||||
unf (0.1.4)
|
||||
unf_ext
|
||||
unf_ext (0.0.7.7)
|
||||
unicode-display_width (1.7.0)
|
||||
unf_ext (0.0.9.1)
|
||||
unicode-display_width (1.8.0)
|
||||
webrick (1.7.0)
|
||||
zeitwerk (2.4.2)
|
||||
zeitwerk (2.6.12)
|
||||
|
||||
PLATFORMS
|
||||
ruby
|
||||
x86_64-linux
|
||||
|
||||
DEPENDENCIES
|
||||
addressable (~> 2.7.0)
|
||||
bigdecimal (~> 1.4)
|
||||
etc (~> 1.2.0)
|
||||
eventmachine (~> 1.2.7)
|
||||
faraday (~> 1.4.3)
|
||||
faraday-net_http_persistent (~> 1.1.0)
|
||||
ffi (~> 1.15.3)
|
||||
github-pages (~> 215)
|
||||
http_parser.rb (~> 0.6.0)
|
||||
jekyll (~> 3.9.0)
|
||||
jekyll-redirect-from (~> 0.16.0)
|
||||
json (~> 2.2)
|
||||
ruby2_keywords (~> 0.0.4)
|
||||
rubyzip (~> 2.3.0)
|
||||
webrick (~> 1.7.0)
|
||||
|
||||
BUNDLED WITH
|
||||
2.2.21
|
||||
2.3.26
|
||||
|
37
README.md
@ -1,8 +1,7 @@
|
||||
# HPCE Documentation
|
||||
|
||||
This site documents HCPE services at Paul Scherrer Institute, particularly the *Merlin 6* cluster.
|
||||
Live pages are available at https://lsm-hpce.gitpages.psi.ch.
|
||||
|
||||
This site contains internal documentation of HCPE services at Paul Scherrer Institute, particularly the *Merlin 6* and *Merlin 7* clusters.
|
||||
Live pages are available at <https://hpce.pages.psi.ch>.
|
||||
|
||||
## Installation
|
||||
|
||||
@ -11,26 +10,28 @@ Live pages are available at https://lsm-hpce.gitpages.psi.ch.
|
||||
The easiest and most reproducable way to test changes is using docker.
|
||||
From the top directory, run the following:
|
||||
|
||||
```
|
||||
docker-compose up
|
||||
```console
|
||||
$ docker compose build
|
||||
$ docker compose up
|
||||
```
|
||||
|
||||
This will start a webserver on `http://0.0.0.0:4000/`. Changes to most pages
|
||||
This will start a webserver on `http://0.0.0.0:4001/`. Changes to most pages
|
||||
will be automatically reflected in the website (with the exception of changes
|
||||
to _config.yml, which requires restarting the webserver).
|
||||
to `config.yml`, which requires restarting the webserver).
|
||||
|
||||
### Running locally
|
||||
|
||||
Building locally requires ruby 2.5 and bundler. To install:
|
||||
|
||||
```
|
||||
gem install bundler jekyll
|
||||
bundle
|
||||
```console
|
||||
$ gem install bundler jekyll
|
||||
$ bundle
|
||||
```
|
||||
|
||||
To run a local webserver:
|
||||
|
||||
```
|
||||
bundle exec jekyll serve
|
||||
```console
|
||||
$ bundle exec jekyll serve
|
||||
```
|
||||
|
||||
## Theme
|
||||
@ -43,7 +44,8 @@ by Tom Johnson.
|
||||
|
||||
- Documentation is organized within the `pages` directory
|
||||
- Add the following frontmatter to each (Merlin6) page:
|
||||
```
|
||||
|
||||
```md
|
||||
---
|
||||
title: Introduction
|
||||
sidebar: merlin6_sidebar
|
||||
@ -51,15 +53,22 @@ by Tom Johnson.
|
||||
keywords: key1, key2
|
||||
---
|
||||
```
|
||||
|
||||
- Sidebars are specified in data files, e.g. `_data/sidebars/merlin6_sidebar.yml`.
|
||||
- The top menu is controlled by `_data/topnav.yml`
|
||||
- News can be addin in `_posts`. Filenames must include the date.
|
||||
- Lots of features still need to be configured (e.g. pdf output, tags, etc)
|
||||
- The search bar uses finds substring of the title, tags, keywords, and summary frontmatter.
|
||||
|
||||
## Deployment
|
||||
|
||||
We use the Gitea workflow (see `.gitea/workflow/deploy-pages.yml` for details). The
|
||||
pages are automatically re-generated on each push to `master` branch. The resulting website
|
||||
is stored under the `gitea-pages` branch, and is automatically exposed.
|
||||
|
||||
## License
|
||||
|
||||
Theme content is licensed under the MIT license.
|
||||
The Navgoco jQuery component used in the sidebar is licensed under the BSD
|
||||
license.
|
||||
See licenses subdirectory for license terms.
|
||||
See licenses subdirectory for license terms.
|
||||
|
18
_config.yml
@ -1,5 +1,3 @@
|
||||
repository: lsm-hpce/lsm-hpce.gitpages.psi.ch
|
||||
|
||||
output: web
|
||||
# this property is useful for conditional filtering of content that is separate from the PDF.
|
||||
|
||||
@ -9,7 +7,7 @@ topnav_title: HPCE@PSI
|
||||
site_title: HPC and Emerging Technologies Documentation
|
||||
# this appears in the html browser tab for the site title (seen mostly by search engines, not users)
|
||||
|
||||
company_name: Paul Scherrer Institute, LSM/HPC and Emerging Technologies Group
|
||||
company_name: Paul Scherrer Institute, CSD/HPC and Emerging Technologies Group
|
||||
# this appears in the footer
|
||||
|
||||
github_editme_path:
|
||||
@ -19,10 +17,6 @@ github_editme_path:
|
||||
# gitlab_editme_path: tomjoht/documentation-theme-jekyll/blob/gh-pages/
|
||||
# if you're using GitLab, provide the basepath to the branch you've created for reviews, following the sample here. if not, leave this value blank.
|
||||
|
||||
disqus_shortname:
|
||||
#disqus_shortname: idratherbewriting
|
||||
# if you're using disqus for comments, add the shortname here. if not, leave this value blank.
|
||||
|
||||
google_analytics:
|
||||
#google_analytics: UA-66296557-1
|
||||
# if you have google-analytics ID, put it in. if not, edit this value to blank.
|
||||
@ -87,6 +81,7 @@ defaults:
|
||||
values:
|
||||
layout: "page"
|
||||
comments: true
|
||||
# if you don't want to use Commento.io and just hide comments, change true to false wherever you see the comments property
|
||||
search: true
|
||||
sidebar: home_sidebar
|
||||
topnav: topnav
|
||||
@ -97,6 +92,7 @@ defaults:
|
||||
values:
|
||||
layout: "page"
|
||||
comments: true
|
||||
# if you don't want to use Commento.io and just hide comments, change true to false wherever you see the comments property
|
||||
search: true
|
||||
tooltip: true
|
||||
|
||||
@ -107,6 +103,7 @@ defaults:
|
||||
values:
|
||||
layout: "post"
|
||||
comments: true
|
||||
# if you don't want to use Commento.io and just hide comments, change true to false wherever you see the comments property
|
||||
search: true
|
||||
sidebar: home_sidebar
|
||||
topnav: topnav
|
||||
@ -120,13 +117,12 @@ sidebars:
|
||||
- product2_sidebar
|
||||
- other
|
||||
|
||||
description: "Merlin 6 is the HPC cluster at Paul Scherrer Institute in Switzerland."
|
||||
description: "Merlin is the HPC cluster at Paul Scherrer Institute in Switzerland."
|
||||
# the description is used in the feed.xml file
|
||||
|
||||
# needed for sitemap.xml file only
|
||||
# url: http://idratherbewriting.com
|
||||
# baseurl: /documentation-theme-jekyll
|
||||
url: "https://lsm-hpce.gitpages.psi.ch"
|
||||
url: "https://hpce.pages.psi.ch"
|
||||
baseurl: /
|
||||
|
||||
|
||||
github: [metadata]
|
||||
|
@ -13,7 +13,10 @@ entries:
|
||||
- title: News
|
||||
url: /news.html
|
||||
output: web
|
||||
- title: The Merlin Local HPC Cluster
|
||||
- title: Merlin7 HPC Cluster (W.I.P.)
|
||||
url: /merlin7/introduction.html
|
||||
output: web
|
||||
- title: Merlin6 HPC Cluster
|
||||
url: /merlin6/introduction.html
|
||||
output: web
|
||||
- title: PSI HPC@CSCS
|
||||
|
@ -13,9 +13,9 @@ entries:
|
||||
url: /merlin6/introduction.html
|
||||
- title: Code Of Conduct
|
||||
url: /merlin6/code-of-conduct.html
|
||||
- title: Requesting Accounts
|
||||
- title: Requesting Merlin Access
|
||||
url: /merlin6/request-account.html
|
||||
- title: Requesting Projects
|
||||
- title: Requesting Merlin Projects
|
||||
url: /merlin6/request-project.html
|
||||
- title: Accessing the Interactive Nodes
|
||||
url: /merlin6/interactive.html
|
||||
|
@ -8,19 +8,76 @@ entries:
|
||||
folderitems:
|
||||
- title: Introduction
|
||||
url: /merlin7/introduction.html
|
||||
- title: Code Of Conduct
|
||||
url: /merlin7/code-of-conduct.html
|
||||
- title: Requesting Merlin7 Access
|
||||
url: /merlin7/request-account.html
|
||||
# - title: Requesting Projects
|
||||
# url: /merlin7/request-project.html
|
||||
- title: Accessing the Interactive Login Nodes
|
||||
url: /merlin7/interactive.html
|
||||
- title: Accessing the Slurm Clusters
|
||||
url: /merlin7/slurm-access.html
|
||||
- title: Software Repositories
|
||||
url: /merlin7/software-repositories.html
|
||||
- title: How To Use Merlin7
|
||||
folderitems:
|
||||
- title: Cray systems modules
|
||||
url: /merlin7/cray-module-env.html
|
||||
- title: Transferring files
|
||||
url: /merlin7/file-transfers.html
|
||||
- title: Accessing from a Linux client
|
||||
url: /merlin7/connect-from-linux.html
|
||||
- title: Accessing from a Windows client
|
||||
url: /merlin7/connect-from-windows.html
|
||||
- title: Accessing from a MacOS client
|
||||
url: /merlin7/connect-from-macos.html
|
||||
- title: Merlin7 Storage
|
||||
url: /merlin7/storage.html
|
||||
- title: Transferring Data
|
||||
url: /merlin7/transfer-data.html
|
||||
# - title: Archive & PSI Data Catalog
|
||||
# url: /merlin7/archive.html
|
||||
- title: Remote Desktop Access
|
||||
url: /merlin7/nomachine.html
|
||||
- title: Configuring SSH Keys
|
||||
url: /merlin7/ssh-keys.html
|
||||
- title: Kerberos and AFS authentication
|
||||
url: /merlin7/kerberos.html
|
||||
- title: Software Repositories
|
||||
url: /merlin7/software-repositories.html
|
||||
- title: General Tools
|
||||
url: /merlin7/tools.html
|
||||
- title: Slurm General Documentation
|
||||
folderitems:
|
||||
- title: Merlin7 Infrastructure
|
||||
url: /merlin7/merlin7-configuration.html
|
||||
- title: Slurm Configuration
|
||||
url: /merlin7/slurm-configuration.html
|
||||
- title: Running Slurm Interactive Jobs
|
||||
url: /merlin7/interactive-jobs.html
|
||||
- title: Slurm Batch Script Examples
|
||||
url: /merlin7/slurm-examples.html
|
||||
- title: Software Support
|
||||
folderitems:
|
||||
- title: PSI Modules
|
||||
url: /merlin7/pmodules.html
|
||||
- title: Spack Modules
|
||||
url: /merlin7/spack.html
|
||||
- title: Cray Modules
|
||||
url: /merlin7/cray-module-env.html
|
||||
- title: OpenMPI
|
||||
url: /merlin7/openmpi.html
|
||||
- title: ANSYS
|
||||
url: /merlin7/ansys.html
|
||||
- title: ANSYS RSM
|
||||
url: /merlin7/ansys-rsm.html
|
||||
- title: Quantum ESPRESSO
|
||||
url: /merlin7/quantum-espresso.html
|
||||
- title: OPAL-X
|
||||
url: /merlin7/opal-x.html
|
||||
- title: IPPL
|
||||
url: /merlin7/ippl.html
|
||||
- title: Support
|
||||
folderitems:
|
||||
- title: Merlin6 to Merlin7 Migration Guide
|
||||
url: /merlin7/migrating.html
|
||||
- title: Contact
|
||||
url: /merlin7/contact.html
|
||||
|
||||
|
@ -6,7 +6,7 @@ topnav:
|
||||
# - title: GitHub
|
||||
# external_url: https://github.com/tomjoht/documentation-theme-jekyll
|
||||
- title: News
|
||||
url: /news
|
||||
url: /news.html
|
||||
|
||||
#Topnav dropdowns
|
||||
topnav_dropdowns:
|
||||
@ -16,9 +16,9 @@ topnav_dropdowns:
|
||||
folderitems:
|
||||
- title: Introduction
|
||||
url: /merlin6/introduction.html
|
||||
- title: Requesting Accounts
|
||||
- title: Requesting Merlin Access
|
||||
url: /merlin6/request-account.html
|
||||
- title: Requesting Projects
|
||||
- title: Requesting Merlin Projects
|
||||
url: /merlin6/request-project.html
|
||||
- title: Accessing the Interactive Nodes
|
||||
url: /merlin6/interactive.html
|
||||
@ -26,11 +26,11 @@ topnav_dropdowns:
|
||||
url: /merlin6/slurm-access.html
|
||||
- title: Clusters
|
||||
folderitems:
|
||||
- title: Cluster 'merlin5'
|
||||
- title: Merlin 5
|
||||
url: /merlin5/slurm-configuration.html
|
||||
- title: Cluster 'merlin6'
|
||||
- title: Merlin 6
|
||||
url: /merlin6/slurm-configuration.html
|
||||
- title: Merlin 6 GPU
|
||||
url: /gmerlin6/slurm-configuration.html
|
||||
- title: Cluster 'merlin7'
|
||||
- title: Merlin 7
|
||||
url: /merlin7/slurm-configuration.html
|
||||
- title: Cluster 'gmerlin6'
|
||||
url: /gmerlin6/slurm-configuration.html
|
||||
|
@ -6,6 +6,6 @@ summary: "More pages will be coming soon."
|
||||
tags: [news, getting_started]
|
||||
---
|
||||
|
||||
Merlin 6 docs are now available at https://lsm-hpce.gitpages.psi.ch/merlin6!
|
||||
Merlin 6 docs are now available at https://hpce.pages.psi.ch/merlin6!
|
||||
|
||||
More complete documentation will be coming shortly.
|
||||
More complete documentation will be coming shortly.
|
||||
|
14
_posts/2024-08-07-merlin7-preprod-docs.md
Normal file
@ -0,0 +1,14 @@
|
||||
---
|
||||
title: "Merlin7 in preproduction"
|
||||
published: true
|
||||
# permalink: samplepost.html
|
||||
summary: "More pages will be coming soon."
|
||||
tags: [news, getting_started]
|
||||
---
|
||||
|
||||
The Merlin7 cluster is officially in preproduction. This phase will be tested by a few users
|
||||
and slowly we will contact other users to be part of it. Keep in mind that access is restricted.
|
||||
|
||||
Merlin7 documentation is now available https://hpce.pages.psi.ch/merlin7/slurm-configuration.html.
|
||||
|
||||
More complete documentation will be coming shortly.
|
@ -119,9 +119,8 @@ margin-top: -40px
|
||||
}
|
||||
|
||||
.post-content img {
|
||||
margin: 12px 0px 3px 0px;
|
||||
width: auto;
|
||||
height: auto;
|
||||
margin-top: 12px;
|
||||
margin-bottom: 3px;
|
||||
max-width: 100%;
|
||||
max-height: 100%;
|
||||
}
|
||||
@ -913,34 +912,17 @@ span.soft {
|
||||
}
|
||||
|
||||
|
||||
.post-content img {
|
||||
margin: 12px 0px 3px 0px;
|
||||
width: auto;
|
||||
height: auto;
|
||||
max-width: 100%;
|
||||
max-height: 100%;
|
||||
}
|
||||
.col-md-9 img {
|
||||
max-width: 100%;
|
||||
max-height: 100%;
|
||||
}
|
||||
|
||||
|
||||
.post-content img {
|
||||
margin: 12px 0px 3px 0px;
|
||||
width: auto;
|
||||
height: auto;
|
||||
max-width: 100%;
|
||||
max-height: 100%;
|
||||
}
|
||||
|
||||
.videoThumbs img {
|
||||
float: left;
|
||||
margin:15px 15px 15px 0px;
|
||||
border: 1px solid #dedede;
|
||||
}
|
||||
|
||||
|
||||
@media only screen and (min-width: 900px), only screen and (min-device-width: 900px) {
|
||||
.col-md-9 img {
|
||||
max-width: 700px;
|
||||
|
@ -1,10 +1,9 @@
|
||||
version: '2'
|
||||
services:
|
||||
server:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
image: lsm-hpce/docs
|
||||
image: userdoc/latest
|
||||
ports:
|
||||
- "4001:4001"
|
||||
volumes:
|
||||
|
BIN
images/ANSYS/merlin7/HFSS/01_Select_Scheduler_Menu.png
Normal file
After Width: | Height: | Size: 22 KiB |
BIN
images/ANSYS/merlin7/HFSS/02_Select_Scheduler_RSM_Remote.png
Normal file
After Width: | Height: | Size: 9.6 KiB |
BIN
images/ANSYS/merlin7/HFSS/03_Select_Scheduler_Slurm.png
Normal file
After Width: | Height: | Size: 9.7 KiB |
BIN
images/ANSYS/merlin7/HFSS/04_Submit_Job_Menu.png
Normal file
After Width: | Height: | Size: 22 KiB |
BIN
images/ANSYS/merlin7/HFSS/05_Submit_Job_Product_Path.png
Normal file
After Width: | Height: | Size: 67 KiB |
BIN
images/ANSYS/merlin7/cfx5launcher.png
Normal file
After Width: | Height: | Size: 39 KiB |
BIN
images/ANSYS/merlin7/merlin7/cfx5launcher.png
Normal file
After Width: | Height: | Size: 39 KiB |
BIN
images/ANSYS/merlin7/rsm-1-add_hpc_resource.png
Normal file
After Width: | Height: | Size: 508 KiB |
BIN
images/ANSYS/merlin7/rsm-2-add_cluster.png
Normal file
After Width: | Height: | Size: 27 KiB |
BIN
images/ANSYS/merlin7/rsm-3-add_scratch_info.png
Normal file
After Width: | Height: | Size: 35 KiB |
BIN
images/ANSYS/merlin7/rsm-4-get_slurm_queues.png
Normal file
After Width: | Height: | Size: 26 KiB |
BIN
images/ANSYS/merlin7/rsm-5-authenticating.png
Normal file
After Width: | Height: | Size: 6.4 KiB |
BIN
images/ANSYS/merlin7/rsm-6-selected-partitions.png
Normal file
After Width: | Height: | Size: 37 KiB |
BIN
images/Access/01-request-merlin5-membership.png
Normal file
After Width: | Height: | Size: 68 KiB |
BIN
images/Access/01-request-merlin6-membership.png
Normal file
After Width: | Height: | Size: 68 KiB |
BIN
images/Access/01-request-merlin7-membership.png
Normal file
After Width: | Height: | Size: 151 KiB |
BIN
images/Access/01-request-unx-group-membership.png
Normal file
After Width: | Height: | Size: 66 KiB |
Before Width: | Height: | Size: 44 KiB |
BIN
images/NoMachine/screen_nx_address.png
Normal file
After Width: | Height: | Size: 52 KiB |
BIN
images/NoMachine/screen_nx_auth.png
Normal file
After Width: | Height: | Size: 43 KiB |
BIN
images/NoMachine/screen_nx_configuration.png
Normal file
After Width: | Height: | Size: 85 KiB |
Before Width: | Height: | Size: 61 KiB |
Before Width: | Height: | Size: 49 KiB |
Before Width: | Height: | Size: 49 KiB |
BIN
images/NoMachine/screen_nx_single_session.png
Normal file
After Width: | Height: | Size: 44 KiB |
BIN
images/WIP/WIP1.jpeg
Normal file
After Width: | Height: | Size: 8.1 KiB |
BIN
images/WIP/WIP1.webp
Normal file
After Width: | Height: | Size: 39 KiB |
Before Width: | Height: | Size: 1.2 KiB After Width: | Height: | Size: 5.3 KiB |
BIN
images/scicat_token.png
Normal file
After Width: | Height: | Size: 70 KiB |
9
index.md
@ -1,17 +1,18 @@
|
||||
---
|
||||
title: "High Performance Computing and Emerging Technologies"
|
||||
keywords: sample homepage
|
||||
# tags: [getting_started]
|
||||
sidebar: home_sidebar
|
||||
toc: false
|
||||
permalink: index.html
|
||||
---
|
||||
|
||||
{: .img-responsive }
|
||||
{: .center-block .img-responsive width="300px" }
|
||||
|
||||
The [HPCE group](https://www.psi.ch/lsm/hpce-group) is part of the [Laboratory for Simulation and Modelling](https://www.psi.ch/lsm)
|
||||
The [HPCE group](https://www.psi.ch/en/awi/high-performance-computing-and-emerging-technologies-group) is part of the [PSI Center for Scientific Computing, Theory and Data](https://www.psi.ch/en/csd)
|
||||
at [Paul Scherrer Institute](https://www.psi.ch). It provides a range of HPC services for PSI scientists and also
|
||||
engages in research activities on technologies (data analysis and machine learning technologies) used on these systems.
|
||||
|
||||
## Available documentation
|
||||
|
||||
<button type="button" class="btn btn-primary" href="/merlin6">Merlin6</button>
|
||||
<button type="button" class="btn btn-primary"><a href="/merlin6/introduction.html" style="color:rgb(255,255,255);" target="_blank"><b>Merlin6</b></a></button>
|
||||
<button type="button" class="btn btn-primary"><a href="/merlin7/introduction.html" style="color:rgb(255,255,255);" target="_blank"><b>Merlin7</b></a></button>
|
||||
|
@ -2,7 +2,7 @@
|
||||
title: PSI HPC@CSCS
|
||||
#tags:
|
||||
#keywords:
|
||||
last_updated: 13 April 2022
|
||||
last_updated: 3 May 2024
|
||||
#summary: ""
|
||||
sidebar: CSCS_sidebar
|
||||
permalink: /CSCS/index.html
|
||||
@ -10,55 +10,52 @@ permalink: /CSCS/index.html
|
||||
|
||||
## PSI HPC@CSCS
|
||||
|
||||
For offering high-end HPC sources to PSI users, PSI has a long standing collaboration with
|
||||
the national supercomputing centre CSCS (since 2005). Some of the resources are procured by
|
||||
central PSI funds while users have the optionsof an additional buy-in at the same rates.
|
||||
PSI has a long standing collaboration with CSCS for offering high end
|
||||
HPC resources to PSI projects. PSI had co-invested in CSCS' initial
|
||||
Cray XT3 supercomputer *Horizon* in 2005 and we continue to procure a share on the
|
||||
CSCS flagship systems.
|
||||
|
||||
### PSI resources at Piz Daint
|
||||
The share is mainly intended for projects that cannot profit from applying for regular
|
||||
[CSCS user lab allocation schemes.](https://www.cscs.ch/user-lab/allocation-schemes).
|
||||
We can also help PSI groups to procure additional resources based on the PSI
|
||||
conditions - please contact us in such a case.
|
||||
|
||||
The yearly computing resources at CSCS for the PSI projects are in general 627,000 NH (Node Hours).
|
||||
The yearly storage resources for the PSI projects is a total of 80TB. These resources are
|
||||
centrally financed, but in addition experiments can individually purchase more resources.
|
||||
### Capacity of PSI resources at CSCS
|
||||
|
||||
For 2024 PSI we have a share of 400'000 NH (Node Hours) on [Piz Daint](https://www.cscs.ch/computers/piz-daint), and 65 TB of storage.
|
||||
|
||||
CSCS plans to decommission Piz Daint during 2024. All allocations
|
||||
will be converted to allocations on the new user lab on the CSCS Alps
|
||||
infrastructure. Users will be informed about the migration process.
|
||||
|
||||
### How to request a PSI project
|
||||
|
||||
A survey is sent out in the third quarter of each year. This survey is used to request
|
||||
CSCS resources for the upcoming year.
|
||||
A survey is sent out to subscribed users once per year, usually in the third quarter. It serves for collecting the CSCS resources requests for the upcoming year.
|
||||
|
||||
Users registered in the **PSI HPC@CSCS mailing list** <psi-hpc-at-cscs@lists.psi.ch> will
|
||||
receive notification and details about the survey, in example:
|
||||
* Link to the survey
|
||||
* Update of resource changes
|
||||
* Other details of the process
|
||||
be contacted to take part in the yearly survey. Please [subscribe to the list](https://psilists.ethz.ch/sympa/subscribe/psi-hpc-at-cscs) if you are interested.
|
||||
|
||||
Generally users need to specify in the survey the total resources they intend to use
|
||||
next year and also how they would like to split it over the 4 quarters (e.g. 25%, 25%,
|
||||
25%, 25%). In general, we provide the possibility to adapt the distribution over the
|
||||
course of next year if required. The minimum allocation over a year is 10,000 node hours.
|
||||
Users need to specify in the survey the total resources they intend to
|
||||
use next year and also how this is to be split over the 4 quarters
|
||||
(e.g. 25%, 25%, 25%, 25%), since CSCS allocates recources on a
|
||||
quarterly basis. We provide the possibility to adapt the distribution
|
||||
over the course of the active year or shift unused resources to other
|
||||
projects (contact us on the mailing list).
|
||||
|
||||
By default allocated nodes are on the CPU partition of PizDaint (36 cores per node).
|
||||
However, allocations to the GPU partition are also possible (1 x NVIDIA P100 and 12cores per
|
||||
node), but needs to be splicitly stated in the survey.
|
||||
However, allocations to the GPU partition are also possible (1 x NVIDIA P100 and 12 cores per
|
||||
node).
|
||||
|
||||
### PSI resources at Piz Daint
|
||||
|
||||
#### 2023
|
||||
|
||||
The yearly computing resources at CSCS for the PSI projects in 2023 are 522,500 NH (Node Hours). The yearly storage resources for the PSI projects is a total of 80TB.
|
||||
These resources are centrally financed, but in addition experiments can individually purchase more resources.
|
||||
|
||||
PizDaint resource distribution for the different PSI projects in 2023 is detailed in the following Excel file: [PSI_CSCSAllocations2023.xltx]({{ site.url }}/downloads/CSCS/PSI_CSCSAllocations2023.xltx) directly.
|
||||
|
||||
### Piz Daint total resources
|
||||
|
||||
References:
|
||||
### CSCS Systems reference information
|
||||
|
||||
* [CSCS User Portal](https://user.cscs.ch/)
|
||||
* [Piz Daint Information](https://www.cscs.ch/computers/piz-daint/)
|
||||
* [Piz Daint: One of the most powerful supercomputers in the world](https://www.cscs.ch/publications/news/2017/piz-daint-one-of-the-most-powerful-supercomputers-in-the-world)
|
||||
|
||||
## Contact information
|
||||
|
||||
* Contact responsibles:
|
||||
* Mail list contact: <psi-hpc-at-cscs-admin@lists.psi.ch>
|
||||
* PSI Contacts:
|
||||
* Mailing list contact: <psi-hpc-at-cscs-admin@lists.psi.ch>
|
||||
* Marc Caubet Serrabou <marc.caubet@psi.ch>
|
||||
* Derek Feichtinger <derek.feichtinger@psi.ch>
|
||||
* Mailing list for receiving user notifications: psi-hpc-at-cscs@lists.psi.ch [(subscribe)](https://psilists.ethz.ch/sympa/subscribe/psi-hpc-at-cscs)
|
||||
|
@ -16,11 +16,12 @@ The table below shows a summary of the hardware setup for the different GPU node
|
||||
|
||||
| Nodes | Def.#CPUs | Max.#CPUs | #Threads | Def.Mem/CPU | Max.Mem/CPU | Max.Mem/Node | Max.Swap | GPU Type | Def.#GPUs | Max.#GPUs |
|
||||
|:------------------:| ---------:| :--------:| :------: | :----------:| :----------:| :-----------:| :-------:| :--------: | :-------: | :-------: |
|
||||
| merlin-g-[001] | 1 core | 8 cores | 1 | 4000 | 102400 | 102400 | 10000 | **geforce_gtx_1080** | 1 | 2 |
|
||||
| merlin-g-[002-005] | 1 core | 20 cores | 1 | 4000 | 102400 | 102400 | 10000 | **geforce_gtx_1080** | 1 | 4 |
|
||||
| merlin-g-[006-009] | 1 core | 20 cores | 1 | 4000 | 102400 | 102400 | 10000 | **geforce_gtx_1080_ti** | 1 | 4 |
|
||||
| merlin-g-[010-013] | 1 core | 20 cores | 1 | 4000 | 102400 | 102400 | 10000 | **geforce_rtx_2080_ti** | 1 | 4 |
|
||||
| merlin-g-014 | 1 core | 48 cores | 1 | 4000 | 360448 | 360448 | 10000 | **geforce_rtx_2080_ti** | 1 | 8 |
|
||||
| merlin-g-[001] | 1 core | 8 cores | 1 | 5120 | 102400 | 102400 | 10000 | **geforce_gtx_1080** | 1 | 2 |
|
||||
| merlin-g-[002-005] | 1 core | 20 cores | 1 | 5120 | 102400 | 102400 | 10000 | **geforce_gtx_1080** | 1 | 4 |
|
||||
| merlin-g-[006-009] | 1 core | 20 cores | 1 | 5120 | 102400 | 102400 | 10000 | **geforce_gtx_1080_ti** | 1 | 4 |
|
||||
| merlin-g-[010-013] | 1 core | 20 cores | 1 | 5120 | 102400 | 102400 | 10000 | **geforce_rtx_2080_ti** | 1 | 4 |
|
||||
| merlin-g-014 | 1 core | 48 cores | 1 | 5120 | 360448 | 360448 | 10000 | **geforce_rtx_2080_ti** | 1 | 8 |
|
||||
| merlin-g-015 | 1 core | 48 cores | 1 | 5120 | 360448 | 360448 | 10000 | **A5000** | 1 | 8 |
|
||||
| merlin-g-100 | 1 core | 128 cores | 2 | 3900 | 998400 | 998400 | 10000 | **A100** | 1 | 8 |
|
||||
|
||||
{{site.data.alerts.tip}}Always check <b>'/etc/slurm/gres.conf'</b> and <b>'/etc/slurm/slurm.conf'</b> for changes in the GPU type and details of the hardware.
|
||||
@ -113,6 +114,7 @@ This is detailed in the below table.
|
||||
| **merlin-g-[006-009]** | **`geforce_gtx_1080_ti`** | 4 |
|
||||
| **merlin-g-[010-013]** | **`geforce_rtx_2080_ti`** | 4 |
|
||||
| **merlin-g-014** | **`geforce_rtx_2080_ti`** | 8 |
|
||||
| **merlin-g-015** | **`A5000`** | 8 |
|
||||
| **merlin-g-100** | **`A100`** | 8 |
|
||||
|
||||
#### Constraint / Features
|
||||
@ -123,7 +125,7 @@ Users can specify which GPU memory size needs to be used with the `--constraint`
|
||||
there is not need to specify `[<type>:]`* in the `--gpus` option.
|
||||
|
||||
```bash
|
||||
#SBATCH --contraint=<Feature> # Possible values: gpumem_8gb, gpumem_11gb, gpumem_40gb
|
||||
#SBATCH --contraint=<Feature> # Possible values: gpumem_8gb, gpumem_11gb, gpumem_24gb, gpumem_40gb
|
||||
```
|
||||
|
||||
The table below shows the available **Features** and which GPU card models and GPU nodes they belong to:
|
||||
@ -131,7 +133,7 @@ The table below shows the available **Features** and which GPU card models and G
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope='colgroup' style="vertical-align:middle;text-align:center;" colspan="3">Merlin6 CPU Computing Nodes</th>
|
||||
<th scope='colgroup' style="vertical-align:middle;text-align:center;" colspan="3">Merlin6 GPU Computing Nodes</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<th scope='col' style="vertical-align:middle;text-align:center;" colspan="1">Nodes</th>
|
||||
@ -155,6 +157,11 @@ The table below shows the available **Features** and which GPU card models and G
|
||||
<td markdown="span" style="vertical-align:middle;text-align:center;" rowspan="1">`geforce_rtx_2080_ti`</td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
<td markdown="span" style="vertical-align:middle;text-align:center;" rowspan="1"><b>merlin-g-015</b></td>
|
||||
<td markdown="span" style="vertical-align:middle;text-align:center;" rowspan="1">`A5000`</td>
|
||||
<td markdown="span" style="vertical-align:middle;text-align:center;" rowspan="1"><b>`gpumem_24gb`</b></td>
|
||||
</tr>
|
||||
<tr style="vertical-align:middle;text-align:center;" ralign="center">
|
||||
<td markdown="span" style="vertical-align:middle;text-align:center;" rowspan="1"><b>merlin-g-100</b></td>
|
||||
<td markdown="span" style="vertical-align:middle;text-align:center;" rowspan="1">`A100`</td>
|
||||
<td markdown="span" style="vertical-align:middle;text-align:center;" rowspan="1"><b>`gpumem_40gb`</b></td>
|
||||
@ -206,8 +213,8 @@ Limits are defined using QoS, and this is usually set at the partition level. Li
|
||||
|
||||
| Partition | Slurm Account | Mon-Sun 0h-24h |
|
||||
|:------------------:| :------------: | :------------------------------------------: |
|
||||
| **gpu** | **`merlin`** | gpu_week(cpu=40,gres/gpu=8,mem=200G) |
|
||||
| **gpu-short** | **`merlin`** | gpu_week(cpu=40,gres/gpu=8,mem=200G) |
|
||||
| **gpu** | **`merlin`** | gpu_week(gres/gpu=8) |
|
||||
| **gpu-short** | **`merlin`** | gpu_week(gres/gpu=8) |
|
||||
| **gwendolen** | `gwendolen` | No limits |
|
||||
| **gwendolen-long** | `gwendolen` | No limits, active from 9pm to 5:30am |
|
||||
|
||||
@ -233,8 +240,8 @@ Limits are defined using QoS, and this is usually set at the partition level. Li
|
||||
|
||||
| Partition | Slurm Account | Mon-Sun 0h-24h |
|
||||
|:------------------:| :----------------: | :---------------------------------------------: |
|
||||
| **gpu** | **`merlin`** | gpu_week(cpu=80,gres/gpu=16,mem=400G) |
|
||||
| **gpu-short** | **`merlin`** | gpu_week(cpu=80,gres/gpu=16,mem=400G) |
|
||||
| **gpu** | **`merlin`** | gpu_week(gres/gpu=16) |
|
||||
| **gpu-short** | **`merlin`** | gpu_week(gres/gpu=16) |
|
||||
| **gwendolen** | `gwendolen` | No limits |
|
||||
| **gwendolen-long** | `gwendolen` | No limits, active from 9pm to 5:30am |
|
||||
|
||||
|
@ -30,7 +30,7 @@ The basic principle is courtesy and consideration for other users.
|
||||
* It is **forbidden** to use the ``/data/user``, ``/data/project`` or ``/psi/home/`` for that purpose.
|
||||
* Always remove files you do not need any more (e.g. core dumps, temporary files) as early as possible. Keep the disk space clean on all nodes.
|
||||
* Prefer ``/scratch`` over ``/shared-scratch`` and use the latter only when you require the temporary files to be visible from multiple nodes.
|
||||
* Read the description in **[Merlin6 directory structure](### Merlin6 directory structure)** for learning about the correct usage of each partition type.
|
||||
* Read the description in **[Merlin6 directory structure](/merlin6/storage.html#merlin6-directories)** for learning about the correct usage of each partition type.
|
||||
|
||||
## User and project data
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
title: Requesting Accounts
|
||||
title: Requesting Merlin Accounts
|
||||
#tags:
|
||||
keywords: registration, register, account, merlin5, merlin6, snow, service now
|
||||
last_updated: 07 September 2022
|
||||
@ -8,117 +8,40 @@ sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/request-account.html
|
||||
---
|
||||
|
||||
Requesting access to the cluster must be done through **[PSI Service Now](https://psi.service-now.com/psisp)** as an
|
||||
*Incident Request*. AIT and us are working on a ServiceNow integrated form to ease this process in the future.
|
||||
|
||||
Due to the ticket *priority* being *Low* for non-emergency requests of this kind, it might take up to 56h in the worst case until access to the cluster is granted (raise the priority if you have strong reasons for faster access) .
|
||||
|
||||
---
|
||||
|
||||
## Requesting Access to Merlin6
|
||||
|
||||
Access to Merlin6 is regulated by a PSI user's account being a member of the **svc-cluster_merlin6** group.
|
||||
Access to Merlin6 is regulated by a PSI user's account being a member of the **`svc-cluster_merlin6`** group. Access to this group will also grant access to older generations of Merlin (`merlin5`).
|
||||
|
||||
Registration for **Merlin6** access *must be done* through **[PSI Service Now](https://psi.service-now.com/psisp)**:
|
||||
Requesting **Merlin6** access *has to be done* with the corresponding **[Request Linux Group Membership](https://psi.service-now.com/psisp?id=psi_new_sc_cat_item&sys_id=84f2c0c81b04f110679febd9bb4bcbb1)** form, available in the [PSI Service Now Service Catalog](https://psi.service-now.com/psisp).
|
||||
|
||||
* Please open a ticket as *Incident Request*, with subject:
|
||||

|
||||
|
||||
```
|
||||
Subject: [Merlin6] Access Request for user xxxxx
|
||||
```
|
||||
Mandatory customizable fields are the following:
|
||||
* **`Order Access for user`**, which defaults to the logged in user. However, requesting access for another user it's also possible.
|
||||
* **`Request membership for group`**, for Merlin6 the **`svc-cluster_merlin6`** must be selected.
|
||||
* **`Justification`**, please add here a short justification why access to Merlin6 is necessary.
|
||||
|
||||
* Text content (please use always this template and fill the fields marked by `xxxxx`):
|
||||
|
||||
```
|
||||
Dear HelpDesk,
|
||||
|
||||
I would like to request access to the Merlin6 cluster. This is my account information
|
||||
* Last Name: xxxxx
|
||||
* First Name: xxxxx
|
||||
* PSI user account: xxxxx
|
||||
|
||||
Please add me to the following Unix groups:
|
||||
* 'svc-cluster_merlin6'
|
||||
|
||||
Thanks,
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
Once submitted, the Merlin responsible will approve the request as soon as possible (within the next few hours on working days). Once the request is approved, *it may take up to 30 minutes to get the account fully configured*.
|
||||
|
||||
## Requesting Access to Merlin5
|
||||
|
||||
Merlin5 computing nodes will be available for some time as a **best effort** service.
|
||||
For accessing the old Merlin5 resources, users should belong to the **svc-cluster_merlin5** Unix Group.
|
||||
Access to Merlin5 is regulated by a PSI user's account being a member of the **`svc-cluster_merlin5`** group. Access to this group does not grant access to newer generations of Merlin (`merlin6`, `gmerlin6`, and future ones).
|
||||
|
||||
Registration for **Merlin5** access *must be done* through **[PSI Service Now](https://psi.service-now.com/psisp)**:
|
||||
Requesting **Merlin5** access *has to be done* with the corresponding **[Request Linux Group Membership](https://psi.service-now.com/psisp?id=psi_new_sc_cat_item&sys_id=84f2c0c81b04f110679febd9bb4bcbb1)** form, available in the [PSI Service Now Service Catalog](https://psi.service-now.com/psisp).
|
||||
|
||||
* Please open a ticket as *Incident Request*, with subject:
|
||||

|
||||
|
||||
```
|
||||
Subject: [Merlin5] Access Request for user xxxxx
|
||||
```
|
||||
Mandatory customizable fields are the following:
|
||||
* **`Order Access for user`**, which defaults to the logged in user. However, requesting access for another user it's also possible.
|
||||
* **`Request membership for group`**, for Merlin5 the **`svc-cluster_merlin5`** must be selected.
|
||||
* **`Justification`**, please add here a short justification why access to Merlin5 is necessary.
|
||||
|
||||
* Text content (please use always this template):
|
||||
Once submitted, the Merlin responsible will approve the request as soon as possible (within the next few hours on working days). Once the request is approved, *it may take up to 30 minutes to get the account fully configured*.
|
||||
|
||||
* Text content (please use always this template and fill the fields marked by `xxxxx`):
|
||||
## Further documentation
|
||||
|
||||
```
|
||||
Dear HelpDesk,
|
||||
|
||||
I would like to request access to the Merlin5 cluster. This is my account information
|
||||
* Last Name: xxxxx
|
||||
* First Name: xxxxx
|
||||
* PSI user account: xxxxx
|
||||
Further information it's also available in the Linux Central Documentation:
|
||||
* [Unix Group / Group Management for users](https://linux.psi.ch/services-user-guide/unix_groups.html)
|
||||
* [Unix Group / Group Management for group managers](https://linux.psi.ch/services-admin-guide/unix_groups.html)
|
||||
|
||||
Please add me to the following Unix groups:
|
||||
* 'svc-cluster_merlin5'
|
||||
|
||||
Thanks,
|
||||
|
||||
```
|
||||
|
||||
Alternatively, if you want to request access to both Merlin5 and Merlin6, you can request it in the same ticket as follows:
|
||||
* Use the template **[Requesting Access to Merlin6](##Requesting-Access-to-Merlin6)**
|
||||
* Add the **``'svc-cluster_merlin5'``** Unix Group after the line containing the merlin6 group **`'svc-cluster_merlin6'`**)
|
||||
|
||||
---
|
||||
|
||||
## Requesting extra Unix groups
|
||||
|
||||
Some users may require to be added to some extra specific Unix groups.
|
||||
* This will grant access to specific resources.
|
||||
* In example, some BIO groups may belong to a specific BIO group for having access to the project area for that group.
|
||||
* Supervisors should inform new users which extra groups are needed for their project(s).
|
||||
|
||||
When requesting access to **[Merlin6](##Requesting-Access-to-Merlin6)** or **[Merlin5](##Requesting-Access-to-Merlin5)**,
|
||||
these extra Unix Groups can be added in the same *Incident Request* by supplying additional lines specifying the respective Groups.
|
||||
|
||||
Naturally, this step can also be done later when the need arises in a separate **[PSI Service Now](https://psi.service-now.com/psisp)** ticket.
|
||||
|
||||
* Please open a ticket as *Incident Request*, with subject:
|
||||
|
||||
```
|
||||
Subject: [Unix Group] Access Request for user xxxxx
|
||||
```
|
||||
|
||||
* Text content (please use always this template):
|
||||
|
||||
```
|
||||
Dear HelpDesk,
|
||||
|
||||
I would like to request membership for the Unix Groups listed below. This is my account information
|
||||
* Last Name: xxxxx
|
||||
* First Name: xxxxx
|
||||
* PSI user account: xxxxx
|
||||
|
||||
List of unix groups I would like to be added to:
|
||||
* unix_group_1
|
||||
* unix_group_2
|
||||
* ...
|
||||
* unix_group_N
|
||||
|
||||
Thanks,
|
||||
```
|
||||
|
||||
**Important note**: Requesting access to specific Unix Groups will require validation from the responsible of the Unix Group. If you ask for inclusion in many groups it may take longer, since the fulfillment of the request will depend on more people.
|
||||
**Special thanks** to the **Linux Central Team** and **AIT** to make this possible.
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
title: Requesting a Project
|
||||
title: Requesting a Merlin Project
|
||||
#tags:
|
||||
keywords: merlin project, project, snow, service now
|
||||
last_updated: 07 September 2022
|
||||
@ -8,17 +8,83 @@ sidebar: merlin6_sidebar
|
||||
permalink: /merlin6/request-project.html
|
||||
---
|
||||
|
||||
A project owns its own storage area which can be accessed by the storage members.
|
||||
A project owns its own storage area in Merlin, which can be accessed by other group members.
|
||||
|
||||
Projects can receive a higher storage quota than user areas and should be the primary way of organizing bigger storage requirements
|
||||
in a multi-user collaboration.
|
||||
|
||||
Access to a project's directories is governed by project members belonging to a common **Unix group**. You may use an existing
|
||||
Unix group or you may have a new Unix group created especially for the project. The **project responsible** will be the owner of
|
||||
the Unix group (this is important)!
|
||||
the Unix group (*this is important*)!
|
||||
|
||||
The **default storage quota** for a project is 1TB (with a maximal *Number of Files* of 1M). If you need a larger assignment, you
|
||||
need to request this and provide a description of your storage needs.
|
||||
This document explains how to request new Unix group, to request membership for existing groups, and the procedure for requesting a Merlin project.
|
||||
|
||||
## About Unix groups
|
||||
|
||||
Before requesting a Merlin project, it is important to have a Unix group that can be used to grant access to it to different members
|
||||
of the project.
|
||||
|
||||
Unix groups in the PSI Active Directory (which is the PSI central database containing user and group information, and more) are defined by the `unx-` prefix, followed by a name.
|
||||
In general, PSI employees working on Linux systems (including HPC clusters, like Merlin) can request for a non-existing Unix group, and can become responsible for managing it.
|
||||
In addition, a list of administrators can be set. The administrators, together with the group manager, can approve or deny membership requests. Further information about this topic
|
||||
is covered in the [Linux Documentation - Services Admin Guides: Unix Groups / Group Management](https://linux.psi.ch/services-admin-guide/unix_groups.html), managed by the Central Linux Team.
|
||||
|
||||
To gran access to specific Merlin project directories, some users may require to be added to some specific **Unix groups**:
|
||||
* Each Merlin project (i.e. `/data/project/{bio|general}/$projectname`) or experiment (i.e. `/data/experiment/$experimentname`) directory has access restricted by ownership and group membership (with a very few exceptions allowing public access).
|
||||
* Users requiring access to a specific restricted project or experiment directory have to request membership for the corresponding Unix group owning the directory.
|
||||
|
||||
### Requesting a new Unix group
|
||||
|
||||
**If you need a new Unix group** to be created, you need to first get this group through a separate
|
||||
**[PSI Service Now ticket](https://psi.service-now.com/psisp)**. **Please use the following template.**
|
||||
You can also specify the login names of the initial group members and the **owner** of the group.
|
||||
The owner of the group is the person who will be allowed to modify the group.
|
||||
|
||||
* Please open an *Incident Request* with subject:
|
||||
```
|
||||
Subject: Request for new unix group xxxx
|
||||
```
|
||||
|
||||
* and base the text field of the request on this template
|
||||
```
|
||||
Dear HelpDesk
|
||||
|
||||
I would like to request a new unix group.
|
||||
|
||||
Unix Group Name: unx-xxxxx
|
||||
Initial Group Members: xxxxx, yyyyy, zzzzz, ...
|
||||
Group Owner: xxxxx
|
||||
Group Administrators: aaaaa, bbbbb, ccccc, ....
|
||||
|
||||
Best regards,
|
||||
```
|
||||
|
||||
### Requesting Unix group membership
|
||||
|
||||
Existing Merlin projects have already a Unix group assigned. To have access to a project, users must belong to the proper **Unix group** owning that project.
|
||||
Supervisors should inform new users which extra groups are needed for their project(s). If this information is not known, one can check the permissions for that directory. In example:
|
||||
```bash
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# ls -ltrhd /data/project/general/$projectname
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# ls -ltrhd /data/project/bio/$projectname
|
||||
```
|
||||
|
||||
Requesting membership for a specific Unix group *has to be done* with the corresponding **[Request Linux Group Membership](https://psi.service-now.com/psisp?id=psi_new_sc_cat_item&sys_id=84f2c0c81b04f110679febd9bb4bcbb1)** form, available in the [PSI Service Now Service Catalog](https://psi.service-now.com/psisp).
|
||||
|
||||

|
||||
|
||||
Once submitted, the responsible of the Unix group has to approve the request.
|
||||
|
||||
**Important note**: Requesting access to specific Unix Groups will require validation from the responsible of the Unix Group. If you ask for inclusion in many groups it may take longer, since the fulfillment of the request will depend on more people.
|
||||
|
||||
Further information can be found in the [Linux Documentation - Services User guide: Unix Groups / Group Management](https://linux.psi.ch/services-user-guide/unix_groups.html)
|
||||
|
||||
### Managing Unix Groups
|
||||
|
||||
Other administration operations on Unix Groups it's mainly covered in the [Linux Documentation - Services Admin Guides: Unix Groups / Group Management](https://linux.psi.ch/services-admin-guide/unix_groups.html), managed by the Central Linux Team.
|
||||
|
||||
## Requesting a Merlin project
|
||||
|
||||
Once a Unix group is available, a Merlin project can be requested.
|
||||
To request a project, please provide the following information in a **[PSI Service Now ticket](https://psi.service-now.com/psisp)**
|
||||
|
||||
* Please open an *Incident Request* with subject:
|
||||
@ -45,28 +111,13 @@ To request a project, please provide the following information in a **[PSI Servi
|
||||
Best regards,
|
||||
```
|
||||
|
||||
**If you need a new Unix group** to be created, you need to first get this group through
|
||||
a separate ***[PSI Service Now ticket](https://psi.service-now.com/psisp)**. Please
|
||||
use the following template. You can also specify the login names of the initial group
|
||||
members and the **owner** of the group. The owner of the group is the person who
|
||||
will be allowed to modify the group.
|
||||
The **default storage quota** for a project is 1TB (with a maximal *Number of Files* of 1M). If you need a larger assignment, you
|
||||
need to request this and provide a description of your storage needs.
|
||||
|
||||
* Please open an *Incident Request* with subject:
|
||||
```
|
||||
Subject: Request for new unix group xxxx
|
||||
```
|
||||
|
||||
* and base the text field of the request on this template
|
||||
```
|
||||
Dear HelpDesk
|
||||
|
||||
I would like to request a new unix group.
|
||||
|
||||
Unix Group Name: unx-xxxxx
|
||||
Initial Group Members: xxxxx, yyyyy, zzzzz, ...
|
||||
Group Owner: xxxxx
|
||||
|
||||
Best regards,
|
||||
```
|
||||
## Further documentation
|
||||
|
||||
Further information it's also available in the Linux Central Documentation:
|
||||
* [Unix Group / Group Management for users](https://linux.psi.ch/services-user-guide/unix_groups.html)
|
||||
* [Unix Group / Group Management for group managers](https://linux.psi.ch/services-admin-guide/unix_groups.html)
|
||||
|
||||
**Special thanks** to the **Linux Central Team** and **AIT** to make this possible.
|
||||
|
@ -79,14 +79,12 @@ central procedures. Alternatively, if you do not know how to do that, follow the
|
||||
**[Requesting extra Unix groups](/merlin6/request-account.html#requesting-extra-unix-groups)** procedure, or open
|
||||
a **[PSI Service Now](https://psi.service-now.com/psisp)** ticket.
|
||||
|
||||
### Installation
|
||||
### Documentation
|
||||
|
||||
Accessing the Data Catalog is done through the [SciCat software](https://melanie.gitpages.psi.ch/SciCatPages/).
|
||||
Documentation is here: [ingestManual.pdf](https://melanie.gitpages.psi.ch/SciCatPages/ingestManual.pdf).
|
||||
Documentation is here: [ingestManual](https://scicatproject.github.io/documentation/Ingestor/ingestManual.html).
|
||||
|
||||
#### (Merlin systems) Loading datacatalog tools
|
||||
|
||||
This is the ***official supported method*** for archiving the Merlin cluster.
|
||||
#### Loading datacatalog tools
|
||||
|
||||
The latest datacatalog software is maintained in the PSI module system. To access it from the Merlin systems, run the following command:
|
||||
|
||||
@ -96,37 +94,28 @@ module load datacatalog
|
||||
|
||||
It can be done from any host in the Merlin cluster accessible by users. Usually, login nodes will be the nodes used for archiving.
|
||||
|
||||
#### (Non-standard systems) Installing datacatalog tools
|
||||
### Finding your token
|
||||
|
||||
***This method is not supported by the Merlin admins***. However, we provide a small recipe for archiving from any host at PSI.
|
||||
On any problems, Central AIT should be contacted.
|
||||
As of 2022-04-14 a secure token is required to interact with the data catalog. This is a long random string that replaces the previous user/password authentication (allowing access for non-PSI use cases). **This string should be treated like a password and not shared.**
|
||||
|
||||
If you do not have access to PSI modules (for instance, when archiving from Ubuntu systems), then you can install the
|
||||
datacatalog software yourself. These tools require 64-bit linux. To ingest from Windows systems, it is suggested to
|
||||
transfer the data to a linux system such as Merlin.
|
||||
1. Go to discovery.psi.ch
|
||||
1. Click 'Sign in' in the top right corner. Click the 'Login with PSI account' and log in on the PSI login1. page.
|
||||
1. You should be redirected to your user settings and see a 'User Information' section. If not, click on1. your username in the top right and choose 'Settings' from the menu.
|
||||
1. Look for the field 'Catamel Token'. This should be a 64-character string. Click the icon to copy the1. token.
|
||||
|
||||
We suggest storing the SciCat scripts in ``~/bin`` so that they can be easily accessed.
|
||||

|
||||
|
||||
You will need to save this token for later steps. To avoid including it in all the commands, I suggest saving it to an environmental variable (Linux):
|
||||
|
||||
```bash
|
||||
mkdir -p ~/bin
|
||||
cd ~/bin
|
||||
/usr/bin/curl -O https://intranet.psi.ch/pub/Daas/WebHome/datasetIngestor
|
||||
chmod +x ./datasetIngestor
|
||||
/usr/bin/curl -O https://intranet.psi.ch/pub/Daas/WebHome/datasetRetriever
|
||||
chmod +x ./datasetRetriever
|
||||
```
|
||||
$ SCICAT_TOKEN=RqYMZcqpqMJqluplbNYXLeSyJISLXfnkwlfBKuvTSdnlpKkU
|
||||
```
|
||||
|
||||
When the scripts are updated you will be prompted to re-run some of the above commands to get the latest version.
|
||||
(Hint: prefix this line with a space to avoid saving the token to your bash history.)
|
||||
|
||||
You can call the ingestion scripts using the full path (``~/bin/datasetIngestor``) or else add ``~/bin`` to your unix PATH.
|
||||
To do so, add the following line to your ``~/.bashrc`` file:
|
||||
Tokens expire after 2 weeks and will need to be fetched from the website again.
|
||||
|
||||
```bash
|
||||
export PATH="$HOME/bin:$PATH"
|
||||
```
|
||||
|
||||
### Ingestion
|
||||
### Ingestion
|
||||
|
||||
The first step to ingesting your data into the catalog is to prepare a file describing what data you have. This is called
|
||||
**``metadata.json``**, and can be created with a text editor (e.g. *``vim``*). It can in principle be saved anywhere,
|
||||
@ -172,20 +161,22 @@ section below. An example follows:
|
||||
}
|
||||
```
|
||||
|
||||
It is recommended to use the [ScicatEditor](https://bliven_s.gitpages.psi.ch/SciCatEditor/) for creating metadata files. This is a browser-based tool specifically for ingesting PSI data. Using the tool avoids syntax errors and provides templates for common data sets and options. The finished JSON file can then be downloaded to merlin or copied into a text editor.
|
||||
|
||||
Another option is to use the SciCat graphical interface from NoMachine. This provides a graphical interface for selecting data to archive. This is particularly useful for data associated with a DUO experiment and p-group. Type `SciCat`` to get started after loading the `datacatalog`` module. The GUI also replaces the the command-line ingestion described below.
|
||||
|
||||
The following steps can be run from wherever you saved your ``metadata.json``. First, perform a "dry-run" which will check the metadata for errors:
|
||||
|
||||
```bash
|
||||
datasetIngestor metadata.json
|
||||
datasetIngestor --token $SCICAT_TOKEN metadata.json
|
||||
```
|
||||
|
||||
It will ask for your PSI credentials and then print some info about the data to be ingested. If there are no errors, proceed to the real ingestion:
|
||||
|
||||
```bash
|
||||
datasetIngestor --ingest --autoarchive metadata.json
|
||||
datasetIngestor --token $SCICAT_TOKEN --ingest --autoarchive metadata.json
|
||||
```
|
||||
|
||||
For particularly important datasets, you may also want to use the parameter **``--tapecopies 2``** to store **redundant copies** of the data.
|
||||
|
||||
You will be asked whether you want to copy the data to the central system:
|
||||
|
||||
* If you are on the Merlin cluster and you are archiving data from ``/data/user`` or ``/data/project``, answer 'no' since the data catalog can
|
||||
@ -240,7 +231,7 @@ find . -name '*~' -delete
|
||||
find . -name '*#autosave#' -delete
|
||||
```
|
||||
|
||||
### Troubleshooting & Known Bugs
|
||||
#### Troubleshooting & Known Bugs
|
||||
|
||||
* The following message can be safely ignored:
|
||||
|
||||
@ -259,8 +250,22 @@ step will take a long time and may appear to have hung. You can check what files
|
||||
|
||||
where UID is the dataset ID (12345678-1234-1234-1234-123456789012) and PATH is the absolute path to your data. Note that rsync creates directories first and that the transfer order is not alphabetical in some cases, but it should be possible to see whether any data has transferred.
|
||||
|
||||
* There is currently a limit on the number of files per dataset (technically, the limit is from the total length of all file paths). It is recommended to break up datasets into 300'000 files or less.
|
||||
* There is currently a limit on the number of files per dataset (technically, the limit is from the total length of all file paths). It is recommended to break up datasets into 300'000 files or less.
|
||||
* If it is not possible or desirable to split data between multiple datasets, an alternate work-around is to package files into a tarball. For datasets which are already compressed, omit the -z option for a considerable speedup:
|
||||
|
||||
```
|
||||
tar -f [output].tar [srcdir]
|
||||
```
|
||||
|
||||
Uncompressed data can be compressed on the cluster using the following command:
|
||||
|
||||
```
|
||||
sbatch /data/software/Slurm/Utilities/Parallel_TarGz.batch -s [srcdir] -t [output].tar -n
|
||||
```
|
||||
|
||||
Run /data/software/Slurm/Utilities/Parallel_TarGz.batch -h for more details and options.
|
||||
|
||||
#### Sample ingestion output (datasetIngestor 1.1.11)
|
||||
<details>
|
||||
<summary>[Show Example]: Sample ingestion output (datasetIngestor 1.1.11)</summary>
|
||||
<pre class="terminal code highlight js-syntax-highlight plaintext" lang="plaintext" markdown="false">
|
||||
@ -353,15 +358,22 @@ user_n@pb-archive.psi.ch's password:
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
### Publishing
|
||||
|
||||
After datasets are are ingested they can be assigned a public DOI. This can be included in publications and will make the datasets on http://doi.psi.ch.
|
||||
|
||||
For instructions on this, please read the ['Publish' section in the ingest manual](https://scicatproject.github.io/documentation/Ingestor/ingestManual.html#sec-8).
|
||||
|
||||
### Retrieving data
|
||||
|
||||
The retrieval process is still a work in progress. For more info, read the ingest manual.
|
||||
Retrieving data from the archive is also initiated through the Data Catalog. Please read the ['Retrieve' section in the ingest manual](https://scicatproject.github.io/documentation/Ingestor/ingestManual.html#sec-6).
|
||||
|
||||
## Further Information
|
||||
|
||||
* **[PSI Data Catalog](https://discovery.psi.ch)**
|
||||
* **[Full Documentation](https://melanie.gitpages.psi.ch/SciCatPages/)**: **[PDF](https://melanie.gitpages.psi.ch/SciCatPages/ingestManual.pdf)**.
|
||||
* Data Catalog **[Official Website](https://www.psi.ch/photon-science-data-services/data-catalog-and-archive)**
|
||||
* Data catalog **[SciCat Software](https://scicatproject.github.io/)**
|
||||
* **[FAIR](https://www.nature.com/articles/sdata201618)** definition and **[SNF Research Policy](http://www.snf.ch/en/theSNSF/research-policies/open_research_data/Pages/default.aspx#FAIR%20Data%20Principles%20for%20Research%20Data%20Management)**
|
||||
* **[Petabyte Archive at CSCS](https://www.cscs.ch/fileadmin/user_upload/contents_publications/annual_reports/AR2017_Online.pdf)**
|
||||
* [PSI Data Catalog](https://discovery.psi.ch)
|
||||
* [Full Documentation](https://scicatproject.github.io/documentation/Ingestor/ingestManual.html)
|
||||
* [Published Datasets (doi.psi.ch)](https://doi.psi.ch)
|
||||
* Data Catalog [PSI page](https://www.psi.ch/photon-science-data-services/data-catalog-and-archive)
|
||||
* Data catalog [SciCat Software](https://scicatproject.github.io/)
|
||||
* [FAIR](https://www.nature.com/articles/sdata201618) definition and [SNF Research Policy](http://www.snf.ch/en/theSNSF/research-policies/open_research_data/Pages/default.aspx#FAIR%20Data%20Principles%20for%20Research%20Data%20Management)
|
||||
* [Petabyte Archive at CSCS](https://www.cscs.ch/fileadmin/user_upload/contents_publications/annual_reports/AR2017_Online.pdf)
|
||||
|
@ -87,7 +87,7 @@ exit
|
||||
Notice that you will need to add your password once. This step is required for generating the **keytab** file.
|
||||
5. Once back to the main shell, one has to ensure that the file contains the proper permissions:
|
||||
```bash
|
||||
chmod 0400 ~/.k5/krb5.keytab
|
||||
chmod 0600 ~/.k5/krb5.keytab
|
||||
```
|
||||
|
||||
### Obtaining tickets by using keytab files
|
||||
|
@ -98,3 +98,25 @@ Remote access is also possible through VPN, however, you **must not use 'rem-acc
|
||||
to the Merlin6 NoMachine **`merlin-nx.psi.ch` front-end** as if you were inside PSI. For VPN access, you should request
|
||||
it to the IT department by opening a PSI Service Now ticket:
|
||||
[VPN Access (PSI employees)](https://psi.service-now.com/psisp?id=psi_new_sc_cat_item&sys_id=beccc01b6f44a200d02a82eeae3ee440).
|
||||
|
||||
|
||||
## Advanced Display Settings
|
||||
|
||||
**Nomachine Display Settings** can be accessed and changed either when creating a new session or by clicking the very top right corner of a running session.
|
||||
|
||||
### Prevent Rescaling
|
||||
|
||||
These settings prevent "bluriness" at the cost of some performance! (You might want to choose depending on performance)
|
||||
|
||||
* Display > Resize remote display (forces 1:1 pixel sizes)
|
||||
* Display > Change settings > Quality: Choose Medium-Best Quality
|
||||
* Display > Change settings > Modify advanced settings
|
||||
* Check: Disable network-adaptive display quality (diables lossy compression)
|
||||
* Check: Disable client side image post-processing
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
title: Transferring Data
|
||||
#tags:
|
||||
keywords: transferring data, data transfer, rsync, winscp, copy data, copying, sftp, import, export, hop, vpn
|
||||
keywords: transferring data, data transfer, rsync, winscp, copy data, copying, sftp, import, export, hopx, vpn
|
||||
last_updated: 24 August 2023
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
@ -21,8 +21,8 @@ visibility.
|
||||
- HTTP-based protocols using ports 80 or 445 (https, WebDav, etc)
|
||||
- Protocols using other ports require admin configuration and may only work with
|
||||
specific hosts (ftp, rsync daemons, etc)
|
||||
- Systems on the internet can access the Remote Access Merlin servers
|
||||
(ra-merlin*.psi.ch) using ssh-based protocols
|
||||
- Systems on the internet can access the [PSI Data Transfer](https://www.psi.ch/en/photon-science-data-services/data-transfer) service
|
||||
`datatransfer.psi.ch`, using ssh-based protocols and [Globus](https://www.globus.org/)
|
||||
|
||||
|
||||
## Direct transfer via Merlin6 login nodes
|
||||
@ -58,33 +58,50 @@ from the Software Kiosk on PSI machines. Add `merlin-l-01.psi.ch` as a host and
|
||||
connect with your PSI credentials. You can then drag-and-drop files between your
|
||||
local computer and merlin.
|
||||
|
||||
### SWITCHfilesender
|
||||
|
||||
## Remote Access Servers
|
||||
**[SWITCHfilesender](https://filesender.switch.ch/filesender2/?s=upload)** is an installation of the FileSender project (filesender.org) which is a web based application that allows authenticated users to securely and easily send arbitrarily large files to other users.
|
||||
|
||||
Two servers are enabled for data transfers originating from outside PSI.
|
||||
This is a central service managed by a different team, which is managing the different Remote Access
|
||||
services at PSI for the different facilities (including the one for Merlin). However, any problems
|
||||
or questions related to it can be directly [reported](/merlin6/contact.html) to the Merlin adminstrators,
|
||||
which will forward the request if necessary.
|
||||
Authentication of users is provided through SimpleSAMLphp, supporting SAML2, LDAP and RADIUS and more. Users without an account can be sent an upload voucher by an authenticated user. FileSender is developed to the requirements of the higher education and research community.
|
||||
|
||||
These Remote Access Merlin servers are the following:
|
||||
* **`ra-merlin-01.psi.ch`**
|
||||
* **`ra-merlin-02.psi.ch`**
|
||||
The purpose of the software is to send a large file to someone, have that file available for download for a certain number of downloads and/or a certain amount of time, and after that automatically delete the file. The software is not intended as a permanent file publishing platform.
|
||||
|
||||
Both servers have mounted the following Merlin filesystems:
|
||||
* `/data/project` directories mounted in RW on demand. Project responsibles must request it.
|
||||
* `/data/user` mounted in RW (read-write)
|
||||
* `/data/experiment/mu3e` directories mounted in RW (read-write), except `data` (read-only mounted)
|
||||
* `/export` directory in RW (read-write). `/export` is also visible from login nodes.
|
||||
**[SWITCHfilesender](https://filesender.switch.ch/filesender2/?s=upload)** is fully integrated with PSI, therefore, PSI employees can log in by using their PSI account (through Authentication and Authorization Infrastructure / AAI, by selecting PSI as the institution to be used for log in).
|
||||
|
||||
Access to the Remote Access server uses ***Multi factor authentication*** (MFA).
|
||||
## PSI Data Transfer
|
||||
|
||||
From August 2024, Merlin is connected to the **[PSI Data Transfer](https://www.psi.ch/en/photon-science-data-services/data-transfer)** service,
|
||||
`datatransfer.psi.ch`. This is a central service managed by the **[Linux team](https://linux.psi.ch/index.html)**. However, any problems or questions related to it can be directly
|
||||
[reported](/merlin6/contact.html) to the Merlin administrators, which will forward the request if necessary.
|
||||
|
||||
The PSI Data Transfer servers supports the following protocols:
|
||||
* Data Transfer - SSH (scp / rsync)
|
||||
* Data Transfer - Globus
|
||||
|
||||
Notice that `datatransfer.psi.ch` does not allow SSH login, only `rsync`, `scp` and [Globus](https://www.globus.org/) access is allowed.
|
||||
|
||||
The following filesystems are mounted:
|
||||
* `/merlin/export` which points to the `/export` directory in Merlin.
|
||||
* `/merlin/data/experiment/mu3e` which points to the `/data/experiment/mu3e` directories in Merlin.
|
||||
* Mu3e sub-directories are mounted in RW (read-write), except for `data` (read-only mounted)
|
||||
* `/merlin/data/project/general` which points to the `/data/project/general` directories in Merlin.
|
||||
* Owners of Merlin projects should request explicit access to it.
|
||||
* Currently, only `CSCS` is available for transferring files between PizDaint/Alps and Merlin
|
||||
* `/merlin/data/project/bio` which points to the `/data/project/bio` directories in Merlin.
|
||||
* `/merlin/data/user` which points to the `/data/user` directories in Merlin.
|
||||
|
||||
Access to the PSI Data Transfer uses ***Multi factor authentication*** (MFA).
|
||||
Therefore, having the Microsoft Authenticator App is required as explained [here](https://www.psi.ch/en/computing/change-to-mfa).
|
||||
|
||||
{{site.data.alerts.tip}}Please follow the
|
||||
<b><a href="https://www.psi.ch/en/photon-science-data-services/data-transfer">Official PSI Data Transfer</a></b> documentation for further instructions.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### Directories
|
||||
|
||||
#### /data/user
|
||||
#### /merlin/data/user
|
||||
|
||||
User data directories are mounted in RW on both 'ra-merlin-01' and 'ra-merlin-02'.
|
||||
User data directories are mounted in RW.
|
||||
|
||||
{{site.data.alerts.warning}}Please, <b>ensure proper secured permissions</b> in your '/data/user'
|
||||
directory. By default, when directory is created, the system applies the most restrictive
|
||||
@ -92,7 +109,7 @@ permissions. However, this does not prevent users for changing permissions if th
|
||||
point, users become responsible of those changes.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
#### /export
|
||||
#### /merlin/export
|
||||
|
||||
Transferring big amounts of data from outside PSI to Merlin is always possible through `/export`.
|
||||
|
||||
@ -109,48 +126,30 @@ This is configured in Read/Write mode. If you need access, please, contact the M
|
||||
For exporting data from Merlin to outside PSI by using `/export`, one has to:
|
||||
* From a Merlin login node, copy your data from any directory (i.e. `/data/project`, `/data/user`, `/scratch`) to
|
||||
`/export`. Ensure to properly secure your directories and files with proper permissions.
|
||||
* Once data is copied, from **ra-merlin-01.psi.ch** or **ra-merlin-02.psi.ch**, copy the data from `/export` to outside PSI.
|
||||
* Once data is copied, from **`datatransfer.psi.ch`**, copy the data from `/merlin/export` to outside PSI
|
||||
|
||||
##### Importing data to Merlin
|
||||
|
||||
For importing data from outside PSI to Merlin by using `/export`, one has to:
|
||||
* From **ra-merlin-01.psi.ch** or **ra-merlin-02.psi.ch**, copy the data from outside PSI to `/export`.
|
||||
* From **`datatransfer.psi.ch`**, copy the data from outside PSI to `/merlin/export`.
|
||||
Ensure to properly secure your directories and files with proper permissions.
|
||||
* Once data is copied, from a Merlin login node, copy your data from `/export` to any directory (i.e. `/data/project`, `/data/user`, `/scratch`).
|
||||
|
||||
#### /data/project
|
||||
#### Request access to your project directory
|
||||
|
||||
Optionally, instead of using `/export`, experiments with a Merlin project can request Read/Write or Read/Only access to their project directory.
|
||||
Optionally, instead of using `/export`, Merlin project owners can request Read/Write or Read/Only access to their project directory.
|
||||
|
||||
{{site.data.alerts.tip}}<b>Merlin projects can request direct access.</b>
|
||||
This can be configured in Read/Write or Read/Only modes. If your project needs access, please,
|
||||
contact the Merlin administrators.
|
||||
This can be configured in Read/Write or Read/Only modes. If your project needs access, please, contact the Merlin administrators.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### Accepted protocols
|
||||
|
||||
Accepted protocols for Remote Access Merlin servers are the following:
|
||||
* **sftp**: **``sftp``** command or similar X11/Windows/MacOS based programs.
|
||||
* **ssh**: **`scp`** command (as well as **WinSCP** and similar programs) or **`rsync`** command
|
||||
* **~~Globus Online~~**: ***not available yet.***
|
||||
|
||||
### Remote Access Servers Policies
|
||||
|
||||
SSH is one of the allowed protocols.
|
||||
* Please, **absolutely never** use this servers as a login node.
|
||||
* Please avoid copying files to the *home* directories.
|
||||
* Please **never use SSH Keys** for accessing these servers. Accessing through SSH keys will be denied in the upcomig months.
|
||||
|
||||
Only ``/data/user`, `/data/project` and `/export` directories should be used on these nodes,
|
||||
and exclusively for transferring data from/to PSI to/from outside PSI.
|
||||
|
||||
## Connecting to Merlin6 from outside PSI
|
||||
|
||||
Merlin6 is fully accessible from within the PSI network. To connect from outside you can use:
|
||||
|
||||
- [VPN](https://www.psi.ch/en/computing/vpn) ([alternate instructions](https://intranet.psi.ch/BIO/ComputingVPN))
|
||||
- [SSH hop](https://www.psi.ch/en/computing/ssh-hop)
|
||||
* Please avoid transferring big amount data through **hop**
|
||||
- [SSH hopx](https://www.psi.ch/en/computing/ssh-hop)
|
||||
* Please avoid transferring big amount data through **hopx**
|
||||
- [No Machine](nomachine.md)
|
||||
* Remote Interactive Access through [**'rem-acc.psi.ch'**](https://www.psi.ch/en/photon-science-data-services/remote-interactive-access)
|
||||
* Please avoid transferring big amount of data through **NoMachine**
|
||||
|
@ -59,6 +59,11 @@ release and associated listening ports are the following:
|
||||
<td><font size="2" face="Courier New">merlin-l-001 merlin-l-001 merlin-l-001</font></td>
|
||||
<td>32959</td>
|
||||
</tr>
|
||||
<tr align="center">
|
||||
<td>2023R2</td>
|
||||
<td><font size="2" face="Courier New">merlin-l-001 merlin-l-001 merlin-l-001</font></td>
|
||||
<td>32960</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
@ -10,38 +10,10 @@ permalink: /merlin6/python.html
|
||||
|
||||
PSI provides a variety of ways to execute python code.
|
||||
|
||||
1. **psi-python modules** - Central installation with common packages pre-installed
|
||||
2. **Anaconda** - Custom environments for using installation and development
|
||||
3. **Jupyterhub** - Execute Jupyter notebooks on the cluster
|
||||
4. **System Python** - Do not use! Only for OS applications.
|
||||
|
||||
## `psi-python` modules
|
||||
|
||||
The easiest way to use python is using the centrally maintained psi-python modules:
|
||||
|
||||
```
|
||||
~ $ module avail psi-python
|
||||
------------------------------------- Programming: ------------------------------
|
||||
|
||||
psi-python27/2.3.0 psi-python27/2.2.0 psi-python27/2.4.1
|
||||
psi-python27/4.4.0 psi-python34/2.1.0 psi-python35/4.2.0
|
||||
psi-python36/4.4.0
|
||||
|
||||
~ $ module load psi-python36/4.4.0
|
||||
~ $ python --version
|
||||
Python 3.6.1 :: Anaconda 4.4.0 (64-bit)
|
||||
```
|
||||
|
||||
These include over 250 common packages from the
|
||||
[Anaconda](https://docs.anaconda.com/anaconda/) software distribution, including
|
||||
numpy, pandas, requests, flask, hdf5, and more.
|
||||
|
||||
{% include callout.html type="warning" content="
|
||||
**Caution**{: .text-warning}
|
||||
Do not use `module load python`. These modules are minimal installs intended as
|
||||
dependencies for other modules that embed python.
|
||||
"%}
|
||||
|
||||
## Anaconda
|
||||
|
||||
[Anaconda](https://www.anaconda.com/) ("conda" for short) is a package manager with
|
||||
@ -78,22 +50,21 @@ merlin. Environments can grow quite large, so you will need to change the defaul
|
||||
storage location from the default (your home directory) to a larger volume (usually
|
||||
`/data/user/$USER`).
|
||||
|
||||
Save the following as `$HOME/.condarc` (update USERNAME and module version as
|
||||
necessary):
|
||||
Save the following as `$HOME/.condarc`:
|
||||
|
||||
```
|
||||
always_copy: true
|
||||
|
||||
envs_dirs:
|
||||
- /data/user/USERNAME/conda/envs
|
||||
- /data/user/$USER/conda/envs
|
||||
|
||||
pkgs_dirs:
|
||||
- /data/user/USERNAME/conda/pkgs
|
||||
- /opt/psi/Programming/anaconda/2019.07/conda/pkgs
|
||||
- /data/user/$USER/conda/pkgs
|
||||
- $ANACONDA_PREFIX/conda/pkgs
|
||||
|
||||
channels:
|
||||
- conda-forge
|
||||
- defaults
|
||||
- nodefaults
|
||||
```
|
||||
|
||||
Run `conda info` to check that the variables are being set correctly.
|
||||
|
@ -43,7 +43,7 @@ interventions or problems. Users can be subscribed in two ways:
|
||||
|
||||
---
|
||||
|
||||
## The Merlin6 Team
|
||||
## The Merlin Cluster Team
|
||||
|
||||
Merlin6 is managed by the **[High Performance Computing and Emerging technologies Group](https://www.psi.ch/de/lsm/hpce-group)**, which
|
||||
is part of **NES/[Laboratory for Scientific Computing and Modelling](https://www.psi.ch/de/lsm)**.
|
||||
The PSI Merlin clusters are managed by the **[High Performance Computing and Emerging technologies Group](https://www.psi.ch/de/lsm/hpce-group)**, which
|
||||
is part of the [Science IT Infrastructure, and Services department (AWI)](https://www.psi.ch/en/awi) in PSI's [Center for Scientific Computing, Theory and Data (SCD)](https://www.psi.ch/en/csd).
|
||||
|
@ -12,7 +12,7 @@ permalink: /merlin6/faq.html
|
||||
|
||||
## How do I register for Merlin?
|
||||
|
||||
See [Requesting Accounts](/merlin6/request-account.html).
|
||||
See [Requesting Merlin Access](/merlin6/request-account.html).
|
||||
|
||||
## How do I get information about downtimes and updates?
|
||||
|
||||
@ -21,9 +21,9 @@ See [Get updated through the Merlin User list!](/merlin6/contact.html#get-update
|
||||
## How can I request access to a Merlin project directory?
|
||||
|
||||
Merlin projects are placed in the `/data/project` directory. Access to each project is controlled by Unix group membership.
|
||||
If you require access to an existing project, please request group membership as described in [Requesting extra Unix groups](/merlin6/request-account.html#requesting-extra-unix-groups).
|
||||
If you require access to an existing project, please request group membership as described in [Requesting Unix Group Membership](/merlin6/request-project.html#requesting-unix-group-membership).
|
||||
|
||||
Your project leader or project colleagues will know what Unix group you should belong to. Otherwise, you can check what Unix group is allowed to access that project directory (simply run `ls -ltrha`).
|
||||
Your project leader or project colleagues will know what Unix group you should belong to. Otherwise, you can check what Unix group is allowed to access that project directory (simply run `ls -ltrhd` for the project directory).
|
||||
|
||||
## Can I install software myself?
|
||||
|
||||
@ -46,3 +46,7 @@ conda create --name myenv python==3.9 ...
|
||||
conda activate myenv
|
||||
```
|
||||
|
||||
## Something doesn't work
|
||||
|
||||
Check the list of [known problems](/merlin6/known-problems.html) to see if a solution is known.
|
||||
If not, please [contact the admins](/merlin6/contact.html).
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
title: Known Problems
|
||||
#tags:
|
||||
keywords: known problems, troubleshooting, illegal instructions, paraview, ansys, shell, opengl, mesa
|
||||
keywords: "known problems, troubleshooting, illegal instructions, paraview, ansys, shell, opengl, mesa, vglrun, module: command not found, error"
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin6_sidebar
|
||||
@ -91,7 +91,7 @@ In this example, by setting an environment variable SRUN_CPUS_PER_TASK
|
||||
|
||||
In general, **`/bin/bash` is the recommended default user's SHELL** when working in Merlin.
|
||||
|
||||
Some users might notice that BASH is not the default SHELL when login to Merlin systems, or they might need to run a different SHELL.
|
||||
Some users might notice that BASH is not the default SHELL when logging in to Merlin systems, or they might need to run a different SHELL.
|
||||
This is probably because when the PSI account was requested, no SHELL description was specified or a different one was requested explicitly by the requestor.
|
||||
Users can check which is the default SHELL specified in the PSI account with the following command:
|
||||
|
||||
@ -159,3 +159,22 @@ fluent -driver x11
|
||||
|
||||
For running Paraview, one can run it with Mesa support or OpenGL support. Please refer to [OpenGL vs Mesa](/merlin6/known-problems.html#opengl-vs-mesa) for
|
||||
further information about how to run it.
|
||||
|
||||
### Module command not found
|
||||
|
||||
In some circumstances the module command may not be initialized properly. For instance, you may see the following error upon logon:
|
||||
|
||||
```
|
||||
bash: module: command not found
|
||||
```
|
||||
|
||||
The most common cause for this is a custom `.bashrc` file which fails to source the global `/etc/bashrc` responsible for setting up PModules in some OS versions. To fix this, add the following to `$HOME/.bashrc`:
|
||||
|
||||
```bash
|
||||
if [ -f /etc/bashrc ]; then
|
||||
. /etc/bashrc
|
||||
fi
|
||||
```
|
||||
|
||||
It can also be fixed temporarily in an existing terminal by running `. /etc/bashrc` manually.
|
||||
|
||||
|
@ -0,0 +1,55 @@
|
||||
---
|
||||
title: Accessing Interactive Nodes
|
||||
#tags:
|
||||
keywords: How to, HowTo, access, accessing, nomachine, ssh
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/interactive.html
|
||||
---
|
||||
|
||||
## SSH Access
|
||||
|
||||
For interactive command shell access, use an SSH client. We recommend to activate SSH's X11 forwarding to allow you to use graphical
|
||||
applications (e.g. a text editor, but for more performant graphical access, refer to the sections below). X applications are supported
|
||||
in the login nodes and X11 forwarding can be used for those users who have properly configured X11 support in their desktops, however:
|
||||
|
||||
* Merlin7 administrators **do not offer support** for user desktop configuration (Windows, MacOS, Linux).
|
||||
* Hence, Merlin7 administrators **do not offer official support** for X11 client setup.
|
||||
* Nevertheless, a generic guide for X11 client setup (*Linux*, *Windows* and *MacOS*) is provided below.
|
||||
* PSI desktop configuration issues must be addressed through **[PSI Service Now](https://psi.service-now.com/psisp)** as an *Incident Request*.
|
||||
* Ticket will be redirected to the corresponding Desktop support group (Windows, Linux).
|
||||
|
||||
### Accessing from a Linux client
|
||||
|
||||
Refer to [{How To Use Merlin -> Accessing from Linux Clients}](/merlin7/connect-from-linux.html) for **Linux** SSH client and X11 configuration.
|
||||
|
||||
### Accessing from a Windows client
|
||||
|
||||
Refer to [{How To Use Merlin -> Accessing from Windows Clients}](/merlin7/connect-from-windows.html) for **Windows** SSH client and X11 configuration.
|
||||
|
||||
### Accessing from a MacOS client
|
||||
|
||||
Refer to [{How To Use Merlin -> Accessing from MacOS Clients}](/merlin7/connect-from-macos.html) for **MacOS** SSH client and X11 configuration.
|
||||
|
||||
## NoMachine Remote Desktop Access
|
||||
|
||||
X applications are supported in the login nodes and can run efficiently through a **NoMachine** client. This is the officially supported way to run more demanding X applications on Merlin7.
|
||||
* For PSI Windows workstations, this can be installed from the Software Kiosk as 'NX Client'. If you have difficulties installing, please request support through **[PSI Service Now](https://psi.service-now.com/psisp)** as an *Incident Request*.
|
||||
* For other workstations The client software can be downloaded from the [Nomachine Website](https://www.nomachine.com/product&p=NoMachine%20Enterprise%20Client).
|
||||
|
||||
### Configuring NoMachine
|
||||
|
||||
Refer to [{How To Use Merlin -> Remote Desktop Access}](/merlin7/nomachine.html) for further instructions of how to configure the NoMachine client and how to access it from PSI and from outside PSI.
|
||||
|
||||
## Login nodes hardware description
|
||||
|
||||
The Merlin7 login nodes are the official machines for accessing the recources of Merlin7.
|
||||
From these machines, users can submit jobs to the Slurm batch system as well as visualize or compile their software.
|
||||
|
||||
The Merlin7 login nodes are the following:
|
||||
|
||||
| Hostname | SSH | NoMachine | Scratch | Scratch Mountpoint |
|
||||
| ----------------------- | --- | --------- | -------- | :------------------ |
|
||||
| login001.merlin7.psi.ch | yes | yes | 1TB NVMe | ``/scratch`` |
|
||||
| login002.merlin7.psi.ch | yes | yes | 1TB NVMe | ``/scratch`` |
|
40
pages/merlin7/01-Quick-Start-Guide/accessing-slurm.md
Normal file
@ -0,0 +1,40 @@
|
||||
---
|
||||
title: Accessing Slurm Cluster
|
||||
#tags:
|
||||
keywords: slurm, batch system, merlin5, merlin7, gmerlin7, cpu, gpu
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/slurm-access.html
|
||||
---
|
||||
|
||||
## The Merlin Slurm clusters
|
||||
|
||||
Merlin contains a multi-cluster setup, where multiple Slurm clusters coexist under the same umbrella.
|
||||
It basically contains the following clusters:
|
||||
|
||||
* The **Merlin7 Slurm CPU cluster**, which is called [**`merlin7`**](/merlin7/slurm-access.html#merlin7-cpu-cluster-access).
|
||||
* The **Merlin7 Slurm GPU cluster**, which is called [**`gmerlin7`**](/merlin7/slurm-access.html#merlin7-gpu-cluster-access).
|
||||
|
||||
## Accessing the Slurm clusters
|
||||
|
||||
Any job submission must be performed from a **Merlin login node**. Please refer to the [**Accessing the Interactive Nodes documentation**](/merlin7/interactive.html)
|
||||
for further information about how to access the cluster.
|
||||
|
||||
In addition, any job *must be submitted from a high performance storage area visible by the login nodes and by the computing nodes*. For this, the possible storage areas are the following:
|
||||
* `/data/user`
|
||||
* `/data/project`
|
||||
* `/data/scratch/shared`
|
||||
|
||||
### Merlin7 CPU cluster access
|
||||
|
||||
The **Merlin7 CPU cluster** (**`merlin7`**) is the default cluster configured in the login nodes. Any job submission will use by default this cluster, unless
|
||||
the option `--cluster` is specified with another of the existing clusters.
|
||||
|
||||
For further information about how to use this cluster, please visit: [**Merlin7 CPU Slurm Cluster documentation**](/merlin7/slurm-configuration.html#cpu-cluster-merlin7).
|
||||
|
||||
### Merlin7 GPU cluster access
|
||||
|
||||
The **Merlin7 GPU cluster** (**`gmerlin7`**) is visible from the login nodes. However, to submit jobs to this cluster, one needs to specify the option `--cluster=gmerlin7` when submitting a job or allocation.
|
||||
|
||||
For further information about how to use this cluster, please visit: [**Merlin7 GPU Slurm Cluster documentation**](/merlin7/slurm-configuration.html#gpu-cluster-gmerlin7).
|
51
pages/merlin7/01-Quick-Start-Guide/code-of-conduct.md
Normal file
@ -0,0 +1,51 @@
|
||||
---
|
||||
title: Code Of Conduct
|
||||
#tags:
|
||||
keywords: code of conduct, rules, principle, policy, policies, administrator, backup
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/code-of-conduct.html
|
||||
---
|
||||
|
||||
## The Basic principle
|
||||
|
||||
The basic principle is courtesy and consideration for other users.
|
||||
|
||||
* Merlin7 is a system shared by many users, therefore you are kindly requested to apply common courtesy in using its resources. Please follow our guidelines which aim at providing and maintaining an efficient compute environment for all our users.
|
||||
* Basic shell programming skills are an essential requirement in a Linux/UNIX HPC cluster environment; a proficiency in shell programming is greatly beneficial.
|
||||
|
||||
## Interactive nodes
|
||||
|
||||
* The interactive nodes (also known as login nodes) are for development and quick testing:
|
||||
* It is **strictly forbidden to run production jobs** on the login nodes. All production jobs must be submitted to the batch system.
|
||||
* It is **forbidden to run long processes** occupying big parts of a login node's resources.
|
||||
* According to the previous rules, **misbehaving running processes will have to be killed.**
|
||||
in order to keep the system responsive for other users.
|
||||
|
||||
## Batch system
|
||||
|
||||
* Make sure that no broken or run-away processes are left when your job is done. Keep the process space clean on all nodes.
|
||||
* During the runtime of a job, it is mandatory to use the ``/scratch`` and ``/data/scratch/shared`` partitions for temporary data:
|
||||
* It is **forbidden** to use the ``/data/user`` or ``/data/project`` for that purpose.
|
||||
* Always remove files you do not need any more (e.g. core dumps, temporary files) as early as possible. Keep the disk space clean on all nodes.
|
||||
* Prefer ``/scratch`` over ``/data/scratch/shared`` and _use the latter only when you require the temporary files to be visible from multiple nodes_.
|
||||
* Read the description in **[Merlin7 directory structure](/merlin7/storage.html#merlin7-directories)** for learning about the correct usage of each partition type.
|
||||
|
||||
## User and project data
|
||||
|
||||
* ***Users are responsible for backing up their own data***. Is recommended to backup the data on third party independent systems (i.e. LTS, Archive, AFS, SwitchDrive, Windows Shares, etc.).
|
||||
* ***When a user leaves PSI, she or her supervisor/team are responsible to backup and move the data out from the cluster***: every few months, the storage space will be recycled for those old users who do not have an existing and valid PSI account.
|
||||
|
||||
{{site.data.alerts.warning}}When a user leaves PSI and his account has been removed, her storage space in Merlin may be recycled.
|
||||
Hence, <b>when a user leaves PSI</b>, she, her supervisor or team <b>must ensure that the data is backed up to an external storage</b>
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## System Administrator Rights
|
||||
|
||||
* The system administrator has the right to temporarily block the access to Merlin7 for an account violating the Code of Conduct in order to maintain the efficiency and stability of the system.
|
||||
* Repetitive violations by the same user will be escalated to the user's supervisor.
|
||||
* The system administrator has the right to delete files in the **scratch** directories
|
||||
* after a job, if the job failed to clean up its files.
|
||||
* during the job in order to prevent a job from destabilizing a node or multiple nodes.
|
||||
* The system administrator has the right to kill any misbehaving running processes.
|
@ -10,9 +10,55 @@ redirect_from:
|
||||
- /merlin7/index.html
|
||||
---
|
||||
|
||||
## Logging in
|
||||
## About Merlin7
|
||||
|
||||
To get onto the machine, you have to start from ela.cscs.ch or from login.psi.ch at PSI.
|
||||
The Merlin7 cluster is moving toward **production** state since August 2024, this is expected latest by Q4 2025. Since January 2025 the system has been generally available,
|
||||
but due to some remaining issues with the platform, the schedule of the migration of users and communities has been delayed. You will be notified well in advance
|
||||
regarding the migration of data.
|
||||
|
||||
$ ssh psi-username@psi-dev.cscs.ch
|
||||
All PSI users can request access to Merlin7, please go to the [Requesting Merlin Accounts](/merlin7/request-account.html) page and complete the steps given there.
|
||||
|
||||
In case you identify errors or missing information, please provide feedback through [merlin-admins mailing list](mailto:merlin-admins@lists.psi.ch) mailing list or [submit a ticket using the PSI service portal](https://psi.service-now.com/psisp).
|
||||
|
||||
## Infrastructure
|
||||
|
||||
### Hardware
|
||||
|
||||
The Merlin7 cluster contains the following node specification:
|
||||
|
||||
| Node | #N | CPU | RAM | GPU | #GPUs |
|
||||
| ----: | -- | --- | --- | ----: | ---: |
|
||||
| Login | 2 | 2 AMD EPYC 7742 (64 Cores 2.25GHz) | 512GB | | |
|
||||
| CPU | 77 | 2 AMD EPYC 7742 (64 Cores 2.25GHz) | 512GB | | |
|
||||
| GPU A100 | 8 | 2 AMD EPYC 7713 (64 Cores 3.2GHz) | 512GB | A100 80GB | 4 |
|
||||
| GPU GH | 5 | NVIDIA ARM Grace Neoverse v2 (144 Cores 3.1GHz) | 864GB (Unified) | GH200 120GB | 4 |
|
||||
|
||||
### Network
|
||||
|
||||
The Merlin7 cluster builds on top of HPE/Cray technologies, including a high-performance network fabric called Slingshot. This network fabric is able
|
||||
to provide up to 200 Gbit/s throughput between nodes. Further information on Slignshot can be found on at [HPE](https://www.hpe.com/psnow/doc/PSN1012904596HREN) and
|
||||
at <https://www.glennklockwood.com/garden/slingshot>.
|
||||
|
||||
Through software interfaces like [libFabric](https://ofiwg.github.io/libfabric/) (which available on Merlin7), application can leverage the network seamlessly.
|
||||
|
||||
### Storage
|
||||
|
||||
Unlike previous iteration of the Merlin HPC clusters, Merlin7 _does not_ have any local storage. Instead storage for the entire cluster is provided through
|
||||
a dedicated storage appliance from HPE/Cray called [ClusterStor](https://www.hpe.com/psnow/doc/PSN1012842049INEN.pdf).
|
||||
|
||||
The appliance is built of several storage servers:
|
||||
|
||||
* 2 management nodes
|
||||
* 2 MDS servers, 12 drives per server, 2.9TiB (Raid10)
|
||||
* 8 OSS-D servers, 106 drives per server, 14.5 T.B HDDs (Gridraid / Raid6)
|
||||
* 4 OSS-F servers, 12 drives per server 7TiB SSDs (Raid10)
|
||||
|
||||
With effective storage capacity of:
|
||||
|
||||
* 10 PB HDD
|
||||
* value visible on linux: HDD 9302.4 TiB
|
||||
* 162 TB SSD
|
||||
* value visible on linux: SSD 151.6 TiB
|
||||
* 23.6 TiB on Metadata
|
||||
|
||||
The storage is directly connected to the cluster (and each individual node) through the Slingshot NIC.
|
||||
|
24
pages/merlin7/01-Quick-Start-Guide/requesting-accounts.md
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
title: Requesting Merlin Accounts
|
||||
#tags:
|
||||
keywords: registration, register, account, merlin5, merlin7, snow, service now
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/request-account.html
|
||||
---
|
||||
|
||||
## Requesting Access to Merlin7
|
||||
|
||||
All PSI users can ask for access to the Merlin7 cluster. Access to Merlin7 is regulated by the PSI user's account being a member of the **`svc-cluster_merlin7`** access group.
|
||||
|
||||
Requesting **Merlin7** access *has to be done* using the **[Request Linux Group Membership](https://psi.service-now.com/psisp?id=psi_new_sc_cat_item&sys_id=84f2c0c81b04f110679febd9bb4bcbb1)** form, available in [PSI's central Service Catalog](https://psi.service-now.com/psisp) on Service Now.
|
||||
|
||||

|
||||
|
||||
Mandatory fields you need to fill:
|
||||
* **`Order Access for user:`** Defaults to the logged in user. However, requesting access for another user it's also possible.
|
||||
* **`Request membership for group:`** Choose**`svc-cluster_merlin7`**.
|
||||
* **`Justification:`** Please add a short justification of what you will be running on Merlin7.
|
||||
|
||||
Once submitted, the Merlin responsibles will approve the request as soon as possible (within the next few hours on working days). Once the request is approved, *it may take up to 30 minutes to get the account fully configured*.
|
123
pages/merlin7/01-Quick-Start-Guide/requesting-projects.md
Normal file
@ -0,0 +1,123 @@
|
||||
---
|
||||
title: Requesting a Merlin Project
|
||||
#tags:
|
||||
keywords: merlin project, project, snow, service now
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/request-project.html
|
||||
---
|
||||
|
||||
A project owns its own storage area in Merlin, which can be accessed by other group members.
|
||||
|
||||
Projects can receive a higher storage quota than user areas and should be the primary way of organizing bigger storage requirements
|
||||
in a multi-user collaboration.
|
||||
|
||||
Access to a project's directories is governed by project members belonging to a common **Unix group**. You may use an existing
|
||||
Unix group or you may have a new Unix group created especially for the project. The **project responsible** will be the owner of
|
||||
the Unix group (*this is important*)!
|
||||
|
||||
This document explains how to request new Unix group, to request membership for existing groups, and the procedure for requesting a Merlin project.
|
||||
|
||||
## About Unix groups
|
||||
|
||||
Before requesting a Merlin project, it is important to have a Unix group that can be used to grant access to it to different members
|
||||
of the project.
|
||||
|
||||
Unix groups in the PSI Active Directory (which is the PSI central database containing user and group information, and more) are defined by the `unx-` prefix, followed by a name.
|
||||
In general, PSI employees working on Linux systems (including HPC clusters, like Merlin) can request for a non-existing Unix group, and can become responsible for managing it.
|
||||
In addition, a list of administrators can be set. The administrators, together with the group manager, can approve or deny membership requests. Further information about this topic
|
||||
is covered in the [Linux Documentation - Services Admin Guides: Unix Groups / Group Management](https://linux.psi.ch/services-admin-guide/unix_groups.html), managed by the Central Linux Team.
|
||||
|
||||
To gran access to specific Merlin project directories, some users may require to be added to some specific **Unix groups**:
|
||||
* Each Merlin project (i.e. `/data/project/{bio|general}/$projectname`) or experiment (i.e. `/data/experiment/$experimentname`) directory has access restricted by ownership and group membership (with a very few exceptions allowing public access).
|
||||
* Users requiring access to a specific restricted project or experiment directory have to request membership for the corresponding Unix group owning the directory.
|
||||
|
||||
### Requesting a new Unix group
|
||||
|
||||
**If you need a new Unix group** to be created, you need to first get this group through a separate
|
||||
**[PSI Service Now ticket](https://psi.service-now.com/psisp)**. **Please use the following template.**
|
||||
You can also specify the login names of the initial group members and the **owner** of the group.
|
||||
The owner of the group is the person who will be allowed to modify the group.
|
||||
|
||||
* Please open an *Incident Request* with subject:
|
||||
```
|
||||
Subject: Request for new unix group xxxx
|
||||
```
|
||||
|
||||
* and base the text field of the request on this template
|
||||
```
|
||||
Dear HelpDesk
|
||||
|
||||
I would like to request a new unix group.
|
||||
|
||||
Unix Group Name: unx-xxxxx
|
||||
Initial Group Members: xxxxx, yyyyy, zzzzz, ...
|
||||
Group Owner: xxxxx
|
||||
Group Administrators: aaaaa, bbbbb, ccccc, ....
|
||||
|
||||
Best regards,
|
||||
```
|
||||
|
||||
### Requesting Unix group membership
|
||||
|
||||
Existing Merlin projects have already a Unix group assigned. To have access to a project, users must belong to the proper **Unix group** owning that project.
|
||||
Supervisors should inform new users which extra groups are needed for their project(s). If this information is not known, one can check the permissions for that directory. In example:
|
||||
```bash
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# ls -ltrhd /data/project/general/$projectname
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/user/caubet_m]# ls -ltrhd /data/project/bio/$projectname
|
||||
```
|
||||
|
||||
Requesting membership for a specific Unix group *has to be done* with the corresponding **[Request Linux Group Membership](https://psi.service-now.com/psisp?id=psi_new_sc_cat_item&sys_id=84f2c0c81b04f110679febd9bb4bcbb1)** form, available in the [PSI Service Now Service Catalog](https://psi.service-now.com/psisp).
|
||||
|
||||

|
||||
|
||||
Once submitted, the responsible of the Unix group has to approve the request.
|
||||
|
||||
**Important note**: Requesting access to specific Unix Groups will require validation from the responsible of the Unix Group. If you ask for inclusion in many groups it may take longer, since the fulfillment of the request will depend on more people.
|
||||
|
||||
Further information can be found in the [Linux Documentation - Services User guide: Unix Groups / Group Management](https://linux.psi.ch/services-user-guide/unix_groups.html)
|
||||
|
||||
### Managing Unix Groups
|
||||
|
||||
Other administration operations on Unix Groups it's mainly covered in the [Linux Documentation - Services Admin Guides: Unix Groups / Group Management](https://linux.psi.ch/services-admin-guide/unix_groups.html), managed by the Central Linux Team.
|
||||
|
||||
## Requesting a Merlin project
|
||||
|
||||
Once a Unix group is available, a Merlin project can be requested.
|
||||
To request a project, please provide the following information in a **[PSI Service Now ticket](https://psi.service-now.com/psisp)**
|
||||
|
||||
* Please open an *Incident Request* with subject:
|
||||
```
|
||||
Subject: [Merlin7] Project Request for project name xxxxxx
|
||||
```
|
||||
|
||||
* and base the text field of the request on this template
|
||||
```
|
||||
Dear HelpDesk
|
||||
|
||||
I would like to request a new Merlin7 project.
|
||||
|
||||
Project Name: xxxxx
|
||||
UnixGroup: xxxxx # Must be an existing Unix Group
|
||||
|
||||
The project responsible is the Owner of the Unix Group.
|
||||
If you need a storage quota exceeding the defaults, please provide a description
|
||||
and motivation for the higher storage needs:
|
||||
|
||||
Storage Quota: 1TB with a maximum of 1M Files
|
||||
Reason: (None for default 1TB/1M)
|
||||
|
||||
Best regards,
|
||||
```
|
||||
|
||||
The **default storage quota** for a project is 1TB (with a maximal *Number of Files* of 1M). If you need a larger assignment, you
|
||||
need to request this and provide a description of your storage needs.
|
||||
|
||||
## Further documentation
|
||||
|
||||
Further information it's also available in the Linux Central Documentation:
|
||||
* [Unix Group / Group Management for users](https://linux.psi.ch/services-user-guide/unix_groups.html)
|
||||
* [Unix Group / Group Management for group managers](https://linux.psi.ch/services-admin-guide/unix_groups.html)
|
||||
|
||||
**Special thanks** to the **Linux Central Team** and **AIT** to make this possible.
|
379
pages/merlin7/02-How-To-Use-Merlin/archive.md
Normal file
@ -0,0 +1,379 @@
|
||||
---
|
||||
title: Archive & PSI Data Catalog
|
||||
#tags:
|
||||
keywords: linux, archive, data catalog, archiving, lts, tape, long term storage, ingestion, datacatalog
|
||||
last_updated: 31 January 2020
|
||||
summary: "This document describes how to use the PSI Data Catalog for archiving Merlin7 data."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/archive.html
|
||||
---
|
||||
|
||||
## PSI Data Catalog as a PSI Central Service
|
||||
|
||||
PSI provides access to the ***Data Catalog*** for **long-term data storage and retrieval**. Data is
|
||||
stored on the ***PetaByte Archive*** at the **Swiss National Supercomputing Centre (CSCS)**.
|
||||
|
||||
The Data Catalog and Archive is suitable for:
|
||||
|
||||
* Raw data generated by PSI instruments
|
||||
* Derived data produced by processing some inputs
|
||||
* Data required to reproduce PSI research and publications
|
||||
|
||||
The Data Catalog is part of PSI's effort to conform to the FAIR principles for data management.
|
||||
In accordance with this policy, ***data will be publicly released under CC-BY-SA 4.0 after an
|
||||
embargo period expires.***
|
||||
|
||||
The Merlin cluster is connected to the Data Catalog. Hence, users archive data stored in the
|
||||
Merlin storage under the ``/data`` directories (currentlyi, ``/data/user`` and ``/data/project``).
|
||||
Archiving from other directories is also possible, however the process is much slower as data
|
||||
can not be directly retrieved by the PSI archive central servers (**central mode**), and needs to
|
||||
be indirectly copied to these (**decentral mode**).
|
||||
|
||||
Archiving can be done from any node accessible by the users (usually from the login nodes).
|
||||
|
||||
{{site.data.alerts.tip}} Archiving can be done in two different ways:
|
||||
<br>
|
||||
<b>'Central mode':</b> Possible for the user and project data directories, is the
|
||||
fastest way as it does not require remote copy (data is directly retreived by central AIT servers from Merlin
|
||||
through 'merlin-archive.psi.ch').
|
||||
<br>
|
||||
<br>
|
||||
<b>'Decentral mode':</b> Possible for any directory, is the slowest way of archiving as it requires
|
||||
to copy ('rsync') the data from Merlin to the central AIT servers.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## Procedure
|
||||
|
||||
### Overview
|
||||
|
||||
Below are the main steps for using the Data Catalog.
|
||||
|
||||
* Ingest the dataset into the Data Catalog. This makes the data known to the Data Catalog system at PSI:
|
||||
* Prepare a metadata file describing the dataset
|
||||
* Run **``datasetIngestor``** script
|
||||
* If necessary, the script will copy the data to the PSI archive servers
|
||||
* Usually this is necessary when archiving from directories other than **``/data/user``** or
|
||||
**``/data/project``**. It would be also necessary when the Merlin export server (**``merlin-archive.psi.ch``**)
|
||||
is down for any reason.
|
||||
* Archive the dataset:
|
||||
* Visit [https://discovery.psi.ch](https://discovery.psi.ch)
|
||||
* Click **``Archive``** for the dataset
|
||||
* The system will now copy the data to the PetaByte Archive at CSCS
|
||||
* Retrieve data from the catalog:
|
||||
* Find the dataset on [https://discovery.psi.ch](https://discovery.psi.ch) and click **``Retrieve``**
|
||||
* Wait for the data to be copied to the PSI retrieval system
|
||||
* Run **``datasetRetriever``** script
|
||||
|
||||
Since large data sets may take a lot of time to transfer, some steps are designed to happen in the
|
||||
background. The discovery website can be used to track the progress of each step.
|
||||
|
||||
### Account Registration
|
||||
|
||||
Two types of account permit access to the Data Catalog. If your data was collected at a ***beamline***, you may
|
||||
have been assigned a **``p-group``** (e.g. ``p12345``) for the experiment. Other users are assigned **``a-group``**
|
||||
(e.g. ``a-12345``).
|
||||
|
||||
Groups are usually assigned to a PI, and then individual user accounts are added to the group. This must be done
|
||||
under user request through PSI Service Now. For existing **a-groups** and **p-groups**, you can follow the standard
|
||||
central procedures. Alternatively, if you do not know how to do that, follow the Merlin7
|
||||
**[Requesting extra Unix groups](/merlin7/request-account.html#requesting-extra-unix-groups)** procedure, or open
|
||||
a **[PSI Service Now](https://psi.service-now.com/psisp)** ticket.
|
||||
|
||||
### Documentation
|
||||
|
||||
Accessing the Data Catalog is done through the [SciCat software](https://melanie.gitpages.psi.ch/SciCatPages/).
|
||||
Documentation is here: [ingestManual](https://scicatproject.github.io/documentation/Ingestor/ingestManual.html).
|
||||
|
||||
#### Loading datacatalog tools
|
||||
|
||||
The latest datacatalog software is maintained in the PSI module system. To access it from the Merlin systems, run the following command:
|
||||
|
||||
```bash
|
||||
module load datacatalog
|
||||
```
|
||||
|
||||
It can be done from any host in the Merlin cluster accessible by users. Usually, login nodes will be the nodes used for archiving.
|
||||
|
||||
### Finding your token
|
||||
|
||||
As of 2022-04-14 a secure token is required to interact with the data catalog. This is a long random string that replaces the previous user/password authentication (allowing access for non-PSI use cases). **This string should be treated like a password and not shared.**
|
||||
|
||||
1. Go to discovery.psi.ch
|
||||
1. Click 'Sign in' in the top right corner. Click the 'Login with PSI account' and log in on the PSI login1. page.
|
||||
1. You should be redirected to your user settings and see a 'User Information' section. If not, click on1. your username in the top right and choose 'Settings' from the menu.
|
||||
1. Look for the field 'Catamel Token'. This should be a 64-character string. Click the icon to copy the1. token.
|
||||
|
||||

|
||||
|
||||
You will need to save this token for later steps. To avoid including it in all the commands, I suggest saving it to an environmental variable (Linux):
|
||||
|
||||
```
|
||||
$ SCICAT_TOKEN=RqYMZcqpqMJqluplbNYXLeSyJISLXfnkwlfBKuvTSdnlpKkU
|
||||
```
|
||||
|
||||
(Hint: prefix this line with a space to avoid saving the token to your bash history.)
|
||||
|
||||
Tokens expire after 2 weeks and will need to be fetched from the website again.
|
||||
|
||||
### Ingestion
|
||||
|
||||
The first step to ingesting your data into the catalog is to prepare a file describing what data you have. This is called
|
||||
**``metadata.json``**, and can be created with a text editor (e.g. *``vim``*). It can in principle be saved anywhere,
|
||||
but keeping it with your archived data is recommended. For more information about the format, see the 'Bio metadata'
|
||||
section below. An example follows:
|
||||
|
||||
```yaml
|
||||
{
|
||||
"principalInvestigator": "albrecht.gessler@psi.ch",
|
||||
"creationLocation": "/PSI/EMF/JEOL2200FS",
|
||||
"dataFormat": "TIFF+LZW Image Stack",
|
||||
"sourceFolder": "/gpfs/group/LBR/pXXX/myimages",
|
||||
"owner": "Wilhelm Tell",
|
||||
"ownerEmail": "wilhelm.tell@psi.ch",
|
||||
"type": "raw",
|
||||
"description": "EM micrographs of amygdalin",
|
||||
"ownerGroup": "a-12345",
|
||||
"scientificMetadata": {
|
||||
"description": "EM micrographs of amygdalin",
|
||||
"sample": {
|
||||
"name": "Amygdalin beta-glucosidase 1",
|
||||
"uniprot": "P29259",
|
||||
"species": "Apple"
|
||||
},
|
||||
"dataCollection": {
|
||||
"date": "2018-08-01"
|
||||
},
|
||||
"microscopeParameters": {
|
||||
"pixel size": {
|
||||
"v": 0.885,
|
||||
"u": "A"
|
||||
},
|
||||
"voltage": {
|
||||
"v": 200,
|
||||
"u": "kV"
|
||||
},
|
||||
"dosePerFrame": {
|
||||
"v": 1.277,
|
||||
"u": "e/A2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
It is recommended to use the [ScicatEditor](https://bliven_s.gitpages.psi.ch/SciCatEditor/) for creating metadata files. This is a browser-based tool specifically for ingesting PSI data. Using the tool avoids syntax errors and provides templates for common data sets and options. The finished JSON file can then be downloaded to merlin or copied into a text editor.
|
||||
|
||||
Another option is to use the SciCat graphical interface from NoMachine. This provides a graphical interface for selecting data to archive. This is particularly useful for data associated with a DUO experiment and p-group. Type `SciCat`` to get started after loading the `datacatalog`` module. The GUI also replaces the the command-line ingestion described below.
|
||||
|
||||
The following steps can be run from wherever you saved your ``metadata.json``. First, perform a "dry-run" which will check the metadata for errors:
|
||||
|
||||
```bash
|
||||
datasetIngestor --token $SCICAT_TOKEN metadata.json
|
||||
```
|
||||
|
||||
It will ask for your PSI credentials and then print some info about the data to be ingested. If there are no errors, proceed to the real ingestion:
|
||||
|
||||
```bash
|
||||
datasetIngestor --token $SCICAT_TOKEN --ingest --autoarchive metadata.json
|
||||
```
|
||||
|
||||
You will be asked whether you want to copy the data to the central system:
|
||||
|
||||
* If you are on the Merlin cluster and you are archiving data from ``/data/user`` or ``/data/project``, answer 'no' since the data catalog can
|
||||
directly read the data.
|
||||
* If you are on a directory other than ``/data/user`` and ``/data/project, or you are on a desktop computer, answer 'yes'. Copying large datasets
|
||||
to the PSI archive system may take quite a while (minutes to hours).
|
||||
|
||||
If there are no errors, your data has been accepted into the data catalog! From now on, no changes should be made to the ingested data.
|
||||
This is important, since the next step is for the system to copy all the data to the CSCS Petabyte archive. Writing to tape is slow, so
|
||||
this process may take several days, and it will fail if any modifications are detected.
|
||||
|
||||
If using the ``--autoarchive`` option as suggested above, your dataset should now be in the queue. Check the data catalog:
|
||||
[https://discovery.psi.ch](https://discovery.psi.ch). Your job should have status 'WorkInProgress'. You will receive an email when the ingestion
|
||||
is complete.
|
||||
|
||||
If you didn't use ``--autoarchive``, you need to manually move the dataset into the archive queue. From **discovery.psi.ch**, navigate to the 'Archive'
|
||||
tab. You should see the newly ingested dataset. Check the dataset and click **``Archive``**. You should see the status change from **``datasetCreated``** to
|
||||
**``scheduleArchiveJob``**. This indicates that the data is in the process of being transferred to CSCS.
|
||||
|
||||
After a few days the dataset's status will change to **``datasetOnAchive``** indicating the data is stored. At this point it is safe to delete the data.
|
||||
|
||||
#### Useful commands
|
||||
|
||||
Running the datasetIngestor in dry mode (**without** ``--ingest``) finds most errors. However, it is sometimes convenient to find potential errors
|
||||
yourself with simple unix commands.
|
||||
|
||||
Find problematic filenames
|
||||
|
||||
```bash
|
||||
find . -iregex '.*/[^/]*[^a-zA-Z0-9_ ./-][^/]*'=
|
||||
```
|
||||
|
||||
Find broken links
|
||||
|
||||
```bash
|
||||
find -L . -type l
|
||||
```
|
||||
|
||||
Find outside links
|
||||
|
||||
```bash
|
||||
find . -type l -exec bash -c 'realpath --relative-base "`pwd`" "$0" 2>/dev/null |egrep "^[./]" |sed "s|^|$0 ->|" ' '{}' ';'
|
||||
```
|
||||
|
||||
Delete certain files (use with caution)
|
||||
|
||||
```bash
|
||||
# Empty directories
|
||||
find . -type d -empty -delete
|
||||
# Backup files
|
||||
find . -name '*~' -delete
|
||||
find . -name '*#autosave#' -delete
|
||||
```
|
||||
|
||||
#### Troubleshooting & Known Bugs
|
||||
|
||||
* The following message can be safely ignored:
|
||||
|
||||
```bash
|
||||
key_cert_check_authority: invalid certificate
|
||||
Certificate invalid: name is not a listed principal
|
||||
```
|
||||
It indicates that no kerberos token was provided for authentication. You can avoid the warning by first running kinit (PSI linux systems).
|
||||
|
||||
* For decentral ingestion cases, the copy step is indicated by a message ``Running [/usr/bin/rsync -e ssh -avxz ...``. It is expected that this
|
||||
step will take a long time and may appear to have hung. You can check what files have been successfully transfered using rsync:
|
||||
|
||||
```bash
|
||||
rsync --list-only user_n@pb-archive.psi.ch:archive/UID/PATH/
|
||||
```
|
||||
|
||||
where UID is the dataset ID (12345678-1234-1234-1234-123456789012) and PATH is the absolute path to your data. Note that rsync creates directories first and that the transfer order is not alphabetical in some cases, but it should be possible to see whether any data has transferred.
|
||||
|
||||
* There is currently a limit on the number of files per dataset (technically, the limit is from the total length of all file paths). It is recommended to break up datasets into 300'000 files or less.
|
||||
* If it is not possible or desirable to split data between multiple datasets, an alternate work-around is to package files into a tarball. For datasets which are already compressed, omit the -z option for a considerable speedup:
|
||||
|
||||
```
|
||||
tar -f [output].tar [srcdir]
|
||||
```
|
||||
|
||||
Uncompressed data can be compressed on the cluster using the following command:
|
||||
|
||||
```
|
||||
sbatch /data/software/Slurm/Utilities/Parallel_TarGz.batch -s [srcdir] -t [output].tar -n
|
||||
```
|
||||
|
||||
Run /data/software/Slurm/Utilities/Parallel_TarGz.batch -h for more details and options.
|
||||
|
||||
#### Sample ingestion output (datasetIngestor 1.1.11)
|
||||
<details>
|
||||
<summary>[Show Example]: Sample ingestion output (datasetIngestor 1.1.11)</summary>
|
||||
<pre class="terminal code highlight js-syntax-highlight plaintext" lang="plaintext" markdown="false">
|
||||
/data/project/bio/myproject/archive $ datasetIngestor -copy -autoarchive -allowexistingsource -ingest metadata.json
|
||||
2019/11/06 11:04:43 Latest version: 1.1.11
|
||||
|
||||
|
||||
2019/11/06 11:04:43 Your version of this program is up-to-date
|
||||
2019/11/06 11:04:43 You are about to add a dataset to the === production === data catalog environment...
|
||||
2019/11/06 11:04:43 Your username:
|
||||
user_n
|
||||
2019/11/06 11:04:48 Your password:
|
||||
2019/11/06 11:04:52 User authenticated: XXX
|
||||
2019/11/06 11:04:52 User is member in following a or p groups: XXX
|
||||
2019/11/06 11:04:52 OwnerGroup information a-XXX verified successfully.
|
||||
2019/11/06 11:04:52 contactEmail field added: XXX
|
||||
2019/11/06 11:04:52 Scanning files in dataset /data/project/bio/myproject/archive
|
||||
2019/11/06 11:04:52 No explicit filelistingPath defined - full folder /data/project/bio/myproject/archive is used.
|
||||
2019/11/06 11:04:52 Source Folder: /data/project/bio/myproject/archive at /data/project/bio/myproject/archive
|
||||
2019/11/06 11:04:57 The dataset contains 100000 files with a total size of 50000000000 bytes.
|
||||
2019/11/06 11:04:57 creationTime field added: 2019-07-29 18:47:08 +0200 CEST
|
||||
2019/11/06 11:04:57 endTime field added: 2019-11-06 10:52:17.256033 +0100 CET
|
||||
2019/11/06 11:04:57 license field added: CC BY-SA 4.0
|
||||
2019/11/06 11:04:57 isPublished field added: false
|
||||
2019/11/06 11:04:57 classification field added: IN=medium,AV=low,CO=low
|
||||
2019/11/06 11:04:57 Updated metadata object:
|
||||
{
|
||||
"accessGroups": [
|
||||
"XXX"
|
||||
],
|
||||
"classification": "IN=medium,AV=low,CO=low",
|
||||
"contactEmail": "XXX",
|
||||
"creationLocation": "XXX",
|
||||
"creationTime": "2019-07-29T18:47:08+02:00",
|
||||
"dataFormat": "XXX",
|
||||
"description": "XXX",
|
||||
"endTime": "2019-11-06T10:52:17.256033+01:00",
|
||||
"isPublished": false,
|
||||
"license": "CC BY-SA 4.0",
|
||||
"owner": "XXX",
|
||||
"ownerEmail": "XXX",
|
||||
"ownerGroup": "a-XXX",
|
||||
"principalInvestigator": "XXX",
|
||||
"scientificMetadata": {
|
||||
...
|
||||
},
|
||||
"sourceFolder": "/data/project/bio/myproject/archive",
|
||||
"type": "raw"
|
||||
}
|
||||
2019/11/06 11:04:57 Running [/usr/bin/ssh -l user_n pb-archive.psi.ch test -d /data/project/bio/myproject/archive].
|
||||
key_cert_check_authority: invalid certificate
|
||||
Certificate invalid: name is not a listed principal
|
||||
user_n@pb-archive.psi.ch's password:
|
||||
2019/11/06 11:05:04 The source folder /data/project/bio/myproject/archive is not centrally available (decentral use case).
|
||||
The data must first be copied to a rsync cache server.
|
||||
|
||||
|
||||
2019/11/06 11:05:04 Do you want to continue (Y/n)?
|
||||
Y
|
||||
2019/11/06 11:05:09 Created dataset with id 12.345.67890/12345678-1234-1234-1234-123456789012
|
||||
2019/11/06 11:05:09 The dataset contains 108057 files.
|
||||
2019/11/06 11:05:10 Created file block 0 from file 0 to 1000 with total size of 413229990 bytes
|
||||
2019/11/06 11:05:10 Created file block 1 from file 1000 to 2000 with total size of 416024000 bytes
|
||||
2019/11/06 11:05:10 Created file block 2 from file 2000 to 3000 with total size of 416024000 bytes
|
||||
2019/11/06 11:05:10 Created file block 3 from file 3000 to 4000 with total size of 416024000 bytes
|
||||
...
|
||||
2019/11/06 11:05:26 Created file block 105 from file 105000 to 106000 with total size of 416024000 bytes
|
||||
2019/11/06 11:05:27 Created file block 106 from file 106000 to 107000 with total size of 416024000 bytes
|
||||
2019/11/06 11:05:27 Created file block 107 from file 107000 to 108000 with total size of 850195143 bytes
|
||||
2019/11/06 11:05:27 Created file block 108 from file 108000 to 108057 with total size of 151904903 bytes
|
||||
2019/11/06 11:05:27 short dataset id: 0a9fe316-c9e7-4cc5-8856-e1346dd31e31
|
||||
2019/11/06 11:05:27 Running [/usr/bin/rsync -e ssh -avxz /data/project/bio/myproject/archive/ user_n@pb-archive.psi.ch:archive
|
||||
/0a9fe316-c9e7-4cc5-8856-e1346dd31e31/data/project/bio/myproject/archive].
|
||||
key_cert_check_authority: invalid certificate
|
||||
Certificate invalid: name is not a listed principal
|
||||
user_n@pb-archive.psi.ch's password:
|
||||
Permission denied, please try again.
|
||||
user_n@pb-archive.psi.ch's password:
|
||||
/usr/libexec/test_acl.sh: line 30: /tmp/tmpacl.txt: Permission denied
|
||||
/usr/libexec/test_acl.sh: line 30: /tmp/tmpacl.txt: Permission denied
|
||||
/usr/libexec/test_acl.sh: line 30: /tmp/tmpacl.txt: Permission denied
|
||||
/usr/libexec/test_acl.sh: line 30: /tmp/tmpacl.txt: Permission denied
|
||||
/usr/libexec/test_acl.sh: line 30: /tmp/tmpacl.txt: Permission denied
|
||||
...
|
||||
2019/11/06 12:05:08 Successfully updated {"pid":"12.345.67890/12345678-1234-1234-1234-123456789012",...}
|
||||
2019/11/06 12:05:08 Submitting Archive Job for the ingested datasets.
|
||||
2019/11/06 12:05:08 Job response Status: okay
|
||||
2019/11/06 12:05:08 A confirmation email will be sent to XXX
|
||||
12.345.67890/12345678-1234-1234-1234-123456789012
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
### Publishing
|
||||
|
||||
After datasets are are ingested they can be assigned a public DOI. This can be included in publications and will make the datasets on http://doi.psi.ch.
|
||||
|
||||
For instructions on this, please read the ['Publish' section in the ingest manual](https://scicatproject.github.io/documentation/Ingestor/ingestManual.html#sec-8).
|
||||
|
||||
### Retrieving data
|
||||
|
||||
Retrieving data from the archive is also initiated through the Data Catalog. Please read the ['Retrieve' section in the ingest manual](https://scicatproject.github.io/documentation/Ingestor/ingestManual.html#sec-6).
|
||||
|
||||
## Further Information
|
||||
|
||||
* [PSI Data Catalog](https://discovery.psi.ch)
|
||||
* [Full Documentation](https://scicatproject.github.io/documentation/Ingestor/ingestManual.html)
|
||||
* [Published Datasets (doi.psi.ch)](https://doi.psi.ch)
|
||||
* Data Catalog [PSI page](https://www.psi.ch/photon-science-data-services/data-catalog-and-archive)
|
||||
* Data catalog [SciCat Software](https://scicatproject.github.io/)
|
||||
* [FAIR](https://www.nature.com/articles/sdata201618) definition and [SNF Research Policy](http://www.snf.ch/en/theSNSF/research-policies/open_research_data/Pages/default.aspx#FAIR%20Data%20Principles%20for%20Research%20Data%20Management)
|
||||
* [Petabyte Archive at CSCS](https://www.cscs.ch/fileadmin/user_upload/contents_publications/annual_reports/AR2017_Online.pdf)
|
48
pages/merlin7/02-How-To-Use-Merlin/connect-from-linux.md
Normal file
@ -0,0 +1,48 @@
|
||||
---
|
||||
title: Connecting from a Linux Client
|
||||
#tags:
|
||||
keywords: linux, connecting, client, configuration, SSH, X11
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes a recommended setup for a Linux client."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/connect-from-linux.html
|
||||
---
|
||||
|
||||
## SSH without X11 Forwarding
|
||||
|
||||
This is the standard method. Official X11 support is provided through [NoMachine](/merlin7/nomachine.html).
|
||||
For normal SSH sessions, use your SSH client as follows:
|
||||
|
||||
```bash
|
||||
ssh $username@login001.merlin7.psi.ch
|
||||
ssh $username@login002.merlin7.psi.ch
|
||||
```
|
||||
|
||||
## SSH with X11 Forwarding
|
||||
|
||||
Official X11 Forwarding support is through NoMachine. Please follow the document
|
||||
[{Job Submission -> Interactive Jobs}](/merlin7/interactive-jobs.html#Requirements) and
|
||||
[{Accessing Merlin -> NoMachine}](/merlin7/nomachine.html) for more details. However,
|
||||
we provide a small recipe for enabling X11 Forwarding in Linux.
|
||||
|
||||
* For enabling client X11 forwarding, add the following to the start of ``~/.ssh/config``
|
||||
to implicitly add ``-X`` to all ssh connections:
|
||||
|
||||
```bash
|
||||
ForwardAgent yes
|
||||
ForwardX11Trusted yes
|
||||
```
|
||||
|
||||
* Alternatively, you can add the option ``-Y`` to the ``ssh`` command. In example:
|
||||
|
||||
```bash
|
||||
ssh -X $username@login001.merlin7.psi.ch
|
||||
ssh -X $username@login002.merlin7.psi.ch
|
||||
```
|
||||
|
||||
* For testing that X11 forwarding works, just run ``sview``. A X11 based slurm view of the cluster should
|
||||
popup in your client session:
|
||||
|
||||
```bash
|
||||
sview
|
||||
```
|
58
pages/merlin7/02-How-To-Use-Merlin/connect-from-macos.md
Normal file
@ -0,0 +1,58 @@
|
||||
---
|
||||
title: Connecting from a MacOS Client
|
||||
#tags:
|
||||
keywords: MacOS, mac os, mac, connecting, client, configuration, SSH, X11
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes a recommended setup for a MacOS client."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/connect-from-macos.html
|
||||
---
|
||||
|
||||
## SSH without X11 Forwarding
|
||||
|
||||
This is the standard method. Official X11 support is provided through [NoMachine](/merlin7/nomachine.html).
|
||||
For normal SSH sessions, use your SSH client as follows:
|
||||
|
||||
```bash
|
||||
ssh $username@login001.merlin7.psi.ch
|
||||
ssh $username@login002.merlin7.psi.ch
|
||||
```
|
||||
|
||||
## SSH with X11 Forwarding
|
||||
|
||||
### Requirements
|
||||
|
||||
For running SSH with X11 Forwarding in MacOS, one needs to have a X server running in MacOS.
|
||||
The official X Server for MacOS is **[XQuartz](https://www.xquartz.org/)**. Please ensure
|
||||
you have it running before starting a SSH connection with X11 forwarding.
|
||||
|
||||
### SSH with X11 Forwarding in MacOS
|
||||
|
||||
Official X11 support is through NoMachine. Please follow the document
|
||||
[{Job Submission -> Interactive Jobs}](/merlin7/interactive-jobs.html#Requirements) and
|
||||
[{Accessing Merlin -> NoMachine}](/merlin7/nomachine.html) for more details. However,
|
||||
we provide a small recipe for enabling X11 Forwarding in MacOS.
|
||||
|
||||
* Ensure that **[XQuartz](https://www.xquartz.org/)** is installed and running in your MacOS.
|
||||
|
||||
* For enabling client X11 forwarding, add the following to the start of ``~/.ssh/config``
|
||||
to implicitly add ``-X`` to all ssh connections:
|
||||
|
||||
```bash
|
||||
ForwardAgent yes
|
||||
ForwardX11Trusted yes
|
||||
```
|
||||
|
||||
* Alternatively, you can add the option ``-Y`` to the ``ssh`` command. In example:
|
||||
|
||||
```bash
|
||||
ssh -X $username@login001.merlin7.psi.ch
|
||||
ssh -X $username@login002.merlin7.psi.ch
|
||||
```
|
||||
|
||||
* For testing that X11 forwarding works, just run ``sview``. A X11 based slurm view of the cluster should
|
||||
popup in your client session.
|
||||
|
||||
```bash
|
||||
sview
|
||||
```
|
47
pages/merlin7/02-How-To-Use-Merlin/connect-from-windows.md
Normal file
@ -0,0 +1,47 @@
|
||||
---
|
||||
title: Connecting from a Windows Client
|
||||
keywords: microsoft, mocosoft, windows, putty, xming, connecting, client, configuration, SSH, X11
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes a recommended setup for a Windows client."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/connect-from-windows.html
|
||||
---
|
||||
|
||||
## SSH with PuTTY without X11 Forwarding
|
||||
|
||||
PuTTY is one of the most common tools for SSH.
|
||||
|
||||
Check, if the following software packages are installed on the Windows workstation by
|
||||
inspecting the *Start* menu (hint: use the *Search* box to save time):
|
||||
* PuTTY (should be already installed)
|
||||
* *[Optional]* Xming (needed for [SSH with X11 Forwarding](/merlin7/connect-from-windows.html#ssh-with-x11-forwarding))
|
||||
|
||||
If they are missing, you can install them using the Software Kiosk icon on the Desktop.
|
||||
|
||||
1. Start PuTTY
|
||||
|
||||
2. *[Optional]* Enable ``xterm`` to have similar mouse behavour as in Linux:
|
||||
|
||||

|
||||
|
||||
3. Create session to a Merlin login node and *Open*:
|
||||
|
||||

|
||||
|
||||
|
||||
## SSH with PuTTY with X11 Forwarding
|
||||
|
||||
Official X11 Forwarding support is through NoMachine. Please follow the document
|
||||
[{Job Submission -> Interactive Jobs}](/merlin7/interactive-jobs.html#Requirements) and
|
||||
[{Accessing Merlin -> NoMachine}](/merlin7/nomachine.html) for more details. However,
|
||||
we provide a small recipe for enabling X11 Forwarding in Windows.
|
||||
|
||||
Check, if the **Xming** is installed on the Windows workstation by inspecting the
|
||||
*Start* menu (hint: use the *Search* box to save time). If missing, you can install it by
|
||||
using the Software Kiosk icon (should be located on the Desktop).
|
||||
|
||||
1. Ensure that a X server (**Xming**) is running. Otherwise, start it.
|
||||
|
||||
2. Enable X11 Forwarding in your SSH client. In example, for Putty:
|
||||
|
||||

|
@ -1,90 +0,0 @@
|
||||
---
|
||||
title: Cray Module Environment
|
||||
#tags:
|
||||
keywords: cray, module
|
||||
last_updated: 24 Mai 2023
|
||||
summary: "This document describes how to use the cray module environment on Merlin7."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/cray-module-env.html
|
||||
---
|
||||
|
||||
To switch from the PSI Module Environment to the provided Cray Programming Environment (CPE), please do the following
|
||||
Switch to Cray PrgEnv
|
||||
|
||||
$ source /etc/bash.bashrc.local.2023-04-26T164631
|
||||
|
||||
Cray Programming Environment, with Cray's compilers and MPI are loaded by default. You can check with module list command or use the short form as shown below.
|
||||
ml
|
||||
|
||||
$ ml #means: module list
|
||||
Currently Loaded Modules:
|
||||
1) craype-x86-rome 4) perftools-base/21.12.0 7) craype/2.7.13 10) cray-libsci/21.08.1.2
|
||||
2) libfabric/1.15.2.0 5) xpmem/2.4.4-2.3_13.8__gff0e1d9.shasta 8) cray-dsmml/0.2.2 11) PrgEnv-cray/8.3.0
|
||||
3) craype-network-ofi 6) cce/13.0.0 9) cray-mpich/8.1.12
|
||||
|
||||
You will notice an unfamiliar PrgEnv-cray/8.3.0 that was loaded. This is meta-module that Cray provides to simplify the switch of compilers and their associated dependencies and libraries, as a whole called Programming Environment. In the Cray Programming Environment, there are 4 key modules.
|
||||
|
||||
cray-libsci is a collection of numerical routines tuned for performance on Cray systems.
|
||||
libfabric is an important low-level library that allows you to take advantage of the high performance Slingshot11 network.
|
||||
cray-mpich is a CUDA-aware MPI implementation.
|
||||
cce is the compiler from Cray. C/C++ compilers are based on Clang/LLVM while Fortran supports Fortran 2018 standard. More info: https://user.cscs.ch/computing/compilation/cray/
|
||||
|
||||
You can switch between different programming environments. You can check the available module with module avail command or the short form (ml av)as shown below.
|
||||
ml av
|
||||
$ ml av PrgEnv
|
||||
|
||||
PrgEnv-aocc/8.3.0 (D) PrgEnv-cray/8.3.3 PrgEnv-intel/8.3.0 (D) PrgEnv-nvidia/8.3.0 (D)
|
||||
PrgEnv-aocc/8.3.3 PrgEnv-gnu/8.3.0 (D) PrgEnv-intel/8.3.3 PrgEnv-nvidia/8.3.3
|
||||
PrgEnv-cray/8.3.0 (L,D) PrgEnv-gnu/8.3.3 PrgEnv-nvhpc/8.3.3
|
||||
|
||||
If you want an in-depth information on the different programming environments, you can use the module spider command. This command allows you to explore the hierarchical structure of Lmod module environment that is in use here.
|
||||
module spider
|
||||
|
||||
$ module spider PrgEnv-cray
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
PrgEnv-cray:
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Versions:
|
||||
PrgEnv-cray/8.3.0
|
||||
PrgEnv-cray/8.3.3
|
||||
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
For detailed information about a specific "PrgEnv-cray" package (including how to load the modules) use the module's full name.
|
||||
Note that names that have a trailing (E) are extensions provided by other modules.
|
||||
For example:
|
||||
|
||||
$ module spider PrgEnv-cray/8.3.3
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
## Switching compiler suites
|
||||
|
||||
Compiler suites can be exchanged with PrgEnv (Programming Environments) provided by HPE-Cray. The wrappers call the correct compiler with appropriate options to build and link applications with relevant libraries, as required by the loaded modules (only dynamic linking is supported) and therefore should replace direct calls to compiler drivers in Makefiles and build scripts.
|
||||
|
||||
To swap the the compiler suite from the default Cray to Intel, you can do the following. If there are no PrgEnv loaded, then you can load the one you prefer with ml command.
|
||||
Swapping PrgEnv
|
||||
|
||||
$ module swap PrgEnv-cray PrgEnv-intel
|
||||
|
||||
Please note that in a Cray provided PrgEnv, cray-mpich will always be used by default. This is because this MPI library has been strongly optimised for a Cray system. In the case of Intel, cray-mpich has been compiled with Intel compiler to improve interoperability. Please note that the same condition applies when you use the GNU and AMD (AOCC) programming environments.
|
||||
|
||||
If you would like to use a pure Intel-MPI, please refer to the the advanced guide on how to install and set up Intel such that the optimised Slingshot11 network is used.
|
||||
|
||||
You can switch versions of intel compilers by using the module swap command. If you want to know the available versions of intel compilers, you can use the module avail or module spider commands.
|
||||
|
||||
An example to switch the version of Cray compiler while you are in the PrgEnv-cray environment
|
||||
module swap
|
||||
|
||||
$ module swap cce/13.0.0 cce/14.0.0
|
||||
|
||||
Due to the use of wrapper, you can use the same commands when compiling with say cce or gnu compilers. In the case of intel compilers, you have to use the original commands, e.g. icc.
|
||||
|
||||
C compiler : cc
|
||||
C++ compiler : CC
|
||||
Fortran compiler : ftn
|
||||
MPI C compiler: mpicc
|
||||
MPI C++ compiler: mpic++
|
||||
MPI Fortran compiler: mpif90
|
||||
|
||||
When using gnu compiler, you will need to specify the architecture (-march or -mtune or --offload-arch) you would like to optimise your code for, in this case Milan, you need to use craype-x86-milan.
|
||||
|
@ -1,13 +0,0 @@
|
||||
---
|
||||
title: Transferring files between systems
|
||||
#tags:
|
||||
keywords: files, transfer, scp
|
||||
last_updated: 24 Mai 2023
|
||||
summary: "This document describes some possibilities to transfer files from Merlin6 to Merlin7."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/file-transfers.html
|
||||
---
|
||||
|
||||
From Merlin6 to Merlin7 you can use (ideally SSH keys should be set beforehand):
|
||||
|
||||
$ rsync -avAHXS ~/merlin6_localdata $USER@psi-dev.cscs.ch:/scratch/home/$USER/
|
216
pages/merlin7/02-How-To-Use-Merlin/kerberos.md
Normal file
@ -0,0 +1,216 @@
|
||||
---
|
||||
title: Kerberos and AFS authentication
|
||||
#tags:
|
||||
keywords: kerberos, AFS, kinit, klist, keytab, tickets, connecting, client, configuration, slurm
|
||||
last_updated: 07 September 2022
|
||||
summary: "This document describes how to use Kerberos."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/kerberos.html
|
||||
---
|
||||
|
||||
Projects and users have their own areas in the central PSI AFS service. In order
|
||||
to access to these areas, valid Kerberos and AFS tickets must be granted.
|
||||
|
||||
These tickets are automatically granted when accessing through SSH with
|
||||
username and password. Alternatively, one can get a granting ticket with the `kinit` (Kerberos)
|
||||
and `aklog` (AFS ticket, which needs to be run after `kinit`) commands.
|
||||
|
||||
Due to PSI security policies, the maximum lifetime of the ticket is 7 days, and the default
|
||||
time is 10 hours. It means than one needs to constantly renew (`krenew` command) the existing
|
||||
granting tickets, and their validity can not be extended longer than 7 days. At this point,
|
||||
one needs to obtain new granting tickets.
|
||||
|
||||
## Obtaining granting tickets with username and password
|
||||
|
||||
As already described above, the most common use case is to obtain Kerberos and AFS granting tickets
|
||||
by introducing username and password:
|
||||
|
||||
* When login to Merlin through SSH protocol, if this is done with username + password authentication,
|
||||
tickets for Kerberos and AFS will be automatically obtained.
|
||||
* When login to Merlin through NoMachine, no Kerberos and AFS are granted. Therefore, users need to
|
||||
run `kinit` (to obtain a granting Kerberos ticket) followed by `aklog` (to obtain a granting AFS ticket).
|
||||
See further details below.
|
||||
|
||||
To manually obtain granting tickets, one has to:
|
||||
|
||||
1. To obtain a granting Kerberos ticket, one needs to run `kinit $USER` and enter the PSI password.
|
||||
|
||||
```bash
|
||||
kinit $USER@D.PSI.CH
|
||||
```
|
||||
|
||||
2. To obtain a granting ticket for AFS, one needs to run `aklog`. No password is necessary, but a valid
|
||||
Kerberos ticket is mandatory.
|
||||
|
||||
```bash
|
||||
aklog
|
||||
```
|
||||
|
||||
3. To list the status of your granted tickets, users can use the `klist` command.
|
||||
|
||||
```bash
|
||||
klist
|
||||
```
|
||||
|
||||
4. To extend the validity of existing granting tickets, users can use the `krenew` command.
|
||||
|
||||
```bash
|
||||
krenew
|
||||
```
|
||||
|
||||
* Keep in mind that the maximum lifetime for granting tickets is 7 days, therefore `krenew` can not be used beyond that limit,
|
||||
and then `kinit` should be used instead.
|
||||
|
||||
## Obtanining granting tickets with keytab
|
||||
|
||||
Sometimes, obtaining granting tickets by using password authentication is not possible. An example are user Slurm jobs
|
||||
requiring access to private areas in AFS. For that, there's the possibility to generate a **keytab** file.
|
||||
|
||||
Be aware that the **keytab** file must be **private**, **fully protected** by correct permissions and not shared with any
|
||||
other users.
|
||||
|
||||
### Creating a keytab file
|
||||
|
||||
For generating a **keytab**, one has to:
|
||||
|
||||
1. Load a newer Kerberos ( `krb5/1.20` or higher) from Pmodules:
|
||||
|
||||
```bash
|
||||
module load krb5/1.20
|
||||
```
|
||||
|
||||
2. Create a private directory for storing the Kerberos **keytab** file
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.k5
|
||||
```
|
||||
|
||||
3. Run the `ktutil` utility which comes with the loaded `krb5` Pmodule:
|
||||
|
||||
```bash
|
||||
ktutil
|
||||
```
|
||||
|
||||
4. In the `ktutil` console, one has to generate a **keytab** file as follows:
|
||||
|
||||
```bash
|
||||
# Replace $USER by your username
|
||||
add_entry -password -k 0 -f -p $USER
|
||||
wkt /data/user/$USER/.k5/krb5.keytab
|
||||
exit
|
||||
```
|
||||
|
||||
Notice that you will need to add your password once. This step is required for generating the **keytab** file.
|
||||
|
||||
5. Once back to the main shell, one has to ensure that the file contains the proper permissions:
|
||||
|
||||
```bash
|
||||
chmod 0600 ~/.k5/krb5.keytab
|
||||
```
|
||||
|
||||
### Obtaining tickets by using keytab files
|
||||
|
||||
Once the keytab is created, one can obtain kerberos tickets without being prompted for a password as follows:
|
||||
|
||||
```bash
|
||||
kinit -kt ~/.k5/krb5.keytab $USER
|
||||
aklog
|
||||
```
|
||||
|
||||
## Slurm jobs accessing AFS
|
||||
|
||||
Some jobs may require to access private areas in AFS. For that, having a valid [**keytab**](/merlin7/kerberos.html#generating-granting-tickets-with-keytab) file is required.
|
||||
Then, from inside the batch script one can obtain granting tickets for Kerberos and AFS, which can be used for accessing AFS private areas.
|
||||
|
||||
The steps should be the following:
|
||||
|
||||
* Setup `KRB5CCNAME`, which can be used to specify the location of the Kerberos5 credentials (ticket) cache. In general it should point to a shared area
|
||||
(`$HOME/.k5` is a good location), and is strongly recommended to generate an independent Kerberos5 credential cache (it is, creating a new credential cache per Slurm job):
|
||||
|
||||
```bash
|
||||
export KRB5CCNAME="$(mktemp "$HOME/.k5/krb5cc_XXXXXX")"
|
||||
```
|
||||
|
||||
* To obtain a Kerberos5 granting ticket, run `kinit` by using your keytab:
|
||||
|
||||
```bash
|
||||
kinit -kt "$HOME/.k5/krb5.keytab" $USER@D.PSI.CH
|
||||
```
|
||||
|
||||
* To obtain a granting AFS ticket, run `aklog`:
|
||||
|
||||
```bash
|
||||
aklog
|
||||
```
|
||||
|
||||
* At the end of the job, you can remove destroy existing Kerberos tickets.
|
||||
|
||||
```bash
|
||||
kdestroy
|
||||
```
|
||||
|
||||
### Slurm batch script example: obtaining KRB+AFS granting tickets
|
||||
|
||||
#### Example 1: Independent crendetial cache per Slurm job
|
||||
|
||||
This is the **recommended** way. At the end of the job, is strongly recommended to remove / destroy the existing kerberos tickets.
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
#SBATCH --partition=hourly # Specify 'general' or 'daily' or 'hourly'
|
||||
#SBATCH --time=01:00:00 # Strictly recommended when using 'general' partition.
|
||||
#SBATCH --output=run.out # Generate custom output file
|
||||
#SBATCH --error=run.err # Generate custom error file
|
||||
#SBATCH --nodes=1 # Uncomment and specify #nodes to use
|
||||
#SBATCH --ntasks=1 # Uncomment and specify #nodes to use
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --constraint=xeon-gold-6152
|
||||
#SBATCH --hint=nomultithread
|
||||
#SBATCH --job-name=krb5
|
||||
|
||||
export KRB5CCNAME="$(mktemp "$HOME/.k5/krb5cc_XXXXXX")"
|
||||
kinit -kt "$HOME/.k5/krb5.keytab" $USER@D.PSI.CH
|
||||
aklog
|
||||
klist
|
||||
|
||||
echo "Here should go my batch script code."
|
||||
|
||||
# Destroy Kerberos tickets created for this job only
|
||||
kdestroy
|
||||
klist
|
||||
```
|
||||
|
||||
#### Example 2: Shared credential cache
|
||||
|
||||
Some users may need/prefer to run with a shared cache file. For doing that, one needs to
|
||||
setup `KRB5CCNAME` from the **login node** session, before submitting the job.
|
||||
|
||||
```bash
|
||||
export KRB5CCNAME="$(mktemp "$HOME/.k5/krb5cc_XXXXXX")"
|
||||
```
|
||||
|
||||
Then, you can run one or multiple jobs scripts (or parallel job with `srun`). `KRB5CCNAME` will be propagated to the
|
||||
job script or to the parallel job, therefore a single credential cache will be shared amongst different Slurm runs.
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
#SBATCH --partition=hourly # Specify 'general' or 'daily' or 'hourly'
|
||||
#SBATCH --time=01:00:00 # Strictly recommended when using 'general' partition.
|
||||
#SBATCH --output=run.out # Generate custom output file
|
||||
#SBATCH --error=run.err # Generate custom error file
|
||||
#SBATCH --nodes=1 # Uncomment and specify #nodes to use
|
||||
#SBATCH --ntasks=1 # Uncomment and specify #nodes to use
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --constraint=xeon-gold-6152
|
||||
#SBATCH --hint=nomultithread
|
||||
#SBATCH --job-name=krb5
|
||||
|
||||
# KRB5CCNAME is inherit from the login node session
|
||||
kinit -kt "$HOME/.k5/krb5.keytab" $USER@D.PSI.CH
|
||||
aklog
|
||||
klist
|
||||
|
||||
echo "Here should go my batch script code."
|
||||
|
||||
echo "No need to run 'kdestroy', as it may have to survive for running other jobs"
|
||||
```
|
109
pages/merlin7/02-How-To-Use-Merlin/merlin-rmount.md
Normal file
@ -0,0 +1,109 @@
|
||||
---
|
||||
title: Using merlin_rmount
|
||||
#tags:
|
||||
keywords: >-
|
||||
transferring data, data transfer, rsync, dav, webdav, sftp, ftp, smb, cifs,
|
||||
copy data, copying, mount, file, folder, sharing
|
||||
last_updated: 24 August 2023
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/merlin-rmount.html
|
||||
---
|
||||
|
||||
## Background
|
||||
|
||||
Merlin provides a command for mounting remote file systems, called `merlin_rmount`. This
|
||||
provides a helpful wrapper over the Gnome storage utilities (GIO and GVFS), and provides support for a wide range of remote file formats, including
|
||||
- SMB/CIFS (Windows shared folders)
|
||||
- WebDav
|
||||
- AFP
|
||||
- FTP, SFTP
|
||||
- [complete list](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/using_the_desktop_environment_in_rhel_8/managing-storage-volumes-in-gnome_using-the-desktop-environment-in-rhel-8#gvfs-back-ends_managing-storage-volumes-in-gnome)
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
### Start a session
|
||||
|
||||
First, start a new session. This will start a new bash shell in the current terminal where you can add further commands.
|
||||
|
||||
```
|
||||
$ merlin_rmount --init
|
||||
[INFO] Starting new D-Bus RMOUNT session
|
||||
|
||||
(RMOUNT STARTED) [bliven_s@login002 ~]$
|
||||
```
|
||||
|
||||
Note that behind the scenes this is creating a new dbus daemon. Running multiple daemons on the same login node leads to unpredictable results, so it is best not to initialize multiple sessions in parallel.
|
||||
|
||||
### Standard Endpoints
|
||||
|
||||
Standard endpoints can be mounted using
|
||||
|
||||
```
|
||||
merlin_rmount --select-mount
|
||||
```
|
||||
|
||||
Select the desired url using the arrow keys.
|
||||
|
||||

|
||||
|
||||
From this list any of the standard supported endpoints can be mounted.
|
||||
|
||||
### Other endpoints
|
||||
|
||||
Other endpoints can be mounted using the `merlin_rmount --mount <endpoint>` command.
|
||||
|
||||

|
||||
|
||||
|
||||
### Accessing Files
|
||||
|
||||
After mounting a volume the script will print the mountpoint. It should be of the form
|
||||
|
||||
```
|
||||
/run/user/$UID/gvfs/<endpoint>
|
||||
```
|
||||
|
||||
where `$UID` gives your unix user id (a 5-digit number, also viewable with `id -u`) and
|
||||
`<endpoint>` is some string generated from the mount options.
|
||||
|
||||
For convenience, it may be useful to add a symbolic link for this gvfs directory. For instance, this would allow all volumes to be accessed in ~/mnt/:
|
||||
|
||||
```
|
||||
ln -s ~/mnt /run/user/$UID/gvfs
|
||||
```
|
||||
|
||||
Files are accessible as long as the `merlin_rmount` shell remains open.
|
||||
|
||||
|
||||
### Disconnecting
|
||||
|
||||
To disconnect, close the session with one of the following:
|
||||
|
||||
- The exit command
|
||||
- CTRL-D
|
||||
- Closing the terminal
|
||||
|
||||
Disconnecting will unmount all volumes.
|
||||
|
||||
|
||||
## Alternatives
|
||||
|
||||
### Thunar
|
||||
|
||||
Users that prefer a GUI file browser may prefer the `thunar` command, which opens the Gnome File Browser. This is also available in NoMachine sessions in the bottom bar (1). Thunar supports the same remote filesystems as `merlin_rmount`; just type the URL in the address bar (2).
|
||||
|
||||

|
||||
|
||||
When using thunar within a NoMachine session, file transfers continue after closing NoMachine (as long as the NoMachine session stays active).
|
||||
|
||||
Files can also be accessed at the command line as needed (see 'Accessing Files' above).
|
||||
|
||||
## Resources
|
||||
|
||||
- [BIO docs](https://intranet.psi.ch/en/bio/webdav-data) on using these tools for
|
||||
transfering EM data
|
||||
- [Redhad docs on GVFS](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/using_the_desktop_environment_in_rhel_8/managing-storage-volumes-in-gnome_using-the-desktop-environment-in-rhel-8)
|
||||
- [gio reference](https://developer-old.gnome.org/gio/stable/gio.html)
|
108
pages/merlin7/02-How-To-Use-Merlin/merlin_tools.md
Normal file
@ -0,0 +1,108 @@
|
||||
---
|
||||
title: Merlin7 Tools
|
||||
#tags:
|
||||
keywords: merlin_quotas
|
||||
#last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/tools.html
|
||||
---
|
||||
|
||||
## About
|
||||
|
||||
We provide tool(s) to help user get the most out of using the cluster. The tools
|
||||
described here are organised by use case and include usage examples.
|
||||
|
||||
## Files and Directories
|
||||
|
||||
### `merlin_quotas`
|
||||
|
||||
This tool is available on all of the login nodes and provides a brief overview of
|
||||
a user's filesystem quotas. These are limits which restrict how much storage (or
|
||||
number of files) a user can create. A generic table of filesystem quotas can be
|
||||
found on the [Storage page](/merlin7/storage.html#dir_classes).
|
||||
|
||||
#### Example #1: Viewing quotas
|
||||
|
||||
Simply calling `merlin_quotas` will show you a table of our quotas:
|
||||
|
||||
```console
|
||||
$ merlin_quotas
|
||||
Path SpaceUsed SpaceQuota Space % FilesUsed FilesQuota Files %
|
||||
-------------- --------- ---------- ------- --------- ---------- -------
|
||||
/data/user 30.26G 1T 03% 367296 2097152 18%
|
||||
└─ <USERNAME>
|
||||
/afs/psi.ch 3.4G 9.5G 36% 0 0 00%
|
||||
└─ user/<USERDIR>
|
||||
/data/project 2.457T 10T 25% 58 2097152 00%
|
||||
└─ bio/shared
|
||||
/data/project 338.3G 10T 03% 199391 2097152 10%
|
||||
└─ bio/hpce
|
||||
```
|
||||
|
||||
{{site.data.alerts.tip}}You can change the width of the table by either passing
|
||||
<code>--no-wrap</code> (to disable wrapping of the <i>Path</i>) or <code>--width N</code>
|
||||
(to explicitly set some width by <code>N</code> characters).
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
#### Example #2: Project view
|
||||
|
||||
The tool can also be used to list out information about what projects directories
|
||||
there are and who owns/manages these:
|
||||
|
||||
```console
|
||||
$ merlin_quotas projects
|
||||
Project ID Path Owner Group
|
||||
---------- ------------------------ --------- --------------
|
||||
600000000 /data/project/bio/shared germann_e unx-merlin_adm
|
||||
600000001 /data/project/bio/hpce assman_g unx-merlin_adm
|
||||
```
|
||||
|
||||
By default this only shows information on projects that you have access to, but
|
||||
to view the whole list you can pass `--all` flag:
|
||||
|
||||
```console
|
||||
$ merlin_quotas projects --all
|
||||
Project ID Path Owner Group
|
||||
---------- ------------------------------- -------------- -----------------
|
||||
500000000 /data/project/general/mcnp gac-mcnp unx-mcnp_all
|
||||
500000001 /data/project/general/vis_as talanov_v unx-vis_as
|
||||
500000002 /data/project/general/mmm krack org-7302
|
||||
500000003 /data/project/general laeuch_a org-7201
|
||||
└─ LTC_CompPhys
|
||||
600000000 /data/project/bio/shared germann_e unx-merlin_adm
|
||||
600000001 /data/project/bio/hpce assman_g unx-merlin_adm
|
||||
600000002 /data/project/bio/abrahams abrahams_j unx-bio_abrahams
|
||||
600000003 /data/project/bio/benoit benoit_r unx-bio_benoit
|
||||
600000004 /data/project/bio/ishikawa ishikawa unx-bio_ishikawa
|
||||
600000005 /data/project/bio/kammerer kammerer_r unx-bio_kammerer
|
||||
600000006 /data/project/bio/korkhov korkhov_v unx-bio_korkhov
|
||||
600000007 /data/project/bio/luo luo_j unx-bio_luo
|
||||
600000008 /data/project/bio/mueller mueller_e unx-bio_mueller
|
||||
600000009 /data/project/bio/poghosyan poghosyan_e unx-bio_poghosyan
|
||||
600000010 /data/project/bio/schertler schertler_g unx-bio_schertler
|
||||
600000011 /data/project/bio/shivashankar shivashankar_g unx-bio_shivashan
|
||||
600000012 /data/project/bio/standfuss standfuss unx-bio_standfuss
|
||||
600000013 /data/project/bio/steinmetz steinmetz unx-bio_steinmetz
|
||||
```
|
||||
|
||||
{{site.data.alerts.tip}}As above you can change the table width by pass either
|
||||
<code>--no-wrap</code> or <code>--width N</code>.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
#### Example #3: Project config
|
||||
|
||||
To make tracking quotas of projects easier, `merlin_quotas` generates a config
|
||||
file in your home directory which defines the projects to show when you call the
|
||||
tool (called `~/.merlin_quotas`).
|
||||
|
||||
The config file simply contains a list (one per line) of project IDs which should
|
||||
be tracked. In theory any (or all available projects) can be tracked, but due to
|
||||
UNIX and Lustre permissions, accessing quotas information for a project you're not
|
||||
a member of **is not possible**.
|
||||
|
||||
If you are added/removed from a project, you can update this config file by
|
||||
calling `merlin_quotas genconf --force` (notice the `--force`, which will overwrite
|
||||
your existing config file) or by editing the file by hand (*not recommended*).
|
||||
|
||||
|
147
pages/merlin7/02-How-To-Use-Merlin/nomachine.md
Normal file
@ -0,0 +1,147 @@
|
||||
---
|
||||
title: Remote Desktop Access to Merlin7
|
||||
keywords: NX, NoMachine, remote desktop access, login node, login001, login002, merlin7-nx-01, merlin7-nx, nx.psi.ch, VPN, browser access
|
||||
last_updated: 07 August 2024
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/nomachine.html
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Merlin7 NoMachine provides users with remote desktop access to the Merlin7 computing environment. This service enables users to connect to their computing resources from any location, whether they are inside the PSI network or accessing from outside via secure methods.
|
||||
|
||||
## Accessing Merlin7 NoMachine
|
||||
|
||||
### From Inside PSI
|
||||
|
||||
If you are inside the PSI network, you can directly connect to the Merlin7 NoMachine service without the need to go through another service.
|
||||
|
||||
1. **Ensure Network Connectivity**: Make sure you are connected to the PSI internal network.
|
||||
2. **Choose Your Access Method**: You can access Merlin7 using either a web browser or the NoMachine client.
|
||||
|
||||
#### Method 1: Using a Web Browser
|
||||
|
||||
Open your web browser and navigate to [https://merlin7-nx.psi.ch:4443](https://merlin7-nx.psi.ch:4443).
|
||||
|
||||
#### Method 2: Using the NoMachine Client
|
||||
|
||||
Settings for the NoMachine client:
|
||||
|
||||
- **Host**: `merlin7-nx.psi.ch`
|
||||
- **Port**: `4000`
|
||||
- **Protocol**: `NX`
|
||||
- **Authentication**: `Use password authentication`
|
||||
|
||||
### From Outside PSI
|
||||
|
||||
Users outside the PSI network have two options for accessing the Merlin7 NoMachine service: through `nx.psi.ch` or via a VPN connection.
|
||||
|
||||
#### Option 1: Via `nx.psi.ch`
|
||||
|
||||
Documentation about the `nx.psi.ch` service can be found [here](https://www.psi.ch/en/photon-science-data-services/remote-desktop-nomachine).
|
||||
|
||||
##### Using a Web Browser
|
||||
|
||||
Open your web browser and navigate to [https://nx.psi.ch](https://nx.psi.ch).
|
||||
|
||||
##### Using the NoMachine Client
|
||||
|
||||
Settings for the NoMachine client:
|
||||
|
||||
- **Host**: `nx.psi.ch`
|
||||
- **Port**: `4000`
|
||||
- **Protocol**: `NX`
|
||||
- **Authentication**: `Use password authentication`
|
||||
|
||||
#### Option 2: Via VPN
|
||||
|
||||
Alternatively, you can use a VPN connection to access Merlin7 as if you were inside the PSI network.
|
||||
|
||||
1. **Request VPN Access**: Contact the IT department to request VPN access if you do not already have it. Submit a request through the PSI Service Now ticketing system: [VPN Access (PSI employees)](https://psi.service-now.com/psisp?id=psi_new_sc_cat_item&sys_id=beccc01b6f44a200d02a82eeae3ee440).
|
||||
2. **Connect to the VPN**: Once access is granted, connect to the PSI VPN using your credentials.
|
||||
3. **Access Merlin7 NoMachine**: Once connected to the VPN, you can access Merlin7 using either a web browser or the NoMachine client as if you were inside the PSI network.
|
||||
|
||||
## The NoMachine Client
|
||||
|
||||
### Installation
|
||||
|
||||
#### Windows
|
||||
|
||||
The NoMachine client is available for PSI Windows computers in the Software Kiosk under the name **NX Client**.
|
||||
|
||||
#### macOS and Linux
|
||||
|
||||
The NoMachine client can be downloaded from [NoMachine's download page](https://downloads.nomachine.com).
|
||||
|
||||
### Connection Configuration
|
||||
|
||||
1. **Launch NoMachine Client**: Open the NoMachine client on your computer.
|
||||
2. **Create a New Connection**: Click the **Add** button to create a new connection.
|
||||
- On the **Address** tab configure:
|
||||
- **Name**: Enter a name for your connection. This can be anything.
|
||||
- **Host**: Enter the appropriate hostname (e.g. `merlin7-nx.psi.ch`).
|
||||
- **Port**: Enter `4000`.
|
||||
- **Protocol**: Select `NX`.
|
||||
|
||||

|
||||
|
||||
- On the **Configuration** tab ensure:
|
||||
- **Authentication**: Select `Use password authentication`.
|
||||
|
||||

|
||||
|
||||
- Click the **Add** button to finish creating the new connection.
|
||||
|
||||
## Authenticating
|
||||
|
||||
When prompted, use your PSI credentials to authenticate.
|
||||
|
||||

|
||||
|
||||
## Managing Sessions
|
||||
|
||||
The Merlin7 NoMachine service is managed through a front-end server and back-end nodes, facilitating balanced and efficient access to remote desktop sessions.
|
||||
|
||||
### Architecture Overview
|
||||
|
||||
- **Front-End Server**: `merlin7-nx.psi.ch`
|
||||
- Serves as the entry point for users connecting to the NoMachine service.
|
||||
- Handles load-balancing and directs users to available back-end nodes.
|
||||
|
||||
- **Back-End Nodes**:
|
||||
- `login001.merlin7.psi.ch`
|
||||
- `login002.merlin7.psi.ch`
|
||||
- These nodes host the NoMachine desktop service and manage the individual desktop sessions.
|
||||
|
||||
Access to the login node desktops must be initiated through the `merlin7-nx.psi.ch` front-end. The front-end service will distribute sessions across available nodes in the back-end, ensuring optimal resource usage.
|
||||
|
||||
### Opening NoMachine Desktop Sessions
|
||||
|
||||
When connecting to the `merlin7-nx.psi.ch` front-end, a new session automatically opens if no existing session is found. Users can manage their sessions as follows:
|
||||
|
||||
- **Reconnect to an Existing Session**: If you have an active session, you can reconnect to it by selecting the appropriate icon in the NoMachine client interface. This allows you to resume work without losing any progress.
|
||||

|
||||
- **Create a Second Session**: If you require a separate session, you can select the **`New Desktop`** button. This option creates a second session on another login node, provided the node is available and operational.
|
||||
|
||||
### Session Management Considerations
|
||||
|
||||
- **Load Balancing**: The front-end service ensures that sessions are evenly distributed across the available back-end nodes to optimize performance and resource utilization.
|
||||
- **Session Limits**: Users are limited to one session per back-end node to maintain system stability and efficiency.
|
||||
|
||||
## Support and Resources
|
||||
|
||||
If you encounter any issues or need further assistance with the Merlin7 NoMachine service, support is available via email. Please contact us at [merlin-admins@lists.psi.ch](mailto:merlin-admins@lists.psi.ch), and our support team will be happy to assist you.
|
||||
|
||||
### Advanced Display Settings
|
||||
|
||||
NoMachine provides several options to optimize the display settings for better performance and clarity. These settings can be accessed and adjusted when creating a new session or by clicking the top right corner of a running session.
|
||||
|
||||
#### Prevent Rescaling
|
||||
|
||||
Preventing rescaling can help eliminate "blurriness" in your display, though it may affect performance. Adjust these settings based on your performance needs:
|
||||
|
||||
- Display: Choose `Resize remote display` (forces 1:1 pixel sizes)
|
||||
- Display > Change settings > Quality: Choose medium-best quality
|
||||
- Display > Change settings > Modify the advanced display settings
|
||||
- Check: Disable network-adaptive display quality (turns off lossy compression)
|
||||
- Check: Disable client side image post-processing
|
50
pages/merlin7/02-How-To-Use-Merlin/software-repositories.md
Normal file
@ -0,0 +1,50 @@
|
||||
---
|
||||
title: Software repositories
|
||||
#tags:
|
||||
keywords: modules, software, stable, unstable, deprecated, spack, repository, repositories
|
||||
last_updated: 16 January 2024
|
||||
summary: "This page contains information about the different software repositories"
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/software-repositories.html
|
||||
---
|
||||
|
||||
## Module Systems in Merlin7
|
||||
|
||||
Merlin7 provides a modular environment to ensure flexibility, compatibility, and optimized performance.
|
||||
The system supports three primary module types: PSI Environment Modules (PModules), Spack Modules, and Cray Environment Modules.
|
||||
|
||||
### PSI Environment Modules (PModules)
|
||||
|
||||
The PModules system, developed by PSI, is the officially supported module system on Merlin7. It is the preferred choice for accessing validated software across a wide range of applications.
|
||||
|
||||
Key Features:
|
||||
* **Expert Deployment:** Each package is deployed and maintained by specific experts to ensure reliability and compatibility.
|
||||
* **Broad Availability:** Commonly used software, such as OpenMPI, ANSYS, MATLAB, and other, is provided within PModules.
|
||||
* **Custom Requests:** If a package, version, or feature is missing, users can contact the support team to explore feasibility for installation.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
For further information about <b>Pmodules</b> on Merlin7 please refer to the <b><a href="/merlin7/pmodules.html">PSI Modules</a></b> chapter.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### Spack Modules
|
||||
|
||||
Merlin7 also provides Spack modules, offering a modern and flexible package management system. Spack supports a wide variety of software packages and versions. For more information, refer to the **external [PSI Spack](https://gitea.psi.ch/HPCE/spack-psi) documentation**.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
For further information about <b>Spack</b> on Merlin7 please refer to the <b><a href="/merlin7/spack.html">Spack</a></b> chapter.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### Cray Environment Modules
|
||||
|
||||
Merlin7 also supports Cray Environment Modules, which include compilers, MPI implementations, and libraries optimized
|
||||
for Cray systems. However, Cray modules are not recommended as the default choice due to potential backward compatibility
|
||||
issues when the Cray Programming Environment (CPE) is upgraded to a newer version.
|
||||
|
||||
Recommendations:
|
||||
* **Compiling Software:** Cray modules can be used when optimization for Cray hardware is essential.
|
||||
* **General Use:** For most applications, prefer PModules, which ensure stability, backward compatibility, and long-term support.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
For further information about <b>CPE</b> on Merlin7 please refer to the <b><a href="/merlin7/cray-module-env.html">Cray Modules</a></b> chapter.
|
||||
{{site.data.alerts.end}}
|
||||
|
184
pages/merlin7/02-How-To-Use-Merlin/ssh-keys.md
Normal file
@ -0,0 +1,184 @@
|
||||
---
|
||||
title: Configuring SSH Keys in Merlin
|
||||
|
||||
#tags:
|
||||
keywords: linux, connecting, client, configuration, SSH, Keys, SSH-Keys, RSA, authorization, authentication
|
||||
last_updated: 15 Jul 2020
|
||||
summary: "This document describes how to deploy SSH Keys in Merlin."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/ssh-keys.html
|
||||
---
|
||||
|
||||
Merlin users sometimes will need to access the different Merlin services without being constantly requested by a password.
|
||||
One can achieve that with Kerberos authentication, however in some cases some software would require the setup of SSH Keys.
|
||||
One example is ANSYS Fluent, which, when used interactively, the way of communication between the GUI and the different nodes
|
||||
is through the SSH protocol, and the use of SSH Keys is enforced.
|
||||
|
||||
## Setting up SSH Keys on Merlin
|
||||
|
||||
For security reason, users **must always protect SSH Keys with a passphrase**.
|
||||
|
||||
User can check whether a SSH key already exists. These would be placed in the **~/.ssh/** directory. `RSA` encryption
|
||||
is usually the default one, and files in there would be **`id_rsa`** (private key) and **`id_rsa.pub`** (public key).
|
||||
|
||||
```bash
|
||||
ls ~/.ssh/id*
|
||||
```
|
||||
|
||||
For creating **SSH RSA Keys**, one should:
|
||||
|
||||
1. Run `ssh-keygen`, a password will be requested twice. You **must remember** this password for the future.
|
||||
* Due to security reasons, ***always try protecting it with a password***. There is only one exception, when running ANSYS software, which in general should not use password to simplify the way of running the software in Slurm.
|
||||
* This will generate a private key **id_rsa**, and a public key **id_rsa.pub** in your **~/.ssh** directory.
|
||||
2. Add your public key to the **`authorized_keys`** file, and ensure proper permissions for that file, as follows:
|
||||
|
||||
```bash
|
||||
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
|
||||
chmod 0600 ~/.ssh/authorized_keys
|
||||
```
|
||||
|
||||
3. Configure the SSH client in order to force the usage of the **psi.ch** domain for trusting keys:
|
||||
|
||||
```bash
|
||||
echo "CanonicalizeHostname yes" >> ~/.ssh/config
|
||||
```
|
||||
|
||||
4. Configure further SSH options as follows:
|
||||
|
||||
```bash
|
||||
echo "AddKeysToAgent yes" >> ~/.ssh/config
|
||||
echo "ForwardAgent yes" >> ~/.ssh/config
|
||||
```
|
||||
|
||||
Other options may be added.
|
||||
|
||||
5. Check that your SSH config file contains at least the lines mentioned in steps 3 and 4:
|
||||
|
||||
```console
|
||||
# cat ~/.ssh/config
|
||||
CanonicalizeHostname yes
|
||||
AddKeysToAgent yes
|
||||
ForwardAgent yes
|
||||
```
|
||||
|
||||
## Using the SSH Keys
|
||||
|
||||
### Using Authentication Agent in SSH session
|
||||
|
||||
By default, when accessing the login node via SSH (with `ForwardAgent=yes`), it will automatically add your
|
||||
SSH Keys to the authentication agent. Hence, no actions should not be needed by the user. One can configure
|
||||
`ForwardAgent=yes` as follows:
|
||||
|
||||
* **(Recommended)** In your local Linux (workstation, laptop or desktop) add the following line in the
|
||||
`$HOME/.ssh/config` (or alternatively in `/etc/ssh/ssh_config`) file:
|
||||
|
||||
```ssh_config
|
||||
ForwardAgent yes
|
||||
```
|
||||
|
||||
* Alternatively, on each SSH you can add the option `ForwardAgent=yes` in the SSH command. In example:
|
||||
|
||||
```bash
|
||||
ssh -XY -o ForwardAgent=yes login001.merlin7.psi.ch
|
||||
```
|
||||
|
||||
If `ForwardAgent` is not enabled as shown above, one needs to run the authentication agent and then add your key
|
||||
to the **ssh-agent**. This must be done once per SSH session, as follows:
|
||||
|
||||
* Run `eval $(ssh-agent -s)` to run the **ssh-agent** in that SSH session
|
||||
* Check whether the authentication agent has your key already added:
|
||||
|
||||
```bash
|
||||
ssh-add -l | grep "/data/user/$(whoami)/.ssh"
|
||||
```
|
||||
|
||||
* If no key is returned in the previous step, you have to add the private key identity to the authentication agent.
|
||||
You will be requested for the **passphrase** of your key, and it can be done by running:
|
||||
|
||||
```bash
|
||||
ssh-add
|
||||
```
|
||||
|
||||
### Using Authentication Agent in NoMachine Session
|
||||
|
||||
By default, when using a NoMachine session, the `ssh-agent` should be automatically started. Hence, there is no need of
|
||||
starting the agent or forwarding it.
|
||||
|
||||
However, for NoMachine one always need to add the private key identity to the authentication agent. This can be done as follows:
|
||||
|
||||
1. Check whether the authentication agent has already the key added:
|
||||
|
||||
```bash
|
||||
ssh-add -l | grep "/data/user/$(whoami)/.ssh"
|
||||
```
|
||||
2. If no key is returned in the previous step, you have to add the private key identity to the authentication agent.
|
||||
You will be requested for the **passphrase** of your key, and it can be done by running:
|
||||
|
||||
```bash
|
||||
ssh-add
|
||||
```
|
||||
|
||||
You just need to run it once per NoMachine session, and it would apply to all terminal windows within that NoMachine session.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Errors when running 'ssh-add'
|
||||
|
||||
If the error `Could not open a connection to your authentication agent.` appears when running `ssh-add`, it means
|
||||
that the authentication agent is not running. Please follow the previous procedures for starting it.
|
||||
|
||||
### Add/Update SSH RSA Key password
|
||||
|
||||
If an existing SSH Key does not have password, or you want to update an existing password with a new one, you can do it as follows:
|
||||
|
||||
```bash
|
||||
ssh-keygen -p -f ~/.ssh/id_rsa
|
||||
```
|
||||
|
||||
### SSH Keys deployed but not working
|
||||
|
||||
Please ensure proper permissions of the involved files, as well as any typos in the file names involved:
|
||||
|
||||
```bash
|
||||
chmod u+rwx,go-rwx,g+s ~/.ssh
|
||||
chmod u+rw-x,go-rwx ~/.ssh/authorized_keys
|
||||
chmod u+rw-x,go-rwx ~/.ssh/id_rsa
|
||||
chmod u+rw-x,go+r-wx ~/.ssh/id_rsa.pub
|
||||
```
|
||||
|
||||
### Testing SSH Keys
|
||||
|
||||
Once SSH Key is created, for testing that the SSH Key is valid, one can do the following:
|
||||
|
||||
1. Create a **new** SSH session in one of the login nodes:
|
||||
|
||||
```bash
|
||||
ssh login001
|
||||
```
|
||||
|
||||
2. In the login node session, destroy any existing Kerberos ticket or active SSH Key:
|
||||
|
||||
```bash
|
||||
kdestroy
|
||||
ssh-add -D
|
||||
```
|
||||
|
||||
3. Add the new private key identity to the authentication agent. You will be requested by the passphrase.
|
||||
|
||||
```bash
|
||||
ssh-add
|
||||
```
|
||||
|
||||
4. Check that your key is active by the SSH agent:
|
||||
|
||||
```bash
|
||||
ssh-add -l
|
||||
```
|
||||
|
||||
4. SSH to the second login node. No password should be requested:
|
||||
|
||||
```bash
|
||||
ssh -vvv login002
|
||||
```
|
||||
|
||||
If the last step succeeds, then means that your SSH Key is properly setup.
|
186
pages/merlin7/02-How-To-Use-Merlin/storage.md
Normal file
@ -0,0 +1,186 @@
|
||||
---
|
||||
title: Merlin7 Storage
|
||||
#tags:
|
||||
keywords: storage, /data/user, /data/software, /data/project, /scratch, /data/scratch/shared, quota, export, user, project, scratch, data, data/scratch/shared, merlin_quotas
|
||||
#last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
redirect_from: /merlin7/data-directories.html
|
||||
permalink: /merlin7/storage.html
|
||||
---
|
||||
|
||||
## Introduction
|
||||
|
||||
This document describes the different directories of the Merlin7 cluster.
|
||||
|
||||
### Backup and data policies
|
||||
|
||||
* ***Users are responsible for backing up their own data***. Is recommended to backup the data on third party independent systems (i.e. LTS, Archive, AFS, SwitchDrive, Windows Shares, etc.).
|
||||
* ***When a user leaves PSI, she or her supervisor/team are responsible to backup and move the data out from the cluster***: every few months, the storage space will be recycled for those old users who do not have an existing and valid PSI account.
|
||||
|
||||
{{site.data.alerts.warning}}When a user leaves PSI and their account is removed, their storage space in Merlin may be recycled.
|
||||
Hence, <b>when a user leaves PSI</b>, they, their supervisor or team <b>must ensure that the data is backed up to an external storage</b>
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### How to check quotas
|
||||
|
||||
Some of the Merlin7 directories have quotas applied. A way for checking the quotas is provided with the `merlin_quotas` command.
|
||||
This command is useful to show all quotas for the different user storage directories and partitions (including AFS). To check your quotas, please run:
|
||||
|
||||
```console
|
||||
$ merlin_quotas
|
||||
Path SpaceUsed SpaceQuota Space % FilesUsed FilesQuota Files %
|
||||
-------------- --------- ---------- ------- --------- ---------- -------
|
||||
/data/user 30.26G 1T 03% 367296 2097152 18%
|
||||
└─ <USERNAME>
|
||||
/afs/psi.ch 3.4G 9.5G 36% 0 0 0%
|
||||
└─ user/<USERDIR>
|
||||
/data/scratch 688.9M 2T 00% 368471 0 00%
|
||||
└─ shared
|
||||
/data/project 3.373T 11T 31% 425644 2097152 20%
|
||||
└─ bio/shared
|
||||
/data/project 4.142T 11T 38% 579596 2097152 28%
|
||||
└─ bio/hpce
|
||||
```
|
||||
|
||||
{{site.data.alerts.note}}On first use you will see a message about some configuration being generated, this is expected. Don't be
|
||||
surprised that it takes some time. After this using <code>merlin_quotas</code> should be faster.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
The output shows the quotas set and how much you are using of the quota, for each filesystem that has this set. Notice that some users will have
|
||||
one or more `/data/project/...` directories showing, depending on whether you are part of a specific PSI research group or project.
|
||||
|
||||
The general quota constraints for the different directories are shown in the [table below](#dir_classes). Further details on how to use `merlin_quotas`
|
||||
can be found on the [Tools page](/merlin7/tools.html).
|
||||
|
||||
{{site.data.alerts.tip}}If you're interesting, you can retrieve the Lustre-based quota information directly by calling
|
||||
<code>lfs quota -h -p $(( 100000000 + $(id -u $USER) )) /data</code> directly. Using the <code>merlin_quotas</code> command is more
|
||||
convenient and shows all your relevant filesystem quotas.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## Merlin7 directories
|
||||
|
||||
Merlin7 offers the following directory classes for users:
|
||||
|
||||
* `/data/user/<username>`: Private user **home** directory
|
||||
* `/data/project/general`: project directory for Merlin
|
||||
* `/data/project/bio/$projectname`: project directory for BIO
|
||||
* `/data/project/mu3e/$projectname`: project directory for Mu3e
|
||||
* `/data/project/meg/$projectname`: project directory for Mu3e
|
||||
* `/scratch`: Local *scratch* disk (only visible by the node running a job).
|
||||
* `/data/scratch/shared`: Shared *scratch* disk (visible from all nodes).
|
||||
|
||||
{{site.data.alerts.tip}}In Lustre there is a concept called <b>grace time</b>. Filesystems have a block (amount of data) and inode (number of files) quota.
|
||||
These quotas contain a soft and hard limits. Once the soft limit is reached, users can keep writing up to their hard limit quota during the <b>grace period</b>.
|
||||
Once the <b>grace time</b> or hard limit are reached, users will be unable to write and will need remove data below the soft limit (or ask for a quota increase
|
||||
when this is possible, see below table).
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
<a name="dir_classes"></a>Properties of the directory classes:
|
||||
|
||||
| Directory | Block Quota [Soft:Hard] | Inode Quota [Soft:Hard] | GraceTime | Quota Change Policy: Block | Quota Change Policy: Inodes | Backup |
|
||||
| ---------------------------------- | ----------------------- | ----------------------- | :-------: | :--------------------------------- |:-------------------------------- | ------ |
|
||||
| /data/user/$username | PRJ [1TB:1.074TB] | PRJ [2M:2.1M] | 7d | Immutable. Need a project. | Changeable when justified. | no |
|
||||
| /data/project/bio/$projectname | PRJ [1TB:1.074TB] | PRJ [1M:1.1M] | 7d | Subject to project requirements. | Subject to project requirements. | no |
|
||||
| /data/project/general/$projectname | PRJ [1TB:1.074TB] | PRJ [1M:1.1M] | 7d | Subject to project requirements. | Subject to project requirements. | no |
|
||||
| /data/scratch/shared | USR [512GB:2TB] | | 7d | Up to x2 when strongly justified. | Changeable when justified. | no |
|
||||
| /scratch | *Undef* | *Undef* | N/A | N/A | N/A | no |
|
||||
|
||||
{{site.data.alerts.warning}}The use of <b>/scratch</b> and <b>/data/scratch/shared</b> areas as an extension of the quota <i>is forbidden</i>. The <b>/scratch</b> and
|
||||
<b>/data/scratch/shared</b> areas <i>must not contain</i> final data. Keep in mind that <br><b><i>auto cleanup policies</i></b> in the <b>/scratch</b> and
|
||||
<b>/data/scratch/shared</b> areas are applied.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### User home directory
|
||||
|
||||
This is the default directory users will land when login in to any Merlin7 machine.
|
||||
It is intended for your scripts, documents, software development and data. Do not use it for I/O-hungry tasks.
|
||||
|
||||
The home directories are mounted in the login and computing nodes under the directory
|
||||
|
||||
```bash
|
||||
/data/user/$username
|
||||
```
|
||||
|
||||
Directory policies:
|
||||
|
||||
* Read **[Important: Code of Conduct](/merlin7/code-of-conduct.html)** for more information about Merlin7 policies.
|
||||
* Is **forbidden** to use the home directories for IO-intensive tasks, instead use one of the **[scratch](/merlin7/storage.html#scratch-directories)** areas instead!
|
||||
* No backup policy is applied for the user home directories: **users are responsible for backing up their data**.
|
||||
|
||||
Home directory quotas are defined in a per Lustre project basis. The quota can be checked using the `merlin_quotas` command described
|
||||
[above](/merlin7/storage.html#how-to-check-quotas).
|
||||
|
||||
### Project data directory
|
||||
|
||||
This storage is intended for keeping large amounts of a project's data, where the data also can be
|
||||
shared by all members of the project (the project's corresponding UNIX group). We recommend to keep most data in
|
||||
project related storage spaces, since it allows users to coordinate. Also, project spaces have more flexible policies
|
||||
regarding extending the available storage space.
|
||||
|
||||
Scientists can request a Merlin project space as described in **[[Accessing Merlin -> Requesting a Project]](/merlin7/request-project.html)**.
|
||||
By default, Merlin can offer **general** project space, centrally covered, as long as it does not exceed 10TB (otherwise, it has to be justified).
|
||||
General Merlin projects might need to be reviewed after one year of their creation.
|
||||
|
||||
Once a Merlin project is created, the directory will be mounted in the login and computing nodes under the directory:
|
||||
|
||||
```bash
|
||||
/data/project/general/$projectname
|
||||
```
|
||||
|
||||
Project quotas are defined in a per Lustre project basis. Users can check the project quota by running the following command:
|
||||
|
||||
```bash
|
||||
lfs quota -h -p $projectid /data
|
||||
```
|
||||
|
||||
{{site.data.alerts.warning}}Checking <b>quotas</b> for the Merlin projects is not yet possible.
|
||||
In the future, a list of `projectid` will be provided, so users can check their quotas.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
Directory policies:
|
||||
|
||||
* Read **[Important: Code of Conduct](/merlin7/code-of-conduct.html)** for more information about Merlin7 policies.
|
||||
* It is **forbidden** to use the data directories as `/scratch` area during a job's runtime, i.e. for high throughput I/O for a job's temporary files.
|
||||
* Please Use `/scratch`, `/data/scratch/shared` for this purpose.
|
||||
* No backups: users are responsible for managing the backups of their data directories.
|
||||
|
||||
#### Dedicated project directories
|
||||
|
||||
Some departments or divisions have bigger storage space requirements on Merlin7. At present, `bio`, `mu3e` and `meg` are the main ones.
|
||||
These are mounted under the following paths:
|
||||
|
||||
```bash
|
||||
/data/project/bio
|
||||
/data/project/mu3e
|
||||
/data/project/meg
|
||||
```
|
||||
|
||||
They follow the same rules as the general projects, except that they have assigned more space.
|
||||
|
||||
### Scratch directories
|
||||
|
||||
There are two different types of scratch storage: **local** (`/scratch`) and **shared** (`/data/scratch/shared`).
|
||||
|
||||
* **local** scratch should be used for all jobs that do not require the scratch files to be accessible from multiple nodes, which is trivially
|
||||
true for all jobs running on a single node. Mount path:
|
||||
|
||||
```bash
|
||||
/scratch
|
||||
```
|
||||
|
||||
* **shared** scratch is intended for files that need to be accessible by multiple nodes, e.g. by a MPI-job where tasks are spread out over the cluster
|
||||
and all tasks need to do I/O on the same temporary files.
|
||||
|
||||
```bash
|
||||
/data/scratch/shared
|
||||
```
|
||||
|
||||
Scratch directories policies:
|
||||
|
||||
* Read **[Important: Code of Conduct](/merlin7/code-of-conduct.html)** for more information about Merlin7 policies.
|
||||
* By default, *always* use **local** first and only use **shared** if your specific use case requires it.
|
||||
* Temporary files *must be deleted at the end of the job by the user*.
|
||||
* Remaining files will be deleted by the system if detected.
|
||||
* Files not accessed within 28 days will be automatically cleaned up by the system.
|
||||
* If for some reason the scratch areas get full, admins have the rights to cleanup the oldest data.
|
173
pages/merlin7/02-How-To-Use-Merlin/transfer-data.md
Normal file
@ -0,0 +1,173 @@
|
||||
---
|
||||
title: Transferring Data
|
||||
#tags:
|
||||
keywords: transferring data, data transfer, rsync, winscp, copy data, copying, sftp, import, export, hop, vpn
|
||||
last_updated: 24 August 2023
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/transfer-data.html
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Most methods allow data to be either transmitted or received, so it may make sense to
|
||||
initiate the transfer from either merlin or the other system, depending on the network
|
||||
visibility.
|
||||
|
||||
- Merlin login nodes are visible from the PSI network, so direct data transfer
|
||||
(rsync/WinSCP/sftp) is generally preferable.
|
||||
- Protocols from Merlin7 to PSI may require special firewall rules.
|
||||
- Merlin login nodes can access the internet using a limited set of protocols:
|
||||
- HTTP-based protocols using ports 80 or 445 (https, WebDav, etc)
|
||||
- Protocols using other ports require admin configuration and may only work with
|
||||
specific hosts, and may require new firewall rules (ssh, ftp, rsync daemons, etc).
|
||||
- Systems on the internet can access the [PSI Data Transfer](https://www.psi.ch/en/photon-science-data-services/data-transfer) service
|
||||
`datatransfer.psi.ch`, using ssh-based protocols and [Globus](https://www.globus.org/)
|
||||
|
||||
SSH-based protocols using port 22 to most PSI servers and (rsync-over-ssh, sftp, WinSCP, etc.), are in general, not permitted
|
||||
|
||||
## Direct transfer via Merlin7 login nodes
|
||||
|
||||
The following methods transfer data directly via the [login
|
||||
nodes](/merlin7/interactive.html#login-nodes-hardware-description). They are suitable
|
||||
for use from within the PSI network.
|
||||
|
||||
### Rsync
|
||||
|
||||
Rsync is the preferred method to transfer data from Linux/MacOS. It allows
|
||||
transfers to be easily resumed if they get interrupted. The general syntax is:
|
||||
|
||||
```
|
||||
rsync -avAHXS <src> <dst>
|
||||
```
|
||||
|
||||
For example, to transfer files from your local computer to a merlin project
|
||||
directory:
|
||||
|
||||
```
|
||||
rsync -avAHXS ~/localdata $USER@login001.merlin7.psi.ch:/data/project/general/myproject/
|
||||
```
|
||||
|
||||
You can resume interrupted transfers by simply rerunning the command. Previously
|
||||
transferred files will be skipped.
|
||||
|
||||
|
||||
### WinSCP
|
||||
|
||||
The WinSCP tool can be used for remote file transfer on Windows. It is available
|
||||
from the Software Kiosk on PSI machines. Add `login001.merlin7.psi.ch` or `login002.merlin7.psi.ch`
|
||||
as a host and connect with your PSI credentials. You can then drag-and-drop files between your
|
||||
local computer and merlin.
|
||||
|
||||
### SWITCHfilesender
|
||||
|
||||
**[SWITCHfilesender](https://filesender.switch.ch/filesender2/?s=upload)** is an installation of the FileSender project (filesender.org) which is a web based application that allows authenticated users to securely and easily send arbitrarily large files to other users.
|
||||
|
||||
Authentication of users is provided through SimpleSAMLphp, supporting SAML2, LDAP and RADIUS and more. Users without an account can be sent an upload voucher by an authenticated user. FileSender is developed to the requirements of the higher education and research community.
|
||||
|
||||
The purpose of the software is to send a large file to someone, have that file available for download for a certain number of downloads and/or a certain amount of time, and after that automatically delete the file. The software is not intended as a permanent file publishing platform.
|
||||
|
||||
**[SWITCHfilesender](https://filesender.switch.ch/filesender2/?s=upload)** is fully integrated with PSI, therefore, PSI employees can log in by using their PSI account (through Authentication and Authorization Infrastructure / AAI, by selecting PSI as the institution to be used for log in).
|
||||
|
||||
{% comment %}
|
||||
## PSI Data Transfer
|
||||
|
||||
From August 2024, Merlin is connected to the **[PSI Data Transfer](https://www.psi.ch/en/photon-science-data-services/data-transfer)** service,
|
||||
`datatransfer.psi.ch`. This is a central service managed by the **[Linux team](https://linux.psi.ch/index.html)**. However, any problems or questions related to it can be directly
|
||||
[reported](/merlin7/contact.html) to the Merlin administrators, which will forward the request if necessary.
|
||||
|
||||
The PSI Data Transfer servers supports the following protocols:
|
||||
* Data Transfer - SSH (scp / rsync)
|
||||
* Data Transfer - Globus
|
||||
|
||||
Notice that `datatransfer.psi.ch` does not allow SSH login, only `rsync`, `scp` and [Globus](https://www.globus.org/) access is allowed.
|
||||
|
||||
The following filesystems are mounted:
|
||||
* `/merlin/export` which points to the `/export` directory in Merlin.
|
||||
* `/merlin/data/experiment/mu3e` which points to the `/data/experiment/mu3e` directories in Merlin.
|
||||
* Mu3e sub-directories are mounted in RW (read-write), except for `data` (read-only mounted)
|
||||
* `/merlin/data/project/general` which points to the `/data/project/general` directories in Merlin.
|
||||
* Owners of Merlin projects should request explicit access to it.
|
||||
* Currently, only `CSCS` is available for transferring files between PizDaint/Alps and Merlin
|
||||
* `/merlin/data/project/bio` which points to the `/data/project/bio` directories in Merlin.
|
||||
* `/merlin/data/user` which points to the `/data/user` directories in Merlin.
|
||||
|
||||
Access to the PSI Data Transfer uses ***Multi factor authentication*** (MFA).
|
||||
Therefore, having the Microsoft Authenticator App is required as explained [here](https://www.psi.ch/en/computing/change-to-mfa).
|
||||
|
||||
{{site.data.alerts.tip}}Please follow the
|
||||
<b><a href="https://www.psi.ch/en/photon-science-data-services/data-transfer">Official PSI Data Transfer</a></b> documentation for further instructions.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### Directories
|
||||
|
||||
#### /merlin/data/user
|
||||
|
||||
User data directories are mounted in RW.
|
||||
|
||||
{{site.data.alerts.warning}}Please, <b>ensure proper secured permissions</b> in your '/data/user'
|
||||
directory. By default, when directory is created, the system applies the most restrictive
|
||||
permissions. However, this does not prevent users for changing permissions if they wish. At this
|
||||
point, users become responsible of those changes.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
#### /merlin/export
|
||||
|
||||
Transferring big amounts of data from outside PSI to Merlin is always possible through `/export`.
|
||||
|
||||
{{site.data.alerts.tip}}<b>The '/export' directory can be used by any Merlin user.</b>
|
||||
This is configured in Read/Write mode. If you need access, please, contact the Merlin administrators.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
{{site.data.alerts.warning}}The use <b>export</b> as an extension of the quota <i>is forbidden</i>.
|
||||
<br><b><i>Auto cleanup policies</i></b> in the <b>export</b> area apply for files older than 28 days.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
##### Exporting data from Merlin
|
||||
|
||||
For exporting data from Merlin to outside PSI by using `/export`, one has to:
|
||||
* From a Merlin login node, copy your data from any directory (i.e. `/data/project`, `/data/user`, `/scratch`) to
|
||||
`/export`. Ensure to properly secure your directories and files with proper permissions.
|
||||
* Once data is copied, from **`datatransfer.psi.ch`**, copy the data from `/merlin/export` to outside PSI
|
||||
|
||||
##### Importing data to Merlin
|
||||
|
||||
For importing data from outside PSI to Merlin by using `/export`, one has to:
|
||||
* From **`datatransfer.psi.ch`**, copy the data from outside PSI to `/merlin/export`.
|
||||
Ensure to properly secure your directories and files with proper permissions.
|
||||
* Once data is copied, from a Merlin login node, copy your data from `/export` to any directory (i.e. `/data/project`, `/data/user`, `/scratch`).
|
||||
|
||||
#### Request access to your project directory
|
||||
|
||||
Optionally, instead of using `/export`, Merlin project owners can request Read/Write or Read/Only access to their project directory.
|
||||
|
||||
{{site.data.alerts.tip}}<b>Merlin projects can request direct access.</b>
|
||||
This can be configured in Read/Write or Read/Only modes. If your project needs access, please, contact the Merlin administrators.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## Connecting to Merlin7 from outside PSI
|
||||
|
||||
Merlin7 is fully accessible from within the PSI network. To connect from outside you can use:
|
||||
|
||||
- [VPN](https://www.psi.ch/en/computing/vpn) ([alternate instructions](https://intranet.psi.ch/BIO/ComputingVPN))
|
||||
- [SSH hop](https://www.psi.ch/en/computing/ssh-hop)
|
||||
* Please avoid transferring big amount data through **hop**
|
||||
- [No Machine](nomachine.md)
|
||||
* Remote Interactive Access through [**'rem-acc.psi.ch'**](https://www.psi.ch/en/photon-science-data-services/remote-interactive-access)
|
||||
* Please avoid transferring big amount of data through **NoMachine**
|
||||
|
||||
## Connecting from Merlin7 to outside file shares
|
||||
|
||||
### `merlin_rmount` command
|
||||
|
||||
Merlin provides a command for mounting remote file systems, called `merlin_rmount`. This
|
||||
provides a helpful wrapper over the Gnome storage utilities, and provides support for a wide range of remote file formats, including
|
||||
- SMB/CIFS (Windows shared folders)
|
||||
- WebDav
|
||||
- AFP
|
||||
- FTP, SFTP
|
||||
- [others](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/using_the_desktop_environment_in_rhel_8/managing-storage-volumes-in-gnome_using-the-desktop-environment-in-rhel-8#gvfs-back-ends_managing-storage-volumes-in-gnome)
|
||||
|
||||
|
||||
[More instruction on using `merlin_rmount`](/merlin7/merlin-rmount.html)
|
||||
{% endcomment %}
|
202
pages/merlin7/03-Slurm-General-Documentation/interactive-jobs.md
Normal file
@ -0,0 +1,202 @@
|
||||
---
|
||||
title: Running Interactive Jobs
|
||||
#tags:
|
||||
keywords: interactive, X11, X, srun, salloc, job, jobs, slurm, nomachine, nx
|
||||
last_updated: 07 August 2024
|
||||
summary: "This document describes how to run interactive jobs as well as X based software."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/interactive-jobs.html
|
||||
---
|
||||
|
||||
## Running interactive jobs
|
||||
|
||||
There are two different ways for running interactive jobs in Slurm. This is possible by using
|
||||
the ``salloc`` and ``srun`` commands:
|
||||
|
||||
* **``salloc``**: to obtain a Slurm job allocation (a set of nodes), execute command(s), and then release the allocation when the command is finished.
|
||||
* **``srun``**: is used for running parallel tasks.
|
||||
|
||||
### srun
|
||||
|
||||
Is run is used to run parallel jobs in the batch system. It can be used within a batch script
|
||||
(which can be run with ``sbatch``), or within a job allocation (which can be run with ``salloc``).
|
||||
Also, it can be used as a direct command (in example, from the login nodes).
|
||||
|
||||
When used inside a batch script or during a job allocation, ``srun`` is constricted to the
|
||||
amount of resources allocated by the ``sbatch``/``salloc`` commands. In ``sbatch``, usually
|
||||
these resources are defined inside the batch script with the format ``#SBATCH <option>=<value>``.
|
||||
In other words, if you define in your batch script or allocation 88 tasks (and 1 thread / core)
|
||||
and 2 nodes, ``srun`` is constricted to these amount of resources (you can use less, but never
|
||||
exceed those limits).
|
||||
|
||||
When used from the login node, usually is used to run a specific command or software in an
|
||||
interactive way. ``srun`` is a blocking process (it will block bash prompt until the ``srun``
|
||||
command finishes, unless you run it in background with ``&``). This can be very useful to run
|
||||
interactive software which pops up a Window and then submits jobs or run sub-tasks in the
|
||||
background (in example, **Relion**, **cisTEM**, etc.)
|
||||
|
||||
Refer to ``man srun`` for exploring all possible options for that command.
|
||||
|
||||
<details>
|
||||
<summary>[Show 'srun' example]: Running 'hostname' command on 3 nodes, using 2 cores (1 task/core) per node</summary>
|
||||
<pre class="terminal code highlight js-syntax-highlight plaintext" lang="plaintext" markdown="false">
|
||||
caubet_m@login001:~> srun --clusters=merlin7 --ntasks=6 --ntasks-per-node=2 --nodes=3 hostname
|
||||
cn001.merlin7.psi.ch
|
||||
cn001.merlin7.psi.ch
|
||||
cn002.merlin7.psi.ch
|
||||
cn002.merlin7.psi.ch
|
||||
cn003.merlin7.psi.ch
|
||||
cn003.merlin7.psi.ch
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
### salloc
|
||||
|
||||
**``salloc``** is used to obtain a Slurm job allocation (a set of nodes). Once job is allocated,
|
||||
users are able to execute interactive command(s). Once finished (``exit`` or ``Ctrl+D``),
|
||||
the allocation is released. **``salloc``** is a blocking command, it is, command will be blocked
|
||||
until the requested resources are allocated.
|
||||
|
||||
When running **``salloc``**, once the resources are allocated, *by default* the user will get
|
||||
a ***new shell on one of the allocated resources*** (if a user has requested few nodes, it will
|
||||
prompt a new shell on the first allocated node). However, this behaviour can be changed by adding
|
||||
a shell (`$SHELL`) at the end of the `salloc` command. In example:
|
||||
|
||||
```bash
|
||||
# Typical 'salloc' call
|
||||
salloc --clusters=merlin7 -N 2 -n 2
|
||||
|
||||
# Custom 'salloc' call
|
||||
# - $SHELL will open a local shell on the login node from where ``salloc`` is running
|
||||
salloc --clusters=merlin7 -N 2 -n 2 $SHELL
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>[Show 'salloc' example]: Allocating 2 cores (1 task/core) in 2 nodes (1 core/node) - <i>Default</i></summary>
|
||||
<pre class="terminal code highlight js-syntax-highlight plaintext" lang="plaintext" markdown="false">
|
||||
caubet_m@login001:~> salloc --clusters=merlin7 -N 2 -n 2
|
||||
salloc: Granted job allocation 161
|
||||
salloc: Nodes cn[001-002] are ready for job
|
||||
|
||||
caubet_m@login001:~> srun hostname
|
||||
cn002.merlin7.psi.ch
|
||||
cn001.merlin7.psi.ch
|
||||
|
||||
caubet_m@login001:~> exit
|
||||
exit
|
||||
salloc: Relinquishing job allocation 161
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>[Show 'salloc' example]: Allocating 2 cores (1 task/core) in 2 nodes (1 core/node) - <i>$SHELL</i></summary>
|
||||
<pre class="terminal code highlight js-syntax-highlight plaintext" lang="plaintext" markdown="false">
|
||||
caubet_m@login001:~> salloc --clusters=merlin7 --ntasks=2 --nodes=2 $SHELL
|
||||
salloc: Granted job allocation 165
|
||||
salloc: Nodes cn[001-002] are ready for job
|
||||
caubet_m@login001:~> srun hostname
|
||||
cn001.merlin7.psi.ch
|
||||
cn002.merlin7.psi.ch
|
||||
caubet_m@login001:~> exit
|
||||
exit
|
||||
salloc: Relinquishing job allocation 165
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
## Running interactive jobs with X11 support
|
||||
|
||||
### Requirements
|
||||
|
||||
#### Graphical access
|
||||
|
||||
[NoMachine](/merlin7/nomachine.html) is the official supported service for graphical
|
||||
access in the Merlin cluster. This service is running on the login nodes. Check the
|
||||
document [{Accessing Merlin -> NoMachine}](/merlin7/nomachine.html) for details about
|
||||
how to connect to the **NoMachine** service in the Merlin cluster.
|
||||
|
||||
For other non officially supported graphical access (X11 forwarding):
|
||||
|
||||
* For Linux clients, please follow [{How To Use Merlin -> Accessing from Linux Clients}](/merlin7/connect-from-linux.html)
|
||||
* For Windows clients, please follow [{How To Use Merlin -> Accessing from Windows Clients}](/merlin7/connect-from-windows.html)
|
||||
* For MacOS clients, please follow [{How To Use Merlin -> Accessing from MacOS Clients}](/merlin7/connect-from-macos.html)
|
||||
|
||||
### 'srun' with x11 support
|
||||
|
||||
Merlin6 and merlin7 clusters allow running any windows based applications. For that, you need to
|
||||
add the option ``--x11`` to the ``srun`` command. In example:
|
||||
|
||||
```bash
|
||||
srun --clusters=merlin7 --x11 sview
|
||||
```
|
||||
|
||||
will popup a X11 based slurm view of the cluster.
|
||||
|
||||
In the same manner, you can create a bash shell with x11 support. For doing that, you need
|
||||
to add the option ``--pty`` to the ``srun --x11`` command. Once resource is allocated, from
|
||||
there you can interactively run X11 and non-X11 based commands.
|
||||
|
||||
```bash
|
||||
srun --clusters=merlin7 --x11 --pty bash
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>[Show 'srun' with X11 support examples]</summary>
|
||||
<pre class="terminal code highlight js-syntax-highlight plaintext" lang="plaintext" markdown="false">
|
||||
caubet_m@login001:~> srun --clusters=merlin7 --x11 sview
|
||||
|
||||
caubet_m@login001:~>
|
||||
|
||||
caubet_m@login001:~> srun --clusters=merlin7 --x11 --pty bash
|
||||
|
||||
caubet_m@cn003:~> sview
|
||||
|
||||
caubet_m@cn003:~> echo "This was an example"
|
||||
This was an example
|
||||
|
||||
caubet_m@cn003:~> exit
|
||||
exit
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
### 'salloc' with x11 support
|
||||
|
||||
**Merlin6** and **merlin7** clusters allow running any windows based applications. For that, you need to
|
||||
add the option ``--x11`` to the ``salloc`` command. In example:
|
||||
|
||||
```bash
|
||||
salloc --clusters=merlin7 --x11 sview
|
||||
```
|
||||
|
||||
will popup a X11 based slurm view of the cluster.
|
||||
|
||||
In the same manner, you can create a bash shell with x11 support. For doing that, you need
|
||||
to add to run just ``salloc --clusters=merlin7 --x11``. Once resource is allocated, from
|
||||
there you can interactively run X11 and non-X11 based commands.
|
||||
|
||||
```bash
|
||||
salloc --clusters=merlin7 --x11
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>[Show 'salloc' with X11 support examples]</summary>
|
||||
<pre class="terminal code highlight js-syntax-highlight plaintext" lang="plaintext" markdown="false">
|
||||
caubet_m@login001:~> salloc --clusters=merlin7 --x11 sview
|
||||
salloc: Granted job allocation 174
|
||||
salloc: Nodes cn001 are ready for job
|
||||
salloc: Relinquishing job allocation 174
|
||||
|
||||
caubet_m@login001:~> salloc --clusters=merlin7 --x11
|
||||
salloc: Granted job allocation 175
|
||||
salloc: Nodes cn001 are ready for job
|
||||
caubet_m@cn001:~>
|
||||
|
||||
caubet_m@cn001:~> sview
|
||||
|
||||
caubet_m@cn001:~> echo "This was an example"
|
||||
This was an example
|
||||
|
||||
caubet_m@cn001:~> exit
|
||||
exit
|
||||
salloc: Relinquishing job allocation 175
|
||||
</pre>
|
||||
</details>
|
@ -0,0 +1,59 @@
|
||||
---
|
||||
title: Slurm cluster 'merlin7'
|
||||
#tags:
|
||||
keywords: configuration, partitions, node definition
|
||||
#last_updated: 24 Mai 2023
|
||||
summary: "This document describes a summary of the Merlin7 configuration."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/merlin7-configuration.html
|
||||
---
|
||||
|
||||
This documentation shows basic Slurm configuration and options needed to run jobs in the Merlin7 cluster.
|
||||
|
||||
## Infrastructure
|
||||
|
||||
### Hardware
|
||||
|
||||
* 2 CPU-only login nodes
|
||||
* 77 CPU-only compute nodes
|
||||
* 5 GPU A100 nodes
|
||||
* 8 GPU Grace Hopper nodes
|
||||
|
||||
The specification of the node types is:
|
||||
|
||||
| Node | #Nodes | CPU | RAM | GRES |
|
||||
| ----: | ------ | --- | --- | ---- |
|
||||
| Login Nodes | 2 | _2x_ AMD EPYC 7742 (x86_64 Rome, 64 Cores, 2.25GHz) | 512GB DDR4 3200Mhz | |
|
||||
| CPU Nodes | 77 | _2x_ AMD EPYC 7742 (x86_64 Rome, 64 Cores, 2.25GHz) | 512GB DDR4 3200Mhz | |
|
||||
| A100 GPU Nodes | 8 | _2x_ AMD EPYC 7713 (x86_64 Milan, 64 Cores, 3.2GHz) | 512GB DDR4 3200Mhz | 4 x NV_A100 (80GB) |
|
||||
| GH GPU Nodes | 5 | _2x_ NVidia Grace Neoverse-V2 (SBSA ARM 64bit, 144 Cores, 3.1GHz) | _2x_ 480GB DDR5X (CPU+GPU) | 4 x NV_GH200 (120GB) |
|
||||
|
||||
### Network
|
||||
|
||||
The Merlin7 cluster builds on top of HPE/Cray technologies, including a high-performance network fabric called Slingshot. This network fabric is able
|
||||
to provide up to 200 Gbit/s throughput between nodes. Further information on Slignshot can be found on at [HPE](https://www.hpe.com/psnow/doc/PSN1012904596HREN) and
|
||||
at <https://www.glennklockwood.com/garden/slingshot>.
|
||||
|
||||
Through software interfaces like [libFabric](https://ofiwg.github.io/libfabric/) (which available on Merlin7), application can leverage the network seamlessly.
|
||||
|
||||
### Storage
|
||||
|
||||
Unlike previous iteration of the Merlin HPC clusters, Merlin7 _does not_ have any local storage. Instead storage for the entire cluster is provided through
|
||||
a dedicated storage appliance from HPE/Cray called [ClusterStor](https://www.hpe.com/psnow/doc/PSN1012842049INEN.pdf).
|
||||
|
||||
The appliance is built of several storage servers:
|
||||
|
||||
* 2 management nodes
|
||||
* 2 MDS servers, 12 drives per server, 2.9TiB (Raid10)
|
||||
* 8 OSS-D servers, 106 drives per server, 14.5 T.B HDDs (Gridraid / Raid6)
|
||||
* 4 OSS-F servers, 12 drives per server 7TiB SSDs (Raid10)
|
||||
|
||||
With effective storage capacity of:
|
||||
|
||||
* 10 PB HDD
|
||||
* value visible on linux: HDD 9302.4 TiB
|
||||
* 162 TB SSD
|
||||
* value visible on linux: SSD 151.6 TiB
|
||||
* 23.6 TiB on Metadata
|
||||
|
||||
The storage is directly connected to the cluster (and each individual node) through the Slingshot NIC.
|
@ -0,0 +1,351 @@
|
||||
---
|
||||
title: Slurm merlin7 Configuration
|
||||
#tags:
|
||||
keywords: configuration, partitions, node definition
|
||||
#last_updated: 24 Mai 2023
|
||||
summary: "This document describes a summary of the Merlin7 Slurm CPU-based configuration."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/slurm-configuration.html
|
||||
---
|
||||
|
||||
This documentation shows basic Slurm configuration and options needed to run jobs in the Merlin7 cluster.
|
||||
|
||||
## Public partitions configuration summary
|
||||
|
||||
### CPU public partitions
|
||||
|
||||
| PartitionName | DefaultTime | MaxTime | Priority | Account | Per Job Limits | Per User Limits |
|
||||
| -----------------: | -----------: | ----------: | -------: | ---------------: | -----------------: | -----------------: |
|
||||
| **<u>general</u>** | 1-00:00:00 | 7-00:00:00 | Low | <u>merlin</u> | cpu=1024,mem=1920G | cpu=1024,mem=1920G |
|
||||
| **daily** | 0-01:00:00 | 1-00:00:00 | Medium | <u>merlin</u> | cpu=1024,mem=1920G | cpu=2048,mem=3840G |
|
||||
| **hourly** | 0-00:30:00 | 0-01:00:00 | High | <u>merlin</u> | cpu=2048,mem=3840G | cpu=8192,mem=15T |
|
||||
|
||||
### GPU public partitions
|
||||
|
||||
#### A100 nodes
|
||||
|
||||
| PartitionName | DefaultTime | MaxTime | Priority | Account | Per Job Limits | Per User Limits |
|
||||
| -------------------: | -----------: | ----------: | ---------: | -------------: | -------------------------------: | -------------------------------: |
|
||||
| **a100-general** | 1-00:00:00 | 7-00:00:00 | Low | <u>merlin</u> | gres/gpu=4 | gres/gpu=8 |
|
||||
| **a100-daily** | 0-01:00:00 | 1-00:00:00 | Medium | <u>merlin</u> | gres/gpu=8 | gres/gpu=8 |
|
||||
| **a100-hourly** | 0-00:30:00 | 0-01:00:00 | High | <u>merlin</u> | gres/gpu=8 | gres/gpu=8 |
|
||||
| **a100-interactive** | 0-01:00:00 | 0-12:00:00 | Very High | <u>merlin</u> | cpu=16,gres/gpu=1,mem=60G,node=1 | cpu=16,gres/gpu=1,mem=60G,node=1 |
|
||||
|
||||
#### Grace-Hopper nodes
|
||||
|
||||
| PartitionName | DefaultTime | MaxTime | Priority | Account | Per Job Limits | Per User Limits |
|
||||
| -------------------: | -----------: | ----------: | ---------: | -------------: | -------------------------------: | -------------------------------: |
|
||||
| **gh-general** | 1-00:00:00 | 7-00:00:00 | Low | <u>merlin</u> | gres/gpu=4 | gres/gpu=8 |
|
||||
| **gh-daily** | 0-01:00:00 | 1-00:00:00 | Medium | <u>merlin</u> | gres/gpu=8 | gres/gpu=8 |
|
||||
| **gh-hourly** | 0-00:30:00 | 0-01:00:00 | High | <u>merlin</u> | gres/gpu=8 | gres/gpu=8 |
|
||||
| **gh-interactive** | 0-01:00:00 | 0-12:00:00 | Very High | <u>merlin</u> | cpu=16,gres/gpu=1,mem=46G,node=1 | cpu=16,gres/gpu=1,mem=46G,node=1 |
|
||||
|
||||
## CPU cluster: merlin7
|
||||
|
||||
**By default, jobs will be submitted to `merlin7`**, as it is the primary cluster configured on the login nodes.
|
||||
Specifying the cluster name is typically unnecessary unless you have defined environment variables that could override the default cluster name.
|
||||
However, when necessary, one can specify the cluster as follows:
|
||||
```bash
|
||||
#SBATCH --cluster=merlin7
|
||||
```
|
||||
|
||||
### CPU general configuration
|
||||
|
||||
The **Merlin7 CPU cluster** is configured with the **`CR_CORE_MEMORY`** and **`CR_ONE_TASK_PER_CORE`** options.
|
||||
* This configuration treats both cores and memory as consumable resources.
|
||||
* Since the nodes are running with **hyper-threading** enabled, each core thread is counted as a CPU
|
||||
to fulfill a job's resource requirements.
|
||||
|
||||
By default, Slurm will allocate one task per core, which means:
|
||||
* Each task will consume 2 **CPUs**, regardless of whether both threads are actively used by the job.
|
||||
|
||||
This behavior ensures consistent resource allocation but may result in underutilization of hyper-threading in some cases.
|
||||
|
||||
### CPU nodes definition
|
||||
|
||||
The table below provides an overview of the Slurm configuration for the different node types in the Merlin7 cluster.
|
||||
This information is essential for understanding how resources are allocated, enabling users to tailor their submission
|
||||
scripts accordingly.
|
||||
|
||||
| Nodes | Sockets | CoresPerSocket | Cores | ThreadsPerCore | CPUs | MaxMemPerNode | DefMemPerCPU | Features |
|
||||
| --------------------:| -------: | --------------: | -----: | --------------: | ----: | ------------: | -----------: | ------------: |
|
||||
| login[001-002] | 2 | 64 | 128 | 2 | 256 | 480G | 1920M | AMD_EPYC_7713 |
|
||||
| cn[001-077] | 2 | 64 | 128 | 2 | 256 | 480G | 1920M | AMD_EPYC_7713 |
|
||||
|
||||
Notes on memory configuration:
|
||||
* **Memory allocation options:** To request additional memory, use the following options in your submission script:
|
||||
* **`--mem=<mem_in_MB>`**: Allocates memory per node.
|
||||
* **`--mem-per-cpu=<mem_in_MB>`**: Allocates memory per CPU (equivalent to a core thread).
|
||||
|
||||
The total memory requested cannot exceed the **`MaxMemPerNode`** value.
|
||||
* **Impact of disabling Hyper-Threading:** Using the **`--hint=nomultithread`** option disables one thread per core,
|
||||
effectively halving the number of available CPUs. Consequently, memory allocation will also be halved unless explicitly
|
||||
adjusted.
|
||||
|
||||
For MPI-based jobs, where performance generally improves with single-threaded CPUs, this option is recommended.
|
||||
In such cases, you should double the **`--mem-per-cpu`** value to account for the reduced number of threads.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
Always verify the Slurm <b>'/var/spool/slurmd/conf-cache/slurm.conf'</b> configuration file for potential changes.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### User and job limits with QoS
|
||||
|
||||
In the `merlin7` CPU cluster, we enforce certain limits on jobs and users to ensure fair resource usage and prevent
|
||||
overuse by a single user or job. These limits aim to balance resource availability while maintaining overall cluster
|
||||
efficiency. However, applying limits can occasionally impact the cluster’s utilization. For example, user-specific
|
||||
limits may result in pending jobs even when many nodes are idle due to low activity.
|
||||
|
||||
On the other hand, these limits also enhance cluster efficiency by preventing scenarios such as a single job monopolizing
|
||||
all available resources, which could block other jobs from running. Without job size limits, for instance, a large job
|
||||
might drain the entire cluster to satisfy its resource request, a situation that is generally undesirable.
|
||||
|
||||
Thus, setting appropriate limits is essential to maintain fair resource usage while optimizing cluster efficiency. These
|
||||
limits should allow for a mix of jobs of varying sizes and types, including single-core and parallel jobs, to coexist
|
||||
effectively.
|
||||
|
||||
To implement these limits, **we utilize Quality of Service (QoS)**. Different QoS policies are defined and applied
|
||||
**to specific partitions** in line with the established resource allocation policies. The table below outlines the
|
||||
various QoS definitions applicable to the merlin7 CPU-based cluster. Here:
|
||||
* `MaxTRES` specifies resource limits per job.
|
||||
* `MaxTRESPU` specifies resource limits per user.
|
||||
|
||||
| Name | MaxTRES | MaxTRESPU | Scope |
|
||||
| --------------: | -----------------: | -----------------: | ---------------------: |
|
||||
| **normal** | | | partition |
|
||||
| **cpu_general** | cpu=1024,mem=1920G | cpu=1024,mem=1920G | <u>user</u>, partition |
|
||||
| **cpu_daily** | cpu=1024,mem=1920G | cpu=2048,mem=3840G | partition |
|
||||
| **cpu_hourly** | cpu=2048,mem=3840G | cpu=8192,mem=15T | partition |
|
||||
|
||||
Where:
|
||||
* **`normal` QoS:** This QoS has no limits and is typically applied to partitions that do not require user or job
|
||||
restrictions.
|
||||
* **`cpu_general` QoS:** This is the **default QoS** for `merlin7` _users_. It limits the total resources available to each
|
||||
user. Additionally, this QoS is applied to the `general` partition, enforcing restrictions at the partition level and
|
||||
overriding user-level QoS.
|
||||
* **`cpu_daily` QoS:** Guarantees increased resources for the `daily` partition, accommodating shorter-duration jobs
|
||||
with higher resource needs.
|
||||
* **`cpu_hourly` QoS:** Offers the least constraints, allowing more resources to be used for the `hourly` partition,
|
||||
which caters to very short-duration jobs.
|
||||
|
||||
For additional details, refer to the [CPU partitions](/merlin7/slurm-configuration.html#CPU-partitions) section.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
Always verify QoS definitions for potential changes using the <b>'sacctmgr show qos format="Name%22,MaxTRESPU%35,MaxTRES%35"'</b> command.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### CPU partitions
|
||||
|
||||
This section provides a summary of the partitions available in the `merlin7` CPU cluster.
|
||||
|
||||
Key concepts:
|
||||
* **`PriorityJobFactor`**: This value is added to a job’s priority (visible in the `PARTITION` column of the `sprio -l` command).
|
||||
Jobs submitted to partitions with higher `PriorityJobFactor` values generally run sooner. However, other factors like *job age*
|
||||
and especially *fair share* can also influence scheduling.
|
||||
* **`PriorityTier`**: Jobs submitted to partitions with higher `PriorityTier` values take precedence over pending jobs in partitions
|
||||
with lower `PriorityTier` values. Additionally, jobs from higher `PriorityTier` partitions can preempt running jobs in lower-tier
|
||||
partitions, where applicable.
|
||||
* **`QoS`**: Specifies the quality of service associated with a partition. It is used to control and restrict resource availability
|
||||
for specific partitions, ensuring that resource allocation aligns with intended usage policies. Detailed explanations of the various
|
||||
QoS settings can be found in the [User and job limits with QoS](/merlin7/slurm-configuration.html#user-and-job-limits-with-qos) section.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
Always verify partition configurations for potential changes using the <b>'scontrol show partition'</b> command.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
#### CPU public partitions
|
||||
|
||||
| PartitionName | DefaultTime | MaxTime | TotalNodes | PriorityJobFactor | PriorityTier | QoS | AllowAccounts |
|
||||
| -----------------: | -----------: | ----------: | --------: | ----------------: | -----------: | ----------: | -------------: |
|
||||
| **<u>general</u>** | 1-00:00:00 | 7-00:00:00 | 50 | 1 | 1 | cpu_general | <u>merlin</u> |
|
||||
| **daily** | 0-01:00:00 | 1-00:00:00 | 62 | 500 | 1 | cpu_daily | <u>merlin</u> |
|
||||
| **hourly** | 0-00:30:00 | 0-01:00:00 | 77 | 1000 | 1 | cpu_hourly | <u>merlin</u> |
|
||||
|
||||
All Merlin users are part of the `merlin` account, which is used as the *default account* when submitting jobs.
|
||||
Similarly, if no partition is specified, jobs are automatically submitted to the `general` partition by default.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
For jobs running less than one day, submit them to the <b>daily</b> partition.
|
||||
For jobs running less than one hour, use the <b>hourly</b> partition.
|
||||
These partitions provide higher priority and ensure quicker scheduling compared to <b>general</b>, which has limited node availability.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
The **`hourly`** partition may include private nodes as an additional buffer. However, the current Slurm partition configuration, governed
|
||||
by **`PriorityTier`**, ensures that jobs submitted to private partitions are prioritized and processed first. As a result, access to the
|
||||
**`hourly`** partition might experience delays in such scenarios.
|
||||
|
||||
#### CPU private partitions
|
||||
|
||||
##### CAS / ASA
|
||||
|
||||
| PartitionName | DefaultTime | MaxTime | TotalNodes | PriorityJobFactor | PriorityTier | QoS | AllowAccounts |
|
||||
| -----------------: | -----------: | ----------: | --------: | ----------------: | -----------: | ----------: | -------------: |
|
||||
| **asa** | 0-01:00:00 | 14-00:00:00 | 10 | 1 | 2 | normal | asa |
|
||||
|
||||
##### CNM / Mu3e
|
||||
|
||||
| PartitionName | DefaultTime | MaxTime | TotalNodes | PriorityJobFactor | PriorityTier | QoS | AllowAccounts |
|
||||
| -----------------: | -----------: | ----------: | --------: | ----------------: | -----------: | ----------: | -------------: |
|
||||
| **mu3e** | 1-00:00:00 | 7-00:00:00 | 4 | 1 | 2 | normal | mu3e, meg |
|
||||
|
||||
##### CNM / MeG
|
||||
|
||||
| PartitionName | DefaultTime | MaxTime | TotalNodes | PriorityJobFactor | PriorityTier | QoS | AllowAccounts |
|
||||
| -----------------: | -----------: | ----------: | --------: | ----------------: | -----------: | ----------: | -------------: |
|
||||
| **meg-short** | 0-01:00:00 | 0-01:00:00 | unlimited | 1000 | 2 | normal | meg |
|
||||
| **meg-long** | 1-00:00:00 | 5-00:00:00 | unlimited | 1 | 2 | normal | meg |
|
||||
| **meg-prod** | 1-00:00:00 | 5-00:00:00 | unlimited | 1000 | 4 | normal | meg |
|
||||
|
||||
## GPU cluster: gmerlin7
|
||||
|
||||
As mentioned in previous sections, by default, jobs will be submitted to `merlin7`, as it is the primary cluster configured on the login nodes.
|
||||
For submittng jobs to the GPU cluster, **the cluster name `gmerlin7` must be specified**, as follows:
|
||||
```bash
|
||||
#SBATCH --cluster=gmerlin7
|
||||
```
|
||||
|
||||
### GPU general configuration
|
||||
|
||||
The **Merlin7 GPU cluster** is configured with the **`CR_CORE_MEMORY`**, **`CR_ONE_TASK_PER_CORE`**, and **`ENFORCE_BINDING_GRES`** options.
|
||||
* This configuration treats both cores and memory as consumable resources.
|
||||
* Since the nodes are running with **hyper-threading** enabled, each core thread is counted as a CPU
|
||||
to fulfill a job's resource requirements.
|
||||
* Slurm will allocate the CPUs to the selected GPU.
|
||||
|
||||
By default, Slurm will allocate one task per core, which means:
|
||||
* For hyper-threaded nodes (NVIDIA A100-based nodes), each task will consume 2 **CPUs**, regardless of whether both threads are actively used by the job.
|
||||
* For the NVIDIA GraceHopper-based nodes, each task will consume 1 **CPU**.
|
||||
|
||||
This behavior ensures consistent resource allocation but may result in underutilization of hyper-threading in some cases.
|
||||
|
||||
### GPU nodes definition
|
||||
|
||||
The table below provides an overview of the Slurm configuration for the different node types in the Merlin7 cluster.
|
||||
This information is essential for understanding how resources are allocated, enabling users to tailor their submission
|
||||
scripts accordingly.
|
||||
|
||||
| Nodes | Sockets | CoresPerSocket | Cores | ThreadsPerCore | CPUs | MaxMemPerNode | DefMemPerCPU | Gres | Features |
|
||||
| --------------------:| -------: | --------------: | -----: | --------------: | ----: | ------------: | -----------: | --------------------------: | ---------------------: |
|
||||
| gpu[001-007] | 4 | 72 | 288 | 1 | 288 | 828G | 2944M | gpu:gh200:4 | AMD_EPYC_7713, NV_A100 |
|
||||
| gpu[101-105] | 1 | 64 | 64 | 2 | 128 | 480G | 3840M | gpu:nvidia_a100-sxm4-80gb:4 | GH200, NV_H100 |
|
||||
|
||||
Notes on memory configuration:
|
||||
* **Memory allocation options:** To request additional memory, use the following options in your submission script:
|
||||
* **`--mem=<mem_in_MB>`**: Allocates memory per node.
|
||||
* **`--mem-per-cpu=<mem_in_MB>`**: Allocates memory per CPU (equivalent to a core thread).
|
||||
|
||||
The total memory requested cannot exceed the **`MaxMemPerNode`** value.
|
||||
* **Impact of disabling Hyper-Threading:** Using the **`--hint=nomultithread`** option disables one thread per core,
|
||||
effectively halving the number of available CPUs. Consequently, memory allocation will also be halved unless explicitly
|
||||
adjusted.
|
||||
|
||||
For MPI-based jobs, where performance generally improves with single-threaded CPUs, this option is recommended.
|
||||
In such cases, you should double the **`--mem-per-cpu`** value to account for the reduced number of threads.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
Always verify the Slurm <b>'/var/spool/slurmd/conf-cache/slurm.conf'</b> configuration file for potential changes.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### User and job limits with QoS
|
||||
|
||||
In the `gmerlin7` CPU cluster, we enforce certain limits on jobs and users to ensure fair resource usage and prevent
|
||||
overuse by a single user or job. These limits aim to balance resource availability while maintaining overall cluster
|
||||
efficiency. However, applying limits can occasionally impact the cluster’s utilization. For example, user-specific
|
||||
limits may result in pending jobs even when many nodes are idle due to low activity.
|
||||
|
||||
On the other hand, these limits also enhance cluster efficiency by preventing scenarios such as a single job monopolizing
|
||||
all available resources, which could block other jobs from running. Without job size limits, for instance, a large job
|
||||
might drain the entire cluster to satisfy its resource request, a situation that is generally undesirable.
|
||||
|
||||
Thus, setting appropriate limits is essential to maintain fair resource usage while optimizing cluster efficiency. These
|
||||
limits should allow for a mix of jobs of varying sizes and types, including single-core and parallel jobs, to coexist
|
||||
effectively.
|
||||
|
||||
To implement these limits, **we utilize Quality of Service (QoS)**. Different QoS policies are defined and applied
|
||||
**to specific partitions** in line with the established resource allocation policies. The table below outlines the
|
||||
various QoS definitions applicable to the merlin7 CPU-based cluster. Here:
|
||||
* `MaxTRES` specifies resource limits per job.
|
||||
* `MaxTRESPU` specifies resource limits per user.
|
||||
|
||||
| Name | MaxTRES | MaxTRESPU | Scope |
|
||||
| -----------------------: | -------------------------------: | ------------------------------: | ---------------------: |
|
||||
| **normal** | | | partition |
|
||||
| **gpu_general** | gres/gpu=4 | gres/gpu=8 | <u>user</u>, partition |
|
||||
| **gpu_daily** | gres/gpu=8 | gres/gpu=8 | partition |
|
||||
| **gpu_hourly** | gres/gpu=8 | gres/gpu=8 | partition |
|
||||
| **gpu_gh_interactive** | cpu=16,gres/gpu=1,mem=46G,node=1 |cpu=16,gres/gpu=1,mem=46G,node=1 | partition |
|
||||
| **gpu_a100_interactive** | cpu=16,gres/gpu=1,mem=60G,node=1 |cpu=16,gres/gpu=1,mem=60G,node=1 | partition |
|
||||
|
||||
Where:
|
||||
* **`normal` QoS:** This QoS has no limits and is typically applied to partitions that do not require user or job
|
||||
restrictions.
|
||||
* **`gpu_general` QoS:** This is the **default QoS** for `gmerlin7` _users_. It limits the total resources available to each
|
||||
user. Additionally, this QoS is applied to the `[a100|gh]-general` partitions, enforcing restrictions at the partition level and
|
||||
overriding user-level QoS.
|
||||
* **`gpu_daily` QoS:** Guarantees increased resources for the `[a100|gh]-daily` partitions, accommodating shorter-duration jobs
|
||||
with higher resource needs.
|
||||
* **`gpu_hourly` QoS:** Offers the least constraints, allowing more resources to be used for the `[a100|gh]-hourly` partitions,
|
||||
which caters to very short-duration jobs.
|
||||
* **`gpu_a100_interactive` & `gpu_gh_interactive` QoS:** Guarantee interactive access to GPU nodes for software compilation and
|
||||
small testing.
|
||||
|
||||
For additional details, refer to the [GPU partitions](/merlin7/slurm-configuration.html#GPU-partitions) section.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
Always verify QoS definitions for potential changes using the <b>'sacctmgr show qos format="Name%22,MaxTRESPU%35,MaxTRES%35"'</b> command.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### GPU partitions
|
||||
|
||||
This section provides a summary of the partitions available in the `merlin7` CPU cluster.
|
||||
|
||||
Key concepts:
|
||||
* **`PriorityJobFactor`**: This value is added to a job’s priority (visible in the `PARTITION` column of the `sprio -l` command).
|
||||
Jobs submitted to partitions with higher `PriorityJobFactor` values generally run sooner. However, other factors like *job age*
|
||||
and especially *fair share* can also influence scheduling.
|
||||
* **`PriorityTier`**: Jobs submitted to partitions with higher `PriorityTier` values take precedence over pending jobs in partitions
|
||||
with lower `PriorityTier` values. Additionally, jobs from higher `PriorityTier` partitions can preempt running jobs in lower-tier
|
||||
partitions, where applicable.
|
||||
* **`QoS`**: Specifies the quality of service associated with a partition. It is used to control and restrict resource availability
|
||||
for specific partitions, ensuring that resource allocation aligns with intended usage policies. Detailed explanations of the various
|
||||
QoS settings can be found in the [User and job limits with QoS](/merlin7/slurm-configuration.html#user-and-job-limits-with-qos) section.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
Always verify partition configurations for potential changes using the <b>'scontrol show partition'</b> command.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
#### A100-based partitions
|
||||
|
||||
| PartitionName | DefaultTime | MaxTime | TotalNodes | PriorityJobFactor | PriorityTier | QoS | AllowAccounts |
|
||||
| -------------------: | -----------: | ----------: | --------: | ----------------: | -----------: | -------------------: | -------------: |
|
||||
| **a100-general** | 1-00:00:00 | 7-00:00:00 | 3 | 1 | 1 | gpu_general | <u>merlin</u> |
|
||||
| **a100-daily** | 0-01:00:00 | 1-00:00:00 | 4 | 500 | 1 | gpu_daily | <u>merlin</u> |
|
||||
| **a100-hourly** | 0-00:30:00 | 0-01:00:00 | 5 | 1000 | 1 | gpu_hourly | <u>merlin</u> |
|
||||
| **a100-interactive** | 0-01:00:00 | 0-12:00:00 | 5 | 1 | 2 | gpu_a100_interactive | <u>merlin</u> |
|
||||
|
||||
All Merlin users are part of the `merlin` account, which is used as the *default account* when submitting jobs.
|
||||
Similarly, if no partition is specified, jobs are automatically submitted to the `general` partition by default.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
For jobs running less than one day, submit them to the <b>a100-daily</b> partition.
|
||||
For jobs running less than one hour, use the <b>a100-hourly</b> partition.
|
||||
These partitions provide higher priority and ensure quicker scheduling compared to <b>a100-general</b>, which has limited node availability.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
#### GH-based partitions
|
||||
|
||||
| PartitionName | DefaultTime | MaxTime | TotalNodes | PriorityJobFactor | PriorityTier | QoS | AllowAccounts |
|
||||
| -------------------: | -----------: | ----------: | --------: | ----------------: | -----------: | -------------------: | -------------: |
|
||||
| **gh-general** | 1-00:00:00 | 7-00:00:00 | 5 | 1 | 1 | gpu_general | <u>merlin</u> |
|
||||
| **gh-daily** | 0-01:00:00 | 1-00:00:00 | 6 | 500 | 1 | gpu_daily | <u>merlin</u> |
|
||||
| **gh-hourly** | 0-00:30:00 | 0-01:00:00 | 7 | 1000 | 1 | gpu_hourly | <u>merlin</u> |
|
||||
| **gh-interactive** | 0-01:00:00 | 0-12:00:00 | 7 | 1 | 2 | gpu_gh_interactive | <u>merlin</u> |
|
||||
|
||||
All Merlin users are part of the `merlin` account, which is used as the *default account* when submitting jobs.
|
||||
Similarly, if no partition is specified, jobs are automatically submitted to the `general` partition by default.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
For jobs running less than one day, submit them to the <b>gh-daily</b> partition.
|
||||
For jobs running less than one hour, use the <b>gh-hourly</b> partition.
|
||||
These partitions provide higher priority and ensure quicker scheduling compared to <b>gh-general</b>, which has limited node availability.
|
||||
{{site.data.alerts.end}}
|
@ -8,6 +8,13 @@ sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/slurm-examples.html
|
||||
---
|
||||
|
||||
{:style="display:block; margin-left:auto; margin-right:auto"}
|
||||
|
||||
{{site.data.alerts.warning}}The Merlin7 documentation is <b>Work In Progress</b>.
|
||||
Please do not use or rely on this documentation until this becomes official.
|
||||
This applies to any page under <b><a href="https://hpce.pages.psi.ch/merlin7/">https://hpce.pages.psi.ch/merlin7/</a></b>
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## Single core based job examples
|
||||
|
||||
```bash
|
||||
@ -22,4 +29,47 @@ permalink: /merlin7/slurm-examples.html
|
||||
module purge
|
||||
module load $MODULE_NAME # where $MODULE_NAME is a software in PModules
|
||||
srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
```
|
||||
```
|
||||
|
||||
## Multi-core based jobs example
|
||||
|
||||
### Pure MPI
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=purempi
|
||||
#SBATCH --partition=daily # Using 'daily' will grant higher priority
|
||||
#SBATCH --time=24:00:00 # Define max time job will run
|
||||
#SBATCH --output=%x-%j.out # Define your output file
|
||||
#SBATCH --error=%x-%j.err # Define your error file
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=128
|
||||
#SBATCH --hint=nomultithread
|
||||
##SBATCH --cpus-per-task=1
|
||||
|
||||
module purge
|
||||
module load $MODULE_NAME # where $MODULE_NAME is a software in PModules
|
||||
srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
```
|
||||
|
||||
### Hybrid
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=hybrid
|
||||
#SBATCH --partition=daily # Using 'daily' will grant higher priority
|
||||
#SBATCH --time=24:00:00 # Define max time job will run
|
||||
#SBATCH --output=%x-%j.out # Define your output file
|
||||
#SBATCH --error=%x-%j.err # Define your error file
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=128
|
||||
#SBATCH --hint=multithread
|
||||
#SBATCH --cpus-per-task=2
|
||||
|
||||
module purge
|
||||
module load $MODULE_NAME # where $MODULE_NAME is a software in PModules
|
||||
srun $MYEXEC # where $MYEXEC is a path to your binary file
|
||||
```
|
||||
|
||||
|
@ -2,28 +2,85 @@
|
||||
title: ANSYS RSM (Remote Resolve Manager)
|
||||
#tags:
|
||||
keywords: software, ansys, rsm, slurm, interactive, rsm, windows
|
||||
last_updated: 24 Mai 2023
|
||||
summary: "This document describes how to use the ANSYS Remote Resolve Manager service in the Merlin6 cluster"
|
||||
last_updated: 23 August 2024
|
||||
summary: "This document describes how to use the ANSYS Remote Resolve Manager service in the Merlin7 cluster"
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/ansys-rsm.html
|
||||
---
|
||||
|
||||
## ANSYS RSM Configuration tool settings:
|
||||
## ANSYS Remote Resolve Manager
|
||||
|
||||
Use Merlin6 ANSYS to submit to RSM:
|
||||
**ANSYS Remote Solve Manager (RSM)** is used by ANSYS Workbench to submit computational jobs to HPC clusters directly from Workbench on your desktop.
|
||||
|
||||
If you want to submit from Titan is possible too, you have to set SSH keys in Titan as described in: https://www.purdue.edu/science/scienceit/ssh-keys-windows.html
|
||||
{{site.data.alerts.warning}} Merlin7 is running behind a firewall, however, there are firewall policies in place to access the Merlin7 ANSYS RSM service from the main PSI networks. If you can not connect to it, please contact us, and please provide the IP address for the corresponding workstation: we will check the PSI firewall rules in place and request for an update if necessary.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
"HPC_Resource" tab configuration:
|
||||
### The Merlin7 RSM service
|
||||
|
||||
HPC Type: Slurm
|
||||
Submit Host: psi-dev.cscs.ch
|
||||
Slurm job arguments: --hint=nomultithread
|
||||
A RSM service is running on a dedicated Virtual Machine server. This service will listen a specific port and will process any request using RSM (in example, from ANSYS users workstations).
|
||||
The following nodes are configured with such services:
|
||||
* `merlin7-ansys-rsm.psi.ch`
|
||||
|
||||
"File Management" tab configuration:
|
||||
The earliest version supported in the Merlin7 cluster is ANSYS/2022R2. Older versions are not supported due to existing bugs or missing functionalities. In case you strongly need to run an older version, please do not hesitate to contact the Merlin admins.
|
||||
|
||||
External mechanism for the transfer (SCP, custom)
|
||||
Transfer Mechanism SCP via SSH
|
||||
As staging directory, use /scratch/tmp
|
||||
As account, use your PSI username
|
||||
SSH Keys have to be configured to make it work.
|
||||
## Configuring RSM client on Windows workstations
|
||||
|
||||
Users can setup ANSYS RSM in their workstations to connect to the Merlin7 cluster.
|
||||
The different steps and settings required to make it work are that following:
|
||||
|
||||
1. Open the RSM Configuration service in Windows for the ANSYS release you want to configure.
|
||||
2. Right-click the **HPC Resources** icon followed by **Add HPC Resource...**
|
||||

|
||||
3. In the **HPC Resource** tab, fill up the corresponding fields as follows:
|
||||

|
||||
* **"Name"**: Add here the preffered name for the cluster. For example: `Merlin7 cluster`
|
||||
* **"HPC Type"**: Select `SLURM`
|
||||
* **"Submit host"**: `merlin7-rsm01.psi.ch`
|
||||
* **"Slurm Job submission arguments (optional)"**: Add any required Slurm options for running your jobs.
|
||||
* `--hint=nomultithread` must be present.
|
||||
* `--exclusive` must also be present for now, due to a bug in the `Slingshot` interconnect which does not allow running shared nodes.
|
||||
* Check **"Use SSH protocol for inter and intra-node communication (Linux only)"**
|
||||
* Select **"Able to directly submit and monitor HPC jobs"**.
|
||||
* **"Apply"** changes.
|
||||
4. In the **"File Management"** tab, fill up the corresponding fields as follows:
|
||||

|
||||
* Select **"RSM internal file transfer mechanism"** and add **`/data/scratch/shared`** as the **"Staging directory path on Cluster"**
|
||||
* Select **"Scratch directory local to the execution node(s)"** and add **`/scratch`** as the **HPC scratch directory**.
|
||||
* **Never check** the option "Keep job files in the staging directory when job is complete" if the previous
|
||||
option "Scratch directory local to the execution node(s)" was set.
|
||||
* **"Apply"** changes.
|
||||
5. In the **"Queues"** tab, use the left button to auto-discover partitions
|
||||

|
||||
* If no authentication method was configured before, an authentication window will appear. Use your
|
||||
PSI account to authenticate. Notice that the **`PSICH\`** prefix **must not be added**.
|
||||

|
||||
* From the partition list, select the ones you want to typically use.
|
||||
* In general, standard Merlin users must use **`hourly`**, **`daily`** and **`general`** only.
|
||||
* Other partitions are reserved for allowed users only.
|
||||
* **"Apply"** changes.
|
||||

|
||||
6. *[Optional]* You can perform a test by submitting a test job on each partition by clicking on the **Submit** button
|
||||
for each selected partition.
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
In the future, we might provide this service also from the login nodes for better transfer performance.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## Using RSM in ANSYS
|
||||
|
||||
Using the RSM service in ANSYS is slightly different depending on the ANSYS software being used.
|
||||
Please follow the official ANSYS documentation for details about how to use it for that specific software.
|
||||
|
||||
Alternativaly, please refer to some the examples showed in the following chapters (ANSYS specific software).
|
||||
|
||||
### Using RSM in ANSYS Fluent
|
||||
|
||||
For further information for using RSM with Fluent, please visit the **[ANSYS RSM](/merlin7/ansys-fluent.html)** section.
|
||||
|
||||
### Using RSM in ANSYS CFX
|
||||
|
||||
For further information for using RSM with CFX, please visit the **[ANSYS RSM](/merlin7/ansys-cfx.html)** section.
|
||||
|
||||
### Using RSM in ANSYS MAPDL
|
||||
|
||||
For further information for using RSM with MAPDL, please visit the **[ANSYS RSM](/merlin7/ansys-mapdl.html)** section.
|
||||
|
143
pages/merlin7/05-Software-Support/ansys.md
Normal file
@ -0,0 +1,143 @@
|
||||
---
|
||||
title: ANSYS
|
||||
#tags:
|
||||
keywords: software, ansys, slurm, interactive, rsm, pmodules, overlay, overlays
|
||||
last_updated: 23 August 2024
|
||||
summary: "This document describes how to load and use ANSYS in the Merlin7 cluster"
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/ansys.html
|
||||
---
|
||||
|
||||
This document describes generic information of how to load and run ANSYS software in the Merlin cluster
|
||||
|
||||
## ANSYS software in Pmodules
|
||||
|
||||
The ANSYS software can be loaded through **[PModules](/merlin7/pmodules.html)**.
|
||||
|
||||
The default ANSYS versions are loaded from the central PModules repository.
|
||||
|
||||
However, we provide local installations on Merlin7 which are needed mainly for some ANSYS packages, like Ansys RSM.
|
||||
Due to this, and also to improve the interactive experience of the user, ANSYS has been also installed in the
|
||||
Merlin high performance storage and we have made it available from Pmodules.
|
||||
|
||||
### Loading Merlin7 ANSYS
|
||||
|
||||
For loading the Merlin7 ANSYS software, one needs to run Pmodules v1.1.22 or newer, and then use a specific repository
|
||||
(called **`merlin`**) which is **only available from the Merlin7 cluster**:
|
||||
|
||||
```bash
|
||||
module purge
|
||||
module load Pmodules/1.1.22
|
||||
module use merlin
|
||||
module use unstable
|
||||
module use /etc/cscs-modules/
|
||||
module load cray
|
||||
module search ANSYS
|
||||
|
||||
# Load the proper ANSYS version, in example for 2022R2
|
||||
module load ANSYS/2022R2
|
||||
```
|
||||
Once `merlin` is invoked, it will disable central ANSYS installations with the same version, which will be replaced
|
||||
by the local ones in Merlin. Releases from the central Pmodules repository which have not a local installation will remain
|
||||
visible. For each ANSYS release, one can identify where it is installed by searching ANSYS in PModules with the `--verbose`
|
||||
option. This will show the location of the different ANSYS releases as follows:
|
||||
* For ANSYS releases installed in the central repositories, the path starts with `/opt/psi`
|
||||
* For ANSYS releases installed in the Merlin7 repository (and/or overwritting the central ones), the path starts with `/data/software/pmodules`
|
||||
|
||||
**We strongly recommend only using ANSYS/2022R2 or superior**.
|
||||
|
||||
<details>
|
||||
<summary>[Example] Loading ANSYS from the Merlin7 PModules repository</summary>
|
||||
<pre class="terminal code highlight js-syntax-highlight plaintext" lang="plaintext" markdown="false">
|
||||
🔥 [caubet_m@login001:~]# module purge
|
||||
🔥 [caubet_m@login001:~]# module load Pmodules/1.1.22
|
||||
module load: unstable module has been loaded -- Pmodules/1.1.22
|
||||
|
||||
🔥 [caubet_m@login001:~]# module use merlin
|
||||
🔥 [caubet_m@login001:~]# module use unstable
|
||||
🔥 [caubet_m@login001:~]# module use /etc/cscs-modules/
|
||||
🔥 [caubet_m@login001:~]# module load cray
|
||||
|
||||
Activating Modules:
|
||||
1) cce/17.0.0
|
||||
|
||||
🔥 [caubet_m@login001:~]# module load ANSYS/2022R2
|
||||
module load: unstable module has been loaded -- ANSYS/2022R2
|
||||
|
||||
🔥 [caubet_m@login001:~]# module search ANSYS --verbose
|
||||
ANSYS/2019R3:
|
||||
release stage: stable
|
||||
group: Tools
|
||||
overlay: base
|
||||
modulefile: /opt/psi/Tools/modulefiles/ANSYS/2019R3
|
||||
dependencies: (none)
|
||||
ANSYS/2020R1:
|
||||
release stage: stable
|
||||
group: Tools
|
||||
overlay: base
|
||||
modulefile: /opt/psi/Tools/modulefiles/ANSYS/2020R1
|
||||
dependencies: (none)
|
||||
ANSYS/2020R1-1:
|
||||
release stage: stable
|
||||
group: Tools
|
||||
overlay: base
|
||||
modulefile: /opt/psi/Tools/modulefiles/ANSYS/2020R1-1
|
||||
dependencies: (none)
|
||||
ANSYS/2020R2:
|
||||
release stage: unstable
|
||||
group: Tools
|
||||
overlay: base
|
||||
modulefile: /opt/psi/Tools/modulefiles/ANSYS/2020R2
|
||||
dependencies: (none)
|
||||
ANSYS/2021R1:
|
||||
release stage: unstable
|
||||
group: Tools
|
||||
overlay: base
|
||||
modulefile: /opt/psi/Tools/modulefiles/ANSYS/2021R1
|
||||
dependencies: (none)
|
||||
ANSYS/2022R2:
|
||||
release stage: unstable
|
||||
group: Tools
|
||||
overlay: merlin
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2022R2
|
||||
dependencies: (none)
|
||||
ANSYS/2023R2:
|
||||
release stage: unstable
|
||||
group: Tools
|
||||
overlay: merlin
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2023R2
|
||||
dependencies: (none)
|
||||
ANSYS/2024R1:
|
||||
release stage: unstable
|
||||
group: Tools
|
||||
overlay: merlin
|
||||
modulefile: /data/software/pmodules/Tools/modulefiles/ANSYS/2024R1
|
||||
dependencies: (none)
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
|
||||
{{site.data.alerts.tip}} Please <b>only use Merlin7 ANSYS installations from `merlin`</b> in the Merlin cluster.
|
||||
Also, please always run <b>ANSYS/2022R2 or superior</b>.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
## ANSYS Documentation by product
|
||||
|
||||
### ANSYS RSM
|
||||
|
||||
**ANSYS Remote Solve Manager (RSM)** is used by ANSYS Workbench to submit computational jobs to HPC clusters directly from Workbench on your desktop.
|
||||
Therefore, PSI workstations with direct access to Merlin can submit jobs by using RSM.
|
||||
|
||||
For further information, please visit the **[ANSYS RSM](/merlin7/ansys-rsm.html)** section.
|
||||
|
||||
### ANSYS Fluent
|
||||
|
||||
For further information, please visit the **[ANSYS RSM](/merlin7/ansys-fluent.html)** section.
|
||||
|
||||
### ANSYS CFX
|
||||
|
||||
For further information, please visit the **[ANSYS RSM](/merlin7/ansys-cfx.html)** section.
|
||||
|
||||
### ANSYS MAPDL
|
||||
|
||||
For further information, please visit the **[ANSYS RSM](/merlin7/ansys-mapdl.html)** section.
|
64
pages/merlin7/05-Software-Support/cray-module.env.md
Normal file
@ -0,0 +1,64 @@
|
||||
---
|
||||
title: Cray Programming Environment
|
||||
#tags:
|
||||
keywords: cray, module
|
||||
last_updated: 24 Mai 2023
|
||||
summary: "This document describes how to use the Cray Programming Environment on Merlin7."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/cray-module-env.html
|
||||
---
|
||||
|
||||
## Loading the Cray module
|
||||
|
||||
The Cray Programming Environment, with Cray's compilers and MPI, is not loaded by default.
|
||||
|
||||
To load it, one has to run the following command:
|
||||
|
||||
```bash
|
||||
module load cray
|
||||
```
|
||||
|
||||
The Cray Programming Environment will load all the necessary dependencies. In example:
|
||||
|
||||
```bash
|
||||
🔥 [caubet_m@login001:~]# module list
|
||||
Currently Loaded Modules:
|
||||
1) craype-x86-rome 2) libfabric/1.15.2.0
|
||||
3) craype-network-ofi
|
||||
4) xpmem/2.9.6-1.1_20240510205610__g087dc11fc19d 5) PrgEnv-cray/8.5.0
|
||||
6) cce/17.0.0 7) cray-libsci/23.12.5
|
||||
8) cray-mpich/8.1.28 9) craype/2.7.30
|
||||
10) perftools-base/23.12.0 11) cpe/23.12
|
||||
12) cray/23.12
|
||||
```
|
||||
|
||||
You will notice an unfamiliar `PrgEnv-cray/8.5.0` that was loaded. This is a meta-module that Cray provides to simplify the switch of compilers and their associated dependencies and libraries,
|
||||
as a whole called Programming Environment. In the Cray Programming Environment, there are 4 key modules.
|
||||
|
||||
* `cray-libsci` is a collection of numerical routines tuned for performance on Cray systems.
|
||||
* `libfabric` is an important low-level library that allows you to take advantage of the high performance Slingshot network.
|
||||
* `cray-mpich` is a CUDA-aware MPI implementation, optimized for Cray systems.
|
||||
* `cce` is the compiler from Cray. C/C++ compilers are based on Clang/LLVM while Fortran supports Fortran 2018 standard. More info: https://user.cscs.ch/computing/compilation/cray/
|
||||
|
||||
You can switch between different programming environments. You can check the available module with the `module avail` command, as follows:
|
||||
|
||||
```bash
|
||||
🔥 [caubet_m@login001:~]# module avail PrgEnv
|
||||
--------------------- /opt/cray/pe/lmod/modulefiles/core ---------------------
|
||||
|
||||
PrgEnv-cray/8.5.0 PrgEnv-gnu/8.5.0
|
||||
PrgEnv-nvhpc/8.5.0 PrgEnv-nvidia/8.5.0
|
||||
```
|
||||
## Switching compiler suites
|
||||
|
||||
Compiler suites can be exchanged with PrgEnv (Programming Environments) provided by HPE-Cray. The wrappers call the correct compiler with appropriate options to build
|
||||
and link applications with relevant libraries, as required by the loaded modules (only dynamic linking is supported) and therefore should replace direct calls to compiler
|
||||
drivers in Makefiles and build scripts.
|
||||
|
||||
To swap the the compiler suite from the default Cray to GNU compiler, one can run the following.
|
||||
|
||||
```bash
|
||||
🔥 [caubet_m@login001:~]# module swap PrgEnv-cray/8.5.0 PrgEnv-gnu/8.5.0
|
||||
|
||||
Lmod is automatically replacing "cce/17.0.0" with "gcc-native/12.3".
|
||||
```
|
51
pages/merlin7/05-Software-Support/ippl.md
Normal file
@ -0,0 +1,51 @@
|
||||
---
|
||||
title: IPPL
|
||||
keywords: IPPL software, compile
|
||||
summary: "Independent Parallel Particle Layer (IPPL) is a performance portable C++ library for Particle-Mesh methods"
|
||||
sidebar: merlin7_sidebar
|
||||
toc: false
|
||||
permalink: /merlin7/ippl.html
|
||||
---
|
||||
|
||||
## IPPL
|
||||
|
||||
Independent Parallel Particle Layer (IPPL) is a performance portable C++ library for Particle-Mesh methods. IPPL makes use of Kokkos (https://github.com/kokkos/kokkos), HeFFTe (https://github.com/icl-utk-edu/heffte), and MPI (Message Passing Interface) to deliver a portable, massively parallel toolkit for particle-mesh methods. IPPL supports simulations in one to six dimensions, mixed precision, and asynchronous execution in different execution spaces (e.g. CPUs and GPUs).
|
||||
|
||||
## Licensing Terms and Conditions
|
||||
|
||||
GNU GPLv3
|
||||
|
||||
## How to run on Merlin7
|
||||
### A100 nodes
|
||||
[](https://gitea.psi.ch/HPCE/spack-psi)
|
||||
```bash
|
||||
module use Spack unstable
|
||||
module load gcc/13.2.0 openmpi/4.1.6-57rc-A100-gpu
|
||||
module load boost/1.82.0-e7gp fftw/3.3.10 gnutls/3.8.3 googletest/1.14.0 gsl/2.8 h5hut/2.0.0rc7 openblas/0.3.26-omp cmake/3.31.6-oe7u
|
||||
|
||||
cd <path to IPPL source directory>
|
||||
mkdir build_gpu
|
||||
cd build_gpu
|
||||
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DKokkos_ARCH_AMPERE80=ON -DCMAKE_CXX_STANDARD=20 -DIPPL_ENABLE_FFT=ON -DIPPL_ENABLE_TESTS=ON -DUSE_ALTERNATIVE_VARIANT=ON -DIPPL_ENABLE_SOLVERS=ON -DIPPL_ENABLE_ALPINE=True -DIPPL_PLATFORMS=cuda ..
|
||||
make [-jN]
|
||||
```
|
||||
|
||||
### GH nodes
|
||||
[](https://gitea.psi.ch/HPCE/spack-psi)
|
||||
|
||||
```bash
|
||||
salloc --partition=gh-daily --clusters=gmerlin7 --time=08:00:00 --ntasks=4 --nodes=1 --gpus=1 --mem=40000 $SHELL
|
||||
ssh <allocated_gpu>
|
||||
|
||||
module use Spack unstable
|
||||
module load gcc/13.2.0 openmpi/5.0.3-3lmi-GH200-gpu
|
||||
module load boost/1.82.0-3ns6 fftw/3.3.10 gnutls/3.8.3 googletest/1.14.0 gsl/2.7.1 h5hut/2.0.0rc7 openblas/0.3.26 cmake/3.31.4-u2nm
|
||||
|
||||
cd <path to IPPL source directory>
|
||||
mkdir build_gh
|
||||
cd build_gh
|
||||
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DKokkos_ARCH_HOPPER90=ON -DCMAKE_CXX_STANDARD=20 -DIPPL_ENABLE_FFT=ON -DIPPL_ENABLE_TESTS=ON -DUSE_ALTERNATIVE_VARIANT=ON -DIPPL_ENABLE_SOLVERS=ON -DIPPL_ENABLE_ALPINE=True -DIPPL_PLATFORMS=cuda ..
|
||||
make [-jN]
|
||||
```
|
75
pages/merlin7/05-Software-Support/opal-x.md
Normal file
@ -0,0 +1,75 @@
|
||||
---
|
||||
title: OPAL-X
|
||||
keywords: OPAL-X software, compile
|
||||
summary: "OPAL (Object Oriented Particle Accelerator Library) is an open source C++ framework for general particle accelerator simulations including 3D space charge, short range wake fields and particle matter interaction."
|
||||
sidebar: merlin7_sidebar
|
||||
toc: false
|
||||
permalink: /merlin7/opal-x.html
|
||||
---
|
||||
|
||||
## OPAL
|
||||
|
||||
OPAL (Object Oriented Particle Accelerator Library) is an open source C++ framework for general particle accelerator simulations including 3D space charge, short range wake fields and particle matter interaction.
|
||||
|
||||
## Licensing Terms and Conditions
|
||||
|
||||
GNU GPLv3
|
||||
|
||||
## How to run on Merlin7
|
||||
### A100 nodes
|
||||
```bash
|
||||
module purge
|
||||
module use Spack unstable
|
||||
module load gcc/13.2.0 openmpi/4.1.6-57rc-A100-gpu opal-x/fixSolverUnits-q4ul-A100-gpu
|
||||
```
|
||||
|
||||
### GH nodes
|
||||
```bash
|
||||
module purge
|
||||
module use Spack unstable
|
||||
module load gcc/13.2.0 openmpi/5.0.3-3lmi-GH200-gpu opal-x/fixSolverUnits-ttg7-GH200-gpu
|
||||
```
|
||||
|
||||
## Developing your own code
|
||||
### A100 nodes
|
||||
|
||||
[](https://gitea.psi.ch/HPCE/spack-psi)
|
||||
|
||||
```bash
|
||||
module purge
|
||||
module use Spack unstable
|
||||
module load gcc/13.2.0 openmpi/4.1.6-57rc-A100-gpu
|
||||
module load boost/1.82.0-e7gp fftw/3.3.10 gnutls/3.8.3 googletest/1.14.0 gsl/2.8 h5hut/2.0.0rc7 openblas/0.3.26-omp cmake/3.31.6-oe7u
|
||||
|
||||
git clone https://gitlab.psi.ch/OPAL/opal-x/src.git opal-x
|
||||
cd opal-x
|
||||
./gen_OPALrevision
|
||||
|
||||
mkdir build_gpu
|
||||
cd build_gpu
|
||||
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DKokkos_ARCH_AMPERE80=ON -DCMAKE_CXX_STANDARD=20 -DIPPL_ENABLE_FFT=ON -DIPPL_ENABLE_TESTS=OFF -DIPPL_ENABLE_SOLVERS=ON -DIPPL_ENABLE_ALPINE=True -DIPPL_PLATFORMS=cuda ..
|
||||
make [-jN]
|
||||
```
|
||||
|
||||
### GH nodes
|
||||
[](https://gitea.psi.ch/HPCE/spack-psi)
|
||||
|
||||
```bash
|
||||
salloc --partition=gh-daily --clusters=gmerlin7 --time=08:00:00 --ntasks=4 --nodes=1 --gpus=1 --mem=40000 $SHELL
|
||||
ssh <allocated_gpu>
|
||||
|
||||
module purge
|
||||
module use Spack unstable
|
||||
module load gcc/13.2.0 openmpi/5.0.3-3lmi-GH200-gpu
|
||||
module load boost/1.82.0-3ns6 fftw/3.3.10 gnutls/3.8.3 googletest/1.14.0 gsl/2.7.1 h5hut/2.0.0rc7 openblas/0.3.26 cmake/3.31.4-u2nm
|
||||
|
||||
git clone https://gitlab.psi.ch/OPAL/opal-x/src.git opal-x
|
||||
cd opal-x
|
||||
./gen_OPALrevision
|
||||
mkdir build_gh
|
||||
cd build_gh
|
||||
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DKokkos_ARCH_HOPPER90=ON -DCMAKE_CXX_STANDARD=20 -DIPPL_ENABLE_FFT=ON -DIPPL_ENABLE_TESTS=OFF -DIPPL_ENABLE_SOLVERS=ON -DIPPL_ENABLE_ALPINE=OFF -DIPPL_PLATFORMS=cuda ..
|
||||
make [-jN]
|
||||
```
|
80
pages/merlin7/05-Software-Support/openmpi.md
Normal file
@ -0,0 +1,80 @@
|
||||
---
|
||||
title: OpenMPI Support
|
||||
#tags:
|
||||
last_updated: 15 January 2025
|
||||
keywords: software, openmpi, slurm
|
||||
summary: "This document describes how to use OpenMPI in the Merlin7 cluster"
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/openmpi.html
|
||||
---
|
||||
|
||||
## Introduction
|
||||
|
||||
This document outlines the supported OpenMPI versions in the Merlin7 cluster.
|
||||
|
||||
### OpenMPI supported versionso
|
||||
|
||||
The Merlin cluster supports OpenMPI versions across three distinct stages: stable, unstable, and deprecated. Below is an overview of each stage:
|
||||
|
||||
#### Stable
|
||||
|
||||
Versions in the `stable` stage are fully functional, thoroughly tested, and officially supported by the Merlin administrators.
|
||||
These versions are available via [Pmodules](/merlin7/pmodules.html) and [Spack](/merlin7/spack.html), ensuring compatibility and reliability for production use.
|
||||
|
||||
#### Unstable
|
||||
|
||||
Versions in the `unstable` stage are available for testing and early access to new OpenMPI features.
|
||||
While these versions can be used, their compilation and configuration are subject to change before they are promoted to the `stable` stage.
|
||||
Administrators recommend caution when relying on `unstable` versions for critical workloads.
|
||||
|
||||
#### Deprecated
|
||||
|
||||
Versions in the `deprecated` stage are no longer supported by the Merlin administrators.
|
||||
Typically, these include versions no longer supported by the official [OpenMPI](https://www.open-mpi.org/software/ompi/v5.0/) project.
|
||||
While deprecated versions may still be available for use, their functionality cannot be guaranteed, and they will not receive updates or bug fixes.
|
||||
|
||||
### Using srun in Merlin7
|
||||
|
||||
In OpenMPI versions prior to 5.0.x, using `srun` for direct task launches was faster than `mpirun`.
|
||||
Although this is no longer the case, `srun` remains the recommended method due to its simplicity and ease of use.
|
||||
|
||||
Key benefits of `srun`:
|
||||
* Automatically handles task binding to cores.
|
||||
* In general, requires less configuration compared to `mpirun`.
|
||||
* Best suited for most users, while `mpirun` is recommended only for advanced MPI configurations.
|
||||
|
||||
Guidelines:
|
||||
* Always adapt your scripts to use srun before seeking support.
|
||||
* For any module-related issues, please contact the Merlin7 administrators.
|
||||
|
||||
Example Usage:
|
||||
```bash
|
||||
srun ./app
|
||||
```
|
||||
|
||||
{{site.data.alerts.tip}}
|
||||
Always run OpenMPI applications with <b>srun</b> for a seamless experience.
|
||||
{{site.data.alerts.end}}
|
||||
|
||||
### PMIx Support in Merlin7
|
||||
|
||||
Merlin7's SLURM installation includes support for multiple PMI types, including pmix. To view the available options, use the following command:
|
||||
|
||||
```bash
|
||||
🔥 [caubet_m@login001:~]# srun --mpi=list
|
||||
MPI plugin types are...
|
||||
none
|
||||
pmix
|
||||
pmi2
|
||||
cray_shasta
|
||||
specific pmix plugin versions available: pmix_v5,pmix_v4,pmix_v3,pmix_v2
|
||||
```
|
||||
Important Notes:
|
||||
* For OpenMPI, always use `pmix` by specifying the appropriate version (`pmix_$version`).
|
||||
When loading an OpenMPI module (via [Pmodules](/merlin7/pmodules.html) or [Spack](/merlin7/spack.html)), the corresponding PMIx version will be automatically loaded.
|
||||
* Users do not need to manually manage PMIx compatibility.
|
||||
|
||||
{{site.data.alerts.warning}}
|
||||
PMI-2 is not supported in OpenMPI 5.0.0 or later releases.
|
||||
Despite this, <b>pmi2</b> remains the default SLURM PMI type in Merlin7 as it is the officially supported type and maintains compatibility with other MPI implementations.
|
||||
{{site.data.alerts.end}}
|
153
pages/merlin7/05-Software-Support/pmodules.md
Normal file
@ -0,0 +1,153 @@
|
||||
---
|
||||
title: PSI Modules
|
||||
#tags:
|
||||
keywords: Pmodules, software, stable, unstable, deprecated, overlay, overlays, release stage, module, package, packages, library, libraries
|
||||
last_updated: 07 September 2022
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/pmodules.html
|
||||
---
|
||||
|
||||
## PSI Environment Modules
|
||||
|
||||
On top of the operating system stack we provide different software using the PSI developed PModule system.
|
||||
|
||||
PModules is the official supported way and each package is deployed by a specific expert. Usually, in PModules
|
||||
software which is used by many people will be found.
|
||||
|
||||
If you miss any package/versions or a software with a specific missing feature, contact us. We will study if is feasible or not to install it.
|
||||
|
||||
### Module Release Stages
|
||||
|
||||
To ensure proper software lifecycle management, PModules uses three release stages: unstable, stable, and deprecated.
|
||||
|
||||
1. **Unstable Release Stage:**
|
||||
* Contains experimental or under-development software versions.
|
||||
* Not visible to users by default. Use explicitly:
|
||||
|
||||
```bash
|
||||
module use unstable
|
||||
```
|
||||
* Software is promoted to **stable** after validation.
|
||||
2. **Stable Release Stage:**
|
||||
* Default stage, containing fully tested and supported software versions.
|
||||
* Recommended for all production workloads.
|
||||
|
||||
3. **Deprecated Release Stage:**
|
||||
* Contains software versions that are outdated or discontinued.
|
||||
* These versions are hidden by default but can be explicitly accessed:
|
||||
|
||||
```bash
|
||||
module use deprecated
|
||||
```
|
||||
* Deprecated software can still be loaded directly without additional configuration to ensure user transparency.
|
||||
|
||||
## PModules commands
|
||||
|
||||
Below is listed a summary of common `module` commands:
|
||||
|
||||
```bash
|
||||
module use # show all available PModule Software Groups as well as Release Stages
|
||||
module avail # to see the list of available software packages provided via pmodules
|
||||
module use unstable # to get access to a set of packages not fully tested by the community
|
||||
module load <package>/<version> # to load specific software package with a specific version
|
||||
module search <string> # to search for a specific software package and its dependencies.
|
||||
module list # to list which software is loaded in your environment
|
||||
module purge # unload all loaded packages and cleanup the environment
|
||||
```
|
||||
|
||||
Please refer to the **external [PSI Modules](https://pmodules.gitpages.psi.ch/chap3.html) document** for
|
||||
detailed information about the `module` command.
|
||||
|
||||
### module use/unuse
|
||||
|
||||
Without any parameter, `use` **lists** all available PModule **Software Groups and Release Stages**.
|
||||
|
||||
```bash
|
||||
module use
|
||||
```
|
||||
|
||||
When followed by a parameter, `use`/`unuse` invokes/uninvokes a PModule **Software Group** or **Release Stage**.
|
||||
|
||||
```bash
|
||||
module use EM # Invokes the 'EM' software group
|
||||
module unuse EM # Uninvokes the 'EM' software group
|
||||
module use unstable # Invokes the 'unstable' Release stable
|
||||
module unuse unstable # Uninvokes the 'unstable' Release stable
|
||||
```
|
||||
|
||||
### module avail
|
||||
|
||||
This option **lists** all available PModule **Software Groups and their packages**.
|
||||
|
||||
Please run `module avail --help` for further listing options.
|
||||
|
||||
### module search
|
||||
|
||||
This is used to **search** for **software packages**. By default, if no **Release Stage** or **Software Group** is specified
|
||||
in the options of the `module search` command, it will search from the already invoked *Software Groups* and *Release Stages*.
|
||||
Direct package dependencies will be also showed.
|
||||
|
||||
```bash
|
||||
🔥 [caubet_m@login001:~]# module search openmpi
|
||||
|
||||
Module Rel.stage Group Overlay Requires
|
||||
--------------------------------------------------------------------------------
|
||||
openmpi/4.1.6 stable Compiler Alps gcc/12.3.0
|
||||
openmpi/4.1.6 stable Compiler Alps gcc/13.3.0
|
||||
openmpi/4.1.6 stable Compiler Alps gcc/14.2.0
|
||||
openmpi/4.1.6 stable Compiler Alps intelcc/22.2
|
||||
openmpi/5.0.5 stable Compiler Alps gcc/8.5.0
|
||||
openmpi/5.0.5 stable Compiler Alps gcc/12.3.0
|
||||
openmpi/5.0.5 stable Compiler Alps gcc/14.2.0
|
||||
openmpi/5.0.5 stable Compiler Alps intelcc/22.2
|
||||
```
|
||||
|
||||
Please run `module search --help` for further search options.
|
||||
|
||||
### module load/unload
|
||||
|
||||
This loads/unloads specific software packages. Packages might have direct dependencies that need to be loaded first. Other dependencies
|
||||
will be automatically loaded.
|
||||
|
||||
In the example below, the ``openmpi/5.0.5`` package will be loaded, however ``gcc/14.2.0`` must be loaded as well as this is a strict dependency. Direct dependencies must be loaded in advance. Users can load multiple packages one by one or at once. This can be useful for instance when loading a package with direct dependencies.
|
||||
|
||||
```bash
|
||||
# Single line
|
||||
module load gcc/14.2.0 openmpi/5.0.5
|
||||
|
||||
# Multiple line
|
||||
module load gcc/14.2.0
|
||||
module load openmpi/5.0.5
|
||||
```
|
||||
|
||||
#### module purge
|
||||
|
||||
This command is an alternative to `module unload`, which can be used to unload **all** loaded module files.
|
||||
|
||||
```bash
|
||||
module purge
|
||||
```
|
||||
|
||||
## Requesting New PModules Packages
|
||||
|
||||
The PModules system is designed to accommodate the diverse software needs of Merlin7 users. Below are guidelines for requesting new software or versions to be added to PModules.
|
||||
|
||||
### Requesting Missing Software
|
||||
|
||||
If a specific software package is not available in PModules and there is interest from multiple users:
|
||||
* **[Contact Support](/merlin7/contact.html):** Let us know about the software, and we will assess its feasibility for deployment.
|
||||
* **Deployment Timeline:** Adding new software to PModules typically takes a few days, depending on complexity and compatibility.
|
||||
* **User Involvement:** If you are interested in maintaining the software package, please inform us. Collaborative maintenance helps
|
||||
ensure timely updates and support.
|
||||
|
||||
### Requesting a Missing Version
|
||||
If the currently available versions of a package do not meet your requirements:
|
||||
* **New Versions:** Requests for newer versions are generally supported, especially if there is interest from multiple users.
|
||||
* **Intermediate Versions:** Installation of intermediate versions (e.g., versions between the current stable and deprecated versions)
|
||||
can be considered if there is a strong justification, such as specific features or compatibility requirements.
|
||||
|
||||
### General Notes
|
||||
* New packages or versions are prioritized based on their relevance and usage.
|
||||
* For any request, providing detailed information about the required software or version (e.g., name, version, features) will help
|
||||
expedite the process.
|
129
pages/merlin7/05-Software-Support/quantum-espresso.md
Normal file
@ -0,0 +1,129 @@
|
||||
---
|
||||
title: Quantum Espresso
|
||||
keywords: Quantum Espresso software, compile
|
||||
summary: "Quantum Espresso code for electronic-structure calculations and materials modeling at the nanoscale"
|
||||
sidebar: merlin7_sidebar
|
||||
toc: false
|
||||
permalink: /merlin7/quantum-espresso.html
|
||||
---
|
||||
|
||||
## Quantum ESPRESSO
|
||||
|
||||
Quantum ESPRESSO is an integrated suite of Open-Source computer codes for electronic-structure calculations and materials modeling at the nanoscale. It is based on density-functional theory, plane waves, and pseudopotentials:
|
||||
|
||||
PWscf (Plane-Wave Self-Consistent Field)
|
||||
FPMD (First Principles Molecular Dynamics)
|
||||
CP (Car-Parrinello)
|
||||
|
||||
## Licensing Terms and Conditions
|
||||
|
||||
Quantum ESPRESSO is an open initiative, in collaboration with many groups world-wide, coordinated by the Quantum ESPRESSO Foundation. Scientific work done using Quantum ESPRESSO should contain an explicit acknowledgment and reference to the main papers (see Quantum Espresso Homepage for the details).
|
||||
|
||||
## How to run on Merlin7
|
||||
### A100 nodes
|
||||
```bash
|
||||
module purge
|
||||
module use Spack unstable
|
||||
module load nvhpc/25.3 openmpi/main-6bnq-A100-gpu quantum-espresso/7.4.1-nxsw-gpu-omp
|
||||
```
|
||||
### GH nodes
|
||||
```bash
|
||||
module purge
|
||||
module use Spack unstable
|
||||
module load nvhpc/25.3 openmpi/5.0.7-e3bf-GH200-gpu quantum-espresso/7.4.1-gxvj-gpu-omp
|
||||
```
|
||||
|
||||
### SBATCH A100, 1 GPU, 64 OpenMP threads, one MPI rank example
|
||||
```bash
|
||||
#!/bin/bash
|
||||
#SBATCH --no-requeue
|
||||
#SBATCH --job-name="si64"
|
||||
#SBATCH --get-user-env
|
||||
#SBATCH --output=_scheduler-stdout.txt
|
||||
#SBATCH --error=_scheduler-stderr.txt
|
||||
#SBATCH --partition=a100-daily
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks-per-node=1
|
||||
#SBATCH --time=06:00:00
|
||||
#SBATCH --cpus-per-task=64
|
||||
#SBATCH --cluster=gmerlin7
|
||||
#SBATCH --gpus=1
|
||||
#SBATCH --hint=nomultithread
|
||||
|
||||
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
|
||||
export OMP_PROC_BIND=spread
|
||||
export OMP_PLACES=threads
|
||||
|
||||
# Load necessary modules
|
||||
module purge
|
||||
module use Spack unstable
|
||||
module load nvhpc/25.3 openmpi/main-6bnq-A100-gpu quantum-espresso/7.4.1-nxsw-gpu-omp
|
||||
|
||||
"srun" '$(which pw.x)' '-npool' '1' '-in' 'aiida.in' > "aiida.out"
|
||||
```
|
||||
|
||||
## Developing your own GPU code
|
||||
### Spack
|
||||
2. ```spack config edit ```
|
||||
3. Add granularity: microarchitectures to your config (if you use nvhpc compiler! Not needed for CPU builds!)
|
||||
```bash
|
||||
spack:
|
||||
concretizer:
|
||||
unify: false
|
||||
targets:
|
||||
granularity: microarchitectures
|
||||
```
|
||||
4. ```spack add quantum-espresso@develop +cuda +mpi +mpigpu hdf5=parallel %nvhpc arch=linux-sles15-zen3 # GPU```
|
||||
5. ```spack add quantum-espresso@develop +mpi hdf5=parallel %gcc # CPU```
|
||||
6. ```spack develop quantum-espresso@develop # clone the code under /afs/psi.ch/sys/spack/user/$USER/spack-environment/quantum-espresso```
|
||||
7. Make changes in /afs/psi.ch/sys/spack/user/$USER/spack-environment/quantum-espresso
|
||||
8. Build: ```spack install [-jN] -v --until=build quantum-espresso@develop```
|
||||
|
||||
### Environment modules
|
||||
#### CPU
|
||||
[](https://gitea.psi.ch/HPCE/spack-psi)
|
||||
|
||||
```bash
|
||||
module purge
|
||||
module use Spack unstable
|
||||
module load gcc/12.3 openmpi/main-syah fftw/3.3.10.6-omp hdf5/1.14.5-t46c openblas/0.3.29-omp cmake/3.31.6-oe7u
|
||||
|
||||
cd <path to QE source directory>
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
cmake -DQE_ENABLE_MPI:BOOL=ON -DQE_ENABLE_OPENMP:BOOL=ON -DCMAKE_C_COMPILER:STRING=mpicc -DCMAKE_Fortran_COMPILER:STRING=mpif90 -DQE_ENABLE_HDF5:BOOL=ON ..
|
||||
make [-jN]
|
||||
```
|
||||
#### A100
|
||||
[](https://gitea.psi.ch/HPCE/spack-psi)
|
||||
|
||||
```bash
|
||||
module purge
|
||||
module use Spack unstable
|
||||
module load nvhpc/25.3 openmpi/main-6bnq-A100-gpu fftw/3.3.10.6-qbxu-omp hdf5/develop-2.0-rjgu netlib-scalapack/2.2.2-3hgw cmake/3.31.6-oe7u
|
||||
|
||||
cd <path to QE source directory>
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
cmake -DQE_ENABLE_MPI:BOOL=ON -DQE_ENABLE_OPENMP:BOOL=ON -DQE_ENABLE_SCALAPACK:BOOL=ON -DQE_ENABLE_CUDA:BOOL=ON -DQE_ENABLE_MPI_GPU_AWARE:BOOL=ON -DQE_ENABLE_OPENACC:BOOL=ON -DCMAKE_C_COMPILER:STRING=mpicc -DCMAKE_Fortran_COMPILER:STRING=mpif90 -DQE_ENABLE_HDF5:BOOL=ON ..
|
||||
make [-jN]
|
||||
|
||||
```
|
||||
#### GH200
|
||||
[](https://gitea.psi.ch/HPCE/spack-psi)
|
||||
|
||||
```bash
|
||||
module purge
|
||||
module use Spack unstable
|
||||
module load nvhpc/25.3 openmpi/5.0.7-e3bf-GH200-gpu fftw/3.3.10-sfpw-omp hdf5/develop-2.0-ztvo nvpl-blas/0.4.0.1-3zpg nvpl-lapack/0.3.0-ymy5 netlib-scalapack/2.2.2-qrhq cmake/3.31.6-5dl7
|
||||
|
||||
|
||||
cd <path to QE source directory>
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
cmake -DQE_ENABLE_MPI:BOOL=ON -DQE_ENABLE_OPENMP:BOOL=ON -DQE_ENABLE_SCALAPACK:BOOL=ON -DQE_ENABLE_CUDA:BOOL=ON -DQE_ENABLE_MPI_GPU_AWARE:BOOL=ON -DQE_ENABLE_OPENACC:BOOL=ON -DCMAKE_C_COMPILER:STRING=mpicc -DCMAKE_Fortran_COMPILER:STRING=mpif90 -DQE_ENABLE_HDF5:BOOL=ON ..
|
||||
make [-jN]
|
||||
```
|
18
pages/merlin7/05-Software-Support/spack.md
Normal file
@ -0,0 +1,18 @@
|
||||
---
|
||||
title: Spack
|
||||
keywords: spack, python, software, compile
|
||||
summary: "Spack the HPC package manager documentation"
|
||||
sidebar: merlin7_sidebar
|
||||
toc: false
|
||||
permalink: /merlin7/spack.html
|
||||
---
|
||||
|
||||
For Merlin7 the *package manager for supercomputing* [Spack](https://spack.io/) is available. It is meant to compliment the existing PModules
|
||||
solution, giving users the opertunity to manage their own software environments.
|
||||
|
||||
Documentation for how to use Spack on Merlin7 is provided [here](https://gitea.psi.ch/HPCE/spack-psi/src/branch/main/README.md).
|
||||
|
||||
## The Spack PSI packages
|
||||
|
||||
An initial collection of packages (and Spack reciepes) are located at **[Spack PSI](https://gitea.psi.ch/HPCE/spack-psi)**, users can directly use these
|
||||
through calls like `spack add ...`.
|
50
pages/merlin7/99-support/contact.md
Normal file
@ -0,0 +1,50 @@
|
||||
---
|
||||
title: Contact
|
||||
#tags:
|
||||
keywords: contact, support, snow, service now, mailing list, mailing, email, mail, merlin-admins@lists.psi.ch, merlin-users@lists.psi.ch, merlin users
|
||||
last_updated: 15. Jan 2025
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/contact.html
|
||||
---
|
||||
|
||||
## Support
|
||||
|
||||
Support can be asked through:
|
||||
* [PSI Service Now](https://psi.service-now.com/psisp)
|
||||
* E-Mail: <merlin-admins@lists.psi.ch>
|
||||
|
||||
Basic contact information is also displayed on every shell login to the system using the *Message of the Day* mechanism.
|
||||
|
||||
|
||||
### PSI Service Now
|
||||
|
||||
**[PSI Service Now](https://psi.service-now.com/psisp)**: is the official tool for opening incident requests.
|
||||
* PSI HelpDesk will redirect the incident to the corresponding department, or
|
||||
* you can always assign it directly by checking the box `I know which service is affected` and providing the service name `Local HPC Resources (e.g. Merlin) [CF]` (just type in `Local` and you should get the valid completions).
|
||||
|
||||
### Contact Merlin Administrators
|
||||
|
||||
**E-Mail <merlin-admins@lists.psi.ch>**
|
||||
* This is the official way to contact Merlin Administrators for discussions which do not fit well into the incident category.
|
||||
Do not hesitate to contact us for such cases.
|
||||
|
||||
---
|
||||
|
||||
## Get updated through the Merlin User list!
|
||||
|
||||
Is strictly recommended that users subscribe to the Merlin Users mailing list: **<merlin-users@lists.psi.ch>**
|
||||
|
||||
This mailing list is the official channel used by Merlin administrators to inform users about downtimes,
|
||||
interventions or problems. Users can be subscribed in two ways:
|
||||
|
||||
* *(Preferred way)* Self-registration through **[Sympa](https://psilists.ethz.ch/sympa/info/merlin-users)**
|
||||
* If you need to subscribe many people (e.g. your whole group) by sending a request to the admin list **<merlin-admins@lists.psi.ch>**
|
||||
and providing a list of email addresses.
|
||||
|
||||
---
|
||||
|
||||
## The Merlin Cluster Team
|
||||
|
||||
The PSI Merlin clusters are managed by the **[High Performance Computing and Emerging technologies Group](https://www.psi.ch/de/lsm/hpce-group)**, which
|
||||
is part of the [Science IT Infrastructure, and Services department (AWI)](https://www.psi.ch/en/awi) in PSI's [Center for Scientific Computing, Theory and Data (SCD)](https://www.psi.ch/en/csd).
|
312
pages/merlin7/99-support/migration-from-merlin6.md
Normal file
@ -0,0 +1,312 @@
|
||||
---
|
||||
#tags:
|
||||
keywords: merlin6, merlin7, migration, fpsync, rsync
|
||||
#summary: ""
|
||||
sidebar: merlin7_sidebar
|
||||
last_updated: 28 May 2025
|
||||
permalink: /merlin7/migrating.html
|
||||
---
|
||||
|
||||
# Merlin6 to Merlin7 Migration Guide
|
||||
|
||||
Welcome to the official documentation for migrating your data from **Merlin6** to **Merlin7**. Please follow the instructions carefully to ensure a smooth and secure transition.
|
||||
|
||||
## 📅 Migration Schedule
|
||||
|
||||
### Phase 1: Users without Projects — **Deadline: July 11**
|
||||
|
||||
If you **do not belong to any Merlin project**, i.e for
|
||||
|
||||
* Users not in any group project (`/data/projects/general`)
|
||||
* Users not in BIO, MEG, Mu3e
|
||||
* Users not part of PSI-owned private Merlin nodes (ASA, MEG, Mu3e)
|
||||
|
||||
You must complete your migration **before July 11**. You just need to migrate your personal */data/user/$USER* and */home/psi/$USER* directories.
|
||||
|
||||
Users are responsible for initiating and completing the migration process as lined out below.
|
||||
Contact the Merlin support team [merlin-admins@lists.psi.ch](mailto:merlin-admins@lists.psi.ch) if you need help.
|
||||
|
||||
> ⚠️ In this phase, **it's important that you don't belong to any project**.
|
||||
> Once the migration is finished, **access to Merlin6 will be no longer possible.**
|
||||
|
||||
### Phase 2: Project Members and Owners — **Start Before August 1**
|
||||
|
||||
For users in active projects:
|
||||
|
||||
* Project **owners and members will be contacted by the Merlin admins**.
|
||||
* Migration will be **scheduled individually per project**.
|
||||
* Expect contact **before August 1**.
|
||||
|
||||
> ⚠️ In this phase, **data and home directories of group owners and members will be also requested to be migrated in parallel.**
|
||||
|
||||
---
|
||||
|
||||
## Directory Structure Changes
|
||||
|
||||
### Merlin6 vs Merlin7
|
||||
|
||||
| Cluster | Home Directory | User Data Directory | Projects | Experiments |
|
||||
| ------- | :----------------- | :------------------ | -------------- | ----------------- |
|
||||
| merlin6 | /psi/home/`$USER` | /data/user/`$USER` | /data/project/ | /data/experiments |
|
||||
| merlin7 | /data/user/`$USER` | /data/user/`$USER` | /data/project/ | /data/project/ |
|
||||
|
||||
* The **home directory and user data directory have been merged** into the single new home directory`/data/user/$USER`.
|
||||
* The **experiments directory has been integrated into `/data/project/`**:
|
||||
|
||||
* `/data/project/general` contains general Merlin7 projects.
|
||||
* Other subdirectories are used for large-scale projects such as CLS division, Mu3e, and MeG.
|
||||
|
||||
---
|
||||
|
||||
## Step-by-Step Migration Instructions
|
||||
|
||||
### 📋 Prerequisites and Preparation
|
||||
|
||||
Before starting the migration, make sure you:
|
||||
|
||||
* are **registered on Merlin7**.
|
||||
|
||||
* If not yet registered, please do so following [these instructions](../merlin7/request-account.html)
|
||||
|
||||
* **have cleaned up your data to reduce migration time and space usage**.
|
||||
* Ensure your total usage on Merlin6 is **well below the 1 TB quota** (use the `merlin_quotas` command). Remember:
|
||||
|
||||
* **Merlin7 also has a 1 TB quota on your home directory**, and you might already have data there.
|
||||
* If your usage exceeds this during the transfer, the process might fail.
|
||||
|
||||
#### Recommended Cleanup Actions
|
||||
|
||||
* Remove unused files and datasets.
|
||||
* Archive large, inactive data sets.
|
||||
* Delete or clean up unused `conda` or `virtualenv` Python environments:
|
||||
|
||||
* These are often large and may not work as-is on Merlin7.
|
||||
* You can export your conda environment description to a file with:
|
||||
|
||||
```bash
|
||||
conda env export -n myenv > $HOME/myenv.yml
|
||||
```
|
||||
* Then recreate them later on Merlin7 from these files.
|
||||
|
||||
> 🧹 You can always remove more old data **after** migration — it will be copied into `~/merlin6data` and `~/merlin6home` on Merlin7.
|
||||
|
||||
---
|
||||
|
||||
## Step 1: Run `merlin7_migration.setup`
|
||||
|
||||
Log into any **Merlin6 login node** (`merlin-l-001.psi.ch`, `merlin-l-002.psi.ch`, `merlin-l-01.psi.ch`) and run:
|
||||
|
||||
```bash
|
||||
merlin7_migration.setup
|
||||
```
|
||||
|
||||
This script will:
|
||||
|
||||
* Check that you have an account on Merlin7.
|
||||
* Configure and check that your environment is ready for transferring files via Slurm job.
|
||||
* **Create two directories:**
|
||||
|
||||
* `~/merlin6data` → copy of your old /data/user
|
||||
* `~/merlin6home` → copy of your old home
|
||||
|
||||
> ⚠️ **Important:** If `~/merlin6home` or `~/merlin6data` already exist on Merlin7, the script will exit.
|
||||
> **Please remove them or contact support**.
|
||||
|
||||
If there are issues, the script will:
|
||||
|
||||
* Print clear diagnostic output
|
||||
* Give you some hints to resolve the issue
|
||||
|
||||
If you are stuck, email: [merlin-admins@lists.psi.ch](mailto:merlin-admins@lists.psi.ch)
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Run `merlin7_migration.start`
|
||||
|
||||
After setup completes, start the migration by running:
|
||||
|
||||
```bash
|
||||
merlin7_migration.start
|
||||
```
|
||||
|
||||
This script will:
|
||||
|
||||
* Check the status of your quota on Merlin6.
|
||||
* Submit **SLURM batch jobs** to the **`xfer`** partition
|
||||
* Queue two jobs:
|
||||
|
||||
* `migrate_merlin6data.batch` (data dir)
|
||||
* `migrate_merlin6home.batch` (home dir)
|
||||
* This job will only start if `migrate_merlin6data.batch` has successfully
|
||||
finished.
|
||||
* Automatically track the job IDs
|
||||
* Print log file locations for the different jobs
|
||||
|
||||
> ⚠️ **Once both transfers succeed, your access to Merlin6 will be revoked.**
|
||||
> Do **not** attempt to reconnect to Merlin6 after this.
|
||||
|
||||
### ❗ If Something Goes Wrong
|
||||
|
||||
If a problem occurs during the migration process:
|
||||
|
||||
* 🔍 **Check the job log files** mentioned in the script output. They contain detailed messages that explain what failed and why.
|
||||
* 🛠️ **Fix the root cause** on the source system. Common issues include:
|
||||
|
||||
* Files with incorrect permissions
|
||||
* Ownership mismatches
|
||||
* Disk quota exceeded on Merlin7
|
||||
* 📚 Refer to the [⚠️ Common rsync/fpsync Migration Issues](/merlin7/migrating.html#%EF%B8%8F--common-rsyncfpsync-migration-issues) section below for detailed explanations and solutions.
|
||||
|
||||
> ℹ️ **Important:** If `migrate_merlin6data.batch` fails, the migration process will automatically cancel `migrate_merlin6home.batch` to avoid ending in an inconsistent state.
|
||||
|
||||
Once the problem is resolved, simply re-run the `merlin7_migration.start` script to resume the migration.
|
||||
|
||||
---
|
||||
|
||||
## Step 3: Monitor Transfer Jobs
|
||||
|
||||
To monitor your transfer jobs, run:
|
||||
|
||||
```bash
|
||||
squeue -M merlin6 -u $USER -p xfer
|
||||
```
|
||||
|
||||
Check the output to ensure your jobs are:
|
||||
|
||||
* Running (`R`) or completed (`CG` or removed from queue)
|
||||
* Not failed (`F`, `TO`, or stuck)
|
||||
|
||||
You can also check logs (as printed by the script) to verify job completion.
|
||||
|
||||
> ✅ When `/data/user/$USER` and `/psi/home/$USER` on Merlin6 are no longer accessible, migration is complete.
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
### Setup the Migration
|
||||
|
||||
```bash
|
||||
merlin7_migration.setup
|
||||
```
|
||||
|
||||
*Expected output:*
|
||||
|
||||
```bash
|
||||
✅ login002.merlin7.psi.ch
|
||||
✅ `$USER` is a member of svc-cluster_merlin7
|
||||
✅ Skipping key generation
|
||||
✅ SSH key already added to agent.
|
||||
✅ SSH ID successfully copied to login00[1|2].merlin7.psi.ch.
|
||||
✅ Test successful.
|
||||
✅ /data/software/xfer_logs/caubet_m created.
|
||||
✅ ~/merlin6data directory created.
|
||||
✅ ~/merlin6home directory created.
|
||||
```
|
||||
|
||||
### Start the Migration
|
||||
|
||||
```bash
|
||||
merlin7_migration.start
|
||||
```
|
||||
|
||||
*Expected output:*
|
||||
|
||||
```bash
|
||||
(base) ❄ [caubet_m@merlin-l-001:/data/software/admin/scripts/merlin-user-tools/alps(master)]# ./merlin7_migration.start
|
||||
✅ Quota check passed.
|
||||
Used: 512 GB, 234001 files
|
||||
|
||||
###################################################
|
||||
Submitting transfer jobs to Slurm
|
||||
|
||||
Job logs can be found here:
|
||||
➡️ Directory '/data/user/caubet_m' does NOT have 000 permissions. Transfer pending, continuing...
|
||||
✅ Submitted DATA_MIGRATION job: 24688554. Sleeping 3 seconds...
|
||||
- /data/user transfer logs:
|
||||
- /data/software/xfer_logs/caubet_m/data-24688554.out
|
||||
- /data/software/xfer_logs/caubet_m/data-24688554.err
|
||||
➡️ Directory '/psi/home/caubet_m' does NOT have 000 permissions. Transfer pending, continuing...
|
||||
✅ Submitted HOME_MIGRATION job with dependency on 24688554: 24688555. Sleeping 3 seconds...
|
||||
- /psi/home transfer logs:
|
||||
- /data/software/xfer_logs/caubet_m/home-24688555.out
|
||||
- /data/software/xfer_logs/caubet_m/home-24688555.err
|
||||
|
||||
✅ You can start manually a monitoring window with:
|
||||
tmux new-session -d -s "xfersession" "watch 'squeue -M merlin6 -u caubet_m -p xfer'"
|
||||
tmux attach -t "xfersession"
|
||||
|
||||
✅ FINISHED - PLEASE CHECK JOB TRANSFER PROGRESS
|
||||
```
|
||||
|
||||
### Monitor Progress
|
||||
|
||||
```bash
|
||||
squeue -M merlin6 -u $USER -p xfer
|
||||
```
|
||||
|
||||
*Output:*
|
||||
|
||||
```bash
|
||||
$ squeue -M merlin6 -u $USER -p xfer
|
||||
CLUSTER: merlin6
|
||||
JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)
|
||||
24688581 xfer HOME_MIG caubet_m PD 0:00 1 (Dependency)
|
||||
24688580 xfer DATA_MIG caubet_m R 0:22 1 merlin-c-017
|
||||
```
|
||||
|
||||
## ⚠️ Common `rsync`/`fpsync` Migration Issues
|
||||
|
||||
### File Permission Denied
|
||||
|
||||
* **Cause**: Files or directories are not readable by the user running the transfer.
|
||||
* **Solution**: Fix source-side permissions:
|
||||
|
||||
```bash
|
||||
chmod -R u+rX /path/to/file_or_dir
|
||||
```
|
||||
|
||||
### Ownership Mismatches
|
||||
|
||||
* **Cause**: Source files are owned by another user (e.g. root or a collaborator).
|
||||
* **Solution**:
|
||||
|
||||
* Change ownership before migration:
|
||||
|
||||
```bash
|
||||
chown -R $USER /path/to/file
|
||||
```
|
||||
|
||||
### Special Files (e.g. device files, sockets)
|
||||
|
||||
* **Cause**: `rsync` tries to copy UNIX sockets, device files, or FIFOs.
|
||||
* **Effect**: Errors or incomplete copies.
|
||||
* **Solution**: Avoid transferring such files entirely (by deleting them).
|
||||
|
||||
### Exceeded Disk Quota
|
||||
|
||||
* **Cause**: Combined size of existing + incoming data exceeds 1 TB quota on Merlin7.
|
||||
* **Effect**: Transfer stops abruptly.
|
||||
* **Solution**: Clean up or archive non-essential data before migration.
|
||||
|
||||
### Very Small Files or Large Trees → Many Small rsync Calls
|
||||
|
||||
* **Cause**: Directory with thousands/millions of small files.
|
||||
* **Effect**: Transfer is slow or hits process limits.
|
||||
* **Solution**: Consider archiving to `.tar.gz` before transferring:
|
||||
|
||||
```bash
|
||||
tar -czf myenv.tar.gz myenv/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Need Help?
|
||||
|
||||
If something doesn't work:
|
||||
|
||||
* Re-run the scripts and check the logs carefully.
|
||||
* Use `less`, `cat`, or `tail -f` to view your job logs.
|
||||
* Contact the Merlin support team: 📧 [merlin-admins@lists.psi.ch](mailto:merlin-admins@lists.psi.ch)
|
||||
|
||||
> We are here to help you migrate safely and efficiently.
|
@ -1,49 +0,0 @@
|
||||
---
|
||||
title: Slurm cluster 'merlin7'
|
||||
#tags:
|
||||
keywords: configuration, partitions, node definition
|
||||
last_updated: 24 Mai 2023
|
||||
summary: "This document describes a summary of the Merlin6 configuration."
|
||||
sidebar: merlin7_sidebar
|
||||
permalink: /merlin7/slurm-configuration.html
|
||||
---
|
||||
|
||||
This documentation shows basic Slurm configuration and options needed to run jobs in the Merlin7 cluster.
|
||||
|
||||
### Infrastructure
|
||||
|
||||
#### Hardware
|
||||
|
||||
The current configuration for the _test_ phase is made up as:
|
||||
|
||||
* 9 nodes for the _PSI-Dev_ development system
|
||||
* 8 nodes were meant for baremetal and k8s
|
||||
* 1 login node
|
||||
|
||||
| Node | CPU | RAM | GRES | Notes |
|
||||
| ---- | --- | --- | ---- | ----- |
|
||||
| Compute node | _2x_ AMD EPYC 7713 (x86_64 Milan, 64 Cores, 3.2GHz) | 512GB DDR4 3200Mhz | _4x_ NVidia A100 (Ampere, 80GB) | |
|
||||
| Login node | _2x_ AMD EPYC 7742 (x86_64 Rome, 64 Cores, 3.2GHz) | 512GB DRR4 3200Mhz | | |
|
||||
|
||||
#### Storage
|
||||
|
||||
* CephFS only for `/home` -- 1 TB
|
||||
* ClusterStor L300 for `/scratch` -- 224 TB usable space
|
||||
* CephRBD `/local` -- 100GB
|
||||
|
||||
#### Node IDs
|
||||
|
||||
Cray user various identifies to uniquely label each node, details on this can be found on the [Crayism page](cray-conventions.html).
|
||||
The table below collates these together for the current configuration:
|
||||
|
||||
| Node ID | Cray XNAME | Notes |
|
||||
| ---------- | ---------- | - |
|
||||
| nid003204 | x1500c4s7b0n0 | login node, to which **psi-dev.cscs.ch** points |
|
||||
| nid002808 | x1007c0s4b0n0 | |
|
||||
| nid002809 | x1007c0s4b0n1 | |
|
||||
| nid002812 | x1007c0s5b0n0 | |
|
||||
| nid002813 | x1007c0s5b0n1 | |
|
||||
| nid002824 | x1007c1s0b0n0 | |
|
||||
| nid002825 | x1007c1s0b0n1 | |
|
||||
| nid002828 | x1007c1s1b0n0 | |
|
||||
| nid002829 | x1007c1s1b0n1 | |
|
@ -12,7 +12,7 @@ folder: news
|
||||
{% for post in site.posts limit:10 %}
|
||||
|
||||
|
||||
<h2><a class="post-link" href="{{ post.url | remove: "/" }}">{{ post.title }}</a></h2>
|
||||
<h2><a class="post-link" href="{{ post.url }}">{{ post.title }}</a></h2>
|
||||
<span class="post-meta">{{ post.date | date: "%b %-d, %Y" }} /
|
||||
{% for tag in post.tags %}
|
||||
|
||||
|