"merged" admin-guide

This commit is contained in:
2021-05-05 14:24:27 +02:00
parent 296ecb3f26
commit c488e8de62
182 changed files with 17246 additions and 5 deletions
+13 -5
View File
@@ -1,9 +1,19 @@
# Table of content
# Learn more at https://jupyterbook.org/customize/toc.html
#
# - file: intro
# - file: user_guide
# sections:
- file: index
- file: admin-guide/architecture
sections:
- file: admin-guide/architecture/overview
- file: admin-guide/architecture/accounts-and-groups
- file: admin-guide/architecture/authentication-authorization
- file: admin-guide/architecture/networking
- file: admin-guide/architecture/services-cron-etc
- file: admin-guide/architecture/version-control
- file: admin-guide/architecture/security
- file: admin-guide/architecture/active-directory
- file: admin-guide/architecture/certificates
# - file: user_standard_infrastructure
# - file: user_standard_directory_structure
# - file: user_ioc_configuration_modules_drivers
@@ -15,5 +25,3 @@
# sections:
# - file: admin_standard_infrastructure
# - file: admin_standard_directory_structure
- file: index
+153
View File
@@ -0,0 +1,153 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
-rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/LinuxInfrastructure.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/LinuxInfrastructure.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/LinuxInfrastructure"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/LinuxInfrastructure"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
File diff suppressed because one or more lines are too long
+19
View File
@@ -0,0 +1,19 @@
==============
Architecture
==============
Contents:
.. toctree::
:maxdepth: 2
architecture/overview
architecture/accounts-and-groups
architecture/authentication-authorization
architecture/networking
architecture/services-cron-etc
architecture/version-control
architecture/security
architecture/active-directory
architecture/certificates
@@ -0,0 +1,132 @@
==========
Accounts
==========
Linux accounts are generally stored and managed in Active Directory.
Account Types
-------------
There are several types of accounts, which are usually indicated by a prefix or
suffix:
- Normal accounts. No prefix or suffix. Older accounts are just last names,
newer accounts are LASTNAME_X, where X is the first letter of the given name.
- Global accounts. These have a ``gac-`` prefix. There are only a handful of
these on Linux so far, due to concerns about the lack of login restrictions.
- Administrator accounts. Marked with an ``-adm`` suffix.
- External users. These start with an ``ext-`` prefix and are provided to
external users, ie. those who are not PSI employees.
- Service accounts. These come with an ``svcusr-`` prefix and are used for
running services.
UID Allocation
--------------
============== ===============
Old accounts 1000-6000
GFA accounts 10000-30000
External users 30000-35000
New accounts 35000+
============== ===============
LDAP Attribute Mapping
----------------------
========= ========================
Attribute LDAP Attribute
--------- ------------------------
username ``msSFU30Name``
UID ``msSFU30UidNumber``
GID ``msSFU30GidNumber``
home ``msSFU30HomeDirectory``
shell ``msSFU30LoginShell``
========= ========================
Primary Groups
--------------
At PSI the user-private group scheme (UPG), the default on Red Hat
distributions, is **not** used. Instead, every user's primary group is usually
one specific to the group/department the user is working for, eg. ``unx-ait``.
Users for whom there is no natural choice of primary group are assigned
``unx-nogroup``.
Low GIDs
--------
A number of groups have very low GIDs (<500), in particular::
unx-fkt:*:101:
unx-lke:*:110:
unx-abe:*:120:stingelin
unx-aea:*:130:
unx-lmu:*:140:
unx-lem:*:141:
unx-muesr:*:150:
unx-asm:*:210:
unx-lrp:*:220:
unx-zrp:*:221:
unx-ash:*:230:
unx-ppt:*:280:
unx-pmr:*:290:
unx-cmt:*:301:
unx-lfk:*:310:
unx-lch:*:320:
unx-lns:*:330:
unx-lap:*:340:
unx-lmn:*:350:
unx-asq:*:360:
unx-crpp:*:370:
unx-psq:*:380:
unx-psz:*:390:
unx-gabe:*:402:
unx-lrs:*:410:
unx-lth:*:420:
unx-lwv:*:430:
unx-les:*:440:
unx-dtp:*:451:
unx-lsu:*:490:
Shells
------
We support bash, and we also try to keep tcsh working.
Currently bash, tcsh, and sh are used. The form for ordering accounts also
offers ``/bin/ksh`` and ``/bin/zsh``. The most popular by far is bash.
Special Accounts
----------------
``linux_ldap``: query LDAP
~~~~~~~~~~~~~~~~~~~~~~~~~~
The `linux_ldap` account has read-only permissions on a limited subset of the
LDAP attributes. It is used by `nslcd`, for example, to query LDAP for users'
uid, gid, etc.
The password should not be shared unnecessarily, but it does not need
to be specifically protected either. In fact, in earlier releases of
Scientific Linux it was necessary to have `/etc/nslcd.conf`, which
contains the password, world-readable.
This account **must not** be given additional access or privileges.
``linuxadjoin.psi.ch@D.PSI.CH``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This account is a pure AD account (ie it doesn't have Unix attributes like uid),
which is used to manage computer objects in AD automatically. In particular, it
is used to precreate computer objects to allow password-less AD joins.
The account is only used on the Puppet server and has no (known) password.
Instead a keytab is used to get a valid Kerberos ticket.
@@ -0,0 +1,46 @@
==================
Active Directory
==================
Kerberos Realm and Settings
---------------------------
The AD domain (ie the Kerberos realm) is D.PSI.CH, **not** PSI.CH. The maximum
lifetime of a ticket is about a day, and a ticket can be renewed for about a
week.
Domain Controllers
------------------
In most networks ``d.psi.ch`` resolves to the correct names/IPs. One exception
is the DMZ.
The domain controllers that are used internally are:
- dc00
- dc01
- dc02
In the DMZ we need to use these instead:
- rodc00
- rodc01
It is important to note that the SSL certificates for the internal DCs are
**not** signed for ``dc0n.psi.ch``, but ``dc0n.d.psi.ch`` (note the extra `d`).
In certain contexts (eg in :manpage:`sssd.conf(5)`) specifying the DCs as
``dc0n.psi.ch`` fails because of this.
Linux Computer Objects
----------------------
Computer objects for Linux systems are created in
``OU=linux,OU=computers,OU=psi,DC=d,DC=psi,DC=ch``. We do not distinguish
between servers and workstations in AD (unlike the Windows team), as the
distinction isn't clear and it wouldn't help us anyway (as we don't use AD group
policies).
We perform the join password-less, by pre-creating the computer object using a
script running on the Puppet master.
@@ -0,0 +1,17 @@
Authentication and authorization
================================
We use/support the following authentication mechanisms:
- SSH keys/certificates
- Kerberos tickets (AD)
- Password (checked against AD), not for the ``root`` account
Login is restricted to certain users and groups on each system. This is
implemented locally using :manpage:`pam_access(8)`.
Shared Credentials
------------------
Shared credentials should be avoided, eg. by using ``.k5login`` or
``AuthorizedPrincipalsFile`` (see :manpage:`sshd_config(5)` for details).
@@ -0,0 +1,5 @@
Certificates
============
For services which are accessed by users, we use certificates provided by SWITCH
as described in Operations/Certificates.
+100
View File
@@ -0,0 +1,100 @@
============
Networking
============
The PSI network is quite fragmented and traffic is often restricted between
different subnets. As far as the Linux infrastructure is concerned we currently
distinguish two network zones: external (DMZ, Extranet, Tier3) and internal
(everything else).
Each 'zone' is supposed to have one instance of the Linux infrastructure
systems, eg Yum repository. This is not entirely true at this point, but
progress has been made towards this goal.
Within a zone all systems are allowed to connect to the respective
infrastructure systems. This page lists the exact connectivity requirements.
Requirements
============
Configuration Management and Software Distribution
--------------------------------------------------
Eventually there should be a separate Puppet server in the DMZ, but for now we
use the internal one.
======= ======================= ====================== ============= ====================
Source Destination (internal) Destination (external) Ports Purpose
------- ----------------------- ---------------------- ------------- --------------------
any puppet00 puppet00 8080, 8140 Puppet
any repo00 repo00 80, 443 Software Packages
======= ======================= ====================== ============= ====================
Authentication
--------------
We use Active Directory for authentication, so Kerberos and encrypted LDAP
connections must be allowed to the domain controllers:
======= ======================== ====================== ============= ========================
Source Destination (internal) Destination (external) Ports Purpose
------- ------------------------ ---------------------- ------------- ------------------------
any {dc00,dc01,dc02} rodc{00,01} 88, 464, 636 AD authentication/joins
======= ======================== ====================== ============= ========================
Deployment
----------
For the successful deployment of Linux systems, the requirements below must be
met. Systems are currently not deployed in external networks (DMZ, Extranet,
Tier3), so this only applies internally.
- `Configuration Management and Software Distribution`_
- `Authentication`_
In addition, the following:
======= ============ ================ ===================
Source Destination Ports Purpose
------- ------------ ---------------- -------------------
any boot00 UDP/69, 80, 443 PXE/Kickstart
======= ============ ================ ===================
Finally, having DHCP is helpful, but not necessary.
Monitoring/Reporting
--------------------
======= ====================== ====================== ============= ======================
Source Destination (internal) Destination (external) Ports Purpose
------- ---------------------- ---------------------- ------------- ----------------------
any influx00 influxdmz00 8086 Performance metrics
any rep N/A 443 Reporting (turned off)
======= ====================== ====================== ============= ======================
Configuration
=============
IPv6
----
Starting with RHEL 7 we do *not* disable IPv6 completely. We leave it on, but do
not configure any addresses. The routers at PSI also don't send router
advertisements, the DHCP server doesn't provide IPv6 addresses.
As a consequence, the network interfaces on these systems only have a link-local
address and IPv6 isn't actually used in practice. The reason for leaving IPv6
enabled is to slowly gain experience with the protocol. So far we have run into
two issues:
- In one network the router *did* send router advertisements, but the route
didn't actually work.
- Some DNS names resolve to IPv6 addresses as well as IPv4 ones. Combined with
the first issue, this caused deployment to fail on a console.
+60
View File
@@ -0,0 +1,60 @@
Overview
========
Documentation
-------------
This admin guide as well as the user guide are written using Sphinx with
ReStructured Text. In addition, Git repositories usually contain a README.md
file.
This `admin guide <https://linux-infra.gitpages.psi.ch/admin-guide>`_ is
automatically rebuilt when changes are pushed.
Deployment
----------
We generally deploy systems using PXE/Kickstart, even VMs. There is a VM
template, but it only defines the standard hardware/VM settings. No software is
pre-installed.
We use an iPXE image, which retrieves its client-specific configuration from a
web service, which also generates the client-specific Kickstart file.
For systems where network booting isn't possible, eg. because DHCP isn't
available, we can boot from USB as well.
The installation process is very roughly:
1. Install a minimal system plus the Puppet agent using Kickstart
2. Reboot
3. On first boot, run Puppet, which installs/configures the rest
OS, Software, and Licenses
--------------------------
We use Red Hat Enterprise Linux 7 (aka RHEL 7). The licenses are provided by
ETHZ, and we mirror the software from their Satellite server locally. We do not
have direct access to Red Hat support, but we can access the Red Hat customer
portal.
In addition to RHEL, we use/make available the following:
- EPEL (all of it)
- ELREPO (mostly for the Nvidia drivers)
- Puppet
- NoMachine
In addition there are several internal repositories.
Configuration Management and Automation
---------------------------------------
We use `Puppet <https://puppet.com>`_ for configuration management on Linux
systems. Configuration data is managed using `Hiera
<https://puppet.com/docs/puppet/5.3/hiera_intro.html>`_.
Scripts are generally written in bash or Python.
+35
View File
@@ -0,0 +1,35 @@
==========
Security
==========
Access Control
--------------
Access control on systems is done using ``pam_access``, ``pam_listfile``, etc.
By default, remote access is only granted to certain users/groups configured in
Hiera (see ``profile::aaa`` for details). Local access is currently
unrestricted, ie. every valid PSI Linux account can log in locally.
``root`` login
--------------
- only with keys/Kerberos tickets
- only through bastion hosts (wmgt*, two-factor auth) by default
SELinux
-------
- depends on the role, enforcing by default, enforcing on all infrastructure
systems
Firewall/tcpwrappers
--------------------
- tcpwrappers yes
- firewall no
@@ -0,0 +1,35 @@
=========
systemd
=========
SysV init scripts
-----------------
We only use SysV init scripts if systemd unit files are not provided by the
software in question. Our own services are all configured through systemd unit
files.
``cron`` vs. systemd timers
---------------------------
While we start/enable cron by default, we use systemd timers for everything. The
advantages are:
- unified view with other system entities, eg. services, mounts, devices
- easy log filtering with ``journalctl -u TIMER``
- easy introspection via ``systemctl list-timers``, ``systemctl cat TIMER``,
etc.
Mounts
------
Mounts, including automounts, are configured in /etc/fstab. Generating the mount
units directly leads to certain difficulties (SELinux problems which we never
quite figured out, stale mount unit files when the mountpoint changes, etc).
Automounts can be configured by adding ``x-systemd.automount`` to the mount
options.
We do start the ``autofs`` service on some systems, to provide the ``-net`` map.
@@ -0,0 +1,66 @@
Version control
---------------
We use the internal `PSI Gitlab server <https://git.psi.ch/>`_ for version
control. On this server we use a single repository group (``linux-infra``) for
all infrastructure-related repositories, because this way a number of things
have to be configured only once, e.g. members. In addition milestones and labels
are group-wide concepts in Gitlab, and it is very useful to have those apply to
all repositories. Finally it is easy to see all issues and merge requests for a
repository group.
Repositories
~~~~~~~~~~~~
Our repositories are configured in the following way:
The ``puppet`` repository
.........................
- Fast-forward merges only
This keeps the history linear without merge commits, which makes it easier to
read.
- Merges to ``prod`` or ``preprod`` require one approval
- Approval is revoked when further changes are pushed to an already approved
merge request.
- The default branch is called ``prod``. There is no ``master`` branch.
- The ``prod`` branch is protected.
- The ``preprod`` branch is protected.
Hiera repositories
..................
These repositories contain Hiera data. There is exactly one repository per sysdb
environment. The repository for environment ``foo`` is called ``data-foo``.
Access is granted to the owner and admins of the corresponding sysdb environment.
Development workflow
~~~~~~~~~~~~~~~~~~~~
We steer development through Gitlab issues.
For every change, be it a new feature, bug fix, or improvement, we create an
issue on the relevant repository. Issues that have broader implications and do
not naturally belong to a code repository, go into the ``org`` repository. An
example would be a change of the development workflow or repository
authorization.
When starting work on a change, the developer first assigns the issue to
themselves.
Once the developer has finished development and tested the change, they rebase
their changes to the ``prod`` branch, push the changes to a new branch, and
opens a merge request to the ``preprod`` branch. Each issue ``#N`` being fixed
must be mentioned at the start of the commit message in the form ``Fix #N``.
Other developers will review the merge request. If they approve it, either the
approving or the original developer may accept it, which will trigger the actual
merge.
+242
View File
@@ -0,0 +1,242 @@
# -*- coding: utf-8 -*-
#
# Linux Infrastructure documentation build configuration file, created by
# sphinx-quickstart on Mon Apr 11 13:24:16 2016.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = []
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'Linux Infrastructure'
copyright = u'2016, PSI Linux Administrators'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '1.0'
# The full version, including alpha/beta/rc tags.
release = '1.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'LinuxInfrastructuredoc'
# -- Options for LaTeX output --------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'LinuxInfrastructure.tex', u'Linux Infrastructure Documentation',
u'PSI Linux Administrators', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'linuxinfrastructure', u'Linux Infrastructure Documentation',
[u'PSI Linux Administrators'], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output ------------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'LinuxInfrastructure', u'Linux Infrastructure Documentation',
u'PSI Linux Administrators', 'LinuxInfrastructure', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
+25
View File
@@ -0,0 +1,25 @@
Deployment
==========
Linux systems are generally deployed using PXE and Kickstart. We use iPXE, which
can retrieve its configuration via HTTP(S) and therefore gives us a lot of
flexibility, because the iPXE configuration can be generated on the fly with the
specific settings needed by the booting client. In the same way the Kickstart
configuration for the RHEL installer is auto-generated.
The information used for generating the iPXE and Kickstart configurations is
stored in the sysdb (see below). The sysdb is accessible through a web api, and
a command line client is available.
When PXE boot is not an option, e.g. in restricted networks, it is possible to
start iPXE from a USB stick or other media.
.. toctree::
:maxdepth: 2
deployment/ipxe
deployment/kickstart
deployment/partitioning
deployment/sample
deployment/infrastructure
deployment/workflow
+94
View File
@@ -0,0 +1,94 @@
============================
Deploying the infrastructure
============================
Introduction
============
The deployment infrastructure is composed of:
- one sysdb server;
- one puppet server;
- one repository server.
We currently have two infrastructures in place, one for testing and the
other for production, with the following hostnames:
+--------------------+----------------+-----------------+
| Role | testing host | production host |
+====================+================+=================+
| sysdb server | boot00-test | boot00 |
+--------------------+----------------+-----------------+
| puppet server | puppet00-test | puppet00 |
+--------------------+----------------+-----------------+
| repository server | repo00-test | repo00 |
+--------------------+----------------+-----------------+
The Sysdb Server
----------------
The sysdb server is a server running HTTP(S) services needed by
other components of the deployment infrastructure, specifically to:
- setup (add/remove/modify) the hosts managed by the system;
- get the iPXE file for the managed hosts;
- get the Kickstart file for the managed hosts.
The code for these services is avaiable at `<https://git.psi.ch/linux-infra-software/sysdb>`_.
the Puppet Server
-----------------
The puppet server is a rather standard puppet master, configured to
use ``sysdb`` informations as ENC.
The Repo Server
---------------
The service that provides clones of different repositories (RHEL,
Puppetlabs, Google Chrome) plus some internally developed ones for
specific products (eg. GPFS).
You can reach the production one at: `<http://repo00.psi.ch/el7/>`_.
Deployment of an infrastructure server
======================================
The procedure is not fully automated and completely documented.
The reference repo server is `<https://git.psi.ch/linux-infra/bootstrap>`_.
The installation of an infrastructure server uses:
- ETH repositories (since the repo server is not assumed to be available)
- PXE only and static kickstart file (since the boot server is not supposed to be available)
After the installation, inside ``/root/bootstrap/instcode`` there is a script for each
server, called ``{puppet,boot,repo}-server``.
The script should be run manually and some user interaction is needed.
Before to install you need to have:
- a copy of the ssh root user keys and ssh server keys
- a Red Hat Satellite Key. You can get it via the RHN Satellite Account at ETH
So to install eg. ``puppet00-test`` the procedure would be:
- ensure on the Satellite web site that the system is not present (Systems-> Overview page)
- copy the kickstart on ``/afs/psi.ch/project/linux/www/kickstart/configs/linux-infra/``
- copy the pxe in ``/afs/psi.ch/service/linux/tftpboot/pxelinux.cfg/``
- install the server booting from network
- after the installation the server will stop
- reboot it
- login on the server
- copy the ssh key
- enter the ``/root/bootstrap/instcode`` directory
- optionally change the branch
- run ``./puppet-server``
Please note that in some cases the ETH repo used during the
installation is not properly working.
In the case you are not redeploying the repo server, then it can be
used instead of the ETH one, changing properly the PXE and the
kickstart file.
+61
View File
@@ -0,0 +1,61 @@
PXE-booting with iPXE
=====================
UEFI
----
iPXE supports UEFI and so do we. This requires the ``ipxe.efi`` boot image.
Process
-------
When a system PXE boots, the DHCP server will boot either pxelinux in legacy
mode or grub on UEFI. It is still possible to load ipxe from either of these.
With the right option, the boot process is pointed to ``boot00.psi.ch`` and one
of the following boot images:
- ``ipxe.efi`` for systems using UEFI
- ``ipxe.lkrn`` for all other systems
The system downloads the image and executes it.
The image contains a small iPXE script, which makes iPXE retrieve its actual
configuration from a web service running (usually) on ``boot00.psi.ch``.
Specifically, it queries the URL
``https://boot00.psi.ch/ipxe/v1/config?mac=<MAC>``, where ``<MAC>`` is the MAC
address of the interface used by iPXE.
The web service on ``boot00.psi.ch`` will generate the iPXE configuration on the
fly, depending on whether the system is supposed to be reinstalled and if so,
which distribution it is supposed to use. The menu offers other options as well,
e.g. an interactive iPXE shell and a memory test.
Building the iPXE boot image
----------------------------
The steps for building an iPXE image are
1. Clone ``git@git.psi.ch:linux-infra/ipxe-build.git``
2. Change into the new directory
3. Run the ``refresh-ipxe.sh`` script. This will check out the iPXE source code
mirror from ``git.psi.ch``.
4. Run the ``build.sh`` script. This will use the named configurations in the
``ipxe-build`` repository to compile two iPXE images: ``src/bin/ipxe.lkrn``
(legacy boot) and ``src/bin-x86_64-efi/ipxe.efi`` (UEFI).
As described above, we use an iPXE boot image with an embedded script looking
roughly like the following::
#!ipxe
dhcp && goto dhcp_succeeded
shell
#prompt for network info
:dhcp_succeeded
chain http://boot00.psi.ch/ipxe/v1/config?mac=${netX/mac}
+11
View File
@@ -0,0 +1,11 @@
Kickstart
=========
The auto-generated Grub or iPXE configuration will, when installing RHEL, instruct
the RHEL installer to download the Kickstart configuration from
``https://boot00.psi.ch/kickstart/v1/config?fqdn=<FQDN>&instrepo=<INSTREPO>``,
where ``<FQDN>`` is the FQDN of the host to be installed, and ``<INSTREPO>`` is
the installation repository to be used.
The web service will auto-generate the Kickstart configuration for the client
based on information in sysdb, e.g. the release to be installed.
+30
View File
@@ -0,0 +1,30 @@
Partitioning
============
Partitions system are configured with a standard schema using LVM, so
that they can be possibly changed afterwards.
By default the whole space available on the first block device is used
and any existing partition is removed.
The default partition schema is:
- create one primary ``/boot`` partition of 1Gb;
- create the ``vg_root`` Volume Group that uses the rest of the disk;
- on ``vg_root`` create the following logical volumes:
- ``lv_root`` of 12 Gb size for ``/root``;
- ``lv_var`` of 8 Gb size for ``/var``;
- ``lv_var_log`` of 2 Gb size for ``/var/log``;
- ``lv_tmp`` of 2 Gb size for ``/tmp``.
Custom Partitioning
-------------------
It is possible to customize the partitioning by using the ``partitions`` attribute on sysdb.
The ``partitions`` attribute can take two values:
- ``manual``: this will not partition the disks and the installation process will wait for manual partitioning;
- ``<URL>``: it is assumed to be a file containing the partitioning commands as used in kickstart.
If it is not possible to download the given URL the system fails-back to manual at installation time.
Please also note that no check is done on the URL content.
+60
View File
@@ -0,0 +1,60 @@
Sample deployment of a new server
=================================
In this section we describe the deployment of a new server from
scratch.
All the operations will be performed using `bob`, the command-line
client for interacting with `sysdb`.
The informations we need are:
- the server name: ``logging-ra.psi.ch``
- the server MAC address: ``23:3d:ef:33:11:22``
- the server role: ``role::logging_server``
- the server environment in puppet: ``production``
- the sysdb environment: ``daas``
On our laptop, where we have a local copy of `bob`, we first setup
some environment variables to have shorter commands: ::
export PSI_BOB_URL=https://boot00-test.psi.ch/sysdb
export PSI_BOB_USER=talamo_i
The $PSI_BOB_USER is needed because our local user is different from
the one on sysdb. It could also be specified on the command line or
let `bob` take the local username.
We then create a deployment environment::
bob env add daas talamo_i daas-admins 'DaaS Cluster'
We add the node to the environment, specifing ``netboot`` as
the boot action ::
bob node add logging-ra.psi.ch daas netboot
We setup its MAC address: ::
bob node add-mac logging-ra.psi.ch 23:3d:ef:33:11:22
And we finally setup the puppet role and puppet environment: ::
bob node set-attr syslog-ra.psi.ch puppet_role=role::log_server
bob node set-attr syslog-ra.psi.ch puppet_env=production
PXE server-side configuration
-----------------------------
In case the node is configured to use legacy BIOS mode (and
not UEFI mode) the following has to be run on the PXE server to make
the node boot the ipxe image: ::
cd /afs/psi.ch/service/linux/tftpboot/pxelinux.cfg/
ln -sf lxprod logging-ra
MAC='the::node::mac::address'
MACFILENAME="01-`echo $MAC|tr 'A-Z' 'a-z'| sed 's/:/-/g'`"
ln -sf logging-ra $MACFILENAME
Otherwise the node will boot with the old pxe configuration. In such
case is possible to boot ipxe typing `lxprod` on the PXE command line.
+165
View File
@@ -0,0 +1,165 @@
===================================
Deploying New Environment: Workflow
===================================
Introduction
============
Deploying a new environment requires the following:
1. Configuring the environment in **bob**
2. Configuring the environment in GIT (``git.psi.ch``)
- Environment format: ``data-<environment_name>``
- And change permissions accordingly
3. Configuring the environment in Puppet (``puppet00.psi.ch``)
Configuring the environment in sysdb
------------------------------------
Bob allows to create a new environment in ``sysdb`` by using the ``bob
env`` option. You must have permissions to do that.
You must belong to the ``sysdb-admins`` group that is actually
configured on the local ``/etc/group`` file. A migration to Active
Directory should be done for that group.
To list current defined environments run::
bob env list
To add a new environment, run::
bob env add <environment_name> <owner> <admin_group> "<description>"
In example, for the MeG cluster::
bob env add meg caubet_m unx-hpc_adm "MeG Cluster"
Test new environment in BOB
~~~~~~~~~~~~~~~~~~~~~~~~~~~
In order to test that environment was successfully created::
bob env list | grep <environment_name>
In example::
caubet_m@caubet-laptop:~/GIT/admin-guide/deployment$ bob env list | grep meg
meg caubet_m unx-hpc_adm MeG Cluster
Configuring the environment in GIT
----------------------------------
Current **GIT** server at **PSI** is ``git.psi.ch``. Every new environment should in principle belong to the **linux-infra** project.
You must belong to the ``puppet_env`` Active Directory group in order to be able to create new projects.
In order to create a new environment in **GIT**, you should access and login in the following link: https://git.psi.ch/linux-infra/. Here you can see
the different environments and **GIT** projects belonging to the **linux-infra** group.
The steps to create and configure a new **GIT** project are:
1. Create a new project (environment). It can be done here: https://git.psi.ch/projects/new
- Go to ``[Blank project] Tab`` (which is the *default* tab)
- Change ``[Project Path]`` as follows:
- https://git.psi.ch/ + ``linux-infra``
- Define ``[Project Name]``, which *must* have the following format:
- ``data-<environment_name>`` where ``<environment_name>`` is the one defined in **Bob**
- *[Optional]* Specify ``[Project description]``
- Specify ``[Visibility Level]``:
- Should be ``Internal``
2. Configure *project* permissions as follows:
- ``[data-<environment_name]->[Settings]->[Repository]``, or directly: https://git.psi.ch/linux-infra/data-(environment_name)/settings/repository
- ``[Deploy Keys]`` -> select ``root@puppet00.psi.ch'`` -> click on ``'Enable'``
- ``[data-<environment_name]->[Settings]->[Members]``, or directly: https://git.psi.ch/linux-infra/data-(environment_name)/project_members
- Setup specific permissions for specific users or groups. In example:
- Set project ``Master``:
- ``[Select members to invite]`` (``caubet_m``) + ``[Choose a role permission]`` (``Master``) + ``[Add to project]``
- Set other roles:
- ``[Select members to invite]`` (``ozerov_d``) + ``[Choose a role permission]`` (``Developer``) + ``[Add to project]``
- ``[data-<environment_name]->[Settings]->[Integrations]``, or directly: https://git.psi.ch/linux-infra/data-(environment_name)/settings/integrations
- Add WebHook as follows:
- ``[URL]``: http://puppet00.psi.ch/events/dataupdate
- (Checked) ``[Push events]``. Uncheck the rest.
- ``[SSL verification]`` -> (uncheck) ``[Enable SSL verification]``
- Confirm information from above, and click on ``[Add webhook]`` to add the new WebHook.
Test new environment in GIT
~~~~~~~~~~~~~~~~~~~~~~~~~~~
In order to test that environment was successfully created::
git clone git@git.psi.ch:linux-infra/data-<environment_name>.git
In example::
caubet_m@caubet-laptop:~/GIT$ git clone git@git.psi.ch:linux-infra/data-meg.git
Cloning into 'data-meg'...
X11 forwarding request failed
warning: You appear to have cloned an empty repository.
Configuring the environment in Puppet
-------------------------------------
Current *test* **Puppet** server is ``puppet00-test.psi.ch``. Current *production* **Puppet** server is ``puppet00.psi.ch``. This documentation will take as an example an environment
deployed in the *production* server ``puppet00.psi.ch``.
You must have ``root`` access to the Puppet server in order to be able to configure it.
Steps are:
1. Login in to the Puppet server::
ssh root@puppet00.psi.ch
2. Clone the already created **GIT** project to the ``/srv/puppet/data`` path, which contains all the different projects (Bob environments)::
git clone git@git.psi.ch:linux-infra/data-<environment_name>.git /srv/puppet/data/<environment_name>
# In example:
git clone git@git.psi.ch:linux-infra/data-meg.git /srv/puppet/data/meg
3. Change permissions to ``plidata.puppet`` for the new directory ``/srv/puppet/data/<environment_name>``::
chown -R plidata.puppet /srv/puppet/data/<environment_name>
# In example:
chown -R plidata.puppet /srv/puppet/data/meg
At this point, everything should be configured and we can proceed to test that it works.
Test new environment in Puppet
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In order to test that environment was successfully created::
git clone git@git.psi.ch:linux-infra/data-<environment_name>.git
Add a new file <environment_name>.yaml in to the project::
cd data-<environment_name>
touch <environment_name>.yaml
git add <environment_name>.yaml
git commit -a -m "Added first empty file"
git push
After a few seconds (needs time to trigger the change), check in ``puppet00.psi.ch:/srv/puppet/data/<environment_name>`` that file was successfully triggered (copied) to the puppet server from **GIT**::
ssh root@puppet00.psi.ch ls /srv/puppet/data/<environment_name>/<environment_name>.yaml
Full real example::
git clone git@git.psi.ch:linux-infra/data-meg.git
cd data-meg
touch meg.yaml
git add meg.yaml
git commit -a -m "Added first empty file"
git push
sleep 5
ssh root@puppet00.psi.ch ls /srv/puppet/data/meg/meg.yaml
+70
View File
@@ -0,0 +1,70 @@
Guidelines
==========
.. toctree::
:maxdepth: 1
guidelines/conventions
Version Control
---------------
Everything must be in version control before being used on production systems.
In particular, scripts and other software, SPEC files for packages, relevant
documentation, Puppet code, etc.
Git Hooks
---------
`This repository <https://gitern.psi.ch/linux-infra-software/githooks>`_ provides a pre-commit git hook that checks code compliance to different standards.
Hiera Code
----------
The naming of the variables inside Hiera depends on the scope usage of the variables.
The variables beeing used only inside one specific class
will be named ``base_class_name::variable`` where ``base_class_name`` is the last part of class name, without the part before the last ``::`` separator.
Eg. the ``permit_root_login`` variable for the ``profile::ssh_server`` class will be named ``ssh_server::permit_root_login``.
When a variable is shared between a client and a server, we will prepend the variable name with the service name.
Eg. the ``grid_name`` parameter for ganglia service, used inside both client and server configuration, will be named ``ganglia::grid_name``.
*Note:* this could potentially bring to conflicts, if we want to specify hiera values to be used both inside roles and profiles with the same name.
Do we want that?
Code Reviews
------------
All code must be reviewed by at least one other team member with write access to
the repository before pushing it. This will help keep the style consistent and
increase familiarity with the code base.
External Code
-------------
Although the installation infrastructure makes large usage of external code the system
has to avoid as much as possible dependency from external services availability.
A WAN outage or a remote http server failure should not influence the installation system.
For this reason, all the external code is mirrored internally in specific git repositories.
Servers and services
--------------------
Every server should support exactly one service, e.g. Puppet, or FTP. This makes
the services more independent (e.g. for downtimes), simplifies the structure of
the corresponding Puppet code, makes it easier to reason about the environment,
and prevents conflicts regarding certain configuration settings.
Development Workflow
--------------------
The general inspiration comes from `this post
<http://nvie.com/posts/a-successful-git-branching-model/>`_.
Regarding the puppet code this is more detailed description of our current
workflow:
+52
View File
@@ -0,0 +1,52 @@
Conventions
===========
Naming convention for servers
-----------------------------
Server names have the form ``purpose[0-9][0-9]{,-test}.psi.ch``, where
``purpose`` is the purpose of the server or the service provided by it. Examples are:
- ``puppet00.psi.ch`` is the **productive** puppetmaster
- ``repo00-test.psi.ch`` is the **test** Yum repository server
When putting system names into configuration files, we always use lower case and
the fully qualified domain name. Good: ``puppet00.psi.ch``. Bad: ``puppet00`` or
``PUPPET00.PSI.CH``.
Names for programs
------------------
The general conventions regarding filenames apply (see `File system hierarchy
and filenames`_). In addition, scripts should not include a suffix indicating
the language they are written in. The latter is an implementation detail and
does not matter to the caller of the script/program. A good name could be
``pli-system-info``, a bad one would be ``pli-system-info.sh``.
File system hierarchy and filenames
-----------------------------------
Files should generally be placed where the distribution and/or the `FHS
<http://www.pathname.com/fhs/>`_ expect them. Deviations and clarifications are
documented in this section.
The base directory for files is ``/usr`` if the files are relevant to end-users,
``/opt/pli`` if the files are relevant only to Linux administrators. This avoids
polluting the PATH for end-users as well as naming conflicts.
Executables (scripts or otherwise) that are meant for interactive use should go
into ``BASE/bin`` or ``BASE/sbin`` as appropriate.
Executables which are primarily called by other executables to implement part of
their functionality should be place in ``BASE/libexec``. This avoids polluting
``$PATH``.
Files and directories should have names starting with ``pli-`` or be placed in a
directory with such a name. This makes it obvious whether a file belongs to an
internal package and avoids collisions.
Note: this section used to recommend ``psi-`` as the prefix. It turns out that
this prefix is being used in various other places already, hence we use
``pli-``, where PLI stands for PSI Linux Infrastructure.
+34
View File
@@ -0,0 +1,34 @@
.. Linux Infrastructure documentation master file, created by
sphinx-quickstart on Mon Apr 11 13:24:16 2016.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to Linux Infrastructure's documentation!
================================================
Contents:
.. toctree::
:maxdepth: 2
architecture
guidelines
deployment
puppet
selinux
software
mgmt-tools
operations
third-party
troubleshooting
more
legacy
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
+20
View File
@@ -0,0 +1,20 @@
================
Legacy Systems
================
This section describes the PSI legacy Linux environment, ie. those systems
running Scientific Linux 6 and earlier. Unlike the rest of the admin guide, this
section is mostly a plain import of existing snippets of documentation, which
were created in a variety of formats.
.. toctree::
:maxdepth: 2
legacy/communication
legacy/installation
legacy/misc
legacy/monitoring
legacy/puppet
legacy/services
legacy/software
legacy/storage
+48
View File
@@ -0,0 +1,48 @@
Communication
=============
There are several communication channels:
1. Mailing lists: linux-announce and linux-discuss
2. Trouble tickets (via the helpdesk).
3. IT supporter meetings
Mailing lists
-------------
There are three mailing lists:
1. ``puppet-devel@lists.psi.ch``
This list is for technical discussions concerning Linux at PSI, in particular
the infrastructure, ie Puppet, iPXE, etc.
2. ``linux-announce@lists.psi.ch``
This list is used for announcements of new updates, changes, events, etc.
Only admins can post (as configured on the mailing list server).
3. ``linux-discuss@lists.psi.ch``
This list is meant for discussions related to the PSI Linux environment. Any
subscriber can post.
Mails to ``linux-announce`` should follow a standard format. In particular, the
subject line should start with a tag describing the kind of announcement.
Already established tags are:
- ``[Change]``
- ``[Updates]``
Trouble tickets
---------------
These are for incidents and requests.
Linux Support Meetings
----------------------
These happen roughly every two weeks.
+65
View File
@@ -0,0 +1,65 @@
Installation
============
This section describes the various installation methods, e.g. PXE.
PXE Server
----------
The PXE server is ``pxeserv01.psi.ch``, running SL 5.1.
Directory structure
~~~~~~~~~~~~~~~~~~~
The usual ``/tftpboot/pxelinux.cfg`` setup. The ``pxelinux.cfg``
directory contains the following subdirectories:
- ``Boot``: could be Windows-related
- ``centos``: CentOS 7.0, 7.1, and 7.2
- ``dl``: diskless systems
- ``pxelinux.cfg``: PXELINUX configuration
- ``scientific``: Scientific Linux, 3.01 -> 7.0, partly obsolete
- ``tools``:
PXELINUX
--------
The PXELINUX configuration files are in ``pxeserv01:/tftpboot``.
The ``default`` file currently contains 127 labels.
Scientific Linux 6, 64bit
~~~~~~~~~~~~~~~~~~~~~~~~~
One thing to note is that the
[Kickstart file](http://linux.web.psi.ch/kickstart/configs/sl64-64-desk-stable-ks.cfg)
is modified during installation by the
[``ks_pre.sh``](http://linux.web.psi.ch/dist/scientific/64/kickstart/bin/ks_pre.sh)
script.
Kickstart
---------
The documentation on Kickstart files is currently based on
``sl64-64-desk-stable-ks.cfg``, the default Kickstart file for SL6.
In the ``%pre`` section a distribution-specific script ``ks_pre.sh``
is downloaded and executed. Errors are not handled. The script
*modifies* the Kickstart file. The latter contains markers
``#BEGIN_MAIN`` and ``#END_MAIN`` to mark the region to be modified.
Custom keys
~~~~~~~~~~~
The Kickstart files defines variables ``INIT_KEYS`` and
``APPEND_KEYS`` and exports them for use by ``ks_pre.sh``. The latter
attempts to download an environment file and a script
``custom_pre.sh`` for each key and sources them.
The custom key related files are stored below
``/afs/psi.ch/software/linux/dist/scientific/$OSVERSION/custom/``.
Examples for custom keys are ``DesktopTesting`` and ``DesktopStable``.
+37
View File
@@ -0,0 +1,37 @@
Miscellaneous
=============
This section contains (potentially out-of-date or obsolete) documents
from the wiki, various places on AFS, etc. Most of them should be
integrated in the other sections properly, the rest removed.
.. toctree::
:maxdepth: 1
misc/afstowindowsloginchangeinsl4andsl5
misc/configureldaponpsipuppet3
misc/createanewkickstartinstallationforfedora10
misc/disklessclientsl60
misc/dkmsbasics
misc/firefoxpreferenceshowto
misc/howtoeditinstallimg
misc/howto-start-vncserver
misc/kernelmodulee1000eupdateforsl5.1
misc/linuxhowtolookupforpcidevicesandcorrespondingmodulesinsl5
misc/linuxhowto-rpm-updatepsi-desktoppackageonsl5
misc/linuxhowto-sl5-nvidiadriverinstallationupdate
misc/linuxloginclusters
misc/loadbalancerllclb1
misc/nxserverclientinstallation
misc/prepareanewslrelease
misc/projectpsi-puppet1
misc/psi-puppet2_installation
misc/puppetmanifestsforsl53
misc/puppetmasteratpsi
misc/puppet-trouble-shooting-in-twiki
misc/release_snapshotssl53
misc/repairrpmdb
misc/sap_client_for_linux_howto
misc/updatesl57
misc/updateslmaindoc
misc/vpnclientlinux
@@ -0,0 +1,199 @@
AFS to Windows-Login Change
===========================
References
----------
http://ait.web.psi.ch/services/linux/news/unified_login1.html
Introduction
------------
In the night from August 2 to 3 2009 the AFS authentication service
will be changed to the Windows authentication service on Green SL4 and
SL5 PCs.
This document describes the changes that have to be made in our
cfengine and puppet environments to facilitate the PSI wide automatic
reconfiguration of the Kerberos 5 authentication service on Green SL4
and SL5 systems.
This automatic reconfiguration requires cfengine running on SL4 and
puppet running on SL5 hosts respectively.
Note: For technical reasons SL3 systems can not use the Windows
authentication service.
Basically, the following two steps have to be performed:
- Replace the current `krb5.conf` for the AFS authentication service by
the new `krb5.conf` for the Windows authentication service.
- Distribute the `krenew.sh` script, which periodically renews an
existing renewable ticket. It starts running when the user logs in
to its graphical desktop.
For additional information see the reference.
Files
-----
`/etc/krb5.conf` for the Windows authentication server for SL4 and
SL5::
[libdefaults]
default_realm = D.PSI.CH
ticket_lifetime = 25h
dns_lookup_realm = false
dns_lookup_kdc = false
udp_preference_limit = 10
renew_lifetime = 30d
forwardable = true
[realms]
PSI.CH = {
kdc = afs00.psi.ch:88 afs01.psi.ch:88 afs02.psi.ch:88
admin_server = afs00.psi.ch:749
kpasswd_server = afs00.psi.ch:464
default_domain = psi.ch
}
D.PSI.CH = {
kdc = d.psi.ch.
kpasswd_server = d.psi.ch.
default_domain = psi.ch
}
[domain_realm]
.psi.ch = D.PSI.CH
To automatically renew a Kerberos v5 ticket during an X session the
krenew command will be started when logging in to X. It is executed by
the script `krenew.sh`, which is placed into
`/etc/X11/xinit/xinitrc.d/`.
`/etc/X11/xinit/xinitrc.d/krenew.sh` for SL4 and SL5::
#!/bin/bash
/usr/bin/krenew -b -K 60 -t
Procedure
---------
SL4
~~~
Replace `krb5.conf` File by Cfengine
....................................
The replacement of the `krb5.conf` is done by cfengine
on Green SL4 systems.
The current `krb5.conf` source files::
/afs/psi.ch/service/linux/cfengine/masterfiles/distTesting/scientific/46/etc/krb5.conf
and::
/afs/psi.ch/service/linux/cfengine/masterfiles/dist/scientific/46/etc/krb5.conf
have to be replaced by the `krb5.conf` file for the Windows
authentication server.
Distribute `krenew.sh` By Cfengine
..................................
Copy `krenew.sh` to::
/afs/psi.ch/service/linux/cfengine/masterfiles/distTesting/scientific/46/etc/
and::
/afs/psi.ch/service/linux/cfengine/masterfiles/dist/scientific/46/etc/
Configure cfengine to distribute them by adding the following entry to::
/afs/psi.ch/service/linux/cfengine/masterfiles/inputsTesting/scientific/46/cf.linux.AFS
and::
/afs/psi.ch/service/linux/cfengine/masterfiles/inputs/scientific/46/cf.linux.AFS
The entry::
linux.scientific::
$(MASTERDIR)/$(DISTDIR)/$(DIST)/$(RELEASE)/etc/krenew.sh
owner=root group=root
mode=0755
dest=/etc/X11/xinit/xinitrc.d/krenew.sh
type=sum # makes a MD5 checksum
server=$(MASTERHOST)
backup=true
syslog=true
SL5
---
Replace `krb5.conf` File by Puppet
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The replacement of the `krb5.conf` is done by puppet on Green SL5
systems.
The corresponding `krb5.conf` source file::
/afs/psi.ch/software/linux/dist/scientific/51/puppet/files/afs/etc/krb5.conf
has to be replaced by the `krb5.conf` file for the Windows
authentication server.
Distribute `krenew.sh` By Puppet
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Copy `krenew.sh` to the following puppet directory::
/afs/psi.ch/software/linux/dist/scientific/51/puppet/files/afs/etc/
Now edit the puppet manifest `psi_afs.pp` in both environments,
production development. Add `krenew.sh` to the file resource type as
shown below.
`/afs/psi.ch/service/linux/puppet/etc/puppet/development/manifests/psi_defaults/psi_afs.pp`,
`/afs/psi.ch/service/linux/puppet/etc/puppet/production/manifests/psi_defaults/psi_afs.pp`::
# psi_afs.pp
#
# Link to PSI pam files
# Default PAM configuration for PSI systems
#
class psi_afs {
file {
"/etc/krb5.conf":
owner => "root",
group => "root",
source => [
"puppet://$servername/$psi_release/afs/etc/krb5.conf.$hostname",
"puppet://$servername/$psi_release/afs/etc/krb5.conf"
];
"/etc/X11/xinit/xinitrc.d/krenew.sh":
owner => "root",
group => "root",
mode => "755",
source => [
"puppet:///$psi_release/afs/etc/krenew.sh.$hostname",
"puppet:///$psi_release/afs/etc/krenew.sh"
];
}
}
@@ -0,0 +1,110 @@
Configure LDAP on `psi-puppet3`
===============================
References
----------
- https://intranet.psi.ch/AIT/LdapActiveDirectoryIntegrationPSI
- http://linux.web.cern.ch/linux/docs/account-mgmt.shtml
Introduction
------------
This document describes the configuration of LDAP to access user account
information on an SL6 system.
Requirements
------------
RPMS:
- nss-pam-ldapd
Procedure
---------
Configure `/etc/nsswitch.conf`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The `/etc/nsswitch.conf` configuration file describes the order in
which password-file lookups are performed. To make sure that local
accounts take precedence over LDAP accounts, it should have these
entries::
passwd: files ldap
shadow: files
group: files ldap
Configuring `/etc/nslcd.conf`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The `/etc/nslcd.conf` configuration file is used to set system-wide
defaults to be applied when running ldap clients. This mechanism is
available on SLP6.
This section describes the options that are relevant to configure
account lookups in the d.psi.ch LDAP service. An example configure
file containing the options described below is shown here. Please edit
it to suit your needs - in particular the `filter passwd` entry!::
# the ldaps uri enforces SSL
uri ldaps://d.psi.ch:636
base ou=PSI,dc=d,dc=psi,dc=ch
binddn CN=linux_ldap,OU=Services,OU=IT,DC=d,DC=psi,DC=ch
bindpw *NOT SHOWN*
# ldap base definitions for each nameservice
base passwd ou=Users,ou=PSI,dc=d,dc=psi,dc=ch
base group ou=Groups,ou=PSI,dc=d,dc=psi,dc=ch
filter passwd (&(objectClass=user)(!(objectClass=computer))(msSFU30UidNumber=*)(msSFU30HomeDirectory=*))
map passwd uid cn
map passwd uidNumber msSFU30UidNumber
map passwd gidNumber msSFU30GidNumber
map passwd loginShell msSFU30LoginShell
map passwd gecos displayName
# do not resolve the unneeded userPassword
#map passwd userPassword msSFU30Password
map passwd homeDirectory msSFU30HomeDirectory
# for our clusters' special environments we may want to use an expression
# #map passwd homeDirectory "/home/${sAMAccountName}"
filter group (&(objectClass=Group)(msSFU30Name=*))
map group uniqueMember member
map group gidNumber msSFU30GidNumber
timelimit 30
# we may want to use this to reduce standing connections to the LDAP server
idle_timelimit 300
# require that server's certificate is tested. The standard installed CA bundle includes
# the necessary Root CA cert from QuoVadis
tls_reqcert hard
tls_cacertfile /etc/ssl/certs/ca-bundle.crt
# request paged results from the LDAP server
pagesize 1000
referrals off
Please make sure that the `nss-pam-ldapd` RPM is installed on your
client machine. Run `yum install nss-pam-ldapd` if this RPM is not
installed.
Then, make sure that the `nslcd` runs, and gets started at boot time::
/sbin/service nslcd restart
/sbin/chkconfig --level 345 nslcd on
Note: the `nslcd` service must be restarted after changes to the
`/etc/nslcd.conf` have been made! For more information, run `man
nslcd.conf` and/or `man nslcd`.
@@ -0,0 +1,119 @@
Create a new Kickstart Installation (for Fedora 10)
===================================================
This document describes the setup of a new kickstart installation at
PSI taking Fedora10 as an example.
Introduction
~~~~~~~~~~~~
Some NICs include the ability to boot using a Pre-Execution
Environment (PXE). It works by sending out a broadcast request for a
DHCP server on the network. If the DHCP server is configured to send
the client the IP address or hostname of a tftp server and the
location on that tftp server of the files needed to start the Linux
installation, the client can start a network installation without
having to boot from local media such as a CD.
This method can also be used with kickstart to perform an automated
network installation.
To perform a network installation using PXE boot, use the following
steps:
- Create an installation tree for the network install and make it
available to the systems being installed.
- Configure the tftp server.
- Configure the DHCP server.
- Boot the system to start the installation.
Procedure
~~~~~~~~~
Preparation of the PXE Boot Installation
........................................
Go to the `tftp` top directory::
# cd /afs/psi.ch/service/linux/tftpboot/
# cd pxelinux.cfg/
# cp -a default.testing{,-20081211}
Now edit the file `default.testing`. Add a new entry for Fedora10.
# vi default.testing
# cd ..
# mkdir -p fedora/10
# cd fedora/10
# mkdir i386 x86_64
# cd i386/
# pwd
/afs/psi.ch/service/linux/tftpboot/fedora/10/i386/
# wget ftp://sunsite.cnlab-switch.ch/mirror/fedora/linux/releases/10/Fedora/i386/os/isolinux/vmlinuz
# wget ftp://sunsite.cnlab-switch.ch/mirror/fedora/linux/releases/10/Fedora/i386/os/isolinux/initrd.img
After having performed a first test installation, the kickstart file
`testhost:/root/anaconda-ks.cfg` based on that installation was
written. This kickstart file was copied to
`/afs/psi.ch/software/linux/kickstart/configs/fedora10-ks.cfg`. Then
`fedora10-ks.cfg` was edited.
`File: fedora10-ks.cfg Version 1`
- Note:
For testing the kickstart installations the root password is given,
see line starting with `rootpw`. In the final version it will be
removed::
##################################
# fedora10-ks.cfg 32-bit
##################################
install
url --url=ftp://sunsite.cnlab-switch.ch/mirror/fedora/linux/releases/10/Fedora/i386/os
lang en_US.UTF-8
network --device eth0 --bootproto dhcp
rootpw --iscrypted $6$dzHkd6Tb5OuEd92w$1mFgHdoRA9JnIeTz7lq8tvh8Gu1DJPWQyV7LyLGGTEE27ORgF6rYLPDc5nRZRMzoX8Zpasg5UFy4T7jOYyWa50
authconfig --enableshadow --passalgo=sha512
selinux --disabled
timezone --utc Europe/Zurich
bootloader --location=mbr
### PARTITION
# The following is the partition information you requested
# Note that any partitions you deleted are not expressed
# here so unless you clear all partitions first, this is
# not guaranteed to work
#part /boot --fstype ext3 --size=100 --asprimary
#part swap --size=500 --asprimary
#part / --fstype ext3 --size=200 --grow --asprimary
%packages
@admin-tools
@base
@core
@editors
@hardware-support
@text-internet
%end
Kickstart Pre Installation Scripts:
- /afs/psi.ch/software/linux/dist/scientific/5/kickstart/pre
- /afs/psi.ch/software/linux/dist/scientific/5/kickstart/pre/set_partition
- /afs/psi.ch/software/linux/dist/scientific/5/kickstart/pre/pre_custom
- /afs/psi.ch/software/linux/dist/scientific/5/kickstart/pre/ask_ip
- /afs/psi.ch/software/linux/dist/scientific/5/kickstart/pre/pre_custom
- /afs/psi.ch/software/linux/dist/scientific/5/kickstart/pre/ask_ipaddr
@@ -0,0 +1,54 @@
Diskless Client SL60
====================
Procedure
---------
Select the kernel that diskless clients should use
(vmlinuz-kernel-version) and copy it to the tftp boot directory::
# cp /boot/vmlinuz-2.6.32-220.4.1.el6.i686 /afs/psi.ch/service/linux/tftpboot/dl/sl60/i386/
Create the initrd (i.e. initramfs-kernel-version.img) with network support::
# yum install dracut-network
# dracut initramfs-dl-2.6.32-220.4.1.el6.i686.img 2.6.32-220.4.1.el6.i686
Copy the resulting initramfs-kernel-version.img into the tftp boot directory as well::
# cp /tmp/initramfs-dl-2.6.32-220.4.1.el6.i686.img /afs/psi.ch/service/linux/tftpboot/dl/sl60/i386/
Edit the default boot configuration to use the initrd and kernel
inside `/var/lib/tftpboot`. This configuration should instruct the
diskless client's root to mount the exported file system
(`/exported/root/directory`) as read-write. To do this, configure
`/var/lib/tftpboot/pxelinux.cfg/default` with the following::
label sl6dl
kernel dl/sl60/i386/vmlinuz-2.6.32-220.4.1.el6.i686
append initrd=dl/sl60/i386/initramfs-dl-2.6.32-220.4.1.el6.i686.img root=nfs4:129.129.190.91:/dl/sl60/i386/base rw
Replace server-ip with the IP address of the host machine on which the
tftp and DHCP services reside. The NFS share is now ready for
exporting to diskless clients. These clients can boot over the network
via PXE.
From the manual::
22.3. Configuring an Exported File System for Diskless Clients
The root directory of the exported file system (used by diskless clients in the network) is shared via NFS. Configure the NFS service to export the root directory by adding it to /etc/exports. For instructions on how to do so, refer to Section 12.7.1, “ The /etc/exports Configuration File”.
To accommodate completely diskless clients, the root directory should contain a complete Red Hat Enterprise Linux installation. You can synchronize this with a running system via rsync, as in:
# rsync -a -e ssh --exclude='/proc/*' --exclude='/sys/*' hostname.com:/ /exported/root/directory
Replace hostname.com with the hostname of the running system with which to synchronize via rsync. The /exported/root/directory is the path to the exported file system.
Alternatively, you can also use yum with the --installroot option to install Red Hat Enterprise Linux to a specific location. For example:
yum groupinstall Base --installroot=/exported/root/directory
The file system to be exported still needs to be configured further before it can be used by diskless clients. To do this, perform the following procedure:
Procedure 22.2. Configure file system
Configure the exported file system's /etc/fstab to contain (at least) the following configuration:
none /tmp tmpfs defaults 0 0
tmpfs /dev/shm tmpfs defaults 0 0
sysfs /sys sysfs defaults 0 0
proc /proc proc defaults 0 0
+396
View File
@@ -0,0 +1,396 @@
Dynamic Kernel Module Support (DKMS) Basics
===========================================
References
----------
G. Lerhaupt, Linuxjournal (www.linuxjournal.com/), September 1st, 2003.
Introduction To DKMS
--------------------
Source is a wonderful thing. Merged module source in the kernel tree
is even better. Most of all, support for that source is what really
counts. In today's explosion of Linux in the enterprise, the ability
to pick up the phone and find help is critical. More than ever,
corporations are driving Linux development and requirements. Often,
this meets with skepticism and a bit of anxiety by the community, but
if done correctly, the benefits are seen and felt by everyone.
The dynamic kernel module support (DKMS) framework should be viewed as
a prime example of this. DKMS, a system designed to help Dell Computer
Corporation distribute fixes to its customers in a controlled fashion,
also speeds driver development, testing and validation for the entire
community.
The DKMS framework is basically a duplicate tree outside of the kernel
tree that holds module source and compiled module binaries. This
duplication allows for a decoupling of modules from the kernel, which,
for Linux solution and deployment providers, is a powerful tool. The
power comes from permitting driver drops onto existing kernels in an
orderly and supportable manner. In turn, this frees both providers and
their customers from being bound by kernel drops to fix their issues.
Instead, when a driver fix has been released, DKMS serves as a stopgap
to distribute the fix until the code can be merged back into the
kernel.
Staying with the customer angle for a bit longer, DKMS offers other
advantages. The business of compiling from source, installing or
fidgeting with rebuildable source RPMs has never been for the
faint-of-heart. The reality is that more Linux users are coming in
with less experience, necessitating simpler solutions. DKMS bridges
these issues by creating one executable that can be called to build,
install or uninstall modules. Further, using its match feature,
configuring modules on new kernels could not be easier, as the modules
to install can be based solely on the configuration of some kernel
previously running. In production environments, this is an immense
step forward as IT managers no longer have to choose between some
predefined solution stack or the security enhancements of a newer
kernel.
DKMS also has much to offer developers and veteran Linux users. The
aforementioned idea of the decoupling of modules from the kernel
through duplication (not complete separation) creates a viable test
bed for driver development. Rather than having to push fixes into
successive kernels, these fixes can be distributed and tested on the
spot and on a large scale. This speedup in testing translates to an
overall improvement in the speed of general development. By removing
kernel releases as a blocking mechanism to widespread module code
distribution, the result is better tested code that later can be
pushed back into the kernel at a more rapid pace—a win for both
developers and users.
DKMS also makes developers' lives easier by simplifying the delivery
process associated with kernel-dependent software. In the past, for
example, Dell's main method for delivering modules was RPMs containing
kernel-specific precompiled modules. As kernel errata emerged, we
often were taken through the monotonous and unending process of
recompiling binaries for these new kernels—a situation that no
developer wants to be in. However, Dell still favored this delivery
mechanism because it minimized the amount of work and/or knowledge
customers needed to have to install modules. With DKMS, we can meet
these usability requirements and significantly decrease our workload
from the development standpoint. DKMS requires module source code to
be located only on the user's system. The DKMS executable takes care
of building and installing the module for any kernel users may have on
their systems, eliminating the kernel catch-up game.
Using DKMS
----------
With all of this up-front hype about DKMS, perhaps it might be best to
settle into the particulars of actually how the software is used.
- Using DKMS for a module requires that the module source be located
on the user's system and that it be located in the directory
`/usr/src/(module))-((module-version))/`.
- A `dkms.conf` file must exist with the appropriately formatted
directives within this configuration file to tell DKMS such things
as where to install the module and how to build it.
More information on the format of the `dkms.conf` file can be found
later in this article. Once these two requirements have been met and
DKMS has been installed on the system, the user can begin using DKMS
by adding a `module/module-version` to the DKMS tree. The example add
command::
# dkms add -m qla2x00 -v v6.04.00
would add `qla2x00/v6.04.00` to the extant `/var/dkms` tree. This
command includes creating the directory `/var/dkms/qla2x00/v6.04.00/`,
creating a symlink from `/var/dkms/qla2x00/v6.04.00/source` to
`/usr/src/qla2x00-v6.04.00/` and copying the `dkms.conf` file from its
original location to `/var/dkms/qla2x00/v6.04.00/dkms.conf`.
Once this add is complete, the module is ready to be built. The `dkms
build` command requires that the proper kernel sources are located on
the system from the `/lib/module/kernel-version/build` symlink. The
make command used to compile the module is specified in the
`dkms.conf` configuration file. Continuing with the `qla2x00/v6.04.00`
example::
# dkms build -m qla2x00 -v v6.04.00 -k 2.4.20-8smp
compiles the module but stops short of installing it. Although build
expects a kernel-version parameter, if this kernel name is left out,
it assumes the currently running kernel. However, building modules
for kernels not currently running also is a viable option. This
functionality is assured through the use of a kernel preparation
subroutine that runs before any module build is performed. This
paranoid kernel preparation involves running a make mrproper, copying
the proper kernel .config file to the kernel source directory, running
a make oldconfig and, finally, running a make dep. These steps ensure
that the module being built is built against the proper kernel
symbols. By default, DKMS looks for the kernel `.config` file in the
`/lib/modules/kernel-version/build/configs/` directory, utilizing Red
Hat's naming structure for those config files. If the kernel `.config`
file is not located in this directory, you must specify a `--config`
option with your build command and tell DKMS where the `.config` file
can be found.
Successful completion of a build creates, for this example, the
`/var/dkms/qla2x00/v6.04.00/2.4.20-8smp/` directory as well as the log
and module subdirectories within this directory. The log directory
holds a log file of the module make, and the module directory holds
copies of the compiled `.o` binaries.
With the completion of a build, the module now can be installed on the
kernel for which it was built. Installation copies the compiled module
binary to the correct location in the `/lib/modules/` tree, as
specified in the dkms.conf file. If a module by that name is already
found in that location, DKMS saves it in its tree as an original
module, so it can be put back into place at a later time if the newer
module is uninstalled. The example install command::
# dkms install -m qla2x00 -v v6.04.00 -k 2.4.20-8smp
creates the following symlink::
/var/dkms/qla2x00/v6.04.00/kernel-2.4.20-8smp → /var/dkms/qla2x00/v6.04.00/2.4.20-8smp
This symlink is how DKMS keeps tabs on which driver version is
installed on which kernel. As stated earlier, if a module by the same
name is installed already, DKMS saves a copy in its tree in the
`/var/dkms/module-name/original_module/` directory. In this case, it
would be saved to `/var/dkms/qla2x00/original_module/2.4.20-8smp/`.
To complete the DKMS cycle, you also can uninstall or remove your
module from the tree. Uninstall removes the module you installed and,
if applicable, replaces it with its original module. In scenarios
where multiple versions of a module are located within the DKMS tree,
when one version is uninstalled, DKMS does not try to understand or
assume which of these other versions should be put in its
place. Instead, if a true original_module was saved from the original
DKMS installation, it is put back into the kernel. All of the other
module versions for that module are left in the built state. An
example uninstall would be::
# dkms uninstall -m qla2x00 -v v6.04.00 -k 2.4.20-8smp
If the kernel version parameter is unset, the currently running kernel
is assumed, but the same behavior does not occur with the remove
command. Remove and uninstall are similar in that a remove command
completes all of the same steps as does an uninstall. However, if the
module-version being removed is the last instance of that
module-version for all kernels on your system, after the uninstall
portion of the remove completes, remove physically removes all traces
of that module from the DKMS tree. In other words, when an uninstall
command completes, your modules are left in the **built**
state. However, when a remove completes, you have to start over from
the add command before you can use this module again with DKMS. Here
are two sample remove commands::
# dkms remove -m qla2x00 -v v6.04.00 -k 2.4.20-8smp
# dkms remove -m qla2x00 -v v6.04.00 --all
With the first remove command, the module would be uninstalled. If
this `module/module-version` were not installed on any other kernel,
all traces of it would be removed from the DKMS tree. If, say,
`qla2x00/v6.04.00` also was installed on the `2.4.20-8bigmem` kernel,
the first remove command would leave it alone—it would remain intact
in the DKMS tree. That would not be the case in the second example. It
would uninstall all versions of the `qla2x00/v6.04.00` module from all
kernels and then completely expunge all references of
`qla2x00/v6.04.00` from the DKMS tree. Thus, remove is what cleans
your DKMS tree.
Miscellaneous DKMS Commands
---------------------------
DKMS also comes with a fully functional status command that returns
information about what is currently located in your tree. If no
parameters are set, it returns all information found. Logically, the
specificity of information returned depends on which parameters are
passed to your status command. Each status entry returned is of the
state added, built or installed. If an original module has been saved,
this information also is displayed. Some example status commands
include::
# dkms status
# dkms status -m qla2x00
# dkms status -m qla2x00 -v v6.04.00
# dkms status -k 2.4.20-8smp
# dkms status -m qla2x00 -v v6.04.00 -k 2.4.20-8smp
Another major feature of DKMS is the match command. The match command
takes the configuration of DKMS-installed modules for one kernel and
applies it to some other kernel. When the match completes, the same
`module/module-versions` installed for one kernel are then installed
on the other kernel. This is helpful when you are upgrading from one
kernel to the next but want to keep the same DKMS modules in place for
the new kernel. In the example::
# dkms match --templatekernel 2.4.20-8smp -k 2.4.20-9smp
`--templatekernel` is the match-er kernel from which the configuration
is based. The `-k` kernel is the match-ee upon which the
configuration is instated.
For systems management purposes, the commands mktarball and ldtarball
also have been added to DKMS. These commands allow the user to make
and load tarball archives, respectively, into the DKMS tree to
facilitate using DKMS in deployments where many similar systems
exist. This allows the system administrator to build modules on only
one system. Rather than build the same module on every other system,
the built binary can be applied directly to the other systems' DKMS
tree. Specifically, mktarball creates a tarball of the source for a
given `module/module-version`. It then archives the DKMS tree of every
kernel version that has a module built for that
`module/module-version`. Consider the example::
# dkms mktarball -m qla2x00 -v v6.04.00 -k 2.4.20-8smp,2.4.20-8
Depending on the `-k` kernel parameter, `mktarball` archives only
certain binaries compiled for those kernels specified. If no kernel
parameter is given, it archives all built module binaries for that
`module/module-version`.
With `ldtarball`, DKMS simply parses the archive created with
mktarball and applies whatever is found to that system's DKMS
tree. This leaves all modules in the built state; the `dkms install`
command then can be used to place the module binaries into the
`/lib/modules` tree. Under normal operation, ldtarball does not
overwrite any files that already exist in the system's DKMS
tree. However, the archive can be forced over what is in the tree with
the `--force` option. An example `ldtarball`::
# dkms ldtarball --config qla2x00-v6.04.00-kernel2.4.20-8smp.tar.gz
The last miscellaneous DKMS command is `mkdriverdisk`. As can be
inferred from its name, `mkdriverdisk` takes the proper sources in
your DKMS tree and creates a driver disk image that can provide
updated drivers to Linux distribution installations. A sample
`mkdriverdisk` might look like::
# dkms mkdriverdisk -d redhat -m qla2x00 -v v6.04.00 -k 2.4.20-8BOOT
Currently, the only supported distribution driver disk format is Red
Hat, but this easily could expand with some help from the community in
understanding driver disk requirements and formats on a
per-distribution basis. For more information on the extra necessary
files and their formats for DKMS to create Red Hat driver disks, see
`people.redhat.com/dledford`. These files should be placed in your
module source directory.
The dkms.conf Configuration File Format
---------------------------------------
For maintainers of DKMS packages, the `dkms.conf` configuration file
is the only auxiliary piece necessary to make your source tarball
DKMS-ready. The format of the conf file is a successive list of shell
variables sourced by DKMS when working with your package. For
example, an excerpt from the `qla2x00/v6.04.00 dkms.conf` file::
MAKE="make all INCLUDEDIR=/lib/modules/$kernelver/build/include"
MAKE_smp="make SMP=1 all INCLUDEDIR=/lib/modules/$kernelver/build/include"
LOCATION="/kernel/drivers/addon/qla2200"
REMAKE_INITRD="yes"
MODULE_NAME="qla2200.o:qla2200_6x.o qla2300.o:qla2300_6x.o"
CLEAN="make clean"
MODULES_CONF_ALIAS_TYPE="scsi_hostadapter"
MODULES_CONF0="options scsi_mod scsi_allow_ghost_devices=1"
shows that each of the shell variable directives should be coded in
all capital letters. One of the current exceptions to this rule is the
`MAKE_` directive. DKMS uses the generic `MAKE=` command to build your
module. But, if a `MAKE_kernel-regexp-text` command exists and the
text after the `MAKE_ matches` (as a substring) the kernel for which
it is being built, then this alternate make command is used. In the
above example, you can see how DKMS would use the `MAKE_smp` directive
on any smp kernel for which it was building this module. Similar
`PATCH_` commands also exist. When the text after the underscore
matches the kernel for which a module is being built, that patch first
is applied to the module source. This allows developers to distribute
one source tarball, with one `dkms.conf` and multiple patches. Yet,
different patches can be applied as necessary to the source to ensure
all modules function correctly on all kernels.
Also notice that dkms.conf accepts the `$kernelver` variable, which,
at build time, is replaced with the kernel version for which the
module is being built. This is especially important so the correct
include directories are referenced when compiling a module for a
kernel that is not currently running.
Using DKMS in Conjunction with RPM
----------------------------------
DKMS and RPM actually work quite well together. The only twist is that
to make it function properly, you have to create an RPM that installs
source. Although normal practice is to install source only with source
RPMs, a source RPM does not necessarily work with DKMS; it will not
let you do much besides install the source. Instead, your source
tarball needs to be included with your RPM, so your source can be
placed in `/usr/src/module-module-version/` and the proper DMKS
commands can be called. The `%post` and `%preun` basically are DKMS
commands.
Here is a sample `.spec` file::
%define module qla2x00
Summary: Qlogic HBA module
Name: %module_dkms
Version: v6.04.00
Release: 1
Vendor: Qlogic Corporation
Copyright: GPL
Packager: Gary Lerhaupt <gary_lerhaupt@dell.com>
Group: System Environment/Base
BuildArch: noarch
Requires: dkms gcc bash sed
Source0: qla2x00src-%version.tgz
Source1: dkms.conf
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root/
%description
This package contains Qlogic's qla2x00 HBA module meant
for the DKMS framework.
%prep
rm -rf qla2x00src-%version
mkdir qla2x00src-%version
cd qla2x00src-%version
tar xvzf $RPM_SOURCE_DIR/qla2x00src-%version.tgz
%install
if [ "$RPM_BUILD_ROOT" != "/" ]; then
rm -rf $RPM_BUILD_ROOT
fi
mkdir -p $RPM_BUILD_ROOT/usr/src/%module-%version/
install -m 644 $RPM_SOURCE_DIR/dkms.conf
$RPM_BUILD_ROOT/usr/src/%module-%version
install -m 644 qla2x00src-%version/*
$RPM_BUILD_ROOT/usr/src/%module-%version
%clean
if [ "$RPM_BUILD_ROOT" != "/" ]; then
rm -rf $RPM_BUILD_ROOT
fi
%files
%defattr(0644,root,root)
%attr(0755,root,root) /usr/src/%module-%version/
%pre
%post
/sbin/dkms add -m %module -v %version
/sbin/dkms build -m %module -v %version
/sbin/dkms install -m %module -v %version
exit 0
%preun
/sbin/dkms remove -m %module -v %version --all
exit 0
@@ -0,0 +1,4 @@
Firefox Preferences
===================
`/usr/lib64/firefox/browser/defaults/preferences/all-psi.js`
@@ -0,0 +1,18 @@
How To Start Vncserver
======================
Login to the remote host::
# ssh root@<hostname>
# ls -l /tmp/krb*
Login as <user> you want to get the Desktop from::
# su - <user>
# export KRB5CCNAME=/tmp/krb5cc_3651_bxD3lb
# aklog
# x0vncserver -SecurityTypes=None -display=:0.0
Start vnc client on local host::
# vncviewer
@@ -0,0 +1,20 @@
How to edit `install.img`
=========================
Introduction
------------
This HowTo describes how to extract, edit and rebuild the install.img
file of an SL6 release. This file is found in the distribution
toplevel `$basearch` directory e.g. in
`/afs/psi.ch/software/linux/dist/scientific/62/x86_64/images/` at our
site. It is loaded during the installation and brings up the
installation process.
The reason to edit the `install.img` in this example is the file
`/etc/anaconda.repos.d/sl.repo`, which references the original SL repos,
i.e. the anaconda installer will try to get RPMS from these external
repos during the installation even if we declare our local repos in
the kickstart config file. We want now that anaconda takes RPMS from
our local copies of the SL repos, because first the installation is
faster, second we do not depend on down times of the SL site.
@@ -0,0 +1,864 @@
Kernel Module E1000E Update For SL5.1
=====================================
This document is almost certainly obsolete, hence I just include the
unformatted POD source of the original::
=head2 References
=over 4
=item *
http://support.intel.com/support/
=back
=head2 Introduction
The Intel Gigabit ethernet card F<82567LM>, which comes with the
new Fujitsu Siemens computers F<Celsius W370>, F<Esprimo p7935 e80>,
F<Esprimo e7935 e80> and F<Lifebook e8420>, contemporary does not work
out of the box in SL51 (and for sure others) because the updated version
of the e1000e driver is not available yet in Scientific Linux.
As a consequence of this the network installation is not working for this
kind of hardware, because
the SL5.1 installation kernel, too, is not coming with the proper driver.
This documents describes a quick workaround, Solution 1, and a more profound
procedure, Solution 2, to solve this issue. A third alternative, Solution 3,
is shortly mentioned, but could not be finalized.
=head3 Solution 1
For the network installation
via PXE boot a second SL5 compatible network card has to be plugged in. When the
installation has finished, an updated version of the
e1000e driver, that supports the mentioned hardware can be downloaded
from the Intel website (see References) and installed on the system.
=head3 Solution 2
Use the SL5.3 kernel, the e1000e driver of which does support the F<82567LM> hardware,
to install the SL5.1.
First, the PXE boot environment has to be setup with the SL5.3 kernel and the related
initial ramdisk.
Second, the F<.buildstamp> in the initial ramdisk of SL5.3 has to be replaced by the
F<.buildstamp> of SL5.1. This facilitates the SL5.3 installer to use the SL5.1 installation
tree. Therefore the ramdisk has to be unpacked, modified and
repacked (it is a gzipped cpio archive).
Third, build a kernel module e1000e RPM for SL5.1 which will be installed during the SL5.1
installation by means of a custom key F<e1000e>.
Fourth, setup the F<e1000e> custom key.
=head3 Solution 3
Note: This was tried but does not work yet.
For the network installation compile the new e1000e driver for the installation kernel and put it into
the corresponding initial ramdisk.
For the running system build the e1000e RPMS for the current kernel/architecture combinations
and install the RPMS during the kickstart process.
When the kernel is updated the corresponding e1000e RPMS have to be rebuilt and put into
the update repository.
=head2 Procedure for Solution 1
Get the tarball, extract it and build the driver following
the instructions in the README of the tarball.
Get the tarball and extract it.
# [root@pc7637]
# uname -a
Linux pc7637 2.6.18-92.1.22.el5PAE #1 SMP Tue Dec 16 07:10:07 EST 2008 i686 i686 i386 GNU/Linux
# cd /tmp/
# wget http://downloadmirror.intel.com/17069/eng/e1000e-0.4.1.7.tar.gz
# tar xfz e1000e-0.4.1.7.tar.gz
Install the corresponding kernel source RPM.
# rpm -ivh /afs/psi.ch/software/mirror/scientific/5x/SRPMS/vendor/kernel-2.6.18-92.1.22.el5.src.rpm
Build the RPM for the running kernel.
# rpmbuild -tb e1000e-0.4.1.7.tar.gz
...
Wrote: /usr/src/redhat/RPMS/i386/e1000e-0.4.1.7-1.i386.rpm
...
Install the driver RPM.
# rpm -ivh /usr/src/redhat/RPMS/i386/e1000e-0.4.1.7-1.i386.rpm
Becaus this is a newly installed host, disable older kernels in grub,
because the kernel module e1000e is only valid for the running kernel.
However the older kernels should be removed later, if there are no needs to run
them.
# vi /boot/grub/grub.conf
The resulting RPM e1000e-0.4.1.7-1.i386.rpm was built on a FS Celsius W370
after Network installation via an old plugged in 3COM network card
as 2nd network device. After installation of this RPM the 3COM
card was removed and the on board Intel Gigabit network card
was running with the new e1000e kernel module.
=over 4
=item Note:
This RPM contains only the module for an SL51 PAE kernel.
/lib/modules/2.6.18-92.1.22.el5PAE/kernel/drivers/net/e1000e/e1000e.ko.new
=back
=for comment
##################################################################################3
=head2 Procedure for Solution 2
=head3 Setup the PXE Boot Environment for the New Kickstart Installation
Add the following labels to
F</afs/psi.ch/service/linux/tftpboot/pxelinux.cfg/default.testing>.
At the time of writing they are used for testing, their names will probably
be changed later on.
Note: You can take the same kickstart configuration files as for the default
SL51 installation, under the labels sl5 and sl564.
label sl53t32
kernel scientific/53for51install/i386/vmlinuz
append initrd=scientific/53for51install/i386/initrd.img ksdevice=eth0 \
ks=nfs:129.129.190.59:/master/linux/kickstart/configs/sl51-a-ks.cfg noipv6
label sl53t64
kernel scientific/53for51install/x86_64/vmlinuz
append initrd=scientific/53for51install/x86_64/initrd.img ksdevice=eth0 \
ks=nfs:129.129.190.59:/master/linux/kickstart/configs/sl51-64-a-ks.cfg noipv6
Copy the installation kernel and the initial ramdisk to the tftpboot
location given in the respective labels above.
# cd /afs/psi.ch/service/linux/tftpboot/scientific/
# mkdir -p 53for51install/i386 53for51install/x86_64
# cd 53for51install/i386/
# wget http://ftp.scientificlinux.org/linux/scientific/5rolling/i386/images/pxeboot/vmlinuz
# wget http://ftp.scientificlinux.org/linux/scientific/5rolling/i386/images/pxeboot/initrd.img
# cd ../x86_64/
# wget http://ftp.scientificlinux.org/linux/scientific/5rolling/x86_64/images/pxeboot/vmlinuz
# wget http://ftp.scientificlinux.org/linux/scientific/5rolling/x86_64/images/pxeboot/initrd.img
Before proceeding with the next section test whether the basic setup is working.
Therefore boot F<sl53t32> and F<sl53t64> on a test machine.
This should setup the network connections, start the kickstart
till to the point when it runs the pre installation scripts.
=head3 Modify the Initial Ramdisk, Example for x86_64
Unpack the initial ramdisk image of SL5.3.
# cd /afs/psi.ch/service/linux/tftpboot/scientific/53for51install/x86_64/
# mkdir tmp
# cd tmp
# zcat ../initrd.img | cpio -ivd
The content of the F<.buildstamp> will be something alike.
# cat .buildstamp
200902111740.x86_64
Scientific Linux
53
SL
your distribution provided bug reporting tool.
Now replace this F<.buildstamp> file with the one
of the SL5.1 installation tree. Therefore you also have to
unpack the ramdisk.
# cd /afs/psi.ch/software/linux/dist/scientific/51/x86_64/images/pxeboot/
# mkdir tmp
# cd tmp
# zcat ../initrd.img | cpio -ivd
Here we have the following F<.buildstamp>.
# cat .buildstamp
200801141434.x86_64
Scientific Linux
51
SL
your distribution provided bug reporting tool.
# cp -i .buildstamp \
# /afs/psi.ch/service/linux/tftpboot/scientific/53for51install/x86_64/tmp/
Clean up.
# cd ..
# rm -rf tmp
Repack the initial ramdisk of SL5.3 x86_64.
For this part the commands were taken from F</sbin/mkinitrd> of
the SL5.1 RPM F<mkinitrd-5.1.19.6-19> and put into a script,
because repacking the ramdisk without the same command options
as from the F</sbin/mkinitrd> script fails.
=opentwisty
# # Function from /sbin/mkinitrd
# findall() {
# echo nash-find "$@" | /sbin/nash --force --quiet
# }
#
# # Architecture
# ARCH="x86_64"
#
# # Set file locations:
# # The temporary directory in the tftpboot directory
# # where the initial ramdisk is extracted to.
# MNTIMAGE="/afs/psi.ch/service/linux/tftpboot/scientific/53for51install/$ARCH/tmp"
#
# # The name of the new ramdisk, not to overwrite the original one
# target="initrd.img_new"
#
# # Start processing
# if test ! -d $MNTIMAGE
# then
# echo "Error: $MNTIMAGE does not exist."
# exit
# fi
#
# if test -e ${MNTIMAGE}/../initrd.img_new.cpio
# then
# echo -n "${MNTIMAGE}/../initrd.img_new.cpio exists. Shall I remove it (y/N)?"
# read a
# if test "$a" = "y"
# then
# rm -f ${MNTIMAGE}/../initrd.img_new.cpio
# fi
# fi
#
# # Create the empty ramdisk file (from /sbin/mkinitrd)
# IMAGE=`mktemp ${MNTIMAGE}/../initrd.img_new.cpio`
#
# # Fill it as cpio archive (from /sbin/mkinitrd)
# (cd $MNTIMAGE; findall . | cpio --quiet -c -o) >| $IMAGE || exit 1
#
# # Compress the cpio archive (from /sbin/mkinitrd)
# gzip -9 < $IMAGE >| ${MNTIMAGE}/../$target
#
# echo "Check the new initial ramdisk at"
# echo "/afs/psi.ch/service/linux/tftpboot/scientific/53for51install/$ARCH/"
# echo
=closetwisty
Go to the tftpboot direcory and setup the new initial ramdisk.
Make also a backup of the original one.
# cd /afs/psi.ch/service/linux/tftpboot/scientific/53for51install/x86_64/
# mv initrd.img initrd.img_orig
# ln -s initrd.img_new initrd.img
Now test the PXE boot kickstart installation using the
label F<sl53t64>.
If everything looks fine clean up and go to the next section
or repeat this part for the i386 architecture.
# rm -rf tmp/ initrd.img_new.cpio
=head3 Build the e1000e Kernel Module RPM, Example for x86_64
Go to the build system tux50-64 and download the e1000e sources
and get the spec file.
# [gasser_m@tux50-64]
# cd /scratch/gasser_m/rpm_topdir/SOURCES/
# wget http://downloadmirror.intel.com/17069/eng/e1000e-0.4.1.7.tar.gz
# tar xfz e1000e-0.4.1.7.tar.gz
# cp e1000e-0.4.1.7/e1000e.spec ../SPECS/
# rm -rf e1000e-0.4.1.7
# cd ../SPECS/
# cp e1000e e1000e_orig
Note, you can build the RPM directly from this tarball, but here we want to
change the name of the built RPM to apply our PSI naming convention for kernel
modules, thus we need to edit the spec file before building.
# vi e1000e.spec
=opentwisty
#### begin e1000e.spec
%define driver_name e1000e
%define kernel 2.6.18-92.1.22.el5
%define pkg_name kernel-module-%{driver_name}-%{kernel}
Name: %{pkg_name}
Summary: Intel(R) Gigabit Ethernet Connection
Version: 0.4.1.7
Release: 1
Source: %{driver_name}-%{version}.tar.gz
Vendor: Intel Corporation
Packager: Marc Gasser <marc.gasser@psi.ch>
License: GPL
ExclusiveOS: linux
Group: System Environment/Kernel
Provides: %{driver_name}
URL: http://support.intel.com/support/go/linux/e1000e.htm
BuildRoot: %{_tmppath}/%{driver_name}-%{version}-root
# do not generate debugging packages by default - newer versions of rpmbuild
# may instead need:
#%define debug_package %{nil}
%debug_package %{nil}
# macros for finding system files to update at install time (pci.ids, pcitable)
%define find() %(for f in %*; do if [ -e $f ]; then echo $f; break; fi; done)
%define _pciids /usr/share/pci.ids /usr/share/hwdata/pci.ids
%define _pcitable /usr/share/kudzu/pcitable /usr/share/hwdata/pcitable /dev/null
%define pciids %find %{_pciids}
%define pcitable %find %{_pcitable}
Requires: kernel, fileutils, findutils, gawk, bash
%description
This package contains the Linux driver for the Intel(R) Gigabit Family of Server Adapters.
########################### begin RPM build section
%prep
%setup -n %{driver_name}-%{version}
%build
mkdir -p %{buildroot}
KV=%{kernel}
KA=%{_arch}
KV_BASE=$(echo $KV | sed '{ s/hugemem//g; s/smp//g; s/enterprise//g; }' )
if [ -e /usr/src/kernels ] && [ $(echo $KV_BASE | grep "^2.6") ]; then
if [ -e /etc/redhat-release ]; then
KSP=$(ls /lib/modules | grep $KV_BASE)
for K in $KSP ; do
if [ $KA == "x86_64" ] && \
[ $(echo $K | grep hugemem) ]; then
# Include path for x86_64 hugemem is broken
# on RHEL4
continue
fi
make -C src clean
make -C src KSP=/lib/modules/$K/build \
INSTALL_MOD_PATH=%{buildroot} \
KVERSION=$k \
MANDIR=%{_mandir} \
CFLAGS_EXTRA="$CFLAGS_EXTRA" install
done
else
make -C src clean
make -C src INSTALL_MOD_PATH=%{buildroot} \
MANDIR=%{_mandir} install
fi
else
SwitchRHKernel () {
CFLAGS_EXTRA=""
for K in $2 ; do
if [ $K == $1 ] ; then
CFLAGS_EXTRA="$CFLAGS_EXTRA -D__BOOT_KERNEL_$K=1"
else
CFLAGS_EXTRA="$CFLAGS_EXTRA -D__BOOT_KERNEL_$K=0"
fi
done
}
KSP="/lib/modules/$KV/build
/usr/src/linux-$KV
/usr/src/linux-$(echo $KV | sed 's/-.*//')
/usr/src/kernel-headers-$KV
/usr/src/kernel-source-$KV
/usr/src/linux-$(echo $KV | sed 's/\([0-9]*\.[0-9]*\)\..*/\1/')
/usr/src/linux"
KSRC=$(for d in $KSP ; do [ -e $d/include/linux ] && echo $d; echo; done)
KSRC=$(echo $KSRC | awk '{ print $1 }')
if [ -e $KSRC/include/linux/rhconfig.h ] ; then
RHKL=$(grep 'BOOT_KERNEL_.* [01]' /boot/kernel.h |
sed 's/.*BOOT_KERNEL_\(.*\) [01]/\1/')
if echo $RHKL | grep BIGMEM
then
RHKL=$(echo $RHKL | sed 's/ENTERPRISE//')
fi
if echo $RHKL | grep HUGEMEM
then
RHKL=$(echo $RHKL | sed 's/BIGMEM//')
fi
for K in $RHKL ; do
SwitchRHKernel $K "$RHKL"
make -C src clean
if [ $KA == "x86_64" ] ; then
CFLAGS_EXTRA="$CFLAGS_EXTRA -D__MODULE_KERNEL_x86_64=0 -D__MODULE_KERNEL_ia32e=1"
fi
make -C src INSTALL_MOD_PATH=%{buildroot} \
MANDIR=%{_mandir} CFLAGS_EXTRA="$CFLAGS_EXTRA" install
done
else
make -C src clean
make -C src INSTALL_MOD_PATH=%{buildroot} MANDIR=%{_mandir} install
fi
fi
%install
# Append .new to driver name to avoid conflict with kernel RPM
echo "# Going to " %{buildroot}
cd %{buildroot}
find lib -name "e1000e.*o" -exec mv {} {}.new \; \
-fprintf %{_builddir}/%{driver_name}-%{version}/file.list "/%p.new\n"
%clean
rm -rf %{buildroot}
%files -f %{_builddir}/%{driver_name}-%{version}/file.list
%defattr(-,root,root)
%{_mandir}/man7/e1000e.7.gz
%doc COPYING
%doc README
%doc file.list
%doc pci.updates
########################### end RPM build section
########################### begin RPM installation section
%post
FL="%{_docdir}/%{name}-%{version}/file.list
%{_docdir}/%{name}/file.list"
FL=$(for d in $FL ; do if [ -e $d ]; then echo $d; break; fi; done)
if [ -d /usr/local/lib/%{name} ]; then
rm -rf /usr/local/lib/%{name}
fi
if [ -d /usr/local/share/%{name} ]; then
rm -rf /usr/local/share/%{name}
fi
echo "original pci.ids saved in /usr/local/share/%{name}";
if [ "%{pcitable}" != "/dev/null" ]; then
echo "original pcitable saved in /usr/local/share/%{name}";
fi
#### Save old drivers (aka .o and .o.gz) in $d_usr
# k is(are) the kernel version(s) extracted with sed from the full qualified
# kernel module name(s) in file.list
echo "Original drivers saved in /usr/local/share/%{name}";
for k in $(sed 's/\/lib\/modules\/\([0-9a-zA-Z_\.\-]*\).*/\1/' $FL) ;
do
d_drivers=/lib/modules/$k
d_usr=/usr/local/share/%{name}/$k
mkdir -p $d_usr
cd $d_drivers; find . -name %{driver_name}.*o -exec cp --parents {} $d_usr \; -exec rm -f {} \;
cd $d_drivers; find . -name %{driver_name}_*.*o -exec cp --parents {} $d_usr \; -exec rm -f {} \;
cd $d_drivers; find . -name %{driver_name}.*o.gz -exec cp --parents {} $d_usr \; -exec rm -f {} \;
cd $d_drivers; find . -name %{driver_name}_*.*o.gz -exec cp --parents {} $d_usr \; -exec rm -f {} \;
cp --parents %{pciids} /usr/local/share/%{name}/
if [ "%{pcitable}" != "/dev/null" ]; then
cp --parents %{pcitable} /usr/local/share/%{name}/
fi
done
# Add driver link
for f in $(sed 's/\.new$//' $FL) ; do
ln -f $f.new $f
done
# Check if kernel version rpm was built on IS the same as running kernel
BK_LIST=$(sed 's/\/lib\/modules\/\([0-9a-zA-Z_\.\-]*\).*/\1/' $FL)
MATCH=no
for i in $BK_LIST
do
if [ $(uname -r) == $i ] ; then
MATCH=yes
break
fi
done
if [ $MATCH == no ] ; then
echo -n "WARNING: Running kernel is $(uname -r). "
echo -n "RPM supports kernels ( "
for i in $BK_LIST
do
echo -n "$i "
done
echo ")"
fi
LD="%{_docdir}/%{name}";
if [ -d %{_docdir}/%{name}-%{version} ]; then
LD="%{_docdir}/%{name}-%{version}";
fi
#Yes, this really needs bash
bash -s %{pciids} \
%{pcitable} \
$LD/pci.updates \
$LD/pci.ids.new \
$LD/pcitable.new \
%{name} \
<<"END"
#! /bin/bash
# $1 = system pci.ids file to update
# $2 = system pcitable file to update
# $3 = file with new entries in pci.ids file format
# $4 = pci.ids output file
# $5 = pcitable output file
# $6 = driver name for use in pcitable file
exec 3<$1
exec 4<$2
exec 5<$3
exec 6>$4
exec 7>$5
driver=$6
IFS=
# pattern matching strings
ID="[[:xdigit:]][[:xdigit:]][[:xdigit:]][[:xdigit:]]"
VEN="${ID}*"
DEV=" ${ID}*"
SUB=" ${ID}*"
TABLE_DEV="0x${ID} 0x${ID} \"*"
TABLE_SUB="0x${ID} 0x${ID} 0x${ID} 0x${ID} \"*"
line=
table_line=
ids_in=
table_in=
vendor=
device=
ids_device=
table_device=
subven=
ids_subven=
table_subven=
subdev=
ids_subdev=
table_subdev=
ven_str=
dev_str=
sub_str=
# force a sub-shell to fork with a new stdin
# this is needed if the shell is reading these instructions from stdin
while true
do
# get the first line of each data file to jump start things
exec 0<&3
read -r ids_in
if [ "$2" != "/dev/null" ];then
exec 0<&4
read -r table_in
fi
# outer loop reads lines from the updates file
exec 0<&5
while read -r line
do
# vendor entry
if [[ $line == $VEN ]]
then
vendor=0x${line:0:4}
ven_str=${line#${line:0:6}}
# add entry to pci.ids
exec 0<&3
exec 1>&6
while [[ $ids_in != $VEN ||
0x${ids_in:0:4} < $vendor ]]
do
echo "$ids_in"
read -r ids_in
done
echo "$line"
if [[ 0x${ids_in:0:4} == $vendor ]]
then
read -r ids_in
fi
# device entry
elif [[ $line == $DEV ]]
then
device=`echo ${line:1:4} | tr [:upper:] [:lower:]`
table_device=0x${line:1:4}
dev_str=${line#${line:0:7}}
ids_device=`echo ${ids_in:1:4} | tr [:upper:] [:lower:]`
table_line="$vendor $table_device \"$driver\" \"$ven_str|$dev_str\""
# add entry to pci.ids
exec 0<&3
exec 1>&6
while [[ $ids_in != $DEV ||
$ids_device < $device ]]
do
if [[ $ids_in == $VEN ]]
then
break
fi
if [[ $ids_device != ${ids_in:1:4} ]]
then
echo "${ids_in:0:1}$ids_device${ids_in#${ids_in:0:5}}"
else
echo "$ids_in"
fi
read -r ids_in
ids_device=`echo ${ids_in:1:4} | tr [:upper:] [:lower:]`
done
if [[ $device != ${line:1:4} ]]
then
echo "${line:0:1}$device${line#${line:0:5}}"
else
echo "$line"
fi
if [[ $ids_device == $device ]]
then
read -r ids_in
fi
# add entry to pcitable
if [ "$2" != "/dev/null" ];then
exec 0<&4
exec 1>&7
while [[ $table_in != $TABLE_DEV ||
${table_in:0:6} < $vendor ||
( ${table_in:0:6} == $vendor &&
${table_in:7:6} < $table_device ) ]]
do
echo "$table_in"
read -r table_in
done
echo "$table_line"
if [[ ${table_in:0:6} == $vendor &&
${table_in:7:6} == $table_device ]]
then
read -r table_in
fi
fi
# subsystem entry
elif [[ $line == $SUB ]]
then
subven=`echo ${line:2:4} | tr [:upper:] [:lower:]`
subdev=`echo ${line:7:4} | tr [:upper:] [:lower:]`
table_subven=0x${line:2:4}
table_subdev=0x${line:7:4}
sub_str=${line#${line:0:13}}
ids_subven=`echo ${ids_in:2:4} | tr [:upper:] [:lower:]`
ids_subdev=`echo ${ids_in:7:4} | tr [:upper:] [:lower:]`
table_line="$vendor $table_device $table_subven $table_subdev \"$driver\" \"$ven_str|$sub_str\""
# add entry to pci.ids
exec 0<&3
exec 1>&6
while [[ $ids_in != $SUB ||
$ids_subven < $subven ||
( $ids_subven == $subven &&
$ids_subdev < $subdev ) ]]
do
if [[ $ids_in == $VEN ||
$ids_in == $DEV ]]
then
break
fi
if [[ ! (${ids_in:2:4} == "1014" &&
${ids_in:7:4} == "052C") ]]
then
if [[ $ids_subven != ${ids_in:2:4} || $ids_subdev != ${ids_in:7:4} ]]
then
echo "${ids_in:0:2}$ids_subven $ids_subdev${ids_in#${ids_in:0:11}}"
else
echo "$ids_in"
fi
fi
read -r ids_in
ids_subven=`echo ${ids_in:2:4} | tr [:upper:] [:lower:]`
ids_subdev=`echo ${ids_in:7:4} | tr [:upper:] [:lower:]`
done
if [[ $subven != ${line:2:4} || $subdev != ${line:7:4} ]]
then
echo "${line:0:2}$subven $subdev${line#${line:0:11}}"
else
echo "$line"
fi
if [[ $ids_subven == $subven &&
$ids_subdev == $subdev ]]
then
read -r ids_in
fi
# add entry to pcitable
if [ "$2" != "/dev/null" ];then
exec 0<&4
exec 1>&7
while [[ $table_in != $TABLE_SUB ||
${table_in:14:6} < $table_subven ||
( ${table_in:14:6} == $table_subven &&
${table_in:21:6} < $table_subdev ) ]]
do
if [[ $table_in == $TABLE_DEV ]]
then
break
fi
if [[ ! (${table_in:14:6} == "0x1014" &&
${table_in:21:6} == "0x052C") ]]
then
echo "$table_in"
fi
read -r table_in
done
echo "$table_line"
if [[ ${table_in:14:6} == $table_subven &&
${table_in:21:6} == $table_subdev ]]
then
read -r table_in
fi
fi
fi
exec 0<&5
done
# print the remainder of the original files
exec 0<&3
exec 1>&6
echo "$ids_in"
while read -r ids_in
do
echo "$ids_in"
done
if [ "$2" != "/dev/null" ];then
exec 0>&4
exec 1>&7
echo "$table_in"
while read -r table_in
do
echo "$table_in"
done
fi
break
done <&5
exec 3<&-
exec 4<&-
exec 5<&-
exec 6>&-
exec 7>&-
END
mv -f $LD/pci.ids.new %{pciids}
if [ "%{pcitable}" != "/dev/null" ]; then
mv -f $LD/pcitable.new %{pcitable}
fi
uname -r | grep BOOT || /sbin/depmod -a > /dev/null 2>&1 || true
########################### end RPM installation section
########################### begin RPM deinstallation section
%preun
# If doing RPM un-install
if [ $1 -eq 0 ] ; then
FL="%{_docdir}/%{name}-%{version}/file.list
%{_docdir}/%{name}/file.list"
FL=$(for d in $FL ; do if [ -e $d ]; then echo $d; break; fi; done)
# Remove driver link
for f in $(sed 's/\.new$//' $FL) ; do
rm -f $f
done
# Restore old drivers
if [ -d /usr/local/share/%{name} ]; then
cd /usr/local/share/%{name}; find . -name '%{driver_name}.*o*' -exec cp --parents {} /lib/modules/ \;
cd /usr/local/share/%{name}; find . -name '%{driver_name}_*.*o*' -exec cp --parents {} /lib/modules/ \;
rm -rf /usr/local/share/%{name}
fi
fi
%postun
uname -r | grep BOOT || /sbin/depmod -a > /dev/null 2>&1 || true
########################### end RPM deinstallation section
#### end e1000e.spec
=closetwisty
=head3 Create the Customization Key for the e1000e Driver, Example for x86_64
Go to the customization key directory for SL5.1 and create the basic
files.
# cd /afs/psi.ch/software/linux/dist/scientific/51/kickstart/custom/
# mkdir e1000e
# cd e1000e
# touch custom.sh
# vi custom.sh
=opentwisty
#!/bin/bash
#
# KS Customization for e1000e driver
#
# marc.gasser@psi.ch
# 2009-02-24
#
# KSII scriplet rules apply
#
# This customization was added for the Intel Gigabit ethernet
# card 82567LM, which comes with the new Fujitsu Siemens computers
# Celsius W370, Esprimo p7935 e80, Esprimo e7935 e80 and Lifebook e8420,
# because the e1000e.ko version 0.2.0 coming with SL kernels <= 2.6.18-92.1.22.el5
# does not support this kind of hardware.
#
##############################################################
# Changelog:
# ---------
#
##############################################################
ARCH=$(uname -m)
DIR_E1000E=/mnt/master/linux/dist/scientific/51/psi/all
# Install the kernel-module-e1000e containing the e1000e driver
if test "$ARCH" = "x86_64"
then
echo "Install kernel-module-e1000e for $ARCH" >> $POSTLOG 2>&1
rpm -ivh $DIR_E1000E/kernel-module-e1000e-2.6.18-92.1.22.el5-0.4.1.7-1.x86_64.rpm || \
echo error installing kernel-module-e1000e \
>> $POSTLOG 2>&1
else
echo "Install kernel-module-e1000e for $ARCH" >> $POSTLOG 2>&1
rpm -ivh $DIR_E1000E/kernel-module-e1000e-2.6.18-92.1.22.el5-0.4.1.7-1.i386.rpm || \
echo error installing kernel-module-e1000e \
>> $POSTLOG 2>&1
fi
=closetwisty
Copy the kernel-module-e1000e RPMS to F</mnt/master/linux/dist/scientific/51/psi/all/>
and F</mnt/master/linux/dist/scientific/51/kernel/all/>, create the symbolic links
in the corresponding F<current> and/or F<testing> repositories, and run C<createrepo>
within F<current> and/or F<testing>.
That's it. Now you can test the installation using the custom key
F<e1000e>.
@@ -0,0 +1,236 @@
Linux How To - RPM - Update psi-desktop Package on SL5
======================================================
Introduction
------------
Contemporary there are many PSI related cron jobs which are executed
at the same time on all linux clients. As a consequence too many AFS
requests coming from these clients are sent simultaneously to the AFS
servers causing AFS performance problems. Thus, these cron jobs should
be spread over time. In order to have time to fix things if anything
goes wrong, the respective jobs are scheduled within a certain time
window instead of using the whole time range, e.g. the hourly executed
job window ranges from 20-40 minutes of an hour and not from 0-59
minutes.
At the time of writing only `/usr/sbin/psi-puppet` is configured to
run within such a window, the time is set randomly in
`/etc/cron.d/psi-cronjobs` during installation of the RPM psi-desktop
to which the file belongs to, 27 in the example below::
#
# /etc/cron.d/psi-cronjobs
#
# PSI related cronjobs
#
# Urs Beyerle, PSI
#
# Run puppet every hour at xx:27
27 * * * * root /usr/sbin/psi-puppet >/dev/null 2>&1
# Send info back to master daily at 10:00
00 10 * * * root /usr/sbin/psi-sendinfo >/dev/null 2>&1
# Run psi-auto-udpate daily at 00:30, 01:30, 02:30, 04:30
30 00 * * * root /usr/sbin/psi-auto-update >> /var/log/update/psi-update.log 2>&1
30 02 * * * root /usr/sbin/psi-auto-update >> /var/log/update/psi-update.log 2>&1
30 03 * * * root /usr/sbin/psi-auto-update >> /var/log/update/psi-update.log 2>&1
30 04 * * * root /usr/sbin/psi-auto-update >> /var/log/update/psi-update.log 2>&1
- /usr/sbin/psi-puppet: Runs puppetd, but only if AUTO_UPDATE_CONFIG=yes
- /usr/sbin/psi-auto-update: Runs the psi-update script if AUTO_UPDATE_RPMS=yes
- /usr/sbin/psi-update: Runs the yum_update script
(Will get the script yum_update from our master.
The script yum_update will be saved as psi-yum_update.
Afterwards psi-yum_update will be executed locally.)
Now the following jobs should be scheduled analogously.#???
- `/etc/cron.hourly/update_afs_users`
- `/etc/cron.hourly/update_environment_modules`
Maybe the time window for `/usr/sbin/psi-puppet` has to be expanded.
- Are there GFA related things which should be scheduled in a new manner?#???
Procedure Description
~~~~~~~~~~~~~~~~~~~~~
Go to the relevant build system (e.g. tux50). Get the sources for
building the psi-desktop RPM, unpack them and apply your modifications
in the unpacked files. When finished tar and gzip again, build the new
RPM, and, eventually, test it on a test machine.
Procedure Step by Step
~~~~~~~~~~~~~~~~~~~~~~
Get The Source RPM
....................
Run::
# [gasser_m@tux50]
# cd /scratch/gasser_m/rpm_topdir/SRPMS/
# ll /scratch/redhat/SRPMS/
total 11M
-rw-r--r-- 1 beyerle ait 4.9M Jun 30 08:09 psi-desktop-1.3.3-16.slp5.src.rpm
-rw-r--r-- 1 beyerle ait 4.9M Jun 30 08:11 psi-desktop-1.3.3-17.slp5.src.rpm
-rw-r--r-- 1 beyerle ait 5.3K Jun 30 10:32 nxcleanup-0.3-1.slp5.src.rpm
-rw-r--r-- 1 beyerle ait 6.0K Jul 9 10:40 nxcleanup-0.4-1.slp5.src.rpm
-rw-r--r-- 1 beyerle ait 418K Jul 16 10:48 aufs-0.20080605.cvs-5.slp5.src.rpm
-rw-r--r-- 1 beyerle ait 642K Jul 29 12:57 ntfs-3g-1.2712-4.slp5.src.rpm
# cp /scratch/redhat/SRPMS/psi-desktop-1.3.3-17.slp5.src.rpm .
# rpm -ivh psi-desktop-1.3.3-17.slp5.src.rpm
This will install the files
`/scratch/gasser_m/rpm_topdir/SOURCES/psi-desktop-1.3.3.tar.gz` and
`/scratch/gasser_m/rpm_topdir/SPECS/psi-desktop.spec`::
# cd /scratch/gasser_m/rpm_topdir/SOURCES/
# tar xfz psi-desktop-1.3.3.tar.gz
Apply Your Modifications
........................
Edit `/scratch/gasser_m/rpm_topdir/SPECS/psi-desktop.spec`::
Modified Parts in "psi-desktop.spec"
------------------------------------
Note: Some lines are cut at the end.
NEW VERSION | OLD VERSION
---------------------------------------------------------------------------------------------------------------------
...
Release: 18%{?dist} | Release: 17%{?dist}
...
Packager: Marc Gasser <marc.gasser@psi.ch> | Packager: Urs Beyerle <urs.beyerle@psi.ch>
...
#### begin psi-cronjobs ############################ | # randomly runs puppet in psi-cronjobs
# | # create a random number between 0-9
# Randomly run commands in "/etc/cron.d/psi-cronjobs" | random=${RANDOM:1:1}
# | [ ! $random ] && random=0
# /usr/sbin/psi-puppet: hourly at random1 | sed -i "s/random/$random/" /etc/cron.d/psi-cronjobs
random1=$[ ( $RANDOM % 20 ) + 21 ] # create a random n|
[ ! $random1 ] && random1=33 |
sed -i "s/random1/$random1/g" /etc/cron.d/psi-cronjobs |
|
# /usr/sbin/psi-sendinfo: daily at 10:1random2 |
# /usr/sbin/update_environment_modules: daily at 06:0random|
# /usr/sbin/update_afs_users: hourly at XX:1random2 |
random2=$[ ( $RANDOM % 10 ) ] # create a random number be|
[ ! $random2 ] && random2=3 |
sed -i "s/random2/$random2/g" /etc/cron.d/psi-cronjobs |
|
#### end psi-cronjobs ############################ |
...
---------------------------------------------------------------------------------------------------------------------
Edit `/etc/cron.d/psi-cronjobs`. The modified `psi-cronjobs` which
will be packed into the new `psi-desktop` RPM::
# cd /scratch/gasser_m/rpm_topdir/SOURCES/psi-desktop-1.3.3/slp5/etc/cron.d
# vi psi-cronjobs
#
# /etc/cron.d/psi-cronjobs
#
# PSI related cronjobs
#
# Marc Gasser, PSI
#
# Note: The time settings for these cron jobs are
# set in "psi-desktop.spec". They are
# randomized within a specific time window.
# The randomization takes place when the RPM is
# installed, thus it is client specific.
# Run psi-puppet every hour at XX:random1
random1 * * * * root /usr/sbin/psi-puppet >/dev/null 2>&1
# Send info back to master daily at 10:1random2
1random2 10 * * * root /usr/sbin/psi-sendinfo >/dev/null 2>&1
# Run psi-auto-udpate daily at 00:4random2, 01:4random2, 02:4random2, 04:4random2
4random2 00 * * * root /usr/sbin/psi-auto-update >> /var/log/update/psi-update.log 2>&1
4random2 02 * * * root /usr/sbin/psi-auto-update >> /var/log/update/psi-update.log 2>&1
4random2 03 * * * root /usr/sbin/psi-auto-update >> /var/log/update/psi-update.log 2>&1
4random2 04 * * * root /usr/sbin/psi-auto-update >> /var/log/update/psi-update.log 2>&1
# Run update_environment_modules daily at 06:0random2, 13:0random2, 18:0random2
0random2 06 * * * root /usr/sbin/update_environment_modules >/dev/null 2>&1
0random2 13 * * * root /usr/sbin/update_environment_modules >/dev/null 2>&1
0random2 18 * * * root /usr/sbin/update_environment_modules >/dev/null 2>&1
# Run update_afs_users every hour at XX:1random2
1random2 * * * * root /usr/sbin/update_afs_users >/dev/null 2>&1
Move commands which are controlled by `/etc/cron.d/psi-cronjobs` to
`/usr/sbin/` if not done yet::
# cd /scratch/gasser_m/rpm_topdir/SOURCES/psi-desktop-1.3.3/slp5/
# mv etc/cron.d/update_environment_modules usr/sbin/
# mv etc/cron.d/update_afs_users usr/sbin/
Cronjob Schedule in `psi-cronjobs`
..................................
Applying this `psi-desktop.spec` to the file `psi-cronjobs` will
randomly distribute the respective cronjobs whithin the time windows
given below. The randomization takes place when the RPM is installed
or updated on the client.
- `/usr/sbin/psi-puppet`: hourly from HH:21 to HH:40.
- `/usr/sbin/psi-sendinfo`: daily from 10:10 to 10:19.
- `/usr/sbin/update_environment_modules`: daily from 06:00 to 06:09,
from 13:00 to 13:09 and from 18:00 to 18:09.
- `/usr/sbin/update_afs_users`: hourly from HH:10 to HH:19.
Build the new `psi-desktop` RPM
...............................
Build both, the new RPM and the new SRPM applying the spec file::
# cd /scratch/gasser_m/rpm_topdir/SOURCES/
# tar cfz psi-desktop-1.3.3.tar.gz psi-desktop-1.3.3
# rm -rf psi-desktop-1.3.3/
# cd /scratch/gasser_m/rpm_topdir/SPECS/
# rpmbuild -ba psi-desktop.spec
Test the New `psi-desktop` RPM
..............................
The file
`tux50:/scratch/gasser_m/rpm_topdir/RPMS/noarch/psi-desktop-1.3.3-18.slp5.noarch.rpm`
was copied to `pc7377:/tmp/` and updated using yum::
# [root@pc7377 etc]
# yum update /tmp/psi-desktop-1.3.3-18.slp5.noarch.rpm
Add the New `psi-desktop` RPM to Psi-All Repository
...................................................
Copy the new RPM to the respective SL5 repository. Update the
`repodata` and the symbolic links in `.../RPMSall/...` if necessary::
# [gasser_m@tux50 ~]
# cd /scratch/gasser_m/rpm_topdir/RPMS/noarch/
# cp psi-desktop-1.3.3-18.slp5.noarch.rpm /afs/psi.ch/software/linux/dist/scientific/51/psi/all/
# cd /afs/psi.ch/software/linux/dist/scientific/51/psi/all/
# createrepo .
# cd /afs/psi.ch/software/linux/dist/scientific/51/scripts
# make rpms_all
@@ -0,0 +1,452 @@
Linux How To - SL5 - Nvidia Driver Installation/Update On SL51 i386
===================================================================
References
----------
- https://wiki.intranet.psi.ch/AIT/Linux/HowToDKMS?skin=clean.nat%2cpsiskin%2cpattern #Update_nvidia_x11_drv_driver_to
- [[DynamicKernelModuleSupportBasics][Dynamic Kernel Module Support (DKMS) Basics]]
Requirements
~~~~~~~~~~~~
DKMS (Dynamic Kernel Module Support) framework has to be installed
before the nvidia RPM installation, because the RPM scripts use it for
proper setup::
# yum install dkms.noarch
Procedure
~~~~~~~~~
Check what graphic card is in your computer and what driver is
installed::
# lspci
...
01:00.0 VGA compatible controller: nVidia Corporation G72 [GeForce 7300 LE] (rev a1)
# less /etc/X11/xorg.conf
(See the output below at "before nvidia installation".)
# rpm -qa | grep nvid
(nothing found)
Before beginning with the installation of the new graphics driver,
make a backup of your current `xorg.conf`::
# cd /etc/X11/
# cp xorg.conf xorg.conf_bak
Check whether some nvidia driver is available::
# [root@pc7377 X11]# yum list | grep nvidia
nvidia-x11-drv.i386 100.14.19-3.9.slp5 sl51psi
# [root@pc7377 X11]# yum install nvidia-x11-drv.i386
### begin output
Loading "kernel-module" plugin
Setting up Install Process
Setting up repositories
Reading repository metadata in from local files
Parsing package install arguments
Resolving Dependencies
--> Populating transaction set with selected packages. Please wait.
---> Downloading header for nvidia-x11-drv to pack into transaction set.
nvidia-x11-drv-100.14.19- 100% |=========================| 24 kB 00:00
---> Package nvidia-x11-drv.i386 0:100.14.19-3.9.slp5 set to be updated
--> Running transaction check
--> Processing Dependency: dkms for package: nvidia-x11-drv
--> Restarting Dependency Resolution with new changes.
--> Populating transaction set with selected packages. Please wait.
---> Downloading header for dkms to pack into transaction set.
dkms-2.0.17.4-1.9.slp5.no 100% |=========================| 36 kB 00:00
---> Package dkms.noarch 0:2.0.17.4-1.9.slp5 set to be updated
--> Running transaction check
Beginning Kernel Module Plugin
Finished Kernel Module Plugin
Dependencies Resolved
=============================================================================
Package Arch Version Repository Size
=============================================================================
Installing:
nvidia-x11-drv i386 100.14.19-3.9.slp5 sl51psi 7.1 M
Installing for dependencies:
dkms noarch 2.0.17.4-1.9.slp5 sl51psi 89 k
Transaction Summary
=============================================================================
Install 2 Package(s)
Update 0 Package(s)
Remove 0 Package(s)
Total download size: 7.2 M
Is this ok [y/N]: y
Downloading Packages:
(1/2): nvidia-x11-drv-100 100% |=========================| 7.1 MB 00:00
(2/2): dkms-2.0.17.4-1.9. 100% |=========================| 89 kB 00:00
Running Transaction Test
Finished Transaction Test
Transaction Test Succeeded
Running Transaction
Installing: dkms ######################### [1/2]
Installing: nvidia-x11-drv ######################### [2/2]
Installed: nvidia-x11-drv.i386 0:100.14.19-3.9.slp5
Dependency Installed: dkms.noarch 0:2.0.17.4-1.9.slp5
Complete!
[root@pc7377 X11]# dkms status -m nvidia
nvidia, 100.14.19-3.9.slp5, 2.6.18-92.1.10.el5, i686: installed
nvidia, 100.14.19-3.9.slp5, 2.6.18-53.1.4.el5, i686: installed-weak from 2.6.18-92.1.10.el5
nvidia, 100.14.19-3.9.slp5, 2.6.18-53.1.21.el5, i686: installed-weak from 2.6.18-92.1.10.el5
[root@pc7377 X11]# rpm -q --scripts nvidia-x11-drv-100
package nvidia-x11-drv-100 is not installed
[root@pc7377 X11]# rpm -q --scripts nvidia-x11-drv
postinstall scriptlet (using /bin/sh):
/sbin/ldconfig
# Make sure we have a Files section in xorg.conf, otherwise create an empty one
XORGCONF=/etc/X11/xorg.conf
[ -w ${XORGCONF} ] && ! grep -q 'Section "Files"' ${XORGCONF} && \
echo -e 'Section "Files"\nEndSection' >> ${XORGCONF}
# Enable the proprietary driver
/usr/sbin/nvidia-config-display enable || :
# Add to DKMS registry
dkms add -m nvidia -v 100.14.19-3.9.slp5 -q || :
# Rebuild and make available for the currenty running kernel
dkms build -m nvidia -v 100.14.19-3.9.slp5 -q || :
dkms install -m nvidia -v 100.14.19-3.9.slp5 -q --force || :
/sbin/MAKEDEV nvidia
preuninstall scriptlet (using /bin/sh):
# Remove all versions from DKMS registry
dkms remove -m nvidia -v 100.14.19-3.9.slp5 -q --all || :
# Last removal, disable the proprietary driver
if [ $1 -eq 0 ]; then
/usr/sbin/nvidia-config-display disable || :
fi
postuninstall program: /sbin/ldconfig
### end output
Check the status of the nvidia kernel module. (From the dkms manpage:
`dkms status` returns the current status of modules, versions and
kernels within the tree as well as whether they have been added, built
or installed.)::
# dkms status -m nvidia
nvidia, 100.14.19-3.9.slp5, 2.6.18-92.1.10.el5, i686: installed
nvidia, 100.14.19-3.9.slp5, 2.6.18-53.1.4.el5, i686: installed-weak from 2.6.18-92.1.10.el5
nvidia, 100.14.19-3.9.slp5, 2.6.18-53.1.21.el5, i686: installed-weak from 2.6.18-92.1.10.el5
List the package specific scriptlet(s) that are used as part of the
installation and uninstallation processes::
# rpm -q --scripts nvidia-x11-drv
### begin output
postinstall scriptlet (using /bin/sh):
/sbin/ldconfig
# Make sure we have a Files section in xorg.conf, otherwise create an empty one
XORGCONF=/etc/X11/xorg.conf
[ -w ${XORGCONF} ] && ! grep -q 'Section "Files"' ${XORGCONF} && \
echo -e 'Section "Files"\nEndSection' >> ${XORGCONF}
# Enable the proprietary driver
/usr/sbin/nvidia-config-display enable || :
# Add to DKMS registry
dkms add -m nvidia -v 100.14.19-3.9.slp5 -q || :
# Rebuild and make available for the currenty running kernel
dkms build -m nvidia -v 100.14.19-3.9.slp5 -q || :
dkms install -m nvidia -v 100.14.19-3.9.slp5 -q --force || :
/sbin/MAKEDEV nvidia
preuninstall scriptlet (using /bin/sh):
# Remove all versions from DKMS registry
dkms remove -m nvidia -v 100.14.19-3.9.slp5 -q --all || :
# Last removal, disable the proprietary driver
if [ $1 -eq 0 ]; then
/usr/sbin/nvidia-config-display disable || :
fi
postuninstall program: /sbin/ldconfig
### end output
Check whether your xorg.conf was modified to use the nvidia driver::
# less /etc/X11/xorg.conf
### begin output
(after nvidia installation) # (before nvidia installation)
# Xorg configuration created by pyxf86config # # Xorg configuration created by pyxf86config
#
Section "ServerLayout" # Section "ServerLayout"
Identifier "Default Layout" # Identifier "Default Layout"
Screen 0 "Screen0" 0 0 # Screen 0 "Screen0" 0 0
InputDevice "Keyboard0" "CoreKeyboard" # InputDevice "Keyboard0" "CoreKeyboard"
EndSection # EndSection
#
Section "Files" #
ModulePath "/usr/lib/xorg/modules/extensions/nvidia" #
ModulePath "/usr/lib/xorg/modules" #
EndSection #
#
Section "InputDevice" # Section "InputDevice"
Identifier "Keyboard0" # Identifier "Keyboard0"
Driver "kbd" # Driver "kbd"
Option "XkbModel" "pc105" # Option "XkbModel" "pc105"
Option "XkbLayout" "ch" # Option "XkbLayout" "ch"
Option "XkbVariant" "de_nodeadkeys" # Option "XkbVariant" "de_nodeadkeys"
EndSection # EndSection
#
Section "Device" # Section "Device"
Identifier "Videocard0" # Identifier "Videocard0"
Driver "nvidia" # Driver "nv"
EndSection # EndSection
#
Section "Screen" # Section "Screen"
Identifier "Screen0" # Identifier "Screen0"
Device "Videocard0" # Device "Videocard0"
DefaultDepth 24 # DefaultDepth 24
SubSection "Display" # SubSection "Display"
Viewport 0 0 # Viewport 0 0
Depth 24 # Depth 24
EndSubSection # EndSubSection
EndSection # EndSection
### end output #
Test the module with glxgears for instance::
[root@pc7377 ~]# glxgears
11424 frames in 5.0 seconds = 2284.662 FPS
11291 frames in 5.0 seconds = 2258.003 FPS
11244 frames in 5.0 seconds = 2248.682 FPS
11290 frames in 5.0 seconds = 2257.974 FPS
12626 frames in 5.0 seconds = 2525.121 FPS
3439 frames in 5.0 seconds = 687.363 FPS
964 frames in 5.0 seconds = 192.629 FPS
964 frames in 5.0 seconds = 192.620 FPS
1395 frames in 5.0 seconds = 278.978 FPS
12080 frames in 5.0 seconds = 2415.854 FPS
11252 frames in 5.0 seconds = 2250.348 FPS
Problem With Automatic Nvidia Driver Update On SL51 i386
--------------------------------------------------------
Problem Description
~~~~~~~~~~~~~~~~~~~
On some systems the automatic nvidia driver update by dkms fails.
Somehow when the new RPM `nvidia-x11-drv-169.12-4.9.slp5` is installed
by yum or rpm, the old nvidia version `100.14.19-3.9.slp5` remains in
dkms status `added`, i.e. the respective directory remains in the dkms
tree, while the original driver sources are removed from `/usr/src/`,
i.e. the RPM `nvidia-x11-drv-100.14.19-3.9.slp5` was removed.
During reboot the service `dkms_autoinstaller` (see `chkconfig
--list`) obviously tries to build the old `100.14.19-3.9.slp5` as well
because it tries to get the file
`/var/lib/dkms/nvidia/100.14.19-3.9.slp5/source/dkms.conf`. This,
however, fails because the `.../scource` is a symbolic link pointing
to the source directory that was removed during the nvidia RPM update
as mentioned before::
"source -> /usr/src/nvidia-100.14.19-3.9.slp5"
Though, this basically would not be a problem if dkms was not confused
by the occurences of several nvidia versions in its tree under
`/var/lib/dkms/nvidia/`.
The main questions are:
- Why does the update procedure only partially clean the installation
environment (under `/usr/src/` it is clean, under
`/var/lib/dkms/nvidia/` it is not)?
- Why does dkms not properly recognize the new nvidia version only but
also tries to build modules out of the sources of the older version?
Solution
~~~~~~~~
The problem appears if the nvidia driver RPM is installed or updated
before the dkms framework is ready on the system, because the RPM
scripts invoke a variety of `dkms` commands to install the new driver
into the dkms tree and to remove the old driver version from it. Thus,
one has to install the dkms RPM first, before doing any driver
installation or updates.
Manual Update Using DKMS
~~~~~~~~~~~~~~~~~~~~~~~~
Perform the following steps::
[root@pc7377 ~]# dkms status -m nvidia
nvidia, 100.14.19-3.9.slp5, 2.6.18-92.1.10.el5, i686: installed
nvidia, 100.14.19-3.9.slp5, 2.6.18-92.1.13.el5, i686: installed
nvidia, 100.14.19-3.9.slp5, 2.6.18-53.1.4.el5, i686: installed-weak from 2.6.18-92.1.10.el5
nvidia, 100.14.19-3.9.slp5, 2.6.18-53.1.4.el5, i686: installed-weak from 2.6.18-92.1.13.el5
nvidia, 100.14.19-3.9.slp5, 2.6.18-53.1.21.el5, i686: installed-weak from 2.6.18-92.1.10.el5
nvidia, 100.14.19-3.9.slp5, 2.6.18-53.1.21.el5, i686: installed-weak from 2.6.18-92.1.13.el5
nvidia, 100.14.19-3.9.slp5, 2.6.18-92.1.10.el5, i686: installed-weak from 2.6.18-92.1.13.el5
[root@pc7377 ~]# uname -a
Linux pc7377 2.6.18-92.1.13.el5 #1 SMP Wed Sep 24 16:44:34 EDT 2008 i686 i686 i386 GNU/Linux
[root@pc7377 ~]# locate nvidia.ko
/lib/modules/2.6.18-53.1.21.el5/weak-updates/lib/modules/2.6.18-92.1.10.el5/kernel/drivers/video/nvidia/nvidia.ko
/lib/modules/2.6.18-53.1.21.el5/weak-updates/lib/modules/2.6.18-92.1.13.el5/kernel/drivers/video/nvidia/nvidia.ko
/lib/modules/2.6.18-53.1.4.el5/weak-updates/lib/modules/2.6.18-92.1.10.el5/kernel/drivers/video/nvidia/nvidia.ko
/lib/modules/2.6.18-53.1.4.el5/weak-updates/lib/modules/2.6.18-92.1.13.el5/kernel/drivers/video/nvidia/nvidia.ko
/lib/modules/2.6.18-92.1.10.el5/kernel/drivers/video/nvidia/nvidia.ko
/lib/modules/2.6.18-92.1.10.el5/weak-updates/lib/modules/2.6.18-92.1.13.el5/kernel/drivers/video/nvidia/nvidia.ko
/lib/modules/2.6.18-92.1.13.el5/kernel/drivers/video/nvidia/nvidia.ko
/var/lib/dkms/nvidia/100.14.19-3.9.slp5/2.6.18-92.1.10.el5/i686/module/nvidia.ko
/var/lib/dkms/nvidia/100.14.19-3.9.slp5/2.6.18-92.1.13.el5/i686/module/nvidia.ko
[root@pc7377 ~]# rpm -q nvidia-x11-drv
nvidia-x11-drv-100.14.19-3.9.slp5
[root@pc7377 ~]# cp -i /etc/X11/xorg.conf /etc/X11/xorg.conf_orig
[root@pc7377 ~]# yum --enablerepo=psi-beta update nvidia-x11-drv
...
=============================================================================
Package Arch Version Repository Size
=============================================================================
Updating:
nvidia-x11-drv i386 169.12-4.9.slp5 psi-beta 9.7 M
Transaction Summary
=============================================================================
Install 0 Package(s)
Update 1 Package(s)
Remove 0 Package(s)
Total download size: 9.7 M
Is this ok [y/N]: y
Downloading Packages:
(1/1): nvidia-x11-drv-169 100% |=========================| 9.7 MB 00:00
Running Transaction Test
Finished Transaction Test
Transaction Test Succeeded
Running Transaction
Updating : nvidia-x11-drv ######################### [1/2]
Cleanup : nvidia-x11-drv ######################### [2/2]
Updated: nvidia-x11-drv.i386 0:169.12-4.9.slp5
- Note
`yum update` takes some time because it invokes dkms, which builds
the required module(s) on the fly and does the cleaning up, see
`nvidia.spec` or the output of `rpm -q --scripts RPM`::
[root@pc7377 nvidia]# rpm -q --scripts nvidia-x11-drv-169.12-4.9.slp5
postinstall scriptlet (using /bin/sh):
/sbin/ldconfig
# Make sure we have a Files section in xorg.conf, otherwise create an empty one
XORGCONF=/etc/X11/xorg.conf
[ -w ${XORGCONF} ] && ! grep -q 'Section "Files"' ${XORGCONF} && \
echo -e 'Section "Files"\nEndSection' >> ${XORGCONF}
# Enable the proprietary driver
/usr/sbin/nvidia-config-display enable || :
# Add to DKMS registry
dkms add -m nvidia -v 169.12-4.9.slp5 -q || :
# Rebuild and make available for the currenty running kernel
dkms build -m nvidia -v 169.12-4.9.slp5 -q || :
dkms install -m nvidia -v 169.12-4.9.slp5 -q --force || :
/sbin/MAKEDEV nvidia
preuninstall scriptlet (using /bin/sh):
# Remove all versions from DKMS registry
dkms remove -m nvidia -v 169.12-4.9.slp5 -q --all || :
# Last removal, disable the proprietary driver
if [ $1 -eq 0 ]; then
/usr/sbin/nvidia-config-display disable || :
fi
postuninstall program: /sbin/ldconfig
Snapshot of output of `ps auxwf` during `yum update`::
...
\_ /bin/bash
root 16677 0.0 0.0 5968 1668 pts/1 S 17:25 0:00 | \_ su -
root 16681 0.0 0.0 4804 1440 pts/1 S 17:25 0:00 | \_ -bash
root 17409 1.6 2.1 55980 44440 pts/1 S+ 17:43 0:04 | \_ /usr/bin/python /usr/bin/yum --enablerepo=psi-beta
root 17413 0.0 0.0 4452 1028 pts/1 S+ 17:44 0:00 | \_ /bin/sh /var/tmp/rpm-tmp.62532 2
root 18088 0.0 0.0 5112 1764 pts/1 S+ 17:45 0:00 | \_ /bin/bash /usr/sbin/dkms install -m nvidia
root 18188 0.0 0.0 4456 1200 pts/1 S+ 17:45 0:00 | \_ /bin/bash /sbin/weak-modules --add-modu
root 19238 0.0 0.0 4460 692 pts/1 S+ 17:47 0:00 | \_ /bin/bash /sbin/weak-modules --add-
root 19243 5.5 0.0 2004 452 pts/1 D+ 17:47 0:00 | \_ zcat /boot/initrd-2.6.18-53.1.4
root 19244 2.5 0.0 1816 496 pts/1 S+ 17:47 0:00 | \_ cpio -i
...
When the new nvidia RPM was installed the old stuff was not cleaned up
properly, the old module sources remained in the dkms tree under
`/var/lib/dkms/nvidia/` and has to be removed manually::
[root@pc7377 etc]# dkms status -m nvidia
nvidia, 100.14.19-3.9.slp5: added
nvidia, 169.12-4.9.slp5, 2.6.18-92.1.13.el5, i686: installed
nvidia, 169.12-4.9.slp5, 2.6.18-53.1.4.el5, i686: installed-weak from 2.6.18-92.1.13.el5
nvidia, 169.12-4.9.slp5, 2.6.18-53.1.21.el5, i686: installed-weak from 2.6.18-92.1.13.el5
nvidia, 169.12-4.9.slp5, 2.6.18-92.1.10.el5, i686: installed-weak from 2.6.18-92.1.13.el5
# rm -rf /var/lib/dkms/nvidia/100.14.19-3.9.slp5/
# dkms status -m nvidia
nvidia, 169.12-4.9.slp5, 2.6.18-92.1.13.el5, i686: installed
nvidia, 169.12-4.9.slp5, 2.6.18-53.1.4.el5, i686: installed-weak from 2.6.18-92.1.13.el5
nvidia, 169.12-4.9.slp5, 2.6.18-53.1.21.el5, i686: installed-weak from 2.6.18-92.1.13.el5
nvidia, 169.12-4.9.slp5, 2.6.18-92.1.10.el5, i686: installed-weak from 2.6.18-92.1.13.el5
The `.../weak-updates/...` nvidia modules below are symbolic links to
`/lib/modules/2.6.18-92.1.13.el5/kernel/drivers/video/nvidia/nvidia.ko`,
which is identical with
`/var/lib/dkms/nvidia/169.12-4.9.slp5/2.6.18-92.1.13.el5/i686/module/nvidia.ko`::
[root@pc7377 etc]# updatedb
[root@pc7377 etc]# locate nvidia.ko
/lib/modules/2.6.18-53.1.21.el5/weak-updates/lib/modules/2.6.18-92.1.13.el5/kernel/drivers/video/nvidia/nvidia.ko
/lib/modules/2.6.18-53.1.4.el5/weak-updates/lib/modules/2.6.18-92.1.13.el5/kernel/drivers/video/nvidia/nvidia.ko
/lib/modules/2.6.18-92.1.10.el5/weak-updates/lib/modules/2.6.18-92.1.13.el5/kernel/drivers/video/nvidia/nvidia.ko
/lib/modules/2.6.18-92.1.13.el5/kernel/drivers/video/nvidia/nvidia.ko
/var/lib/dkms/nvidia/169.12-4.9.slp5/2.6.18-92.1.13.el5/i686/module/nvidia.ko
Nvidia related lines in `/var/log/messages`::
...
Oct 23 17:48:12 pc7377 Updated: nvidia-x11-drv.i386 169.12-4.9.slp5
...
Oct 24 11:16:04 pc7377 kernel: NVRM: API mismatch: the client has the version 169.12, but
Oct 24 11:16:04 pc7377 kernel: NVRM: this kernel module has the version 100.14.19. Please
Oct 24 11:16:04 pc7377 kernel: NVRM: make sure that this kernel module and all NVIDIA driver
Oct 24 11:16:04 pc7377 kernel: NVRM: components have the same version.
Oct 24 11:16:05 pc7377 gdm[6706]: gdm_slave_xioerror_handler: Fatal X error - Restarting :0
Oct 24 11:16:09 pc7377 kernel: NVRM: API mismatch: the client has the version 169.12, but
Oct 24 11:16:09 pc7377 kernel: NVRM: this kernel module has the version 100.14.19. Please
Oct 24 11:16:09 pc7377 kernel: NVRM: make sure that this kernel module and all NVIDIA driver
Oct 24 11:16:09 pc7377 kernel: NVRM: components have the same version.
Oct 24 11:16:10 pc7377 gdm[3734]: gdm_slave_xioerror_handler: Fatal X error - Restarting :0
Oct 24 11:16:13 pc7377 kernel: NVRM: API mismatch: the client has the version 169.12, but
Oct 24 11:16:13 pc7377 kernel: NVRM: this kernel module has the version 100.14.19. Please
Oct 24 11:16:13 pc7377 kernel: NVRM: make sure that this kernel module and all NVIDIA driver
Oct 24 11:16:13 pc7377 kernel: NVRM: components have the same version.
Oct 24 11:16:14 pc7377 gdm[3752]: gdm_slave_xioerror_handler: Fatal X error - Restarting :0
Oct 24 11:16:14 pc7377 gdm[6489]: deal_with_x_crashes: Running the XKeepsCrashing script
...
Oct 24 11:27:45 pc7377 kernel: nvidia: module license 'NVIDIA' taints kernel.
Oct 24 11:27:45 pc7377 kernel: ACPI: PCI Interrupt 0000:01:00.0[A] -> GSI 16 (level, low) -> IRQ 169
Oct 24 11:27:45 pc7377 kernel: NVRM: loading NVIDIA UNIX x86 Kernel Module 169.12 Thu Feb 14 17:53:07 PST 2008
...
@@ -0,0 +1,131 @@
How To Look Up For PCI Devices And Corresponding Modules in SL5
===============================================================
Weblinks
--------
- Intel Driver Download Center: http://downloadcenter.intel.com/
- Intel Support Site: http://support.intel.com/support/index.htm
- Intel Network Connectivity: http://support.intel.com/support/network/sb/cs-008441.htm
Procedure Description
---------------------
List your PCI devices, look up for the vendor and hardware ID numbers
and search the corresponding entry in the table which maps hardware
IDs to module names.
This might be helpful if one has a new hardware device such as a new
network card, which is not recognized by the system because of a
missing entry in the file
`/lib/modules/<kernelversion>/modules.pcimap`, which maps devices to
modules. Assumption: the required driver (module) is present.
Step by Step Procedure: Example For a Working Network Card
----------------------------------------------------------
List your PCI devices using `lscpi`::
# lspci
...
00:19.0 Ethernet controller: Intel Corporation 82566DM-2 Gigabit Network Connection (rev 02)
...
Listing with `lspci -n` will show identification numbers instead of names::
# lspci -n
...
00:19.0 0200: 8086:10bd (rev 02)
...
- a)
`19.0 PCI device number`
- b)
`0200 Hardware Type "Ethernet controller"`
- c)
`8086 Vendor ID "Intel Corporation"`
- d)
`10bd Hardware ID "82566DM-2 Gigabit Network Connection"`
For a very verbose output use `lspci -vv`::
# lspci -vv | grep 00:19 -A 15
00:19.0 Ethernet controller: Intel Corporation 82566DM-2 Gigabit Network Connection (rev 02)
Subsystem: Fujitsu Siemens Computer GmbH Unknown device 10fd
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 0
Interrupt: pin A routed to IRQ 74
Region 0: Memory at f2200000 (32-bit, non-prefetchable) [size=128K]
Region 1: Memory at f2225000 (32-bit, non-prefetchable) [size=4K]
Region 2: I/O ports at 1820 [size=32]
Capabilities: [c8] Power Management version 2
Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
Status: D0 PME-Enable- DSel=0 DScale=1 PME-
Capabilities: [d0] Message Signalled Interrupts: 64bit+ Queue=0/0 Enable+
Address: 00000000fee00000 Data: 404a
Capabilities: [e0] #13 [0306]
Hovewer, take the device ID `10bd` and look for it in the `modules.pcimap`::
# grep 10bd /lib/modules/2.6.18-92.1.13.el5/modules.pcimap
e1000e 0x00008086 0x000010bd 0xffffffff 0xffffffff 0x00000000 0x00000000 0x0
In this output one can see that the kernel module e1000e is selected
for this network device. You can see this by "lsmod" if your network
is up::
# lsmod | grep e1000e
e1000e 92801 0
Step by Step Procedure: Example For a Not Working Network Card
--------------------------------------------------------------
Basically the steps from the previous section are repeated to get the
required hardware infos.
Eventually the missing entry is added to `modules.pcimap`. Here we
assume that the "e1000e" module works as well for the new network
card::
# lscpi
00:19.0 Ethernet controller: Intel Corporation Unknown device 10de (rev 02)
The device ID is `10de`::
# grep 10de /lib/modules/2.6.18-53.1.4.el5/modules.pcimap
...
Only vendor IDs match the number `10de`. The following line will be
added to `modules.pcimap`::
# vi /lib/modules/2.6.18-53.1.4.el5/modules.pcimap
... adding the line
e1000e 0x00008086 0x000010de 0xffffffff 0xffffffff 0x00000000 0x00000000 0x0
`ethtool`
---------
Use `ethtool` to get network device specific information, e.g. query
the specified ethernet device for associated driver information::
[root@pc7377 ~]# ethtool -i eth0
driver: e1000e
version: 0.4.1.7-NAPI
firmware-version: 1.3-0
bus-info: 0000:00:19.0
@@ -0,0 +1,26 @@
Linux Login Clusters
====================
Introduction
------------
The following list shows the hosts of linux login clusters.
- SL4 32bit
llc5, llc6 (Schrank 3.7)
- SL4 64 bit
lcsl4a, lcsl4b (Schrank 6.9)
- SL5 32bit
lcsl5a (Schrank 6.9)
- SL5 64 bit
llcsl5a (Schrank 6.9)
To watch the hosts go to the ganglia web interface: http://129.129.190.27/ganglia/
@@ -0,0 +1,533 @@
Load Balancer `llclb1`
======================
References
----------
- http://www.austintek.com/LVS/LVS-HOWTO/mini-HOWTO/LVS-mini-HOWTO.html
- http://www.linuxvirtualserver.org/
Introduction
------------
This document describes the setup of `llclb1.psi.ch`, the Linux Login
Cluster LoadBalancer for the ssh service on llc5 and llc6.
The load balancing is implemented by means of the ipvsadm utility.
The forwarding method is LVS_DR, direct routing (see below).
Terms and Abbreviations
~~~~~~~~~~~~~~~~~~~~~~~
- LVS (Linux Virtual Server)
The Linux Virtual Server is a scalable server built on a cluster of
real servers, with the load balancer (director) running on the Linux
operating system (LVS = director + realservers). The architecture
of the server cluster is fully transparent to end users, and the
users interact as if it were a single server.
- IPVS, ip_vs
The code that patches the linux kernel on the director.
- Director (Load Balancer)
The node that runs the ipvs code. Clients connect to the
director. The director forwards packets to the realservers. The
director is nothing but an IP router with special rules that make
the LVS work.
- Realservers (Servers)
The hosts that have the services. The realservers handle the
requests from the clients.
- Client
The host or user level process that connects to the VIP on the
director.
- Forwarding method
Currently LVS-NAT, LVS-DR, LVS-Tun. The director is a router with
somewhat different rules for forwarding packets than a normal
router. The forwarding method determines how the director sends
packets from the client to the realservers.
- Scheduling
The algorithm the director uses to select a realserver to service a
new connection request from a client (ipvsadm and schedulers).
- VIP
Virtual IP, the IP on the director that the client connects to.
- DIP
Director IP, the IP on the director in the network.
- RIP
Realserver IP, the IP on the realserver.
General setup of an LVS
~~~~~~~~~~~~~~~~~~~~~~~
The following figure illustrates a general layout of a network with an
LVS::
_________ _________ _________
| | | | | |
| CLIENT 1| | CLIENT 2| | CLIENT N|
|_________| |_________| |_________|
| | |
--------------------------------
|
___|_____
| |
| GATEWAY |
|_________|
|
Linux Virtual Server |
...............................................................
. | .
. ___VIP____ .
. | | .
. | DIRECTOR | .
. |__________| .
. DIP .
. | .
. ------------------------------------------ .
. | | | .
. | | | .
. _____RIP1_____ ____RIP2______ _____RIPN_____ .
. | | | | | | .
. | REALSERVER 1 | | REALSERVER 2 | | REALSERVER N | .
. | | | | | | .
. |______________| |______________| |______________| .
. .
...............................................................
One or Two NICs on the Director
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If you have one NIC on the director, the VIP and the RIP are on the
same physical network interface, where at least one virtual NIC was
added, to hold one of the IP addresses.
If you have two NICs you can assign one to the VIP network and the
second to the RIP network.
Here we have one NIC on the director.
Requirements For LVS-DR
-----------------------
For a reasonable LVS-DR setup the following is required:
- One or more clients which are on networks different from the VIP and
DIP network.
- The realservers must be on the same network as the director (the
realservers and director can arp each other).
- One director host with at least one network interface.
- One, better two static IPs for the director (one for the VIP, and
one for the DIP).
- Two realservers.
- One static IP for each realserver.
**Note to the Number of IPs on the director**:
Depending on the service that is routed through the director, it might
be useful to have two different IPs for the VIP and the DIP.
E.g.: If you load balance an ssh service and you assign one IP to your
NIC, which acts as VIP and DIP at the same time you can not reach your
director anymore via ssh, because all ssh requests are routed through
to one of the realservers.
Installation Procedure
----------------------
Director Installation
~~~~~~~~~~~~~~~~~~~~~
An SL54 server installation was performed on llclb1.
The director configuration shown below is implemented by means of
puppet modules on puppet server psi-puppet1.
Realserver Installation
~~~~~~~~~~~~~~~~~~~~~~~
Contemporary, the realservers llc5 and llc6 are SL46 Desktop Enhanced
systems.
One NIC LVS-DR Configuration
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The director processes only the client-to-server half of a connection
in the virtual server via direct routing, and the response packets can
follow separate network routes to the clients. This can greatly
increase the scalability of virtual server.
Compared to the virtual server via IP tunneling approach, this
approach doesn't have tunneling overhead (In fact, this overhead is
minimal in most situations), but requires that one of the load
balancer's interfaces and the real server's interfaces must be in the
same physical segment.
The following figure illustrates the setup of the LVS-DR with llclb1
(director) having one NIC. The whole LVS is on the same network::
_________ _________ _________
| | | | | |
| CLIENT 1| | CLIENT 2| | CLIENT N|
|_________| |_________| |_________|
| | |
--------------------------------
|
___|_____
| |
| GATEWAY | IP=129.129.190.1
|_________|
|
LVS (for service ssh) |
.....................................................................
. | .
. __________ | .
. | llc | | VIP=129.129.190.54 (eth0:1) .
. | | | .
. | DIRECTOR |---| .
. | | | .
. | llclb1 | | DIP=129.129.190.53 (eth0) .
. |__________| | .
. | .
. | .
. ------------------------------------ .
. | | .
. | | .
. RIP1=129.129.193.175 RIP2=129.129.193.176 .
. ______________ ______________ .
. | | | | .
. | REALSERVER 1 | | REALSERVER 2 | .
. | llc5 | | llc6 | .
. |______________| |______________| .
. .
.....................................................................
Network Configuration
.....................
Configure the LVS network according the scheme shown above. Static
IPs for llclb1, llc, llc5 and llc6 have to be assigned.
Director Configuration
......................
Configure the static device eth0 and restart the network.
`/etc/sysconfig/network-scripts/ifcfg-eth0`::
DEVICE=eth0
BOOTPROTO=none
HWADDR=00:14:5E:6B:13:3E
ONBOOT=yes
IPADDR=129.129.190.53
NETMASK=255.255.255.0
GATEWAY=129.129.193.1
TYPE=Ethernet
USERCTL=no
IPV6INIT=no
PEERDNS=yes
`/etc/sysconfig/network`::
NETWORKING=yes
NETWORKING_IPV6=no
HOSTNAME=llclb1
Install the package ipvsadm::
# yum install ipvsadm
Setup the LVS for ssh on the director using the following script:
Note: It might be better to add it to the init scripts.
`/etc/setup-LVS-DR-director.conf`::
#!/bin/bash
#---------------mini-rc.lvs_dr-director------------------------
###
### Network configuration
###
VIP1=129.129.190.54
RIP1=129.129.190.175
RIP2=129.129.190.176
# Set ip_forward OFF for lvs-dr director (1 on, 0 off)
# (there is no forwarding in the conventional sense for LVS-DR)
cat /proc/sys/net/ipv4/ip_forward
echo "0" >/proc/sys/net/ipv4/ip_forward
# Director is not gw for realservers: leave icmp redirects on
echo 'setting icmp redirects (1 on, 0 off) '
echo "1" >/proc/sys/net/ipv4/conf/all/send_redirects
cat /proc/sys/net/ipv4/conf/all/send_redirects
echo "1" >/proc/sys/net/ipv4/conf/default/send_redirects
cat /proc/sys/net/ipv4/conf/default/send_redirects
echo "1" >/proc/sys/net/ipv4/conf/eth0/send_redirects
cat /proc/sys/net/ipv4/conf/eth0/send_redirects
# Add ethernet device and routing for VIP $VIP1
/sbin/ifconfig eth0:1 $VIP1 broadcast $VIP1 netmask 255.255.255.255
/sbin/route add -host $VIP1 dev eth0:1
# Listing ifconfig info for VIP $VIP1
/sbin/ifconfig eth0:1
# Check VIP $VIP1 is reachable from self (director)
/bin/ping -c 1 $VIP1
# Listing routing info for VIP $VIP1
/bin/netstat -rn
###
### Setup_ipvsadm_table
###
# Clear ipvsadm table
/sbin/ipvsadm -C
# Installing LVS services with ipvsadm
# Add ssh to VIP with round robin scheduling
/sbin/ipvsadm -A -t ${VIP1}:ssh -s rr
# Forward ssh to realserver using direct routing with weight 1
/sbin/ipvsadm -a -t ${VIP1}:ssh -r $RIP1 -g -w 1
# Check realserver reachable from director
ping -c 1 $RIP1
# Forward ssh to realserver using direct routing with weight 1
/sbin/ipvsadm -a -t ${VIP1}:ssh -r $RIP2 -g -w 1
# Check realserver reachable from director
ping -c 1 $RIP2
# Set tcp timeout to 72 hours while leaving
# tcpfin and udp timeouts unchanged.
/sbin/ipvsadm --set 259200 0 0
# List timeout values
/sbin/ipvsadm -L --timeout
# Displaying ipvsadm settings
/sbin/ipvsadm
# Not installing a default gw for LVS_TYPE vs-dr
###
### Delete an LVS entry
###
#
# Example: remove/delete ssh forwarding to RIP2
#
# /sbin/ipvsadm -d -t ${VIP1}:ssh -r RIP2
#
#
#---------------mini-rc.lvs_dr-director------------------------
Realserver Configuration
........................
The realserver shall send responses not to the VIP of the load
balancer, rather to the client directly. This requires the iptables
rule below.
Settings of realserver 2 for instance:
`/etc/sysconfig/network-scripts/ifcfg-eth0`::
DEVICE=eth0
BOOTPROTO=none
HWADDR=00:06:5B:8C:3C:8E
ONBOOT=yes
TYPE=Ethernet
DHCP_HOSTNAME=llc6
PEERDNS=yes
IPADDR=129.129.190.176
NETMASK=255.255.255.0
GATEWAY=129.129.190.1
USERCTL=no
IPV6INIT=no
`/etc/sysconfig/network`::
NETWORKING=yes
HOSTNAME=llc6
`/etc/sysconfig/lvs`::
# LVS configuration file for LLC and LLCX
VIP=129.129.190.54
`/etc/init.d/lvs`::
#! /bin/sh
#
# chkconfig: 345 90 10
# description: Startscript to initialize this machine as an lvs real server.
# Get network configuration
. /etc/sysconfig/network
# Get functions
. /etc/rc.d/init.d/functions
# Get VIP from the LVS configuration file
. /etc/sysconfig/lvs
# Check that networking is up
if [ ${NETWORKING} = "no" ] ; then
exit 0
fi
RETVAL=0
# See how we were called.
case "$1" in
start)
# Add rule
echo "Starting load balancing mechanism with NAT iptables "
/sbin/iptables -t nat -A PREROUTING -d $VIP -j REDIRECT
;;
stop)
# Delete rule
echo "Stopping load balancing mechanism with NAT iptables "
/sbin/iptables -t nat -D PREROUTING -d $VIP -j REDIRECT
;;
*)
echo "Usage: $0 {start|stop}"
exit 1
;;
esac
exit $RETVAL
Update Procedure
----------------
Director Update
~~~~~~~~~~~~~~~
Login to llclb1 as root and run yum update::
# yum clean all
# yum update
Then reboot the director::
# reboot
After rebooting no lvs rules are set by default::
[root@llclb1 ~]# ipvsadm -L
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
As soon as the realservers are updated and rebooted, too (see next
section), run the lvs setup script to initialize the lvs rules for the
ssh loadbalancing::
# sh /etc/setup-LVS-DR-director.conf
0
setting icmp redirects (1 on, 0 off)
1
1
1
SIOCADDRT: File exists
eth0:1 Link encap:Ethernet HWaddr 00:14:5E:6B:13:3E
inet addr:129.129.190.54 Bcast:129.129.190.54 Mask:255.255.255.255
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
Interrupt:169 Memory:d8300000-d8310000
PING 129.129.190.54 (129.129.190.54) 56(84) bytes of data.
64 bytes from 129.129.190.54: icmp_seq=1 ttl=64 time=0.053 ms
--- 129.129.190.54 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.053/0.053/0.053/0.000 ms
Kernel IP routing table
Destination Gateway Genmask Flags MSS Window irtt Iface
129.129.190.54 0.0.0.0 255.255.255.255 UH 0 0 0 eth0
129.129.190.0 0.0.0.0 255.255.255.0 U 0 0 0 eth0
169.254.0.0 0.0.0.0 255.255.0.0 U 0 0 0 eth0
0.0.0.0 129.129.190.1 0.0.0.0 UG 0 0 0 eth0
PING 129.129.190.175 (129.129.190.175) 56(84) bytes of data.
64 bytes from 129.129.190.175: icmp_seq=1 ttl=64 time=2.13 ms
--- 129.129.190.175 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 2.139/2.139/2.139/0.000 ms
PING 129.129.190.176 (129.129.190.176) 56(84) bytes of data.
64 bytes from 129.129.190.176: icmp_seq=1 ttl=64 time=0.172 ms
--- 129.129.190.176 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.172/0.172/0.172/0.000 ms
Timeout (tcp tcpfin udp): 259200 120 300
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP llc.psi.ch:ssh rr
-> llc6.psi.ch:ssh Route 1 1 1
-> llc5.psi.ch:ssh Route 1 0 0
Realserver Update
~~~~~~~~~~~~~~~~~
The realservers should be updated automatically as they are standard
SL desktop hosts. Login as root to the corresponding realserver,
e.g. llc5, and verify that the update was performed correctly, if not
fix it first.
Then reboot the realserver. The iptables rule for the direct routing
are initialized automatically by the init script /etc/init.d/lvs::
# reboot
Eventually, test the ssh connection from any client to llc::
# [anyuser@anyhost] ssh llc
@@ -0,0 +1,164 @@
NX Server/Client Installation
=============================
References
----------
- http://freenx.berlios.de/
- http://nomachine.com/
- http://wiki.centos.org/HowTos/FreeNX/
Introduction
------------
This document describes the setup of an NX server/client
infrastructure on SL54.
First, a more generic installation procedure is illustrated, second,
the PSI default nx server/client setup is shown.
Generic Installation
--------------------
NX Server
~~~~~~~~~
Required Packages
.................
- nx
- freenx
The packages were found in the centos-extras repository. One can use
the following yum repo file, for instance.
File `/etc/yum.repos.d/centos-extras.repo`::
[centos-extras]
name=Centos Extras for SL5.5
baseurl=ftp://mirror.switch.ch/mirror/centos/5.5/extras/$basearch/
enabled=0
Procedure
.........
Install the required packages on your platform, i386 or x86_64::
# yum --enablerepo centos-extras install freenx
Now, generate the ssh keys::
# nxkeygen
The keys are stored in `/etc/nxserver` and in the home directory of
the nx user account::
# cd /var/lib/nxserver/home/
# ls -l .ssh/
-rw------- 1 nx root 672 Oct 22 16:28 authorized_keys2
-rw------- 1 nx root 672 Oct 22 16:28 client.id_dsa.key
-rwx------ 1 nx root 392 Oct 22 16:14 known_hosts
-rw------- 1 nx root 605 Oct 22 16:28 server.id_dsa.pub.key
The private key `client.id_dsa.key` has to be copied to the nx client
(see next section).
NX Client
~~~~~~~~~
Required Packages
.................
- nxclient
Procedure
.........
NoMachine does not allow the distribution of their client, so it must
be downloaded from their website at http://nomachine.com/.
After downloading install it::
# rpm -ivh nxclient-3.4.0-7.i386.rpm
Get the private ssh key of user nx from the server and copy it to the
client::
# [root@server]
# scp /var/lib/nxserver/home/.ssh/client.id_dsa.key client:/usr/NX/share/keys/
Create the following symbolic link to the key on the client::
# [root@client]
# cd /usr/NX/share/keys/
# ln -s client.id_dsa.key server.id_dsa.key
Now, you can login to the nx server::
# nxclient
PSI Installation
----------------
The difference between the generic and the PSI installation is, that
the NX packages can be installed from our local repository. Further,
the PSI default keys are part of this installation, i.e. installing
the RPMS sets up a working NX server/client environment.
NX Server
~~~~~~~~~
Required Packages
.................
- nx
- freenx
- freenx-psi
Currently the server packages are available from the SL54 psi-beta
repository, while the client packages are located in the PSI nonfree
repo.
Procedure
.........
File `/etc/yum.repos.d/psi-beta.repo`::
[psi-beta]
name=54 psi beta
baseurl=http://linux.web.psi.ch/dist/scientific/54/beta/
enabled=0
Install freenx, nx and freenx-psi on the server::
# yum --enablerepo psi-beta install freenx nx freenx-psi
NX Client
~~~~~~~~~
Required Packages
.................
- nxclient
- nxclient-psi
Procedure
.........
Install nxclient and nxclient-psi on the client host::
# yum install nxclient nxclient-psi
Now, you can login to the nx server::
# nxclient
@@ -0,0 +1,162 @@
Prepare a new release
=====================
Introduction
------------
This document describes the preparation of a new SLP major or minor
release for SL >= 5.4.
Requirements
------------
Before you start you need a clean copy of the original SL repositories
of interest from http://www.scientificlinux.org.
The SL repositories shall be mirrored to our AFS mirror on its own
volume, e.g. `/afs/psi.ch/software/mirror/scientific/54/` is on AFS
volume `sw.mirror.sl54.nb`.
However, the mirror is usually made automatically by the script
`/afs/psi.ch/software/mirror/scripts/mirror.sh` which is run by cron
on pxeserv01. To add a new distribution to the mirror script you have
to edit the file `/afs/psi.ch/software/mirror/scripts/mirror.conf`.
You also need a separate AFS distribution volume, e.g. `sw.dist.sl54`
for `/afs/psi.ch/software/linux/dist/scientific/54`.
Once this is done, you're ready to start with the Procedure section.
Procedure
---------
1. Go to the SL distribution B<top> directory, change to the
subdirectory `scripts` and run the following scripts::
# cd /afs/psi.ch/software/linux/dist/scientific/
# cd scripts/Sl60/
# sh 1-create_the_new_dist-config_file.sh <new_osversion>
# sh 2-copy_the_kernel_and_initrd_for_tftp_boot.sh
# sh 3-copy_the_puppet_environment_for_desktop.sh # If a new one should be used,
# usually only for major release
# changes
# Note: Some of the files might need some manual customization, e.g.
#
# module_gdm (pictures)
# module_splash (pictures)
# module_background_image (pictures)
# module_buildsystem
# sh 4-create_new_dist_directory_structure.sh <new_osversion> <pre_osversion>
### Check and edit the new files if necessary, especially the following ones:
# scripts/dist-config
# scripts/KERNEL_MODULES_TO_BUILD/sl6.i386.kms-to-be-built
# scripts/KERNEL_MODULES_TO_BUILD/sl6.x86_64.kms-to-be-built
# scripts/build-gpfs.sl6x.sh
# scripts/build-openafs.sl6x.i686.sh
# scripts/build-openafs.sl6x.x86_64.sh
# scripts/copy_the_sl_base_repos_from_the_local_mirror.sh
# scripts/rsync_security_updates_to_all_directories.sh
<pre_osversion> is the distribution which serves as a template for
<new_osversion>.
The scripts will prepare most of the stuff automatically based on
the previous version. Some of the new files, however, still have to
be edited manually.
2. Go to the SL distribution directory, change to the subdirectory
`scripts` and run the following scripts::
# cd /afs/psi.ch/software/linux/dist/scientific/60/scripts/
2.1 Copy the SL Base Repos
The script below copies the sl6base repository from the mirror on
AFS to the dist SL6 directory taking into account the
`rsync_exclude.rpmlist`, i.e. not all RPMS will be copied. To keep
the repodata consistent with the RPMS in the Packages directory,
the repodata has to be updated, too, by running the second script
shown here::
# sh copy_the_sl_base_repos_from_the_local_mirror.sh
# sh run_createrepo_on_base_i386.sh #sl64: still to do
# sh run_createrepo_on_base_x86_64.sh #sl64: still to do
2.2 Copy the Security Updates::
# sh rsync_security_updates_to_all_directories.sh
2.3 Build kernel module if minor release
If you prepare a new major release skip this section.
If you prepare a new minor release you already have build systems
where you can build kernel modules.
Thus check whether you have to build at least the openafs and the
gpfs modules. If the kernel versions of this and the previous SL
minor versions do not differ you should already have the modules
copied to e.g. the nonfree and psi repos, then there's nothing to
do here.
2.4 Update The Symbolic Links in RPMS_all::
# sh update_symlinks_in_RPMS_all.sh
3. Create new snaphots, release UNSTABLE and set the initial PSI
version.
# ./create_new_snapshots.sh alldirs > \
# INITIAL_LOG/2011-02-25-create_new_snapshots.log 2>&1
# ./release_unstable.sh
# ./create_new_psi_version.sh
# ./create_file_psiversion-info.sh
6. Write the new kickstart conig files.
Goto the kickstart config directory and use one of the old
kickstart config files as a template::
# cd /afs/psi.ch/software/linux/kickstart/configs/
8. Make the new entries in the tftpd file.
Goto the tftpd directory and edit the file default::
# cd /afs/psi.ch/service/linux/tftpboot/pxelinux.cfg/
# vi default
#<<< sl64: hereiam:
#<<< to do: fix /usr/bin/psi-fix_file_permission, module_localadmin
9. Now perform first installation tests and debug where necessary.
10. If you prepare a new major release, install new build systems.
12. If you prepare a new major release, go to
`/afs/psi.ch/project/linux/dist` and create the directory
structure below for the RPM build environment::
slp6
|-- RPMS/
| |-- i386/
| |-- i486/
| |-- i586/
| |-- i686/
| |-- noarch/
| `-- x86_64/
`-- SRPMS/
14. Build kernel modules.
15. Also get new software versions for non SL repos (psi, others,
etc.).
16. Again create new snapshots and a new PSI version.
17. Now perform the second test installation and debugging suite.
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,609 @@
Installation `psi-puppet2`
==========================
References
----------
- https://wiki.intranet.psi.ch/AIT/PuppetServer2009
- http://reductivelabs.com/trac/puppet/wiki/PluginsInModules
Introduction
------------
This document describes the Installation of the new puppet server
psi-puppet2, which replaces the puppet server running on pxeserv01.
The psi-puppet2 server is an interim solution for the pxeserv01
replacement for the following reasons:
- The puppet service on pxeserv01 is very unstable.
- The performance on pxeserv01 is very bad, what makes work almost impossible.
- The psi-puppet1 server is not ready for production yet.
The new system will be setup as a 32 bit SL5 server with the latest
puppet server package and dependencies installed.
The puppet server configuration is the same as the one on pxeserv01.
**Note**: Don't confuse psi-puppet2 with psi-puppet1, which not only
is thought as a replacement of the puppet server on pxeserv01 but also
comes with a reimplementation of the whole puppet configuration,
whereas psi-puppet2 takes over the puppet configuration from
pxeserv01.
Description of the Basic Server Setup
-------------------------------------
- Hardware
Dell Power Edge 1750
- Operating System
SL51 32 bit Server
- Network configuration
Static IP for Production Server: 129.129.190.160/24
Hostname: psi-puppet2.psi.ch
- Required RPMS
- puppet-server (http://download.fedora.redhat.com/pub/epel/5/i386/repoview/)
- augeas-libs
- facter
- puppet (http://download.fedora.redhat.com/pub/epel/5/i386/repoview/)
- ruby
- ruby-augeas
- ruby-libs
- ruby-shadow
- ruby-irb (required for reading help)
- ruby-rdoc (required for reading help)
Procedure
---------
Setup The Puppet Server
~~~~~~~~~~~~~~~~~~~~~~~
Install The Puppet Server Software
..................................
Use the yum repositories `/etc/yum.repos.d/epeli386.repo` and
`/etc/yum.repos.d/epelx86_64.repo` to download and install the latest
puppet packages::
[epeli386]
name=epel i386
baseurl=http://download.fedora.redhat.com/pub/epel/5/i386/
enabled=0
Install the puppet-server package. Dependencies will be resolved::
# yum --enablerepo=epeli386 install puppet-server
...
Dependencies Resolved
=============================================================================
Package Arch Version Repository Size
=============================================================================
Installing:
puppet-server noarch 0.24.8-1.el5.1 epel 26 k
Installing for dependencies:
augeas-libs i386 0.5.1-1.el5 epel 196 k
facter noarch 1.5.5-1.el5 epel 54 k
puppet noarch 0.24.8-1.el5.1 epel 542 k
ruby i386 1.8.5-5.el5_2.6 sl5update 279 k
ruby-augeas i386 0.2.0-1.el5 epel 17 k
ruby-libs i386 1.8.5-5.el5_2.6 sl5update 1.6 M
ruby-shadow i386 1.4.1-7.el5 epel 9.5 k
Transaction Summary
=============================================================================
Install 8 Package(s)
Update 0 Package(s)
Remove 0 Package(s)
...
For later use, download the required RPMS and copy them to our yum
repository, see section `Put Puppet Related RPMS To Our Yum
Repository` below.
Configure The Puppet Server
...........................
Use the puppet server configuration of pxeserv01 on psi-puppet2.
The configuration files of the puppet server, directory
`/etc/puppet/`, are located on AFS, see section `Mount AFS Volumes`.
The log is on the local disk in `/var/log/puppet`. To set the logfile
edit the line `PUPPETMASTER_OPTS` in
`/etc/rc.d/init.d/puppetmaster`. For testing also the debug option
`-d` is enabled::
PUPPETMASTER_OPTS="-v -d -l /var/log/puppet/puppetmaster.log"
Enable puppetmaster in runlevels 3, 4 and 5::
# chkconfig --levels 345 puppetmaster on
Mount AFS Volumes on Puppet Server
..................................
The puppet server configuration files as well as the puppet manifests
for clients are located on AFS.
AFS is already mounted as `/afs` in this default SL5 server installation::
# mount
...
AFS on /afs type afs (rw)
Now, we want to remount `/afs/psi.ch/service/linux/puppet/etc/puppet`
on `/etc/puppet`. Therefor the mount option `bind` is used, which
facilitates to remount parts of already mounted filesystems on an
alternative location in the file hierarchy.
As shown below we do the remount in `/etc/rc.local`, which is executed
after all the other init scripts::
#!/bin/sh
touch /var/lock/subsys/local
# Puppet
mount -o bind /afs/psi.ch/service/linux/puppet/etc/puppet /etc/puppet
# Restart Services depending on afs mounts
/etc/init.d/puppetmaster restart
Before the `rc.local` script can be applied the proper AFS permissions
have to be set.
Check the AFS permissions::
# [gasser_m@pc7377 ~]
# fs la /afs/psi.ch/service/linux/
Access list for /afs/psi.ch/service/linux/ is
Normal rights:
svc_linux:tools l
svc_linux:puppet l
svc_linux:readonly rl
svc_linux:pxe l
svc_linux rlidwka
Create a new AFS group for the puppet server::
# pts creategr svc_linux:puppet_hosts -owner svc_linux
group svc_linux:puppet_hosts has id -10851
Add the IP of psi-puppet2 to this group::
# pts adduser 129.129.190.160 svc_linux:puppet_hosts
Set the AFS permissions::
# fs sa /afs/psi.ch/service/linux/ svc_linux:puppet_hosts l
# fs sa /afs/psi.ch/service/linux/puppet/ svc_linux:puppet_hosts rl
# fs sa /afs/psi.ch/service/linux/puppet/etc svc_linux:puppet_hosts rl
# fs sa /afs/psi.ch/service/linux/puppet/var svc_linux:puppet_hosts rl
# fs sa /afs/psi.ch/service/linux/puppet/etc/puppet svc_linux:puppet_hosts rl
Recursively set the same permissions to all subdirectories underneath
`/afs/psi.ch/service/linux/puppet/etc/puppet`::
# cd /afs/psi.ch/service/linux/puppet/etc/puppet
# find -noleaf -type d -exec fs sa {} svc_linux:puppet_hosts rl \;
Then restart the AFS service::
# service afs restart
Finally, run the `rc.local` script or just reboot psi-puppet2 to see
whether everything comes up.
Allow User Login Access To psi-puppet2
......................................
To enable remote login via ssh and execution of root commands via
sudo, the following files have to be edited.
- `/etc/passwd`
- `/etc/shadow`
- `/etc/security/ssh.allow`
- `/etc/group`
- `/etc/hosts.allow`
- `/etc/sudoers`
passwd::
gasser_m:!:2374:840:Gasser Marc:/afs/psi.ch/user/g/gasser_m:/bin/bash
kapeller:!:3804:9102:Rene Kapeller:/afs/psi.ch/user/k/kapeller:/bin/bash
billich:!:3830:840:Heinrich Billich:/afs/psi.ch/user/b/billich:/bin/bash
markushin:!:3883:840:Valeri Markushin:/afs/psi.ch/user/m/markushin:/bin/bash
shadow::
gasser_m:NP:::::::
kapeller:NP:::::::
billich:NP:::::::
markushin:NP:::::::
group::
ait::840
sls::9102
ssh.allow::
kapeller
billich
markushin
gasser_m
hosts.allow::
sshd: ... pc7377 pc7377.psi.ch gfalc05 gfalc05.psi.ch pc4568 pc4568.psi.ch
Configuring Puppet Reporting
............................
There are a number of different report processors available on the
master. The default report, store, simply stores the report file on
the disk.
By default, each client is configured not to report back to the
master. It has to be enabled either by the report option in
`puppet.conf` or using `--report` on the command line.
`/etc/puppet/puppet.conf`::
[puppetd]
report = true
Command line::
# puppetd --report
Store Report Processor
,,,,,,,,,,,,,,,,,,,,,,
Enable the store reports by using the `reports` configuration option
in the puppemasterd section of the `puppet.conf` file on the master.
`/etc/puppet/puppet.conf`::
[puppetmasterd]
reports = store
The default reports directory is $vardir/reports.
Rrdgraph Report Processor
,,,,,,,,,,,,,,,,,,,,,,,,,
To enable the `rrdgraph` reports, `rrdtool` and `rrdtool-ruby`
packages have to be installed.
The packages are available from the the `psi-beta` repository, they
originate from the repository shown below.
`/etc/yum.repos.d/epeli386.repo`::
[epeli386]
name=epel i386
baseurl=http://download.fedora.redhat.com/pub/epel/5/i386/
enabled=0
# yum install rrdtool rrdtool-ruby
Then, configure `puppet.conf` by adding the lines shown below in the
corresponding section. Here store and rrdgraph are enabled.
`/etc/puppet/puppet.conf`::
[puppetmasterd]
reports = store,rrdgraph
rrddir = $vardir/rrd
rrdinterval = $runinterval
rrdgraph = true
Install the Ganglia Monitor Daemon
..................................
Install `ganglia-gmond` and add the configuration file shown below::
# yum install ganglia-gmond
Use the same `/etc/gmond.conf` as on psi-puppet1::
# /etc/init.d/gmond start
For the ganglia server configuration ask Valeri Markushin.
See puppet at [[http://129.129.190.27/ganglia/][http://129.129.190.27/ganglia/]].
Setup The Puppet Client
~~~~~~~~~~~~~~~~~~~~~~~~
Either install or update the puppet package on a client::
# yum --enablerepo=psi-beta install puppet
# yum --enablerepo=psi-beta update puppet
Test The Puppet Clients
~~~~~~~~~~~~~~~~~~~~~~~
To find out which versions of puppet servers and clients are
compatible to each other and in which order they should be updated,
the following tests are performed.
Test the Old Client With The New Server
.......................................
Set the new server `psi-puppet2.psi.ch` in `/etc/puppet/puppet.conf`,
remove the certificate and run puppetd::
# [root@vsl5132de-ut]
# cd /var/puppet/
# rm -rf ssl/
# puppetd --test
Ok.
Test the New Client With The New Server
.......................................
Update the puppet client. Set the new server `psi-puppet2.psi.ch` in
`/etc/puppet/puppet.conf`, remove the certificate and run puppetd::
# [root@vsl5132de-ut]
# yum --enablerepo=psi-beta update puppet
# cd /var/puppet/
# rm -rf ssl/
# puppetd --test
Ok.
Test the New Client With The Old Server
.......................................
Update the puppet client. Then set the old server `pxeserv01.psi.ch`
in `/etc/puppet/puppet.conf`, remove the certificate and run puppetd::
# [root@vsl5132de-ut]
# yum --enablerepo=psi-beta update puppet
...
=============================================================================
Package Arch Version Repository Size
=============================================================================
Updating:
puppet noarch 0.24.8-1.el5.1 psi-beta 542 k
Installing for dependencies:
augeas-libs i386 0.5.1-1.el5 psi-beta 196 k
ruby-augeas i386 0.2.0-1.el5 psi-beta 17 k
ruby-shadow i386 1.4.1-7.el5 psi-beta 9.5 k
Updating for dependencies:
facter noarch 1.5.5-1.el5 psi-beta 54 k
Transaction Summary
=============================================================================
Install 3 Package(s)
Update 2 Package(s)
Remove 0 Package(s)
...
# cd /var/puppet/
# rm -rf ssl/
# puppetd --test
Not ok. A lot of error messages appear. It seems the new client is not
compatible to the old server!
Conclusion
..........
First update the server, then the client.
So, when you have an old client addressing the new server for tests,
and finally you wanna change it back to the old server, this should
work by just modifying `/etc/puppet/puppet.conf` on the client.
However, once you updated the client you can not go back to the old
server, unless you downgrade the client --- with yum this means
removing the new client and reinstalling the old one.
Put Puppet Related RPMS To Our Yum Repository
---------------------------------------------
To have a consistent puppet installation on our hosts we put them to
our repository.
Use yumdowloader, package yum-utils, to download puppet related RPMS::
# root@psi-puppet2
# yum install yum-utils
# yumdownloader --enablerepo=epeli386 puppet-server augeas-libs facter \
# puppet ruby-augeas ruby-shadow
# yumdownloader --enablerepo=epelx86_64 augeas-libs ruby-augeas ruby-shadow
**Note**: There seem to be only i386 and noarch versions of the
required RPMS.
Before they are linked into the latest `testing` and `current`
repositories they should be tested. So copy them to the `psi-beta`
repository first::
# scp *rpm gasser_m@tux50:/afs/psi.ch/software/linux/dist/scientific/51/beta
# gasser_m@tux50
# cd /afs/psi.ch/software/linux/dist/scientific/51/beta
# createrepo .
To test the puppet clients see section `Test The Puppet Clients`.
If the tests passed successfully copy them to the `others` repository
and create symbolic links to `testing` and `current` to make them
available::
# [root@psi-puppet2]
# scp *rpm gasser_m@tux50:/afs/psi.ch/software/linux/dist/scientific/51/others/all
# gasser_m@tux50
# cd /afs/psi.ch/software/linux/dist/scientific/51/others/all
# createrepo .
Update Puppet
-------------
Keep the following order.
- Update the puppet server.
- Update the puppet client.
Update The Puppet Server
~~~~~~~~~~~~~~~~~~~~~~~~
At the time of writing the latest Puppet version 0.25.1 was only
available at http://tmz.fedorapeople.org/repo/puppet/epel/5/i386/.
Download the required packages to SL51 psi-beta repository first::
# wget http://tmz.fedorapeople.org/repo/puppet/epel/5/i386/puppet-server-0.25.1-0.3.el5.noarch.rpm
# wget http://tmz.fedorapeople.org/repo/puppet/epel/5/i386/puppet-0.25.1-0.3.el5.noarch.rpm
Then login to a test server and stop the puppetmaster daemon::
# /etc/init.d/puppetmaster stop
Because the /etc/puppet/ is on AFS, root has no write permissions.
So, first umount the etc/puppet from AFS, then run `yum update` using
the psi-beta repository, and remount etc/puppet::
# umount /etc/puppet
# yum --enablerepo=psi-beta update puppet-server
# mount -o bind /afs/psi.ch/service/linux/puppet/etc/puppet /etc/puppet
Set the following options in /etc/init.d/puppetmaster::
PUPPETMASTER_OPTS="-v -d -l /var/log/puppet/puppetmaster.log"
Finally, restart the service and test it with a client::
# /etc/init.d/puppetmaster start
Login to a client and run puppetd::
# puppetd --test
Update The Puppet Client
~~~~~~~~~~~~~~~~~~~~~~~~
Basically, the 0.24.x clients should be compatible to the 0.25.x
server. Though, there are some changes between the versions, that
might cause troubles. Thus, the puppet clients should be updated, too.
On a SL51 client run::
# yum --enablerepo=psi-beta update puppet
On a SL54 client run::
# yum update puppet
As soon as the client is updated, its `puppet.conf` needs some
modifications, because of the "factsync" option which is deprecated
and replaced by "pluginsync" in the 0.25.x versions.
`/etc/puppet/puppet.conf` on a 0.25.x client: "factsync" is replaced
by "pluginsync", factpath is set (not clear whether the latter is
necessary)::
[main]
vardir = /var/puppet
logdir = /var/log/puppet
rundir = /var/run/puppet
ssldir = $vardir/ssl
pluginsync = true
factpath = $vardir/lib/facter
environment = DesktopSL5Unstable
[puppetd]
report = true
classfile = $vardir/classes.txt
localconfig = $vardir/localconfig
server = psi-puppet1.psi.ch
Further the directory structure on the server for placing facts
changed. The new structure if using modules and environments is
illustrated below taking the environment "DesktopSL5Unstable" as an
example:
The modulepath for DesktopSL5Unstable is::
modulepath = /var/puppet/environments/DesktopSL5Unstable/modules
A stub module called "custom" has to be created in the "$modulepath"
subdirectory to keep the files::
$modulepath/custom/
`-- lib/
|-- facter/
| `-- sysconfig_psi_desktop.rb
`-- puppet/
|-- provider/
`-- type/
This subdirectory tree under custom is implicitly searched by the
puppet server.
**Notes**:
If you have both, the new and the old variant to keep facts, option
"pluginsync" enabled, only the new script location will be considered.
If you run only the old variant to keep facts::
$modulepath/facts/files/somescript.rb
with option "factsync" enabled, they will be loaded giving out a
warning "... use pluginsync instead of factsync ..."
For more information see the
http://reductivelabs.com/trac/puppet/wiki/PluginsInModules.
@@ -0,0 +1,145 @@
Puppet Trouble-shooting in TWiki
================================
Introduction
------------
This is the location were to post puppet problems and solutions at AIT
and GFA.
Please use the following format according the first entry below:
- HEADING2: Error (HOSTNAME): "BEGIN OF ERROR MESSAGE ..."
*(The HOSTNAME in parentheses is optional, if the problem might affect any host.)*
- HEADING3: Context, a short description in what situation the error appears.
- HEADING3: Solution, if known, provide a possible solution to fix the problem.
Puppet Server Errors
--------------------
Error (pxeserv01): "Starting puppetmaster: Could not prepare for execution ..."
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Context
.......
The puppetmaster daemon on pxeserv01 received an unattended SIGTERM
and stopped. Maybe there is a self signed certificate which expired
or something. Anyway, restarting failed showing the following error::
# [root@pxeserv01 puppet]# /etc/init.d/puppetmaster start
Starting puppetmaster: Could not prepare for execution: Retrieved certificate does not \
match private key; please remove certificate from server and regenerate it with the current key
Solution
........
Create a new certificate for the server.
**IMPORTANT NOTE**: Recreating a new certificate for the server showed
that the clients which still have the old certificate of the server
can not connect anymore to it. Thus you need to be sure you have a
way to update the certificate of the server on the clients before
applying this. Eventually, a better solution might be to just sign
the old server certificate with `puppetca`. It was not tested whether
this works, too.
Remove /var/puppet/ssl/ on the server::
# rm -rf /var/puppet/ssl/
Then try to start puppetmaster, what failed again, but recreated the
ssl subdirectory and its content::
# /etc/init.d/puppetmaster start
Eventually, sign the certificate for pxeserv01 itself::
# puppetca --sign pxeserv01.psi.ch
# /etc/init.d/puppetmaster start
Via our management console the certificates on the clients had to be
deleted, in order they can retrieve the new server certificate::
# for i in host1 host2 ...
# do
# ssh -o ConnectTimeout=1 root@$i rm -rf /var/puppet/ssl/
# done
Puppet Client Errors
--------------------
Error: "Could not request certificate: ..."
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Context
.......
Running puppetd on any client might fail showing the following error::
# puppetd --test
err: Could not request certificate: Retrieved certificate does not match private key;
please remove certificate from server and regenerate it with the current key
Exiting; failed to retrieve certificate and watiforcert is disabled
Solution
........
The problem is that for what reason ever the client certificate is not
valid, i.e. it does not match its copy stored on the puppet server.
Usually this happens when a host is reinstalled keeping its original
hostname. During the initial puppetd run, the client requests a new
certificate from the server, which denies, because there is already a
certificate on the server for that hostname.
To fix this you have to delete the old certificate on client and
server.
On the server::
# puppetca --clean HOSTNAME.psi.ch
HOSTNAME.psi.ch
notice: Removing file Puppet::SSL::Certificate HOSTNAME.psi.ch at
'/var/puppet/ssl/ca/signed/mpc1273.psi.ch.pem'
If this notice does not appear, the certificate was not deleted.
On the client::
# rm -rf /var/puppet/ssl/
Then, run puppetd again::
# puppetd --test
Error: "notice: Run of Puppet configuration client already in progress; skipping ..."
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Context
.......
Running puppetd on any client might fail showing the following error,
even `ps ax | grep puppetd` does not show a running puppet process::
# puppetd --test
notice: Run of Puppet configuration client already in progress; skipping
Solution
........
There is still an old lock file `/var/puppet/state/puppetdlock`.
Remove the lock file and run puppetd again::
# rm /var/puppet/state/puppetdlock
@@ -0,0 +1,98 @@
Puppet Manifests for SL 5.3
===========================
Introduction
------------
As we are planning to upgrade from SL 5.1 to SL 5.3, we decided to
reorganize and reimplement all the client configuration manifests.
Procedure
---------
**Note**: Here we are still in the development state, thus filenames
and everything probably will be changed for the productive setup.
Puppet SVN
~~~~~~~~~~
To manage the changes to manifests and client configuration files we
use `subversion (svn)` as a revision control::
# ENV=CnodeSL5
# mkdir /var/puppet/environments/$ENV
# cd /var/puppet/environments/$ENV
Check out all manifests::
# svn co svn+ssh://svn.psi.ch/repos/linux/kickstart/trunk/puppet/manifests
Check out modules individually::
# mkdir /var/puppet/environments/$ENV/modules
# cd /var/puppet/environments/$ENV/modules
# svn co svn+ssh://svn.psi.ch/repos/linux/kickstart/trunk/puppet/Modules/facts
# svn co svn+ssh://svn.psi.ch/repos/linux/kickstart/trunk/puppet/Modules/cnode
# svn co svn+ssh://svn.psi.ch/repos/linux/kickstart/trunk/puppet/Modules/psibasic
# svn co svn+ssh://svn.psi.ch/repos/linux/kickstart/trunk/puppet/Modules/ssh
# svn co svn+ssh://svn.psi.ch/repos/linux/kickstart/trunk/puppet/Modules/ntp
# svn co svn+ssh://svn.psi.ch/repos/linux/kickstart/trunk/puppet/Modules/scratch
On the Puppet Server Side
~~~~~~~~~~~~~~~~~~~~~~~~~
Restart the puppet server. For testing use some increased verbosity
(-v), debug mode (-d) and log to a file (-l). Set these options in
`/etc/init.d/puppetmaster` using variable `PUPPETMASTER_OPTS`::
# vi /etc/init.d/puppetmaster
...
PUPPETMASTER_OPTS="-v -d -l /var/log/puppet/puppetmaster.log"
...
# /etc/init.d/puppetmaster restart
On the Puppet Client Side
~~~~~~~~~~~~~~~~~~~~~~~~~
To specify which environment the Puppet client uses you can specify a
value for the environment configuration variable in the client's
`puppet.conf` file. Here the environment `developmentSL53` is set.
Additionally the name of the puppet server `psi-puppet1.psi.ch` is
assigned::
# vi /etc/puppet/puppet.conf
[main]
vardir = /var/puppet
logdir = /var/log/puppet
rundir = /var/run/puppet
ssldir = $vardir/ssl
environment = developmentSL53
[puppetd]
classfile = $vardir/classes.txt
localconfig = $vardir/localconfig
factsync = true
server = psi-puppet1.psi.ch
In the Kickstart
~~~~~~~~~~~~~~~~
In the SL 5.3 Installation Tree
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Create the base directories, its subdirectories and some first test
file for the `Basic` class as declared above in the manifest
`basic.pp` on the server::
# cd /afs/psi.ch/software/linux/dist/scientific/53/
# mkdir -p puppet/files/Basic/etc
# touch puppet/files/Basic/etc/puppet-test-file
@@ -0,0 +1,76 @@
Puppetmaster At PSI
===================
Procedure
---------
Manual Check Whether Puppetmaster Is Running
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Contemporary, the `puppetmasterd` process often crashes and automatic
restart fails. Thus it has to be started manually again. Therefore login
to `pxeserv01`, check whether the process is still running and if not,
restart `puppetmasterd`.
Check whether the `puppetmasterd` is running::
[root@pxeserv01 ~]# ps ax | grep puppet
549 pts/0 S+ 0:00 grep puppet
6641 ? S 7:53 /bin/bash ./test-and-restart-puppetmaster.sh
It's not running.
Check the logfile. The logfile is situated on AFS::
# tail /afs/psi.ch/service/linux/puppet/var/log/puppetmaster.log
...
Mon Jan 26 09:22:27 +0100 2009 Puppet (notice): Compiled configuration for slsnedi1.psi.ch in 0.09 seconds
Mon Jan 26 09:24:55 +0100 2009 Puppet (notice): Caught TERM; shutting down
Mon Jan 26 09:24:55 +0100 2009 Puppet (notice): Shutting down
Mon Jan 26 09:24:55 +0100 2009 Puppet (err): Could not remove PID file /var/run/puppet/puppetmasterd.pid
Mon Jan 26 09:25:01 +0100 2009 Puppet (err): Could not create PID file: /var/run/puppet/puppetmasterd.pid
In this case remove `/var/run/puppet/puppetmasterd.pid` and restart the server::
# rm /var/run/puppet/puppetmasterd.pid
# /etc/init.d/puppetmaster restart
Check again whether the `puppetmasterd` is running::
# [root@pxeserv01 ~]# ps ax | grep puppet
549 pts/0 S+ 0:00 grep puppet
6641 ? S 7:53 /bin/bash ./test-and-restart-puppetmaster.sh
31599 ? Ssl 0:03 /usr/bin/ruby /usr/sbin/puppetmasterd \
--logdest=/afs/psi.ch/service/linux/puppet/var/log/puppetmaster.log
Finally, test it on a client::
# [root@pc7377 ~]# psi-puppet
...
puppetd was running successfully
Automatic Check Whether Puppetmaster Is Running
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
On `pxeserv01` the script `/root/test-and-restart-puppetmaster.sh`
continuously observes the `puppetmasterd` and should restart it when
it's hanging.
The script is started in the following way::
# [root@pxeserv01 etc]# nohup /root/test-and-restart-puppetmaster.sh &
The output is written to:
- `$HOME/nohup.out`
The logfile.
- `/dev/shm/puppetwatch.1`
- `/dev/shm/puppetwatch.2`
@@ -0,0 +1,188 @@
Update SL53 i386 and x86_64
===========================
Get Native Scientific Linux Updates
-----------------------------------
This section describes how the particular linux repositories are
updated by looking for new RPMS in our mirror and copying them from
there to the repositories.
Get the Latest Security Update RPMS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Get the latest security update RPMS from the local SL53 mirror for
both architectures, i386 and x86_64, by invoking
`update_repo_all_directories.sh` on tux50. They will be copied to the
corresponding `.../update.${ARCH}/all/` directories. This will take
some time::
# cd /afs/psi.ch/software/linux/dist/scientific/53/scripts/
# ./update_repo_all_directories.sh > ~/tmp/20090930-update_repo_all_directories.sl53.output 2>&1
Then, check for errors in the log file::
# grep -i error ~/tmp/20090930-update_repo_all_directories.sl53.output
...
Finally, run `update_symlinks_in_rpms_all.sh` to keep all symlinks in
the directory
`/afs/psi.ch/software/linux/dist/scientific/53/RPMS_all/` up to date.
The script removes dead links and creates new links to the new RPMS.
It is basically not necessary for running PSI updates, rather it's
just convenient to have a directory with the list of all RPMS of a
distribution::
# ./update_symlinks_in_rpms_all.sh
Create A New PSI Version And Release The SLP Snapshots
------------------------------------------------------
Keep the following order:
- Update the "all" repositories. (Described in section texttext)
- Create new snapshots. (Described in section texttext)
- Release "unstable" from new snapshots.
- Create a new PSI version.
- Release "testing".
- Release "stable".
Release Unstable
~~~~~~~~~~~~~~~~
The "unstable" distribution is where active development of SLP occurs.
Generally, this distribution is run by developers and those who like
to live on the edge.
The command `release_unstable.sh` will update the respective symlinks
`.../unstable` to the latest snapshots.
Because `release_unstable.sh` is interactive, you should not redirect
the output to a file, as you won't be able to see the questions asked.
Before `release_unstable.sh`::
# [gasser_m@tux50]
# cd /afs/psi.ch/software/linux/dist/scientific/53/
# ls -l */unstable
# ./release_unstable.sh
After `release_unstable.sh`::
# ls -l */unstable
As soon as an unstable distribution has become testing a new unstable
can be generated that again points to the new latest snapshots.
Create A New PSI Version
~~~~~~~~~~~~~~~~~~~~~~~~
As soon as the new PSI version is created, i.e. the symbolic links
which point to the same target snapshots as the latest unstable
snapshots, the PSI auto-update process is active again for the hosts
which are set to unstable::
# cd /afs/psi.ch/software/linux/dist/scientific/53/scripts
# ./create_new_psi_version.sh
Release Testing
~~~~~~~~~~~~~~~
The "testing" distribution contains packages that haven't been
accepted into a "stable" release yet, but they are in the queue for
that. The main advantage of using this distribution is that it has
more recent versions of software.
The command `release_testing.sh` will update the respective symlinks
`.../testing` to the latest unstable snapshots.
Because `release_testing.sh` is interactive, you should not redirect
the output to a file, as you won't be able to see the questions asked.
Before `release_testing.sh`::
# [gasser_m@tux50]
# cd /afs/psi.ch/software/linux/dist/scientific/53/
# ls -l */testing
# ./release_testing.sh
After `release_testing.sh`::
# ls -l */testing
Release Stable
~~~~~~~~~~~~~~
The "stable" distribution, formerly known as "current", contains the
latest officially released distribution of SLP.
This is the production release of SLP, the one which we primarily
recommend using.
The command `release_stable.sh` will update the respective symlinks
`.../stable` to the latest unstable snapshots.
Because `release_stable.sh` is interactive, you should not redirect
the output to a file, as you won't be able to see the questions asked.
Before `release_stable.sh`::
# [gasser_m@tux50]
# cd /afs/psi.ch/software/linux/dist/scientific/53/
# ls -l */stable
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:25 cluster/stable -> 20090316
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:23 enhanced/stable -> 20090316
lrwxr-xr-x 1 gasser_m ait 18 Sep 18 11:24 kernel/stable -> 2.6.18-128.1.1.el5
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:24 nonfree/stable -> 20090316
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:22 others/stable -> 20090316
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:21 psi/stable -> 20090821
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:22 update.i386/stable -> 20090820
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:21 update.x86_64/stable -> 20090820
# ./release_stable.sh
### begin ./release_stable.sh ###
Sourcing configuration file ./dist-config
TOP_DIR is /afs/psi.ch/software/linux/dist/scientific/53
Running ./release_stable.sh ...
Latest snapshot in psi:
/afs/psi.ch/software/linux/dist/scientific/53/psi/testing -> 20090916
Latest snapshot in others:
/afs/psi.ch/software/linux/dist/scientific/53/others/testing -> 20090916
Latest snapshot in update.i386:
/afs/psi.ch/software/linux/dist/scientific/53/update.i386/testing -> 20090916
Latest snapshot in update.x86_64:
/afs/psi.ch/software/linux/dist/scientific/53/update.x86_64/testing -> 20090916
Latest snapshot in enhanced:
/afs/psi.ch/software/linux/dist/scientific/53/enhanced/testing -> 20090916
Latest snapshot in kernel:
/afs/psi.ch/software/linux/dist/scientific/53/kernel/testing -> 2.6.18-128.7.1.el5
Latest snapshot in nonfree:
/afs/psi.ch/software/linux/dist/scientific/53/nonfree/testing -> 20090916
Latest snapshot in cluster:
/afs/psi.ch/software/linux/dist/scientific/53/cluster/testing -> 20090916
Relink stable to the latest snapshots (y/n)?
After `release_stable.sh`::
# ls -l */stable
+21
View File
@@ -0,0 +1,21 @@
Repair the RPM DB
=================
Introduction
------------
Sometimes the rpm database gets broken and any command of manipulating
or questioning RPMS might fail.
So, it can be necessary to remove the corrupted RPM database and to
reinitialize it.
Procedure
---------
Run::
# cd /var/lib/rpm
# rm -f __db.*
# rpmdb --initdb
# rpmdb --rebuilddb
@@ -0,0 +1,120 @@
Installation of SAP Citrix Client and Prerequisites
===================================================
SL6 32 bit
----------
First tests with rebuild of SRPMS of pcsc-lite and pcsc-lite-libs
version 1.5.2-7 without hal support.
Install pcsl-lite 1.5.2 (SmartCard daemon)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Run::
# yum install pcsc-lite
# yum install pcsc-lite-libs
Install Omnikey SmartCard driver
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Run::
# yum --enablerepo beta install omnikey-usb-3121-driver # todo: move the driver to the stable repo
# /etc/init.d/pcscd restart
or for debug mode::
# /usr/sbin/pcscd -df
Install Citrix ICA Client
~~~~~~~~~~~~~~~~~~~~~~~~~
Run::
# yum install openmotif libXp
# yum install ICAClient
SL6 64 bit
----------
Install pcsc-lite-1.5.3
~~~~~~~~~~~~~~~~~~~~~~~
The following packages have to be present/installed.
- alsa-lib, alsa-lib.i686, gtk2-devel, gtk2-devel.i386, glibc, glibc.i686, glibc-devel, glibc-devel.i686
- libgcc, libgcc.i686, libusb, libusb.i686, libusb-devel, libusb-devel.i686
- libXpm, libXpm.i686, libXaw, libXaw.i686, nspluginwrapper, nspluginwrapper.i686
- openmotif, openmotif.i686, openmotif-devel, openmotif-devel.i686
- libusb1, libusb1.i686, libusb1-devel, libusb1-devel.i686
Remove the preinstalled pcsc-lite packages::
# rpm -e --nodeps pcsc-lite pcsc-lite-libs pcsc-lite-openct
Download, unpack pcsc-lite-1.5.3 sources, run configure and build the
daemon::
# wget --no-check-certificate https://alioth.debian.org/frs/download.php/3017/pcsc-lite-1.5.3.tar.bz2
# tar xvf pcsc-lite-1.5.3.tar
# cd pcsc-lite-1.5.3.tar
# ./configure --enable-usbdropdir=/usr/lib/pcsc/drivers --disable-libhal --prefix=/usr CFLAGS=-m32 CXXFLAGS=-m32 LDFLAGS=-m32
# make
# make check
# make install
Install Omnikey SmartCard driver
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Run::
# yum --enablerepo beta install omnikey-usb-3121-driver # todo: move the driver to the stable repo
#todo: dependency pcsc-lite-libs???
# /etc/init.d/pcscd restart
or for debug mode::
# /usr/sbin/pcscd -df
Install Citrix ICA Client
~~~~~~~~~~~~~~~~~~~~~~~~~
Run::
# yum install openmotif libXp
# yum install ICAClient
Start SAP Session
-----------------
Plug in the Omnikey 3121 USB SmartCard reader, and plug in the
SmartCard.
Start the Citrix Receiver (E.g. in KDE: Applications --> Internet -->
Citrix Receiver) and login to the terminal server tsadlm01, use your
Windows login and password.
Then run the APPGATE_Start application by clicking the icon.
Open the connection to the server acc1.caz.admin.ch using the method
certificate and keep the autoselected certificate, read from the
SmartCard reader. Click ok.
Enter your password for the SmartCard token and click ok.
Internet Explorer starts and pops up to websites, HP something and one
from the BIT. Close them or leave them, as you like.
Switch to the cmd.exe console of the terminal server and press any
key. UltraLogon will start and open the SAP main window, where you
select your session.
+282
View File
@@ -0,0 +1,282 @@
Update SL57 and later i386 and x86_64
=====================================
Introduction
------------
Keep the following order:
- Get the latest security updates from the SL mirror for the
"update.$ARCH" repository.
- Build kernel modules.
- Get updates for the "psi" repository.
- Get updates for the "other" repository.
- Get updates for the "nonfree" repository.
- Create new snapshots.
- Release "unstable" from new snapshots.
- Create a new PSI version.
- Release "testing".
- Release "stable".
Update The Different Repositories
---------------------------------
Get the Latest Security Update RPMS From SL Mirror
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Get the latest security update RPMS from the local SL57 mirror for
both architectures, i386 and x86_64, by invoking
`rsync_security_updates_to_all_directories.sh` on tux50.
They will be copied to the corresponding `.../updates.${ARCH}/all/`
directories. This will take some time::
# cd /afs/psi.ch/software/linux/dist/scientific/57/scripts/
# nohup ./rsync_security_updates_to_all_directories.sh > \
# UPDATE_LOG/$(date +%Y-%m-%d)-rsync_security_updates_to_all_directories.log 2>&1 &
Then, check for errors in the log file::
# grep -i error UPDATE_LOG/20091221-rsync_security_updates_to_all_directories.log
...
Finally, run `update_symlinks_in_rpms_all.sh` to keep all symlinks in
the directory
`/afs/psi.ch/software/linux/dist/scientific/57/RPMS_all/` up to date.
The script removes dead links and creates new links to the new RPMS.
It is basically not necessary for running PSI updates, rather it's
just convenient to have a directory with the list of all RPMS of a
distribution::
# ./update_symlinks_in_RPMS_all.sh
Build Kernel Modules
~~~~~~~~~~~~~~~~~~~~
First install the latest kernel and kernel-devel packages on the
respective build systems.
Then build the RPMS on the corresponding build systems using the
scripts below.
32 bit::
# [gasser_m@tukan50-32]
# cd /afs/psi.ch/software/linux/dist/scientific/57/scripts
# sh build_kernel_modules.sh 2.6.18-194.3.1.el5 > \
# UPDATE_LOG/20100630-build_kernel_modules-2.6.18-194.3.1.el5.i686.log 2>&1
# check-set-of-built-kernel-modules.bash /tmp/rpms_for_2.6.18-194.3.1.el5 \
# KERNEL_MODULES_TO_BUILD/sl5.i386.kms-to-be-built
# sh build-gpfs.sl5x.i386.sh
64 bit::
# [gasser_m@tux50-64]
# cd /afs/psi.ch/software/linux/dist/scientific/57/scripts
# sh build_kernel_modules.sh 2.6.18-194.3.1.el5 > \
# UPDATE_LOG/20100630-build_kernel_modules-2.6.18-194.3.1.el5.x86_64.log 2>&1
# check-set-of-built-kernel-modules.bash /tmp/rpms_for_2.6.18-194.3.1.el5 \
# KERNEL_MODULES_TO_BUILD/sl5.x86_64.kms-to-be-built
# sh build-gpfs.sl5x.x86_64.sh
Finally, from both build systems, copy the built kernel modules to the
psi repository::
# cp -av `cat /tmp/rpms_for_2.6.18-194.3.1.el5` /afs/psi.ch/software/linux/dist/scientific/57/psi/all/
Get Updates For The Psi Repository
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Get Updates For The Other Repository
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Get Updates For The Nonfree Repository
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Create New Snapshots
--------------------
The script `create_new_snapshots.sh` will pick out the latest version
of each RPM found in the active repositories.
It will create a subdirectory (snapshot) in each repository and name
it after the current date using the format YYYYMMDD.
Then it will place a symbolic link into this subdirectory for each
latest RPM found in the `all` subdirectory of the currently processed
repository.
To invoke it run the following command::
# nohup ./create_new_snapshots.sh alldirs > UPDATE_LOG/$(date +%Y-%m-%d)-create_new_snapshots.log 2>&1 &
Release Unstable, Create A New PSI Version And Release The Other SLP Snapshots
------------------------------------------------------------------------------
Release Unstable
~~~~~~~~~~~~~~~~
The "unstable" distribution is where active development of SLP occurs.
Generally, this distribution is run by developers and those who like
to live on the edge.
The command `release_unstable.sh` will update the respective symlinks
`.../unstable` to the latest snapshots.
Because `release_unstable.sh` is interactive, you should not redirect
the output to a file, as you won't be able to see the questions asked.
Before `release_unstable.sh`::
# [gasser_m@tux50]
# cd /afs/psi.ch/software/linux/dist/scientific/57/
# ls -l */unstable
# ./release_unstable.sh
After `release_unstable.sh`::
# ls -l */unstable
As soon as an unstable distribution has become testing a new unstable
can be generated which again points to the new latest snapshots.
Create A New PSI Version
~~~~~~~~~~~~~~~~~~~~~~~~
Run::
# cd /afs/psi.ch/software/linux/dist/scientific/57/scripts
# sh create_new_psi_version.sh
As soon as the new PSI version is created, i.e. the symbolic links
which point to the same target snapshots as the latest unstable
snapshots, the psi-version-info.txt file has to be actualized to
activate the PSI auto-update again for the hosts which are set to
unstable::
# ./create_file_psiversion-info.sh
Eventually, test the unstable release.
Release Testing
~~~~~~~~~~~~~~~
The "testing" distribution contains packages that haven't been
accepted into a "stable" release yet, but they are in the queue for
that. The main advantage of using this distribution is that it has
more recent versions of software.
The command `release_testing.sh` will update the respective symlinks
`.../testing` to the latest unstable snapshots.
If the puppet environment DesktopSL5Unstable was modified and the
changes should be made in DesktopSL5Testing, too, run
`rsync_puppet_env_testing_with_unstable.sh` before
`release_testing.sh`.
Because `release_testing.sh` is interactive, you should not redirect
the output to a file, as you won't be able to see the questions asked::
# [gasser_m@tux50]
# cd /afs/psi.ch/software/linux/dist/scientific/57/scripts
# ./rsync_puppet_env_testing_with_unstable.sh
# ./rsync_kickstart_dir_DesktopTesting_with_DesktopUnstable.sh
# ./release_testing.sh
# ./create_file_psiversion-info.sh
Release Stable
~~~~~~~~~~~~~~
To activate the PSI auto-update again for testing hosts the
psi-version-info.txt file has to be actualized.
The "stable" distribution, formerly known as "current", contains the
latest officially released distribution of SLP.
This is the production release of SLP, the one which we primarily
recommend using.
The command `release_stable.sh` will update the respective symlinks
`.../stable` to the latest unstable snapshots.
The procedure is analogous to the procedure described in the "Release
Testing" section::
# ./rsync_puppet_env_stable_with_testing.sh
# ./rsync_kickstart_dir_DesktopStable_with_DesktopTesting.sh
# ./release_stable.sh
# ./create_file_psiversion-info.sh
### begin ./release_stable.sh ###
Sourcing configuration file ./dist-config
TOP_DIR is /afs/psi.ch/software/linux/dist/scientific/57
Running ./release_stable.sh ...
Latest snapshot in psi:
/afs/psi.ch/software/linux/dist/scientific/57/psi/testing -> 20090916
Latest snapshot in others:
/afs/psi.ch/software/linux/dist/scientific/57/others/testing -> 20090916
Latest snapshot in update.i386:
/afs/psi.ch/software/linux/dist/scientific/57/update.i386/testing -> 20090916
Latest snapshot in update.x86_64:
/afs/psi.ch/software/linux/dist/scientific/57/update.x86_64/testing -> 20090916
Latest snapshot in kernel:
/afs/psi.ch/software/linux/dist/scientific/57/kernel/testing -> 2.6.18-128.7.1.el5
Latest snapshot in nonfree:
/afs/psi.ch/software/linux/dist/scientific/57/nonfree/testing -> 20090916
Latest snapshot in cluster:
/afs/psi.ch/software/linux/dist/scientific/57/cluster/testing -> 20090916
Relink stable to the latest snapshots (y/n)?
Before `release_stable.sh`::
# [gasser_m@tux50]
# cd /afs/psi.ch/software/linux/dist/scientific/57/
# ls -l */stable
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:25 cluster/stable -> 20090316
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:23 enhanced/stable -> 20090316
lrwxr-xr-x 1 gasser_m ait 18 Sep 18 11:24 kernel/stable -> 2.6.18-128.1.1.el5
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:24 nonfree/stable -> 20090316
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:22 others/stable -> 20090316
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:21 psi/stable -> 20090821
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:22 update.i386/stable -> 20090820
lrwxr-xr-x 1 gasser_m ait 8 Sep 18 11:21 update.x86_64/stable -> 20090820
After `release_stable.sh`::
# ls -l */stable
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,46 @@
VPN Client on Linux
===================
References
----------
- vpnc manpage
Requirements
------------
- VPN Client Package
Procedure
---------
Installation
~~~~~~~~~~~~
Login as localadmin (Green PC) or root (Red PC) and install the VPN
related packages::
# yum install vpnclient-psi
# yum install vpnc
Configuration
~~~~~~~~~~~~~
For Users
.........
You don't have to configure anything manually, the configuration was
done by installing the `vpnclient-psi` package.
For Packager
............
The default configuration files are `/etc/vpnc/default.conf` and
`/etc/vpnc.conf`.
The Cisco VPN client config file `vpn-psi.pcf` for PSI is provided by
Tobias Marx. The command `/usr/share/doc/vpnc-0.5.3/pcf2vpnc` is used
to convert the `.pcf` to `.conf`::
# perl /usr/share/doc/vpnc-0.5.3/pcf2vpnc vpn-psi.pcf vpn-psi.conf
+34
View File
@@ -0,0 +1,34 @@
Monitoring and Reporting
========================
Cron jobs
---------
Cron jobs send mail on error, usually to `linuxmaster@psi.ch`, a
distribution list.
Nagios/Icinga
-------------
Ganglia
-------
Other
-----
Some systems dump information in
``/afs/psi.ch/service/linux/statistics/rawdata``. Others save it to
``/afs/psi.ch/intranet/Controls/tmp/sysreport/PSI``.
The PXE server `pxeserv01` regularly runs a script,
`/afs/psi.ch/service/linux/tftpboot/tools/script/pxe_script`, which copies
`/var/log/tftp.log` to `/afs/psi.ch/service/linux/tftpboot/tools/log/`,
compresses it, and produces statistics, which are saved as `pxeStatistik-$(date
+%Y%m%d).txt`.
The script is currently run as user `vojisavljevic` and the run is configured in
that user's crontab.
+8
View File
@@ -0,0 +1,8 @@
Puppet
======
There have been and still are several instances of Puppet PSI.
- ``pxeserv01`` (obsolete, no puppetmaster running)
- ``psi-puppet3`` (used by Controls-IT)
- ``psi-puppet4``
+19
View File
@@ -0,0 +1,19 @@
Services
========
Login cluster
-------------
The login cluster allows users to run interactive programs. The
cluster consists of several nodes `llcN.psi.ch`, where N is 1, 2,
or 3. There is also a load-balancer, `llclb1.psi.ch`, which uses IPVS
with a round robin policy to distribute incoming connections.
Jump hosts
----------
Many servers can only be accessed via SSH through one of the two jump
hosts, `wmgt01.psi.ch` and `wmgt02.psi.ch`. These two systems require
two-factor authentication (currently using a Cryptocard dongle) and
offer a very restricted environment.
+10
View File
@@ -0,0 +1,10 @@
Software
========
.. toctree::
:maxdepth: 1
software/repositories
software/packaging
software/modules
+26
View File
@@ -0,0 +1,26 @@
Modules
=======
The Scientific Computing team provides a number of programs using Modules. The
Modules configuration files and the corresponding software are stored on AFS.
To use Modules it is necessary to create a symlink::
ln -snf /afs/psi.ch/sys/psi.x86_64.slp6/ /opt/psi
where ``x86_64.slp6`` describes the architecture and OS of the system the
modules should be used on. It is the output of ``fs sysname``. Every session
needs to ``source /opt/psi/config/profile.bash`` as well.
The available modules can be listed by running::
module avail
Note that the modules are hierarchical, i.e. only the modules that can actually
be loaded are listed. Some modules require that other modules are loaded first,
e.g. a certain version of gcc, and are listed only after that has been done.
Modules are loaded by running::
module load gcc/4.7.3
+35
View File
@@ -0,0 +1,35 @@
Packaging
=========
Currently, the two most important custom packages are OpenAFS and GPFS
(see below), but there are many others. The sources and spec files
for RPMs are stored on AFS in
`/afs/psi.ch/project/linux/src/scientific/RPMS/` and
`/afs/psi.ch/project/linux/src/RPMS/`. They should be - and partially
have been - moved to Gitlab, group `linux-packages`.
Build hosts
-----------
OpenAFS
-------
GPFS
----
The source files for GPFS as well as certain tools for building the
packages are stored in
`/afs/psi.ch/project/linux/src/scientific/RPMS/gpfs/`. The
`README.build` file in that directory contains instructions on how to
build GPFS for a given combination of GPFS version and kernel version.
The files `default-version.${release}.${architecture}` contain the
GPFS Release and supported versions for the specific `${release}` and
`${architecture}`, e.g.::
GPFSVERSION="3.5.0-24 4.1.1-4"
GPFSRELEASE=7
@@ -0,0 +1,245 @@
Repositories
============
A number of repositories are currently mirrored on AFS under
``/afs/psi.ch/software/mirror`` (``$MIRROR``), in particular the following
versions of Scientific Linux, both 32 and 64 bit: 5.7, 6.0, 6.4, and 6.x.
The repositories are accessible via HTTP at http://linux.web.psi.ch/.
They are stored in ``$MIRROR/scientific``, each subdirectory of which
is a mount point for a separate AFS volume. This is done because
smaller AFS volumes are easier to handle (e.g. when moving them
between fileservers).
Each mirror, e.g. ``$MIRROR/scientific/60/epelp/`` contains a
subdirectory ``all``, which contains the packages that we actually
want in our YUM repositories.
Repository management as well as package building can be done on the
various ``tux*.psi.ch`` servers.
Further documentation on repository management can be found in
``/afs/psi.ch/project/linux/doc``. This documentation will eventually
be included in this document.
The directory ``$MIRROR/scientific/scripts`` contains scripts for
various tasks related to managing the Scientific Linux repositories.
It is a working copy of a [git
repository](http://git.psi.ch/linux-dist/sl-scripts).
Tools
-----
There are several scripts that are used to maintain/update the various
repositories.
``sync_updates.sh``
~~~~~~~~~~~~~~~~~~~
This script copies new packages from the SL mirror to the PSI distribution
directories, and runs ``createrepo --update`` to update the repository metadata.
Information on the directories is read from a file ``dist-config`` in the
current working directory.
``update_symlinks``
~~~~~~~~~~~~~~~~~~~
Updates
-------
To release updates to the existing repositories, the steps below have to be
performed. The value of ``DIST_DIR`` is the base directory of the distribution
to be updated, i.e. ``/afs/psi.ch/software/linux/dist/scientific/xx/``, where
``xx`` is one of ``57``, ``60``, or ``64``.
1. Update add-on repositories
2. cd "${DIST_DIR}/scripts"
3. ./sync_updates.sh
4. ./update_symlinks.sh
5. ./create_snapshots.sh alldirs
6. ./release_unstable.sh
7. ./incr_version.sh
8. ./update_version_info.sh
9. test new unstable release
10. ./release.sh testing
11. ./update_version-info.sh
12. test testing release
13. ./release.sh stable
14. ./update_version-info.sh
The sections below describe each step in detail. The value of ``PRJ_DIR`` is
``/afs/psi.ch/project/linux/``.
Kernel updates
~~~~~~~~~~~~~~
If there is a new kernel, some of the modules need to be rebuilt. For SL 5.7
there is a script in the ``scripts`` subdirectory taking care of this:
``build_kernel_modules.sh``.
Update add-on repositories
~~~~~~~~~~~~~~~~~~~~~~~~~~
GPFS
,,,,
rsync --archive --verbose ${PRJ_DIR}/dist/slp6/RPMS/*/kmod-gpfs* "${DIST_DIR}/psi/all"
rsync --archive --verbose ${PRJ_DIR}/dist/slp6/RPMS/*/gpfs* "${DIST_DIR}/nonfree/all"
Notes:
- Don't know why the kernel modules for GPFS are in 'psi' but the GPFS user-land
software is in 'nonfree'.
- Hans-Christian is responsible for building RPMs
OpenAFS
,,,,,,,
Note: OpenAFS version on tux50 and tux50-64 is 1.4.x. On all other systems it is 1.6.x.
Build OpenAFS
+++++++++++++
On tux50, tux50-64, tux60-tux60-64, tux70.
Either::
rsync --verbose ${PRJ_DIR}/dist/slp6/RPMS/*/kernel-module-openafs-* "${DIST_DIR}/psi/all"
or: copy decicated versions
Notes: Achim is responsible for building OpenAFS RPMs.
Checklist
+++++++++
New version available? If yes, build binaries:
========= ===== ========= ========
OS New? Compiled Copied?
--------- ----- --------- --------
tux50 no
tux50-64 no
tux60 yes yes yes
tux60-64 yes yes yes
tux70-64 yes yes yes
========= ===== ========= ========
New kernel or version available? If yes, install kernel, build module:
========= ===== ========= ========
OS New? Compiled Copied?
--------- ----- --------- --------
tux50 no - -
tux50-64 no - -
tux60 yes yes yes
tux60-64 yes yes yes
tux70-64 yes yes yes
========= ===== ========= ========
ZFS
,,,
ZFS kernel modules are available for EL7 only!
Sync from master repository
+++++++++++++++++++++++++++
Run on tux70-64 as normal user::
reposync --repoid=zfs --norepopath --download_path /afs/psi.ch/software/mirror/zfsonlinux/7/x86_64
Build kmod
++++++++++
Build on tux70-64 as root.
Boot into the right (newest) kernel! This is the kernel we are going to build
the modules for. Run::
# set some variables
$ ZFS_VERS='x.y.z'
$ ZFS_REL='r'
$ ZFS_REPO_DIR='/afs/psi.ch/software/linux/dist/scientificlinux/7x/x86_64/zfs/'
$ DIST='el7'
$ ARCH='x86_64'
$ cd /usr/src/spl-$ZFS_VERS
$ ./configure
$ make rpm-utils rpm-kmod
# Install the spl packages, they are required to build zfs.
$ yum localinstall \
kmod-spl-devel-$ZFS_VERS-$ZFS_REL.$DIST.$ARCH.rpm \
kmod-spl-devel-kernel-$ZFS_VERS-$ZFS_REL.$DIST.$ARCH.rpm \
kmod-spl-kernel-$ZFS_VERS-$ZFS_REL.$DIST.$ARCH.rpm \
spl-$ZFS_VERS-$ZFS_REL.$DIST.$ARCH.rpm
$ cd ../zfs-x.y.z
$ ./configure
$ make rpm-utils rpm-kmod
# you need an AFS-token to copy the files!
$ klog.openafs USERNAME
# for a new ZFS version do
$ cp -v spl-$ZFS_VERS/*.x86_64.rpm "$ZFS_REPO_DIR"
$ cp -v zfs-$ZFS_VERS/*.x86_64.rpm "$ZFS_REPO_DIR"
# to copy kernel modules only, do
$ cp -v spl-$ZFS_VERS/kmod-*-$(uname -r)*.x86_64.rpm "$ZFS_REPO_DIR"
$ cp -v zfs-$ZFS_VERS/kmod-*-$(uname -r)*.x86_64.rpm "$ZFS_REPO_DIR"
# update repo
$ cd "$ZFS_REPO_DIR"
$ createrepo --update .
See also: `<http://zfsonlinux.org/generic-rpm.html>`_
Checklist
+++++++++
If there is a new version: Are the RPM of the new versions of spl-x.y.z and
zfs-x.y.z in ``/afs/psi.ch/software/linux/dist/scientificlinux/7x/x86_64/zfs/``?
Are the kmod-RPM's for the newest kernel in
``/afs/psi.ch/software/linux/dist/scientificlinux/7x/x86_64/zfs/``?
Flash Player
,,,,,,,,,,,,
Download
++++++++
Current Flash-Player must be downloaded from `Adobe
<https://get.adobe.com/de/flashplayer/otherversions/>`_ and copied to
``${DIST_DIR}/nonfree/all``.
Checklist
+++++++++
Is the newest version installed in
``/afs/psi.ch/software/linux/dist/scientific/xx/nonfree/all/`` (``xx`` in
``57``, ``60``, ``64``)?
NVidia
,,,,,,
syslog-ng
,,,,,,,,,
openntpd
,,,,,,,,
+10
View File
@@ -0,0 +1,10 @@
Storage
=======
Currently, data is generally stored on AFS. In particular:
- ``/afs/psi.ch/software/mirror``: Mirrors of external repositories and
software, e.g. CentOS, EPEL.
- ``/afs/psi.ch/software/linux/kickstart``: Kickstart files and related
tools/packages.
- ``/afs/psi.ch/software/linux/dist/``: YUM repositories used at PSI.
+190
View File
@@ -0,0 +1,190 @@
@ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^<target^>` where ^<target^> is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\LinuxInfrastructure.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\LinuxInfrastructure.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
:end
+10
View File
@@ -0,0 +1,10 @@
Management Tools
================
.. toctree::
:maxdepth: 2
mgmt-tools/sysdb
mgmt-tools/bob
mgmt-tools/sphinx
mgmt-tools/puppetdb
+99
View File
@@ -0,0 +1,99 @@
bob - a CLI sysdb client
========================
``bob`` is a command-line client for sysdb. It supports adding, updating, and
retrieving information about environments, nodes, MAC addresses and attributes.
It can authenticate using Kerberos credentials or username/password pairs.
Installation and setup
----------------------
For production use of bob there will be RPMs for bob and its dependencies.
For the moment the steps are the following::
yum -y install python-setuptools python-requests-kerberos
git clone git@git.psi.ch:linux-infra/bob.git
cd bob
python setup.py install
Configuration
-------------
``bob`` takes a number of arguments, one of which is the base URL of the sysdb
instance. It can be passed via ``--url BASEURL`` on the command-line, or using
the environment variable ``PSI_BOB_URL``. Therefore it is useful include the
following in your ``.bash_profile``::
PSI_BOB_URL=https://boot00.psi.ch/
Development
-----------
For development, the easiest way to use bob is the following::
pip install --user click
git clone XXX
cd YYY
pip install --user --editable .
This will place a script ``bob`` in ``~/.local/bin``, which will reference the
code in the working directory of the repository clone. This way, local changes
in the source are effective immediately. It might be a good idea to use
``virtualenv`` to avoid installing dependencies system-wide.
In addition to the ``PSI_BOB_URL`` it can be handy to also have the following
in your ``.bash_profile``::
alias bobtest='PSI_BOB_URL=https://boot00-test.psi.ch/ bob'
alias bobdev='PSI_BOB_URL=http://localhost:5000/ bob'
During development sysdb usually listens on ``localhost:5000`` and doesn't use
SSL, explaining the second alias. It also doesn't support authentication,
instead expecting to find the username in the ``REMOTE_USER`` header. The latter
can be set using the ``--fake-user`` option in bob or using the ``-H`` option in
cURL::
bobdev --fake-user kaminski_k node set-attr foo.psi.ch ipxe_installer=rhel72server
curl -X PUT -H REMOTE_USER:talamo_i http://localhost:5000/sysdb/v1/
Attributes
----------
The following attributes can be set for a node:
+------------------------+------------------+-----------+------------------------------------------------------------------------------+
| attribute name | sample value | mandatory | meaning |
+========================+==================+===========+==============================================================================+
| ``ipxe_installer`` | rhel73installer | yes | the installation entry to use at installation time via IPXE |
+------------------------+------------------+-----------+------------------------------------------------------------------------------+
| ``network`` | static | yes | network setup of the installed system. can be ``static`` or ``dhcp`` |
+------------------------+------------------+-----------+------------------------------------------------------------------------------+
| ``puppet_env`` | prod | yes | puppet environment |
+------------------------+------------------+-----------+------------------------------------------------------------------------------+
| ``puppet_role`` | ``role::server`` | yes | puppet role of the node. String should start with ``role::`` |
+------------------------+------------------+-----------+------------------------------------------------------------------------------+
| ``puppet_group`` | computing | no | defines the group for the hiera values. If not set takes the ``default`` one |
+------------------------+------------------+-----------+------------------------------------------------------------------------------+
root password
~~~~~~~~~~~~~
The root password can be set with the attribute ``rootpw``. The value
of the attribute must be the password hash, and can be generated with the command
``openssl passwd -1 "this is the password"``
partitions
~~~~~~~~~~
Partition schema can be passed as a url using the ``partitions`` attribute.
It must be a publicly accessible url and its content will be copied into the
kickstart section related to the partition.
The usage of this attribute is generally discouraged and is provided to cover
only the cases that are not actually already covered by the installation system,
eg. the software raid configuration.
+52
View File
@@ -0,0 +1,52 @@
puppetdb
========
Authentication
--------------
We need a client certificate (including the key) accepted by the Puppet server
for authentication. On Puppet-enabled nodes the node's certificate works, ie
- Certificate: ``/etc/puppetlabs/puppet/ssl/certs/$(hostname -f).pem``
- Key: ``/etc/puppetlabs/puppet/ssl/private_keys/$(hostname -f).pem``
These can be supplied to ``curl(1)`` using the ``--key``, ``--cert``, and
``--cacert`` options::
curl --cacert /etc/puppetlabs/puppet/ssl/certs/ca.pem \
--key /etc/puppetlabs/puppet/ssl/private_keys/$(hostname -f).pem \
--cert /etc/puppetlabs/puppet/ssl/certs/$(hostname -f).pem \
https://puppet00.psi.ch:8080/pdb/query/v4/nodes
Queries
-------
There are several API endpoints, eg. ``/pdb/query/v4`` (note that there is no
``/`` at the end), or ``/pdb/query/v4/nodes``.
Examples
--------
First, let's define a function to simplify the queries::
function pdb {
local pql=$1
curl --cacert /etc/puppetlabs/puppet/ssl/certs/ca.pem \
--key /etc/puppetlabs/puppet/ssl/private_keys/$(hostname -f).pem \
--cert /etc/puppetlabs/puppet/ssl/certs/$(hostname -f).pem \
-H content-type:application/json --data "{ \"query\": \"$pql\" }" \
https://puppet00.psi.ch:8080/pdb/query/v4 | json_reformat
}
List all nodes::
pdb "nodes[certname] { order by certname }"
List environments and times of the last Puppet run::
pdb "reports[certname,environment,start_time,end_time] { order by certname }"
+40
View File
@@ -0,0 +1,40 @@
Sphinx
======
`Sphinx <http://www.sphinx-doc.org/en/stable/>`_ is a tool for generating
documentation from `reStructuredText
<http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html>`_.
It can generate various formats, including HTML and PDF.
Installing Sphinx
-----------------
On RHEL 7 it is enough to install the ``python-sphinx`` package::
yum install python-sphinx
Editing
-------
On Linux with the ``inotify-tools`` it is easy to have the documentation recompiled
automatically while editing. Just run the following in the base directory of the
documentation::
inotifywait --monitor --recursive --event close_write --exclude '_build' . | while read l; do make html;done
On OSX you can install `fswatch <https://github.com/emcrisostomo/fswatch>`_ and then run the following::
fswatch --exclude=_build --monitor=fsevents_monitor --recursive --event Updated --event Created --event Renamed .| while read l; do make html ;done
An even better alternative is `sphinx-autobuild
<https://pypi.python.org/pypi/sphinx-autobuild>`_, which can be installed using
pip::
pip install sphinx-autobuild
It can then be called like this::
sphinx-autobuild /path/to/sphinx/repo /path/to/sphinx/repo/_build/html/
+20
View File
@@ -0,0 +1,20 @@
sysdb
=====
sysdb is a simple database holding information relevant to system deployment,
including
#. a MAC to FQDN mapping
#. whether to boot from local disk or install
#. if installation is requested, which distribution/release to install
#. which environment a system belongs to, and who can manage the environment
#. arbitrary key-value pairs which can be used to store additional information
like the Puppet role of a system.
sysdb runs on the boot server, ie. ``boot00.psi.ch`` (production) and
``boot00-test.psi.ch`` (testing).
+8
View File
@@ -0,0 +1,8 @@
More
====
.. toctree::
:maxdepth: 2
more/network
+9
View File
@@ -0,0 +1,9 @@
Network
-------
The production infrastructure systems, eg. ``puppet00.psi.ch``, are not
reachable from all networks. One example would be the DMZ.
To allow the deployment and configuration in such networks, firewall changes are
necessary. Firewall changes can be requested through a Service Now Change
Request or Change Task.
+9
View File
@@ -0,0 +1,9 @@
Operations
==========
.. toctree::
:maxdepth: 2
operations/certificates
operations/deployment
operations/version-control
+68
View File
@@ -0,0 +1,68 @@
Managing SSL certificates
=========================
We use QuoVadis certificates from Switch.
Requesting certificates
-----------------------
First create a certificate signing request (CSR) like this, replacing ``$FQDN``
and ``$ALIASES``::
cat >$FQDN.cnf <<EOF
FQDN = $FQDN
ORGNAME = Paul-Scherrer-Institut (PSI)
# subjectAltName entries: to add DNS aliases to the CSR, delete
# the '#' character in the ALTNAMES line, and change the subsequent
# 'DNS:' entries accordingly. Please note: all DNS names must
# resolve to the same IP address as the FQDN.
ALTNAMES = DNS:\$FQDN $ALIASES
# --- no modifications required below ---
[ req ]
default_bits = 2048
default_md = sha256
prompt = no
encrypt_key = no
distinguished_name = dn
req_extensions = req_ext
[ dn ]
C = CH
O = \$ORGNAME
CN = \$FQDN
OU = AIT
[ req_ext ]
subjectAltName = \$ALTNAMES
EOF
/usr/bin/openssl req -new -config $FQDN.cnf -keyout $FQDN.key -out $FQDN.csr
Finally, `submit the CSR <https://www.switch.ch/pki/manage/request/>`_.
This procedure is described in more detail at the `SWITCH website
<https://www.switch.ch/pki/manage/request/howto/>`_.
SWITCH will send an email including instructions on how to download the
certificate.
Renewing certificates
---------------------
Using the same configuration file as above, generate a new private key and CSR,
and submit the CSR as before.
Revoke certificates
-------------------
When receiving the first mail from SWITCH as described in `Requesting
certificates`_ one has to create an account to download the certificate. Using
this account, certificates can be revoked at the `QuoVadis website
<https://tl.quovadisglobal.com/subscriber/>`_.
+41
View File
@@ -0,0 +1,41 @@
Deployment
==========
This section describes how to (re-)deploy Puppet-managed Linux systems. The general process is:
1. Register the system with sysdb (only for new systems).
2. Tell sysdb to perform an installation on the next boot.
3. Reboot the system and trigger a PXE boot (usually by pressing F12 during
POST).
On some systems it is not possible to use PXE boot. Either because the network
card doesn't support it/has a bug/doesn't work with iPXE, or because the system
is in a network from which TFTP requests to the boot server are not allowed. In
those cases it is possible to either boot iPXE from a USB stick or DVD, or to
simply boot the RHEL installer from DVD and point it at the Kickstart file
generated by sysdb.
Registration with sysdb
~~~~~~~~~~~~~~~~~~~~~~~
The easiest way to interact with sysdb is to use bob, a command-line client
written in Python. Alternatively it is possible to use the web API directly.
First, add the new node::
bob node add $FQDN $ENV local
To be able to PXE boot we need to configure at least one MAC address for the new
node::
bob node add-mac $FQDN 00:50:56:aa:fe:9b
Finally we need to configure the installer to use, and the Puppet-related
parameters::
bob node set-attr $FQDN ipxe_installer=rhel72server
bob node set-attr $FQDN puppet_env=prod
bob node set-attr $FQDN puppet_role=role::server
@@ -0,0 +1,85 @@
Version Control
===============
Setting up Git
--------------
Initialize Git to use your username and PSI email address, for example::
git config --global user.name kaminski_k
git config --global user.email kai.kaminski@psi.ch
Gitlab server
-------------
We use the `PSI-internal Gitlab server <https://git.psi.ch>`_ to host our
repositories.
Groups
------
Currently, all of our repositories are in the repository group ``linux-infra``.
Setting up a repository
-----------------------
Repositories containing code are configured along the following lines:
1. Only fast-forward merges are allowed, no merge commits.
2. Merge requests require at least one approval (by someone other than the
requester).
Workflow
--------
Changes to repositories containing **only** documentation can simply be pushed
to the repository in question.
Changes to repositories containing code, SPEC files, or anything else that is
not pure documentation, need to be reviewed - even if the change in question
only changes documentation.
To implement a change, follow these steps:
1. Create an issue on GitLab.
2. Create a feature branch with name starting with
`issue-n-description`.
3. Assign the issue to yourself when you start working on it.
4. Commit the change to the feature branch and push it.
5. Create a merge request mentioning the issue in the commit message
or in the merge request (see `this
<http://doc.gitlab.com/ee/customization/issue_closing.html>`_ for
the syntax).
6. Wait for **someone else** to approve and apply the merge request.
Commit messages
~~~~~~~~~~~~~~~
A commit message follows the well-known `50/72 format
<http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html>`_: 50
characters for the first line of the commit message, followed by a blank line,
which is then followed by zero or more paragraphs with a maximal line-length of
72 characters.
If the commit fixes an issue recorded on Gitlab (which it should), the commit
message must start with ``Fix #N:``, where ``N`` is the number of the issue.
This will automatically close the issue when the commit is merged into master.
Code reviews
~~~~~~~~~~~~
During a code review be sure to check for at least the following:
1. adherence to the guidelines described in this document,e.g. PEP8-compliance
for Python code
2. solid implementation: edge cases and failures are considered and handled
appropriately, etc. Non-obvious bits are explained.
3. documentation has been written or changed as appropriate
+16
View File
@@ -0,0 +1,16 @@
Puppet
======
Puppet documentation goes here.
Contents:
.. toctree::
:maxdepth: 2
puppet/general
puppet/client
puppet/puppet-master
puppet/hiera
puppet/modules
puppet/development
+33
View File
@@ -0,0 +1,33 @@
Puppet client
-------------
Puppet client run is executed once daily between 5AM and 8AM
(``puppet_client::exec_time`` in hiera data).
The puppet client is executed via the ``pli-puppet-run`` system timer: ::
[root@lxdev04 ~]# systemctl list-timers
NEXT LEFT LAST PASSED UNIT ACTIVATES
Wed 2017-10-25 15:55:54 CEST 6h left Tue 2017-10-24 15:55:54 CEST 17h ago systemd-tmpfiles-clean.timer systemd-tmpfiles-clean.service
Thu 2017-10-26 07:51:00 CEST 22h left Wed 2017-10-25 07:51:16 CEST 1h 5min ago pli-puppet-run.timer pli-puppet-run.service
2 timers listed.
Pass --all to see loaded but inactive timers, too.
Puppet execution can be disabled for a certain amount of time with the
``/opt/pli/libexec/pli-puppet-disable`` command: ::
[root@lxdev04 ~]# /opt/pli/libexec/pli-puppet-disable
puppet currently not disabled
[root@lxdev04 ~]# /opt/pli/libexec/pli-puppet-disable '1 week'
[root@lxdev04 ~]# /opt/pli/libexec/pli-puppet-disable
Puppet disabled until: Wed Nov 1 08:00:05 CET 2017
[root@lxdev04 ~]# /opt/pli/libexec/pli-puppet-disable stop
Stopping
[root@lxdev04 ~]# /opt/pli/libexec/pli-puppet-disable
puppet currently not disabled
[root@lxdev04 ~]#
The disabling time has to be in the ``date`` format (see date(1)).
+20
View File
@@ -0,0 +1,20 @@
``grub2``
=========
This module provides a defined type to configure kernel arguments in grub2.
Defined Types
-------------
``grub2::kernel_arg``
~~~~~~~~~~~~~~~~~~~~~
Modify a kernel argument.
Parameters:
- ``action``: one of 'add' and 'remove'
- ``value``: specifies the value for arguments of the form ``arg=value``, e.g.
``crashkernel=auto``.
+156
View File
@@ -0,0 +1,156 @@
``logrotate``
===========
This module configures ``logrotate``. Further information an examples can be
found in the following link:
* https://git.psi.ch/linux-infra/voxpupuli-logrotate
Main class ``logrotate`` can be called, but usually calling just a rule ``logrotate::rule``
should be enough to automatically configure ``logrotate`` in the host. It will use the
default values for this class, which is placing all the default ``rules`` from the O.S.
to the ``/etc/logrotate.d`` directory.
Parameters
----------
All available parameters are::
namevar - The String name of the rule.
path - The path String to the logfile(s) to be rotated.
ensure - The desired state of the logrotate rule as a String. Valid
values are 'absent' and 'present' (default: 'present').
compress - A Boolean value specifying whether the rotated logs should
be compressed (optional).
compresscmd - The command String that should be executed to compress the
rotated logs (optional).
compressext - The extention String to be appended to the rotated log files
after they have been compressed (optional).
compressoptions - A String of command line options to be passed to the
compression program specified in `compresscmd` (optional).
copy - A Boolean specifying whether logrotate should just take a
copy of the log file and not touch the original (optional).
copytruncate - A Boolean specifying whether logrotate should truncate the
original log file after taking a copy (optional).
create - A Boolean specifying whether logrotate should create a new
log file immediately after rotation (optional).
create_mode - An octal mode String logrotate should apply to the newly
created log file if create => true (optional).
create_owner - A username String that logrotate should set the owner of the
newly created log file to if create => true (optional).
create_group - A String group name that logrotate should apply to the newly
created log file if create => true (optional).
dateext - A Boolean specifying whether rotated log files should be
archived by adding a date extension rather just a number
(optional).
dateformat - The format String to be used for `dateext` (optional).
Valid specifiers are '%Y', '%m', '%d' and '%s'.
dateyesterday - A Boolean specifying whether to use yesterday's date instead
of today's date to create the `dateext` extension (optional).
delaycompress - A Boolean specifying whether compression of the rotated
log file should be delayed until the next logrotate run
(optional).
extension - Log files with this extension String are allowed to keep it
after rotation (optional).
ifempty - A Boolean specifying whether the log file should be rotated
even if it is empty (optional).
mail - The email address String that logs that are about to be
rotated out of existence are emailed to (optional).
mailfirst - A Boolean that when used with `mail` has logrotate email the
just rotated file rather than the about to expire file
(optional).
maillast - A Boolean that when used with `mail` has logrotate email the
about to expire file rather than the just rotated file
(optional).
maxage - The Integer maximum number of days that a rotated log file
can stay on the system (optional).
minsize - The String minimum size a log file must be to be rotated,
but not before the scheduled rotation time (optional).
The default units are bytes, append k, M or G for kilobytes,
megabytes and gigabytes respectively.
maxsize - The String maximum size a log file may be to be rotated;
When maxsize is used, both the size and timestamp of a log
file are considered for rotation.
The default units are bytes, append k, M or G for kilobytes,
megabytes and gigabytes respectively.
missingok - A Boolean specifying whether logrotate should ignore missing
log files or issue an error (optional).
olddir - A String path to a directory that rotated logs should be
moved to (optional).
postrotate - A command String that should be executed by /bin/sh after
the log file is rotated (optional).
prerotate - A command String that should be executed by /bin/sh before
the log file is rotated and only if it will be rotated
(optional).
firstaction - A command String that should be executed by /bin/sh once
before all log files that match the wildcard pattern are
rotated (optional).
lastaction - A command String that should be execute by /bin/sh once
after all the log files that match the wildcard pattern are
rotated (optional).
rotate - The Integer number of rotated log files to keep on disk
(optional).
rotate_every - How often the log files should be rotated as a String.
Valid values are 'hour', 'day', 'week', 'month' and 'year'
(optional). Please note, older versions of logrotate do not
support yearly log rotation.
size - The String size a log file has to reach before it will be
rotated (optional). The default units are bytes, append k,
M or G for kilobytes, megabytes or gigabytes respectively.
sharedscripts - A Boolean specifying whether logrotate should run the
postrotate and prerotate scripts for each matching file or
just once (optional).
shred - A Boolean specifying whether logs should be deleted with
shred instead of unlink (optional).
shredcycles - The Integer number of times shred should overwrite log files
before unlinking them (optional).
start - The Integer number to be used as the base for the extensions
appended to the rotated log files (optional).
su_owner - A username String that logrotate should use to rotate a
log file set instead of using the default if
su => true (optional).
su_group - A String group name that logrotate should use to rotate a
log file set instead of using the default if
su => true (optional).
uncompresscmd - The String command to be used to uncompress log files
(optional).
Examples
--------
Configure ``logrotate.conf``::
logrotate::conf { '/etc/logrotate.conf':
rotate => 10,
rotate_every => 'week',
ifempty => true,
dateext => true,
}
Rule for rotating ``messages``::
logrotate::rule { 'messages':
path => '/var/log/messages',
rotate => 5,
rotate_every => 'week',
postrotate => '/usr/bin/killall -HUP syslogd',
}
Rule for rotating ``slurmd`` client::
logrotate::rule { 'slurmd':
path => '/var/log/slurm/slurmd.log',
rotate => $rotate,
rotate_every => $rotate_every,
compress => true,
create => true,
create_mode => '0600',
create_owner => 'slurm',
create_group => 'slurm',
missingok => true,
ifempty => false,
sharedscripts => true,
postrotate => '/bin/systemctl reload slurmd.service > /dev/null 2>/dev/null || true';
}
+43
View File
@@ -0,0 +1,43 @@
``selinux``
===========
This module configures :doc:`/selinux`. It allows configuring the SELinux mode
as well as setting SELinux booleans.
Parameters
----------
``selinux_mode``
~~~~~~~~~~~~~~~~
Acceptable values are ``enforcing``, ``permissive``, and ``disabled``. The
module configures this mode for the next reboot. It also sets it right away
unless the mode to be configured is ``disabled`` or SELinux is currently
disabled. In the former case the current mode is set to ``permissive``. In the
latter case nothing is done before the next reboot.
Defined Types
-------------
``set_sebool``
~~~~~~~~~~~~~~
Sets the SELinux boolean specified as the resource title unless SELinux is
disabled, in which case it does nothing.
Parameters:
- ``enable``
If true, the boolean is set to ``on``. If false, the boolean is set to
``off``.
Examples
--------
Allow httpd to access user's home directories::
selinux::set_sebool {'httpd_enable_homedirs': enable => true }
+24
View File
@@ -0,0 +1,24 @@
``sudo``
========
This module installs and configures ``sudo(8)``.
Parameters
----------
``allow_sudoers_d``
~~~~~~~~~~~~~~~~~~~
This boolean determines whether the files in ``/etc/sudoers.d/`` are included in
the sudo configuration.
Defined types
-------------
``sudo::rules``
~~~~~~~~~~~~~~~
This defined type accepts a parameter ``rules``, which is an array of strings to
be added to ``/etc/sudoers``.
+18
View File
@@ -0,0 +1,18 @@
``sysctl``
===========
This module configures ``sysctl``. Further information an examples can be
found in the official page:
* https://forge.puppet.com/tpdownes/sysctl
The sysctl module is called automatically so is possible to do sysctl tuning
by configuring properly hiera using the `sysctl::values` variable, eg.::
sysctl::values:
net.ipv4.tcp_slow_start_after_idle:
value: '0'
net.core.rmem_max:
value: '83886080'
net.core.wmem_max:
value: '83886080'
+77
View File
@@ -0,0 +1,77 @@
``systemd``
===========
This module provides defined types to create systemd units. It automatically
takes care of reloading systemd when necessary, automatically generates correct
names for ``.mount`` units, etc.
The parameters of defined types are usually similar to the systemd option names
that they represent. Camel case (as used by systemd) is transformed to the
lowercase-with-underscore style usually used with Puppet, e.g. ``OnCalendar`` in
systemd becomes ``on_calendar``.
Parameters
----------
``default_target``
~~~~~~~~~~~~~~~~~~
The systemd default target, i.e. usually one of ``multi-user.target`` or
``graphical.target``.
Defined Types
-------------
All units support the following parameters for the ``[Unit]`` or ``[Install]``
sections:
- ``description``
- ``wanted_by``
``systemd::timer``
~~~~~~~~~~~~~~~~~~
Creates a timer unit.
Parameters:
- ``on_calendar``
- ``unit``
``systemd::mount``
~~~~~~~~~~~~~~~~~~
Creates a mount unit. The name of the unit file has to be (an encoded form of)
the mountpoint, which is automatically generated. See ``systemd.mount(5)`` for
details.
Parameters:
- ``what``
- ``where``
- ``options``
``systemd.service``
~~~~~~~~~~~~~~~~~~~
- ``type`` (default: ``simple``)
- ``exec_start``
Examples
--------
Defining a timer unit
~~~~~~~~~~~~~~~~~~~~~
Timer units integrate nicely with the remaining services. They can be managed
through ``systemctl(1)`` and their log messages can easily be accessed through
``journalctl -u xxxxx.timer``.
A timer unit needs a corresponding service unit which executes the actual
command.
@@ -0,0 +1,24 @@
``updatedb``
============
This module manages the configuration for ``mlocate`` aka ``updatedb(8)``.
Parameters
----------
``enable``
~~~~~~~~~~
A boolean indicating whether ``mlocate`` - ie the ``updatedb(8)`` cron job -
should be enabled or not.
Defined Types
-------------
``updatedb::exclude``
~~~~~~~~~~~~~~~~~~~~~
Takes a list of directories and makes sure that ``updatedb(8)`` ignores them
when indexing.
+32
View File
@@ -0,0 +1,32 @@
``utils``
=========
This module contains a number of utility functions used at PSI. It is the
PSI-specific analogue to Puppetlabs' ``stdlib``.
Functions
---------
``check_os``
~~~~~~~~~~~~
Accepts an arbitrary number of arguments, each of which is a string of the form
``$osfamily $majorversion`` as reported by ``facter(8)``. If the facts of the
client system **do not** match any of the arguments, the function aborts the
Puppet run.
``rand_time``
~~~~~~~~~~~~~
This function takes a parameter specifying a time window and returns a specific
time within this window based on the MD5 on the hostname. This allows scheduling
cron jobs etc uniformly across a fleet of system within a given window, while
keeping the time on each system predictable.
The format for the argument specifying the time window is ``HH:MM--HH:MM``. The
window must not include midnight.
The format of the return value is ``HH:MM``.
+52
View File
@@ -0,0 +1,52 @@
Development Environments
========================
The puppet server gives access to a limited set of users to the
environment area, where prod and preprod environments are already
present, to create environments that can be used for new puppet code
development.
This area is accessible via sftp and can be mounted on users'
workstations via sshfs.
The typical workflow would be:
#. user mounts the environment area (``puppet00:/envs`` on the puppet master) via sshfs on ``~/puppetenv``;
#. user create a directory for the new environent (``mkdir ~/puppetenv/issue_x``);
#. after a few seconds ``issue_x`` will be populated with a copy of the content of the the ``preprod`` environment with a proper git branch named ``issue_x``;
#. user edits files in that directory;
#. user run on some testing node the command ``puppet agent -t --environment=issue_x``;
#. user iterates on steps number 4 and 5 until the code is ready;
#. users makes the proper add/commit into the directory and finally pushes the code;
#. from the git server web interface user triggers a merge request of the ``issue_x`` into the ``preprod`` branch.
Please note that ssh to the puppet server has to be done via ``wmgt`` nodes.
Environment names
-----------------
The name of the environment:
- must be at least 4 characters long;
- can include lower case letters, digits and the underscore character;
- must start with a letter.
If the name is not compliant with these rules, the directory will be automatically removed.
SSHFS mount
-----------
Generally refer to your operating system instructions on how to
properly configure sshfs.
One caveat is that your local user can be different from the remote (puppet master)
user. In this case a file mapping your local username to the remote user id should be passed to the sshfs client.
The file should be in the form of::
<local username>:<remote uid>
And should then be passed in the command like::
sshfs -o idmap=file,uidfile=/Users/talamoig/uidmap,nomap=ignore talamo_i@puppet00:/ ~/puppetenvs
+96
View File
@@ -0,0 +1,96 @@
General
=======
`Puppet <https://puppetlabs.com>`_ is the *configuration management system* used to configure the hosts.
Introduction
------------
Puppet is used in master mode and the general idea is to make large
use of indipendent and (relatively) small puppet
modules and composing profiles and roles based on them to create
classes that are assigned to the hosts. With indipendence
of puppet modules we mean that each puppet module targets a single
functionality (eg. `web server`, `afs client`) and this
is choosen to keep the code of the single module smaller, more
coherent and easier to debug.
Furthermore the system makes use of some puppet modules from the `puppet forge
<https://forge.puppet.com>`_. These puppet modules are not accessed directly but
through their PSI mirror, inside of the `Linux-infra group <https://git.psi.ch/linux-infra>`_.
Code and data are kept separated, using puppet modules
for code and hiera for data. Secure data are safely managed
inside hiera using `hiera eyaml <https://github.com/TomPoulton/hiera-eyaml>`_.
To each host the following element will determine how it will be configured:
- the puppet environment (that will determine the puppet role/profile code base)
- the sysdb data environment (that will determine the hiera code base)
- the puppet group (that will determine what files will be considered in the hiera code base)
- the role
All these elements are configured inside sysdb as attributes and are accessed
by the puppet master via the ENC.
Here you can get a general overview:
.. following image generated from https://docs.google.com/drawings/d/16AXZd5PF-HgW379Cxgvwzvc6MTl_34LVFHzP5Fi8RdQ/edit
.. image:: puppet_workflow.jpg
Environments
------------
We use Puppet environments for two purposes:
- roll out changes to a small subset of all systems first
- module development
The following environments exist:
- ``prod``
The most stable, and most systems are attached to it. All changes to ``prod``
have to go through ``preprod`` first.
- ``preprod``
- Development environments. These are private to a single developer,
can have arbitrary names like ``ganglia-issue-21`` or
``kaminski_k-log_client``, and are used for developing and testing
changes. Generally, only individual systems are attached to these
environments.
puppet master
-------------
The puppet master will make use of the `ENC
<https://docs.puppet.com/guides/external_nodes.html>`_ for getting
two informations:
- the environment;
- the role.
The environment is used to determine the directory location inside
`/etc/puppetlabs/code/environments` where to look code for.
The role is a class-name inside the specific environment that will be
used to generate the node catalog.
Assuming for example the following result from the ENC for a specific node: ::
---
environment: production
classes:
- role::log_server
the puppet master will look for the a puppet class named
``role::log_server`` in the file
`/etc/puppetlabs/code/environments/productions/modules/psi/manifests/role/log_server.pp`.
+201
View File
@@ -0,0 +1,201 @@
Hiera
=====
Look `here <https://docs.puppet.com/hiera/3.1/>`_ for a general Hiera
introduction.
The current hierarchy has four levels (first will be considered first
during value lookup):
- nodes (FQDN)
- group (``puppet_group`` attribute in sysdb)
- sysdb environments
- common
and values can be stored as classical YAML values or with `encrypted yaml
<https://github.com/TomPoulton/hiera-eyaml>`_ for secrets.
The filesystem structure is as follows::
1. ``%{::sysdb_env}/%{::group}/%{::fqdn}``
2. ``%{::sysdb_env}/%{::group}``
3. ``%{::sysdb_env}/%{::sysdb_env}``
4. ``%{::environment}/data/common``
The ``%{variable}`` notation is hiera specific and each path represents a ``.yaml``
file.
Hiera repositories
------------------
Hiera data are organized in different repositories.
Sysdb environments data
^^^^^^^^^^^^^^^^^^^^^^^
Each sysdb environment has a dedicated hiera repository, called ``data-<sydbenv>``,
eg. `data-hpc <https://git.psi.ch/linux-infra/data-hpc>`_
and `data-sls <https://git.psi.ch/linux-infra/data-sls>`_.
The first three levels of the filesystem structure shown before are actually the
files inside this kind of repositories.
Any change to the repo will automatically trigger a redeployment of the new version
of its content on the puppet master within a few seconds from the push.
This choice has been made to allow groups to change their hiera data independently of
the linux infrastructure admins. Furthermore there is no way to influence other sysdb
environments data.
Common data
^^^^^^^^^^^
The last element in the hierarchy (``common.yaml``) is instead defined inside the main puppet repository
(the one containing also the real puppet code). It is important to notice that the version
of the ``common.yaml`` used for a specific host will depend on the puppet environment it
is running on, while for the sysdb environements data are the same, whatever the puppet
environment of the host.
The common part is kept under the control of the linux infrastructure admins
since a change on this can have an impact on a much larger set of hosts and all the changes
on this file are discussed and approved through a longer process.
Example
-------
Assuming two sysdb environments ``hpc`` and ``sls``, as well as:
- group ``merlin4`` in ``hpc`` with ``merlinc10`` and ``merlinc11`` in it;
- group ``merlin5`` in ``hpc`` with ``merlin-c001`` and ``merlin-c002`` in it;
- group ``mx`` in ``sls`` with ``mxcn-1`` and ``mxcn-2`` in it;
- host ``xbl-gateway`` in no explicit group (will take the implicit ``default``)
the Hiera structure would look like this::
data/hpc/merlin4/merlinc10.psi.ch.yaml
data/hpc/merlin4/merlinc11.psi.ch.yaml
data/hpc/merlin4.yaml
data/hpc/merlin5/merlin-c001.psi.ch.yaml
data/hpc/merlin5/merlin-c002.psi.ch.yaml
data/hpc/merlin5.yaml
data/hpc.yaml
data/sls/mx/mxcn-1.psi.ch.yaml
data/sls/mx/mxcn-2.psi.ch.yaml
data/sls/mx.yaml
data/sls/default/xbl-gateway.psi.ch.yaml
data/sls.yaml
code/environments/{prod,preprod}/common.yaml
While the output of bob would be something like (some unneeded attributes have been removed)::
merlinc10.psi.ch hpc local puppet_group=merlin4
merlinc11.psi.ch hpc local puppet_group=merlin4
merlin-c001.psi.ch hpc local puppet_group=merlin5
merlin-c002.psi.ch hpc local puppet_group=merlin5
mxcn-1.psi.ch sls local puppet_group=mx
mxcn-2.psi.ch sls local puppet_group=mx
xbl-gateway.psi.ch sls local
Secret values
-------------
Secrets and clear-text values can be mixed inside the same yaml file, eg.::
ntp_client::servers:
- pstime1.psi.ch
- pstime2.psi.ch
- pstime3.psi.ch
secret_key: ENC[PKCS7,MIIBiQYJKoZIhvcNAQcDoIIBejCCAXYCAQAxggEhMIIBHQIBADAFMAACAQEwDQYJKoZIhvcNAQEBBQAEggEAY/9V1S0VAMrRX1B4V06AgsbHPHdONFCQ4RiWfTrhV02rL5gSL4LAdqOuvGPY8YZZv8Mp06/FARlvP1aOfEx7avqSBy11IoUGkeajKZFzJV3OJsfhso4wroQ4JmfBaVKICnQZwCdpke+PHPRkwTgHcjmY2FeBnhvOlrGiQMQU3JzCjLePOa7UvlIIin3xOU/TdetzhfvoNGRhsz7+XRPD+mTT8efJ+OslJmqU7hEqMbs9CmhPJWqsjsQUp8jsM10Dk2Rv4v+zYeJd1ZLRGK3Z56G4NrlLyYua+/yyPbUP4+1bEuisDg9bfQHp3R491/kN0W558oQ+85rsRVXCp1Hb6TBMBgkqhkiG9w0BBwEwHQYJYIZIAWUDBAEqBBB2x9awGQnxAJsxIHA9OiM2gCBFvgxIR4SJZPrrQ/UlhKU39yYSkEmuKE/ou+yeIe5AMA==]
The encrypted values can be decrypted transparently from Hiera (on a host having the proper hiera key)::
[root]# hiera secret_key
this is a secret value
You can edit secure data inside any yaml file with the command
``/opt/puppetlabs/puppet/bin/eyaml edit common.yaml``. In this case secure data
will appear in clear-text inside the editor.
Encripting data with the public key
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The eyaml public key is::
-----BEGIN CERTIFICATE-----
MIIC2TCCAcGgAwIBAgIBATANBgkqhkiG9w0BAQUFADAAMCAXDTE2MTAyNDE0NTY1
N1oYDzIwNjYxMDEyMTQ1NjU3WjAAMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB
CgKCAQEA2eykSgS7VJEXrWYkQMV48ZkUVcHMbCEo2gZXD4vIJsOdJu77F7tA53Ay
NxdKnJTftsj+R7yFP9Z2XllA9Our0Ypphj40rNstRg5O4IoSkAqitJchlfGL9jZ3
CB4dJqFitzOkxxCWZjQpjBd3dMJc6U3us6IDWohCjYqyjMZIVwU5EflzJKV4haEy
Y9qHkVt938RM9UohEvia5/1lZxuZQmDpYqCw9gmBK/dVKZ7abZGkujTKAg5cjD/X
vuexLMCGrjnPdrsblwBh+yfu6cEo9nfvfj6EA0FxPHIvQ3fv1yJZ+90OA9eUJnqQ
ED66OGPATAJIqhWlgb8a760xPQFQQQIDAQABo1wwWjAPBgNVHRMBAf8EBTADAQH/
MB0GA1UdDgQWBBSF05r9TYDiAmkdguCVcDzmYR8Q6TAoBgNVHSMEITAfgBSF05r9
TYDiAmkdguCVcDzmYR8Q6aEEpAIwAIIBATANBgkqhkiG9w0BAQUFAAOCAQEAWAER
CTGsOFUkCfvqke75PmIkxKBp/2eJbavWzPkbA/mwAGS4lQc5oyS8FMkUFxATo1k/
WIb2B3WJIMHfCzMNxTlQLjJiSyvWAlEBHDW4H2XekzKSbj96l+/nirmOq3QkEKTK
omexF5zYSPkBVA/S2m2wae3g2kubH1p42+REKQUvt1+xaecHBYD6eXzBWChnMMnq
FbXoayTibn0p9Roo8HClGGJpjPZUTMf+VGUqKWPfvaKl48Y0yrc/4BzZT6Sbzeou
ZSiHwa62rTV7ia7m2SILZU5b65JUVkFH/2r6qkxCr0Ep+oaxSNXtAXLCbnXmdOeK
B40J8ePbbmmGE24+zQ==
-----END CERTIFICATE-----
Assuming the public key is saved in a file named ``/home/someone/eyaml_key.pub`` and that
`hiera-eyaml <https://github.com/TomPoulton/hiera-eyaml>`_ is properly installed,
a string can be encripted with::
eyaml encrypt --pkcs7-public-key=/home/someone/eyaml_key.pub -s secret_string
While a complete file can be encripted with::
eyaml encrypt --pkcs7-public-key=/home/someone/eyaml_key.pub -f secret_file
Example: Encripting password
----------------------------
Steps:
Install locally (local = server/desktop from where you will encrypt the password) hiera-eyaml (https://github.com/voxpupuli/hiera-eyaml/tree/command-refactor)
Create locally a *keys* directory::
#> mkdir -p ~/eyaml/keys
Copy *puppet00:/etc/puppetlabs/keys/eyaml/public_key.pkcs7.pem* to the above folder. Alternatively, you can copy the above public key, which should be the same::
#> scp root@puppet00:/etc/puppetlabs/keys/eyaml/public_key.pkcs7.pem ~/eyaml/keys
Go to the *eyaml* directory::
#> cd ~/eyaml
Hash your password using *openssl* as follows**. It will generate a hashed password::
#> openssl passwd -1
Password: <input_password>
Verifying - Password: <input_password>
<output_hashed_password>
Encrypt your hashed password with *eyaml* and copy the exact output (either the string or the block) to your hiera (YAML) file::
#> eyaml encrypt -l 'root::password' -s '<output_hashed_password>'
root::password: ENC[PKCS7,MIIBmxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
OR
root::password: >
ENC[PKCS7,MIIBmxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxx]
+259
View File
@@ -0,0 +1,259 @@
Modules
=======
The repository for the Puppet role/profile module is
`<https://git.psi.ch/linux-infra/puppet>`_.
The general roles structure is::
role::generic_server
role::generic_desktop
...
role::daas::compute
role::daas::login
...
role::sls::console
role::sls::boot_server
So we have some roles that are generic PSI-wide (eg. ``generic_server``) while
some roles that are specific to some projects and have a dedicated namespace.
For the profiles section we have the following::
profile::ssh_client
profile::afs_client
profile::log_client
profile::mysql_server
For profiles maybe we will not need namespace areas dedicated to
specific projects, since profiles should be generic enough to be
reusable.
How to write modules
--------------------
The structure of a module depends on the type of module to some extent.
Currently, we distinguish three kinds of modules:
- roles
- profiles
- components
Parameter validation
~~~~~~~~~~~~~~~~~~~~
The first thing every module must do is parameter and environment validation. In
particular, a module should check that
- its arguments have the correct type
- it supports the OS of the client system
A typical module could start like this::
class profile::logging (
$forward_to,
$persistent_journal,
)
{
validate_array($forward_to)
validate_bool($target)
check_os('RedHat 7', 'RedHat 8')
}
This would make sure that ``$forward_to`` is an array, ``$persistent_journal``
is a boolean, and that the client runs RHEL 7 or (a hypothetical) RHEL 8.
Arguments should be checked first, in the order that they are passed.
Checking the OS will ease porting efforts to newer releases of RHEL, other
distributions (e.g. Ubuntu), or other operating systems (e.g. BSD), should the
need arise.
Hiera queries
~~~~~~~~~~~~~
Only profiles and roles query Hiera. Components should take all their inputs as
parameters or facts.
In profiles, Hiera queries must generally be done as default arguments to
parameters, **not** inside the modules body::
class profile::logging (
$forward_to=hiera('...'),
$persistent_journal=hiera('...'),
)
{
The reason is that this allows a role to enforce certain parameters and disable
the corresponding Hiera query.
Layout
~~~~~~
Roles and profiles are usually implemented in a single file, e.g.
``psi/manifests/profile/logging.pp``. Components on the other hand follow the
standard Puppet layout, i.e.
``auditd/manifests/{init,install,config,service}.pp``.
Files and templates
~~~~~~~~~~~~~~~~~~~
Every file or template should be used by only one class and its path inside the
module should reflect this. Eg. if the template ``sshd_config.erb`` is used by
the ``profile::ssh_server`` module, it will be places inside the
``templates/profile/ssh_server`` directory.
Furthermore, on top of every file managed by puppet, a header like the
following should be present: ::
########################################################################
#
# THIS FILE IS MANAGED BY PUPPET - DO NOT MODIFY!
#
# profile::ssh_server
# sshd_config.erb
#
########################################################################
The last two lines should be:
- the puppet class using the file;
- the name of the file/template.
Debugging templates
~~~~~~~~~~~~~~~~~~~
You can use the ``erb`` tool to test the variable interpolation. One easy way is to prepare a file with the variable values and pipe it together with the template through erb. Define the variables in a file ``test-vars.erb`` like in this example::
<%
@partitions = {'a' => 'aa', 'b' => 'bb', 'c' => 'cc'}
@group_whitelist = ['groupA', 'groupB']
@port = 8000
%>
and then use a commmand line like the following to pipe it through ``erb``::
erb <(cat /tmp/test-vars.erb /tmp/my-template.erb)
The output will contain the variable substituted template. If you want to check your
template for syntax errors, you can just use the following command::
erb -P -x -T '-' jupyterhub_config.py.erb | ruby -c
Roles
-----
.. toctree::
:maxdepth: 1
roles/base
roles/bootpc
roles/console
roles/daq_buffer
roles/dcache_t3_pools
roles/desktop
roles/ganglia_server
roles/grafana
roles/hpc/ces
roles/hpc/cn
roles/hpc/database
roles/hpc/ui
roles/hpc/server
roles/influxdb
roles/jupyterserver
roles/log_server
roles/login_server
roles/media_station
roles/nomachine_proxy
roles/reverse_proxy
roles/server
roles/slurm_client
roles/slurm_compute
roles/slurm_server
roles/softioc
roles/web_server
roles/workstation
roles/zookeeper
Profiles
--------
.. toctree::
:maxdepth: 1
profiles/aaa
profiles/afs_client
profiles/autofs
profiles/custom_timers
profiles/epics
profiles/filecopy
profiles/files
profiles/ganglia_client
profiles/ganglia_server
profiles/gnome
profiles/gpfs
profiles/grafana
profiles/icewm
profiles/icinga/client
profiles/icinga/nrpe
profiles/icinga/checks/gpfs
profiles/icinga/checks/nvidia
profiles/icinga/checks/puppet_client
profiles/icinga/checks/service
profiles/icinga/checks/slurm
profiles/icinga/checks/hp/smart_array
profiles/infiniband
profiles/jupyterhub
profiles/kdump_client
profiles/local_accounts
profiles/log_client
profiles/log_server
profiles/mkresource/files
profiles/mounter
profiles/mta
profiles/multipath
profiles/nomachine
profiles/nomachine/desktop
profiles/nomachine/license
profiles/nomachine/repository
profiles/nomachine/service
profiles/nomachine/terminal
profiles/nomachine/workstation
profiles/networking
profiles/nfs_server
profiles/ntp_client
profiles/nvidia
profiles/platform
profiles/platform/hewlett_packard
profiles/pmodules
profiles/print_client
profiles/puppet_client
profiles/serial_console
profiles/ssh_client
profiles/ssh_server.rst
profiles/sysinfo
profiles/telegraf
profiles/web_server
profiles/yum_client
Components
----------
.. toctree::
:maxdepth: 1
components/grub2
components/logrotate
components/selinux
components/sudo
components/systemd
components/sysctl
components/updatedb
components/utils
+336
View File
@@ -0,0 +1,336 @@
``profile::aaa``
=====================
This module manages configuration related to authentication, authorization, and
auditing. In particular, it
- performs the Active Directory join (optional)
- configures login restrictions (e.g. :manpage:`pam_access(8)`)
- adds sudo rules for administrators
Parameters
----------
=============================== ======== ================================================
**Name** **Type** **Default**
------------------------------- -------- ------------------------------------------------
admins list hiera_array('aaa::admins')
allow_sudoers_d bool hiera('aaa::allow_sudoers_d')
bastions list hiera('aaa::bastions')
cache_creds bool hiera('aaa::cache_creds')
create_homes bool hiera('aaa::create_homes')
enable_ad bool hiera('aaa::enable_ad')
enable_eaccounts bool hiera('aaa::enable_eaccounts')
enable_slurm bool false
enable_ssh_allow bool hiera('aaa::enable_ssh_allow')
enable_tier3_ldap bool hiera('aaa::enable_tier3_ldap', false)
ldap_enabled bool hiera('aaa::ldap_enabled')
offline_creds_expiration string hiera('aaa::offline_creds_expiration')
override_homedir string hiera('aaa::override_homedir', undef)
ssh_authorized_keys hash hiera_hash('aaa::sshkeys', {})
sssd_debuglevel int hiera('aaa::sssd_debuglevel')
sudo_rules (Hiera only) list hiera_array('aaa::sudo_rules', [])
support_afs bool hiera('aaa::support_afs'),
use_bastions bool hiera('aaa::use_bastions', undef)
users list hiera_array('aaa::users', [])
=============================== ======== ================================================
``admins``
~~~~~~~~~~
A list of user and/or group names. The users as well as the members of the groups
can log in on the system and have full root privileges via sudo.
Group names must be prefixed with ``%``.
Example::
aaa::admins:
- 'markushin'
- 'gsell'
- 'barabas'
- '%unx-linux_support'
Also see `users`_.
``allow_sudoers_d``
~~~~~~~~~~~~~~~~~~~
This boolean controls whether the files in ``/etc/sudoers.d`` are taken
into account by sudo.
``bastions``
~~~~~~~~~~~~
A list of FQDNs. If `use_bastions`_ is true, then root logins are only allowed
from the hosts on this list. *Note*: If the list is empty, login is unrestricted
again!
Example::
aaa::bastions:
- 'gpfs-node1.psi.ch'
- 'gpfs-node2.psi.ch'
- 'gpfs-node3.psi.ch'
- 'wmgt01.psi.ch'
- 'wmgt02.psi.ch'
``create_homes``
~~~~~~~~~~~~~~~~
This boolean controls whether home directories are created on login when
necessary. This only works if the underlying filesystem allows it. In
particular, it does not work on AFS.
Also see `override_homedir`_.
``cache_creds``
~~~~~~~~~~~~~~~~~~~~~
This boolean controls whether sssd caches credentials.
Also see `offline_creds_expiration`_.
``enable_ad``
~~~~~~~~~~~~~
Determines whether the system will be configured for Active Directory
authentication.
``enable_eaccounts``
~~~~~~~~~~~~~~~~~~~~
Setting this to ``true`` enables the Active Directory OU containing the
e-accounts.
``enable_slurm``
~~~~~~~~~~~~~~~~
When set to ``true``, we allow slurm users to login on computing nodes in where
their jobs are running. Otherwise, users are not allowed to login in the computing
nodes.
To enable this behaviour, someone needs to call directly the ``profile::aaa`` class
and enable this feature, but also needs to change the call of the ``role::base``
class by setting ``include_aaa`` to ``false``. For example::
class {
'role::base':
include_aaa => false;
'profile::aaa':
support_afs => hiera('base::enable_afs'),
enable_slurm => true;
'slurm::compute':
cluster => $cluster;
}
``enable_ssh_allow``
~~~~~~~~~~~~~~~~~~~~
Allow users to manage access to the system bypassing Puppet by adding usernames
to ``/etc/security/ssh.allow.user``. This file is never touched by Puppet.
Documentation can be found in :manpage:`pam_listfile(8)` or
``/etc/security/ssh.allow.README`` on the target system.
``enable_tier3_ldap``
~~~~~~~~~~~~~~~~~~~~~
When set to ``true``, sssd.conf is configured to support only OpenLDAP for
the Tier3. Other domains (Active Directory D.PSI.CH and LDAP SLS.PSI.CH) are
not included in the configuration file.
``ldap_enabled``
~~~~~~~~~~~~~~~~
This boolean controls whether LDAP (AD) is used for user information and
authentication.
In this case the following port have to be accessible from the host to the
domain controllers:
- 389/TCP
- 389/UDP
- 88/TCP
- 88/UDP
- 464/UDP
``offline_creds_expiration``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This integer controls for how many days cached credentials are valid after the
last successful online login. A value of 0 means that there is no limit.
Also see `cache_creds`_.
``override_homedir``
~~~~~~~~~~~~~~~~~~~~
This string defines the home directory path to use as expected by sssd's
``override_homedir`` option. See :manpage:`sssd.conf(5)` for details.
Example::
aaa::override_homedir: '/home/%u'
``sshkeys``
~~~~~~~~~~~
A hash of hashes, each of which contains the parameters as accepted by the
`ssh_authorized_key
<https://docs.puppet.com/puppet/latest/types/ssh_authorized_key.html>`_ resource
type.
Example::
aaa::sshkeys:
'root@ra':
user: 'root'
type: 'ssh-dss'
key: 'AAAAB3Nz...'
Keep in mind that this only works if the user's home directory is accessible to
root. In particular, it doesn't work if the home is on AFS.
``sssd_debuglevel``
~~~~~~~~~~~~~~~~~~~
This integer configures sssd's ``debug_level`` option (see
:manpage:`sssd.conf(5)`). It is rarely necessary to use this setting, as the
debug level can be changed at runtime using :manpage:`sss_debuglevel(8)`.
``sudo_rules``
~~~~~~~~~~~~~~
A list of strings taken from Hiera containing additional sudo rules.
``support_afs``
~~~~~~~~~~~~~~~
Configure PAM and related tools which are necessary for running AFS.
``use_bastions``
~~~~~~~~~~~~~~~~
This parameter determines whether root logins are only possible from the hosts
listed in `bastions`_. Normally the value is taken from the network property of
the same name, but this parameter allows overriding the network setting through
Hiera or in Puppet manifests.
``users``
~~~~~~~~~
A list of user and/or group names. The users as well as the members of the
groups can log in on the system.
Group names must be prefixed with ``%``.
Also see `admins`_.
``ssh_authorized_key``
~~~~~~~~~~~~~~~~~~~~~~
A hash containing SSH public keys as expected by the ``ssh_authorized_key``
Puppet resource.
Examples
--------
The base role includes this profile by default. If you need to customize the
profile, do it like this::
class role::some_role () {
class {'role::base':
include_aaa => false,
...
}
class {'profile::aaa':
override_homedir => '/localhomes/%u',
create_homes => true,
...
}
...
Implementation Notes
--------------------
User/Group Enumeration
~~~~~~~~~~~~~~~~~~~~~~
User/group enumeration happens when using the ``{set,get,end}pwent`` and
``{set,get,ent}grent`` APIs to enumerate all users or groups. These are used by
``getent passwd`` and ``gentent group`` for example, when not providing a
specific user or group to query.
Enumeration would potentially be very slow and put a high load on the AD domain
controllers if it were to return all users/groups from AD. To prevent this, we
disable enumeration for AD accounts using the ``enumerate`` setting in
:manpage:`sssd.conf(5)`. This means that enumeration using the APIs/programs
above only returns local users and groups.
The ``min_id`` issue
~~~~~~~~~~~~~~~~~~~~
There are Unix groups in AD at PSI with very low GIDs, the lowest being 101
(``unx-fkt``). This leads to problems, because :manpage:`sssd(8)` ignores users
and groups with GIDs lower than ``min_id`` (from :manpage:`sssd.conf(5)`), which
has a value of 500 by default.
Therefore we set ``min_id`` to 100. One consequence is that there is overlap
between the IDs considered by sssd when querying AD, and the ID range for system
users and groups (``[SYS_]ID_{MIN,MAX}``) as defined in
:manpage:`login.defs(5)`.
When creating system groups, :manpage:`groupadd` scans **all** GIDs from
``SYS_GID_MAX`` to ``SYS_GID_MIN``. This causes a number of requests to AD,
which is usually not a problem but can be, if those take a long time to
complete. This used to happen without the ``ad_enabled_domains`` setting for
example. ``sssd`` would query ``d.ethz.ch`` in addition to ``d.psi.ch``, which
would time out after a few seconds.
PAM details
~~~~~~~~~~~
There have been discussions to move :manpage:`pam_access` to the ``auth``
section (from the ``account`` section) to prevent information leakage for
accounts that can't even log in: an attacker would get a different error message
depending on whether the password was wrong or whether :manpage:`pam_access`
didn't allow access.
The problem is that when using Kerberos authentication, sshd does not use PAM
for authentication, circumventing :manpage:`pam_access`. Hence, just moving the
latter from ``account`` to ``auth`` does not work.
Kerberos ticket/AFS token renewal
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Kerberos tickets and AFS tokens are automatically renewed for as long as
possible. This is done by running a :manpage:`krenew` for every session in the
background.
@@ -0,0 +1,51 @@
``profile::afs_client``
============================
This module installs and configures the OpenAFS client.
On systems using AFS it is necessary to have the AFS kernel module for the
running kernel installed. This module installs the AFS kernel module for
**every** installed kernel (usually up to 3). Therefore it is recommended to run
Puppet after (kernel) updates.
Note: Simply including this profile is usually **not** enough to enable AFS on a
system. It is also necessary to enable AFS support when including
``profile::aaa``. The latter makes sure that AD is used for authentication and
configures PAM to automatically create and renew AFS tokens.
Parameters
----------
=============================== ======== ================================================
**Name** **Type** **Default**
------------------------------- -------- ------------------------------------------------
enable_dynroot bool hiera('afs_client::enable_dynroot')
min_cache_size int hiera('afs_client::min_cache_size')
mountpoint string hiera('afs_client::mountpoint')
root_volume string hiera('afs_client::root_volume')
=============================== ======== ================================================
``enable_dynroot``
~~~~~~~~~~~~~~~~~~
``min_cache_size``
~~~~~~~~~~~~~~~~~~
The minimum size for the local AFS cache. If the cache is smaller than this,
Puppet will try to resize the logical volume =lv_openafs=, otherwise Puppet will
leave it alone.
``mountpoint``
~~~~~~~~~~~~~~
Defines the mountpoint for the AFS filesystem.
``root_volume``
~~~~~~~~~~~~~~~
The name of the root volume to mount.
+23
View File
@@ -0,0 +1,23 @@
``profile::autofs``
===================
This profile enables the ``autofs`` service. For normal automounts this is not
necessary, as they can be implemented using systemd, but this module can enable
the autofs ``-hosts`` map.
Parameters
----------
=============================== ======== ================================================
**Name** **Type** **Default**
------------------------------- -------- ------------------------------------------------
install_auto_master bool hiera('autofs::install_auto_master', true)
=============================== ======== ================================================
``autofs::install_auto_master``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If this variable is true, the profile will create an ``/etc/auto.master`` file
enabling the ``-hosts`` map on ``/net``.
@@ -0,0 +1,40 @@
``profile::custom_timers``
==========================
This profile is a defined type to define systemd timers together with the corresponding service.
The resources are defined using the ``systemd::service`` and ``system::service``.
The service is defined as type simple and it it has dependency (``Wants`` and ``After``) from
the ``network-online.target``.
The defined type is used from ``role::base`` to create the resources
defined in the ``base::timers`` hiera hash.
Parameters
----------
=============================== ======== ================================================
**Name** **Type** **Default**
------------------------------- -------- ------------------------------------------------
description String
command String
on_calendar String
=============================== ======== ================================================
``description``
~~~~~~~~~~~~~~~
Human-oriented description of the service and timer.
``command``
~~~~~~~~~~~
Path to the command to invoke into the service. It should be an absolute path
and a check is performed against this.
``on_calendar``
~~~~~~~~~~~~~~~
The definition of the timer execution time. See `OnCalendar` on systemd.timer(5).
+20
View File
@@ -0,0 +1,20 @@
``profile::epics``
==================
Install and configure Epics.
Parameters
----------
============ ======== ===========
**Name** **Type** **Default**
------------ -------- -----------
epics_func string -
============ ======== ===========
``epics_func``
~~~~~~~~~~~~~~
A command written to ``/etc/epics_func.conf``, followed by ``>/dev/null``.
+37
View File
@@ -0,0 +1,37 @@
``profile::filecopy``
=====================
This module allows to copy file on the host using a git repository as
the source.
The purpose of this module is to distribute files that are not
created/configured with other modules, usually files very specific
eg. scripts for very specific environments.
With this module is possible to specifiy only single files, so no
directory or recursion is possible.
The source files are taken from the master branch of a git repo that
should be accessible without authentication. Or anyway accessible from
the host where the files have to be copied (eg. via a proper ssh key).
A section like the following in hiera will copy on file ``/tmp/test1``
the content of
``https://git.psi.ch/talamo_i/copy-file-test/raw/master/abc``::
filecopy::files:
'/tmp/test1':
repo: 'talamo_i/copy-file-test'
path: 'abc'
mode: '0600'
owner: 'talamo_i'
You can additionally specify the group owner of the file:
group: 'apache'
This module is included by default and ``filecopy::files`` is empty by
default.
To copy files just define properly the ``filecopy::files`` variable.
+27
View File
@@ -0,0 +1,27 @@
``profile::files``
==================
This profile creates symlinks.
Parameters
----------
=============================== ======== ================================================
**Name** **Type** **Default**
------------------------------- -------- ------------------------------------------------
symlinks hash hiera('files::symlinks')
=============================== ======== ================================================
``symlinks``
~~~~~~~~~~~~
A hash specifying symlinks to be created. The keys of the hash are the absolute
pathnames of the symlinks, the values of the hash are the corresponding symlink
targets.
Example::
files::symlinks:
'/opt/foo': '/var/lib/foo'
@@ -0,0 +1,36 @@
``profile::ganglia_client``
============================
This module installs and configures Ganglia's gmond.
Parameters
----------
=============================== ======== ================================================
**Name** **Type** **Default**
------------------------------- -------- ------------------------------------------------
grid string hiera('ganglia::grid')
cluster string hiera('ganglia_client::cluster')
n/a hash hiera('ganglia::grid_${GRID}')
=============================== ======== ================================================
``grid``
~~~~~~~~
The name of the Ganglia grid the client is a part of.
``cluster``
~~~~~~~~~~~
The name of the Ganglia cluster within the grid the client is a part of.
``grid_${GRID}``
~~~~~~~~~~~~~~~~
The definition of the grid. See the :doc:`ganglia_server documentation
<../profiles/ganglia_server>` for details.
@@ -0,0 +1,104 @@
``profile::ganglia_server``
===========================
This module configures the following Ganglia server components:
- the web interface (using Apache)
- one gmond for each cluster to be monitored by this server
The latter is a little unusual. Normally, the gmonds collecting the data for
each cluster, which are then queried by gmetad, are part of the cluster. The
implementation chosen by this module makes it easier to deal with firewalls.
Parameters
----------
=============================== ======== ================================================
**Name** **Type** **Default**
------------------------------- -------- ------------------------------------------------
grid string hiera('ganglia::grid')
grid_${GRID} hash hiera('ganglia::grid_${GRID}')
=============================== ======== ================================================
``grid``
~~~~~~~~
This string identifies the grid (ie the collection of clusters) this server
monitors. The definition of the grid is taken from the Hiera value
``grid_${GRID}``.
``grid_${GRID}``
~~~~~~~~~~~~~~~~
The definition of the Ganglia grid this server is responsible for. It is a hash
containing the following keys:
``name``
,,,,,,,,
The name of the grid as used in the web interface.
``ui``
,,,,,,
The FQDN of the server hosting the web UI and collecting the data for the grid.
A system with role ``ganglia_server`` will compare its own FQDN to this value
and fail if they are not equal. Clients will need this value to know where they
they should send their metrics.
``clusters``
,,,,,,,,,,,,
A hash of cluster definitions. The keys are the IDs of the clusters, ie. what a
client expects to find in ``ganglia::cluster``. The values are hashes containing
the following attributes:
``name``
........
The user-friendly name of the cluster to be used in the web UI.
``port``
........
The port used within the cluster. The first cluster should use 8649 (the Ganglia
standard port), the next cluster should use 8650, and so on.
Examples
--------
Suppose we have a Ganglia grid for Swissfel (sysdb_env == swissfel).
The we could put the following in Hiera.
In ``swissfel.yaml`` we would define grid, say ``sfel``, and set the grid for
all systems in the ``swissfel`` environment to ``sfel``::
ganglia::grid: 'sfel'
ganglia::grid_sfel:
name: 'SwissFEL'
ui: 'gmeta00.psi.ch'
clusters:
'sf-daqbuf':
name: 'DAQ Buffers'
port: 8649
'sf-athos'
name: 'Athos Beamline Systems'
port: 8650
In the ``swissfel/daqbuf.yaml`` we would then set the cluster to ``sf-daqbuf``::
ganglia::cluster: 'sf-daqbuf'
In ``swissfel/athos.yaml`` on the other hand, we would set the cluster to
``sf-athos``::
ganglia::cluster: 'sf-athos'
+46
View File
@@ -0,0 +1,46 @@
``profile::gnome``
==================
This module installs and configures the Gnome desktop environment.
Parameters
----------
=============================== ======== ================================================
**Name** **Type** **Default**
------------------------------- -------- ------------------------------------------------
banner_message string
enable_update_notifications bool
favorites list
keyboard_layouts list
=============================== ======== ================================================
``banner_message``
~~~~~~~~~~~~~~~~~~
The message displayed above the login prompt.
``enable_update_notifications``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Whether or not ``gnome-software`` is started at the beginning of a session to
notify the user about available updates.
``favorites``
~~~~~~~~~~~~~
The applications/directories/etc to be displayed in the *Favorites* menu and/or
the Gnome dock. Each member of the list must be a string containing the full
name (not path!) of the ``.desktop`` file, eg. ``gnome-terminal.desktop`` for
the Gnome Terminal.
``keyboard_layouts``
~~~~~~~~~~~~~~~~~~~~
A list of the keyboard mappings which should be available on the login screen.
The first mapping in the list is the default.

Some files were not shown because too many files have changed in this diff Show More