389 lines
16 KiB
Python
389 lines
16 KiB
Python
# Copyright Red Hat 2021, Jake Hunsaker <jhunsake@redhat.com>
|
|
|
|
# This file is part of the sos project: https://github.com/sosreport/sos
|
|
#
|
|
# This copyrighted material is made available to anyone wishing to use,
|
|
# modify, copy, or redistribute it subject to the terms and conditions of
|
|
# version 2 of the GNU General Public License.
|
|
#
|
|
# See the LICENSE file in the source distribution for further information.
|
|
|
|
import os
|
|
|
|
from shlex import quote
|
|
from sos.collector.clusters import Cluster
|
|
from sos.utilities import is_executable
|
|
|
|
|
|
class ocp(Cluster):
|
|
"""
|
|
This profile is for use with OpenShift Container Platform (v4) clusters
|
|
instead of the kubernetes profile.
|
|
|
|
This profile will favor using the `oc` transport type, which means it will
|
|
leverage a locally installed `oc` binary. This is also how node enumeration
|
|
is done. To instead use SSH to connect to the nodes, use the
|
|
'--transport=control_persist' option.
|
|
|
|
Thus, a functional `oc` binary for the user executing sos collect is
|
|
required. Functional meaning that the user can run `oc` commands with
|
|
clusterAdmin privileges.
|
|
|
|
If this requires the use of a secondary configuration file, specify that
|
|
path with the 'kubeconfig' cluster option. This config file will also be
|
|
used on a single master node to perform API collections if the `with-api`
|
|
option is enabled (default disabled). If no `kubeconfig` option is given,
|
|
but `with-api` is enabled, the cluster profile will attempt to use a
|
|
well-known default kubeconfig file if it is available on the host.
|
|
|
|
Alternatively, provide a clusterAdmin access token either via the 'token'
|
|
cluster option or, preferably, the SOSOCPTOKEN environment variable.
|
|
|
|
By default, this profile will enumerate only master nodes within the
|
|
cluster, and this may be changed by overriding the 'role' cluster option.
|
|
To collect from all nodes in the cluster regardless of role, use the form
|
|
-c ocp.role=''.
|
|
|
|
Filtering nodes by a label applied to that node is also possible via the
|
|
label cluster option, though be aware that this is _combined_ with the role
|
|
option mentioned above.
|
|
|
|
To avoid redundant collections of OCP API information (e.g. 'oc get'
|
|
commands), this profile will attempt to enable the API collections on only
|
|
a single master node. If the none of the master nodes have a functional
|
|
'oc' binary available, *and* the --no-local option is used, that means that
|
|
no API data will be collected.
|
|
"""
|
|
|
|
cluster_name = 'OpenShift Container Platform v4'
|
|
packages = ('openshift-hyperkube', 'openshift-clients')
|
|
|
|
api_collect_enabled = False
|
|
token = None
|
|
project = 'sos-collect-tmp'
|
|
oc_cluster_admin = None
|
|
_oc_cmd = ''
|
|
|
|
option_list = [
|
|
('label', '', 'Colon delimited list of labels to select nodes with'),
|
|
('role', 'master', 'Colon delimited list of roles to filter on'),
|
|
('kubeconfig', '', 'Path to the kubeconfig file'),
|
|
('token', '', 'Service account token to use for oc authorization'),
|
|
('with-api', False, 'Collect OCP API data from a master node'),
|
|
('api-url', '', 'Alternate API URL of an external control-plane'),
|
|
]
|
|
|
|
@property
|
|
def oc_cmd(self):
|
|
if not self._oc_cmd:
|
|
self._oc_cmd = 'oc'
|
|
if self.primary.host.in_container():
|
|
_oc_path = self.primary.run_command(
|
|
'which oc', chroot=self.primary.host.sysroot
|
|
)
|
|
if _oc_path['status'] == 0:
|
|
self._oc_cmd = os.path.join(
|
|
self.primary.host.sysroot,
|
|
_oc_path['output'].strip().lstrip('/')
|
|
)
|
|
else:
|
|
self.log_warn(
|
|
"Unable to to determine PATH for 'oc' command, "
|
|
"node enumeration may fail."
|
|
)
|
|
self.log_debug(
|
|
f"Locating 'oc' failed: {_oc_path['output']}")
|
|
if self.get_option('kubeconfig'):
|
|
self._oc_cmd += " --kubeconfig " \
|
|
f"{self.get_option('kubeconfig')}"
|
|
self.log_debug(f"oc base command set to {self._oc_cmd}")
|
|
return self._oc_cmd
|
|
|
|
def fmt_oc_cmd(self, cmd):
|
|
"""Format the oc command to optionall include the kubeconfig file if
|
|
one is specified
|
|
"""
|
|
return f"{self.oc_cmd} {cmd}"
|
|
|
|
def _attempt_oc_login(self):
|
|
"""Attempt to login to the API using the oc command using a provided
|
|
token
|
|
"""
|
|
_res = self.exec_primary_cmd(
|
|
self.fmt_oc_cmd("login --insecure-skip-tls-verify=True "
|
|
f"--token={self.token} "
|
|
f"{self.get_option('api-url')}")
|
|
)
|
|
return _res['status'] == 0
|
|
|
|
def check_enabled(self):
|
|
if super().check_enabled():
|
|
return True
|
|
self.token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None)
|
|
if self.token:
|
|
self._attempt_oc_login()
|
|
_who = self.fmt_oc_cmd('whoami')
|
|
return self.exec_primary_cmd(_who)['status'] == 0
|
|
|
|
def setup(self):
|
|
"""Create the project that we will be executing in for any nodes'
|
|
collection via a container image
|
|
"""
|
|
if not self.set_transport_type() == 'oc':
|
|
return None
|
|
|
|
out = self.exec_primary_cmd(self.fmt_oc_cmd("auth can-i '*' '*'"))
|
|
self.oc_cluster_admin = out['status'] == 0
|
|
if not self.oc_cluster_admin:
|
|
self.log_debug("Check for cluster-admin privileges returned false,"
|
|
" cannot create project in OCP cluster")
|
|
raise Exception("Insufficient permissions to create temporary "
|
|
"collection project.\nAborting...")
|
|
|
|
self.log_info(f"Creating new temporary project '{self.project}'")
|
|
ret = self.exec_primary_cmd(
|
|
self.fmt_oc_cmd(f"new-project {self.project}")
|
|
)
|
|
if ret['status'] == 0:
|
|
self._label_sos_project()
|
|
return True
|
|
|
|
self.log_debug(f"Failed to create project: {ret['output']}")
|
|
raise Exception("Failed to create temporary project for collection. "
|
|
"\nAborting...")
|
|
|
|
def _label_sos_project(self):
|
|
"""Add pertinent labels to the temporary project we've created so that
|
|
our privileged containers can properly run.
|
|
"""
|
|
labels = [
|
|
"security.openshift.io/scc.podSecurityLabelSync=false",
|
|
"pod-security.kubernetes.io/enforce=privileged"
|
|
]
|
|
for label in labels:
|
|
ret = self.exec_primary_cmd(
|
|
self.fmt_oc_cmd(
|
|
f"label namespace {self.project} {label} --overwrite"
|
|
)
|
|
)
|
|
if not ret['status'] == 0:
|
|
raise Exception(
|
|
f"Error applying namespace labels: {ret['output']}"
|
|
)
|
|
|
|
def cleanup(self):
|
|
"""Remove the project we created to execute within
|
|
"""
|
|
if self.project:
|
|
try:
|
|
ret = self.exec_primary_cmd(
|
|
self.fmt_oc_cmd(f"delete project {self.project}"),
|
|
timeout=30
|
|
)
|
|
if not ret['status'] == 0:
|
|
self.log_error(
|
|
f"Error deleting temporary project: {ret['output']}"
|
|
)
|
|
ret = self.exec_primary_cmd(
|
|
self.fmt_oc_cmd(
|
|
f"wait namespace/{self.project} --for=delete "
|
|
f"--timeout=30s"
|
|
)
|
|
)
|
|
if not ret['status'] == 0:
|
|
self.log_error(
|
|
f"Error waiting for temporary project to be deleted: "
|
|
f"{ret['output']}"
|
|
)
|
|
except Exception as err:
|
|
self.log_error(
|
|
f"Failed attempting to remove temporary project "
|
|
f"'sos-collect-tmp': {err}\n"
|
|
f"Please manually remove the temporary project."
|
|
)
|
|
# don't leave the config on a non-existing project
|
|
self.exec_primary_cmd(self.fmt_oc_cmd("project default"))
|
|
self.project = None
|
|
return True
|
|
|
|
def _build_dict(self, nodelist):
|
|
"""From the output of get_nodes(), construct an easier-to-reference
|
|
dict of nodes that will be used in determining labels, primary status,
|
|
etc...
|
|
|
|
:param nodelist: The split output of `oc get nodes`
|
|
:type nodelist: ``list``
|
|
|
|
:returns: A dict of nodes with `get nodes` columns as keys
|
|
:rtype: ``dict``
|
|
"""
|
|
nodes = {}
|
|
if 'NAME' in nodelist[0]:
|
|
# get the index of the fields
|
|
statline = nodelist.pop(0).split()
|
|
idx = {}
|
|
for state in ['status', 'roles', 'version', 'os-image']:
|
|
try:
|
|
idx[state] = statline.index(state.upper())
|
|
except Exception:
|
|
# label is not available, which is not fatal for our dict
|
|
# construction here
|
|
pass
|
|
for node in nodelist:
|
|
_node = node.split()
|
|
nodes[_node[0]] = {}
|
|
for column, value in idx.items():
|
|
nodes[_node[0]][column] = _node[value]
|
|
return nodes
|
|
|
|
def set_transport_type(self):
|
|
if self.opts.transport != 'auto':
|
|
return self.opts.transport
|
|
if is_executable('oc', sysroot=self.primary.host.sysroot):
|
|
return 'oc'
|
|
self.log_info("Local installation of 'oc' not found or is not "
|
|
"correctly configured. Will use ControlPersist.")
|
|
self.ui_log.warning(
|
|
"Preferred transport 'oc' not available, will fallback to SSH."
|
|
)
|
|
if not self.opts.batch:
|
|
input("Press ENTER to continue connecting with SSH, or Ctrl+C to"
|
|
"abort.")
|
|
return 'control_persist'
|
|
|
|
def get_nodes(self):
|
|
nodes = []
|
|
self.node_dict = {}
|
|
cmd = 'get nodes -o wide'
|
|
if self.get_option('label'):
|
|
labels = ','.join(self.get_option('label').split(':'))
|
|
cmd += f" -l {quote(labels)}"
|
|
res = self.exec_primary_cmd(self.fmt_oc_cmd(cmd))
|
|
if res['status'] == 0:
|
|
if self.get_option('role') == 'master':
|
|
self.log_warn("NOTE: By default, only master nodes are listed."
|
|
"\nTo collect from all/more nodes, override the "
|
|
"role option with '-c ocp.role=role1:role2'")
|
|
roles = list(self.get_option('role').split(':'))
|
|
self.node_dict = self._build_dict(res['output'].splitlines())
|
|
for node_name, node in self.node_dict.items():
|
|
if roles:
|
|
for role in roles:
|
|
if role in node['roles']:
|
|
nodes.append(node_name)
|
|
break
|
|
else:
|
|
nodes.append(node_name)
|
|
else:
|
|
msg = "'oc' command failed"
|
|
if 'Missing or incomplete' in res['output']:
|
|
msg = ("'oc' failed due to missing kubeconfig on primary node."
|
|
" Specify one via '-c ocp.kubeconfig=<path>'")
|
|
raise Exception(msg)
|
|
return nodes
|
|
|
|
def set_node_label(self, node):
|
|
if node.address not in self.node_dict:
|
|
return ''
|
|
for label in ['master', 'worker']:
|
|
if label in self.node_dict[node.address]['roles']:
|
|
return label
|
|
return ''
|
|
|
|
def check_node_is_primary(self, node):
|
|
if node.address not in self.node_dict:
|
|
return False
|
|
return 'master' in self.node_dict[node.address]['roles']
|
|
|
|
def _toggle_api_opt(self, node, use_api):
|
|
"""In earlier versions of sos, the openshift plugin option that is
|
|
used to toggle the API collections was called `no-oc` rather than
|
|
`with-api`. This older plugin option had the inverse logic of the
|
|
current `with-api` option.
|
|
|
|
Use this to toggle the correct plugin option given the node's sos
|
|
version. Note that the use of version 4.2 here is tied to the RHEL
|
|
release (the only usecase for this cluster profile) rather than
|
|
the upstream version given the backports for that downstream.
|
|
|
|
:param node: The node being inspected for API collections
|
|
:type node: ``SoSNode``
|
|
|
|
:param use_api: Should this node enable API collections?
|
|
:type use_api: ``bool``
|
|
"""
|
|
if node.check_sos_version('4.2-16'):
|
|
_opt = 'with-api'
|
|
_val = 'on' if use_api else 'off'
|
|
else:
|
|
_opt = 'no-oc'
|
|
_val = 'off' if use_api else 'on'
|
|
node.plugopts.append(f"openshift.{_opt}={_val}")
|
|
|
|
def set_primary_options(self, node):
|
|
|
|
node.enable_plugins.append('openshift')
|
|
if not self.get_option('with-api'):
|
|
self._toggle_api_opt(node, False)
|
|
return
|
|
if self.api_collect_enabled:
|
|
# a primary has already been enabled for API collection, disable
|
|
# it among others
|
|
self._toggle_api_opt(node, False)
|
|
else:
|
|
# running in a container, so reference the /host mount point
|
|
master_kube = (
|
|
'/host/etc/kubernetes/static-pod-resources/'
|
|
'kube-apiserver-certs/secrets/node-kubeconfigs/'
|
|
'localhost.kubeconfig'
|
|
)
|
|
_optconfig = self.get_option('kubeconfig')
|
|
if _optconfig and not _optconfig.startswith('/host'):
|
|
_optconfig = '/host/' + _optconfig
|
|
_kubeconfig = _optconfig or master_kube
|
|
_oc_cmd = 'oc'
|
|
if node.host.containerized:
|
|
_oc_cmd = '/host/bin/oc'
|
|
# when run from a container, the oc command does not inherit
|
|
# the default config, so if it's present then pass it here to
|
|
# detect a funcitonal oc command. This is sidestepped in sos
|
|
# report by being able to chroot the `oc` execution which we
|
|
# cannot do remotely
|
|
if node.file_exists('/root/.kube/config', need_root=True):
|
|
_oc_cmd += ' --kubeconfig /host/root/.kube/config'
|
|
can_oc = node.run_command(f"{_oc_cmd} whoami",
|
|
use_container=node.host.containerized,
|
|
# container is available only to root
|
|
# and if rhel, need to run sos as root
|
|
# anyways which will run oc as root
|
|
need_root=True)
|
|
if can_oc['status'] == 0:
|
|
# the primary node can already access the API
|
|
self._toggle_api_opt(node, True)
|
|
self.api_collect_enabled = True
|
|
elif self.token:
|
|
node.sos_env_vars['SOSOCPTOKEN'] = self.token
|
|
self._toggle_api_opt(node, True)
|
|
self.api_collect_enabled = True
|
|
elif node.file_exists(_kubeconfig):
|
|
# if the file exists, then the openshift sos plugin will use it
|
|
# if the with-api option is turned on
|
|
if _kubeconfig != master_kube:
|
|
node.plugopts.append(
|
|
f"openshift.kubeconfig={_kubeconfig}"
|
|
)
|
|
self._toggle_api_opt(node, True)
|
|
self.api_collect_enabled = True
|
|
if self.api_collect_enabled:
|
|
msg = (f"API collections will be performed on {node.address}\n"
|
|
"Note: API collections may extend runtime by 10s of "
|
|
"minutes\n")
|
|
self.soslog.info(msg)
|
|
self.ui_log.info(msg)
|
|
|
|
def set_node_options(self, node):
|
|
# don't attempt OC API collections on non-primary nodes
|
|
self._toggle_api_opt(node, False)
|
|
|
|
# vim: set et ts=4 sw=4 :
|