439 lines
17 KiB
Python
439 lines
17 KiB
Python
# This file is part of the sos project: https://github.com/sosreport/sos
|
|
#
|
|
# This copyrighted material is made available to anyone wishing to use,
|
|
# modify, copy, or redistribute it subject to the terms and conditions of
|
|
# version 2 of the GNU General Public License.
|
|
#
|
|
# See the LICENSE file in the source distribution for further information.
|
|
|
|
from fnmatch import translate
|
|
import os
|
|
import re
|
|
from sos.report.plugins import Plugin, RedHatPlugin, PluginOpt
|
|
|
|
|
|
class Openshift(Plugin, RedHatPlugin):
|
|
"""This is the plugin for OCP 4.x collections. While this product is still
|
|
built ontop of kubernetes, there is enough difference in the collection
|
|
requirements and approach to warrant a separate plugin as opposed to
|
|
further extending the kubernetes plugin (or the OCP 3.x extensions included
|
|
in the Red Hat version of the kube plugin).
|
|
|
|
This plugin may collect OCP API information when the `with-api` option is
|
|
enabled. This option is disabled by default.
|
|
|
|
When enabled, this plugin will collect cluster information and inspect the
|
|
default namespaces/projects that are created during deployment - i.e. the
|
|
namespaces of the cluster projects matching openshift.* and kube.*. At the
|
|
time of this plugin's creation that number of default projects is already
|
|
north of 50; hence this plugin is expected to take a long time in both the
|
|
setup() and collect() phases. End-user projects may also be collected from
|
|
when those projects are included in the `add-namespaces` or
|
|
`only-namespaces` options.
|
|
|
|
It is expected to need to perform an `oc login` command in order for this
|
|
plugin to be able to correctly capture information, as system root is not
|
|
considered cluster root on the cluster nodes in order to access the API.
|
|
|
|
Users will need to either:
|
|
|
|
1) Accept the use of a well-known stock kubeconfig file provided via a
|
|
static pod resource for the kube-apiserver
|
|
2) Provide the bearer token via the `-k openshift.token` option
|
|
3) Provide the bearer token via the `SOSOCPTOKEN` environment variable
|
|
4) Otherwise ensure that the root user can successfully run `oc` and
|
|
get proper output prior to running this plugin
|
|
|
|
|
|
It is highly suggested that option #1 be used first, as this uses well
|
|
known configurations and requires the least information from the user. If
|
|
using a token, it is recommended to use option #3 as this will prevent
|
|
the token from being recorded in output saved to the archive. Option #2 may
|
|
be used if this is considered an acceptable risk. It is not recommended to
|
|
rely on option #4, though it will provide the functionality needed.
|
|
"""
|
|
|
|
short_desc = 'Openshift Container Platform 4.x'
|
|
|
|
plugin_name = "openshift"
|
|
plugin_timeout = 900
|
|
profiles = ('openshift',)
|
|
packages = ('openshift-hyperkube', 'openshift-kubelet')
|
|
|
|
master_localhost_kubeconfig = (
|
|
'/etc/kubernetes/static-pod-resources/'
|
|
'kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig'
|
|
)
|
|
|
|
oc_cmd = "oc get "
|
|
|
|
option_list = [
|
|
PluginOpt('token', default=None, val_type=str,
|
|
desc='admin token to allow API queries'),
|
|
PluginOpt('kubeconfig', default=None, val_type=str,
|
|
desc='Path to a locally available kubeconfig file'),
|
|
PluginOpt('host', default='https://localhost:6443',
|
|
desc='host address to use for oc login, including port'),
|
|
PluginOpt('with-api', default=False,
|
|
desc='collect output from the OCP API'),
|
|
PluginOpt('podlogs', default=True, desc='collect logs from each pod'),
|
|
PluginOpt('podlogs-filter', default='', val_type=str,
|
|
desc='only collect logs from pods matching this pattern'),
|
|
PluginOpt('only-namespaces', default='', val_type=str,
|
|
desc='colon-delimited list of namespaces to collect from'),
|
|
PluginOpt('add-namespaces', default='', val_type=str,
|
|
desc=('colon-delimited list of namespaces to add to the '
|
|
'default collection list'))
|
|
]
|
|
|
|
def _check_oc_function(self):
|
|
"""Check to see if we can run `oc` commands"""
|
|
return self.exec_cmd('oc whoami')['status'] == 0
|
|
|
|
def _check_localhost_kubeconfig(self):
|
|
"""Check if the localhost.kubeconfig exists with system:admin user"""
|
|
return self.path_exists(self.get_option('kubeconfig'))
|
|
|
|
def _check_oc_logged_in(self):
|
|
"""See if we're logged in to the API service, and if not attempt to do
|
|
so using provided plugin options
|
|
"""
|
|
if self._check_oc_function():
|
|
return True
|
|
|
|
if self.get_option('kubeconfig') is None:
|
|
# If admin doesn't add the kubeconfig
|
|
# use default localhost.kubeconfig
|
|
self.set_option(
|
|
'kubeconfig',
|
|
self.master_localhost_kubeconfig
|
|
)
|
|
|
|
# Check first if we can use the localhost.kubeconfig before
|
|
# using token. We don't want to use 'host' option due we use
|
|
# cluster url from kubeconfig. Default is localhost.
|
|
if self._check_localhost_kubeconfig():
|
|
self.set_default_cmd_environment({
|
|
'KUBECONFIG': self.get_option('kubeconfig')
|
|
})
|
|
|
|
oc_res = self.exec_cmd(
|
|
"oc login -u system:admin "
|
|
"--insecure-skip-tls-verify=True"
|
|
)
|
|
if oc_res['status'] == 0 and self._check_oc_function():
|
|
return True
|
|
|
|
self._log_warn(
|
|
"The login command failed with status: "
|
|
f"{oc_res['status']} and error: {oc_res['output']}"
|
|
)
|
|
return False
|
|
|
|
# If kubeconfig is not defined, check if token is provided.
|
|
token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None)
|
|
|
|
if token:
|
|
oc_res = self.exec_cmd(f"oc login {self.get_option('host')} "
|
|
f"--token={token} "
|
|
"--insecure-skip-tls-verify=True")
|
|
if oc_res['status'] == 0:
|
|
if self._check_oc_function():
|
|
return True
|
|
|
|
self._log_warn("Attempt to login to OCP API failed, will not run "
|
|
"or collect `oc` commands")
|
|
return False
|
|
|
|
self._log_warn("Not logged in to OCP API, and no login token provided."
|
|
" Will not collect `oc` commands")
|
|
return False
|
|
|
|
def _setup_namespace_regexes(self):
|
|
"""Combine a set of regexes for collection with any namespaces passed
|
|
to sos via the -k openshift.add-namespaces option. Note that this does
|
|
allow for end users to specify namespace regexes of their own.
|
|
"""
|
|
|
|
if self.get_option('only-namespaces'):
|
|
return list(self.get_option('only-namespaces').split(':'))
|
|
|
|
collect_regexes = [
|
|
'openshift.*',
|
|
'kube.*'
|
|
]
|
|
|
|
if self.get_option('add-namespaces'):
|
|
for nsp in self.get_option('add-namespaces').split(':'):
|
|
collect_regexes.append(nsp)
|
|
|
|
return collect_regexes
|
|
|
|
def _reduce_namespace_list(self, nsps):
|
|
"""Reduce the namespace listing returned to just the ones we want to
|
|
collect from. By default, as requested by OCP support personnel, this
|
|
must include all 'openshift' prefixed namespaces
|
|
|
|
:param nsps list: Namespace names from oc output
|
|
"""
|
|
|
|
def _match_namespace(namespace, regexes):
|
|
"""Match a particular namespace for inclusion (or not) in the
|
|
collection phases
|
|
|
|
:param namespace str: The name of a namespace
|
|
"""
|
|
|
|
for regex in regexes:
|
|
if re.match(regex, namespace):
|
|
return True
|
|
return False
|
|
|
|
regexes = self._setup_namespace_regexes()
|
|
|
|
return list({n for n in nsps if _match_namespace(n, regexes)})
|
|
|
|
def setup(self):
|
|
"""The setup() phase of this plugin will iterate through all default
|
|
projects (namespaces), and/or those specified via the `add-namespaces`
|
|
and `only-namespaces` plugin options. Both of these options accept
|
|
shell-style regexes.
|
|
|
|
Cluster-wide information, that is information that is not tied to a
|
|
specific namespace, will be saved in the top-level plugin directory.
|
|
Each namespace will have it's own subdir within the `namespaces` subdir
|
|
to aide in organization. From there, each namespace subdir will have a
|
|
subsequent subdir for each type of API resource the plugin collects.
|
|
|
|
In contrast with the `kubernetes` plugin, this plugin will collect
|
|
logs from all pods within each namespace, as well as the previous pod's
|
|
logs, by default. The `-k openshift.podlogs-filter` option can be used
|
|
to greatly reduce the amount of collected information.
|
|
"""
|
|
|
|
# Capture the kubelet journal, but don't use it as a service which
|
|
# would simultaneously enable this and the kubernetes plugin
|
|
self.add_journal('kubelet')
|
|
self.add_service_status('kubelet')
|
|
self.add_forbidden_path([
|
|
'/etc/kubernetes/*.crt',
|
|
'/etc/kubernetes/*.key',
|
|
])
|
|
self.add_copy_spec('/etc/kubernetes/*')
|
|
|
|
# see if we run `oc` commands
|
|
if self.get_option('with-api'):
|
|
can_run_oc = self._check_oc_logged_in()
|
|
else:
|
|
can_run_oc = False
|
|
|
|
if can_run_oc:
|
|
# with an out-of-the-box install, setup time alone has been known
|
|
# to take over 5 minutes. Print a notification message so that
|
|
# users don't prematurely think sos has hung during setup
|
|
self._log_warn(
|
|
'Note that the Openshift Container Platform plugin can be '
|
|
'expected in most configurations to take 5+ minutes in both '
|
|
'the setup and collection phases'
|
|
)
|
|
|
|
oc_nsps = []
|
|
|
|
# get 'global' or cluster-level information
|
|
self.add_cmd_output([
|
|
'oc cluster-info',
|
|
'oc get -A pv',
|
|
'oc get -A csr',
|
|
'oc status',
|
|
'oc version'
|
|
])
|
|
|
|
# get non-namespaces api resources
|
|
self.collect_cluster_resources()
|
|
|
|
# get all namespaces, as data collection will be organized by that
|
|
_nm_res = self.collect_cmd_output(f"{self.oc_cmd} namespaces")
|
|
if _nm_res['status'] == 0:
|
|
nsps = [
|
|
n.split()[0] for n in _nm_res['output'].splitlines()[1:]
|
|
]
|
|
oc_nsps = self._reduce_namespace_list(nsps)
|
|
|
|
# collect each namespace individually
|
|
for namespace in oc_nsps:
|
|
self.collect_from_namespace(namespace)
|
|
|
|
def collect_cluster_resources(self):
|
|
"""Collect cluster-level (non-namespaced) resources from the API
|
|
"""
|
|
global_resources = [
|
|
'clusternetworks',
|
|
'clusteroperators',
|
|
'clusterversions',
|
|
'componentstatuses',
|
|
'configs',
|
|
'containerruntimeconfigs',
|
|
'controllerconfigs',
|
|
'dnses',
|
|
'hostsubnets',
|
|
'infrastructures',
|
|
'machineconfigpools',
|
|
'machineconfigs',
|
|
'netnamespaces',
|
|
'networks',
|
|
'nodes',
|
|
'proxies',
|
|
'storageclasses'
|
|
]
|
|
|
|
for resource in global_resources:
|
|
_subdir = f"cluster_resources/{resource}"
|
|
_tag = [f"ocp_{resource}"]
|
|
_res = self.collect_cmd_output(f"{self.oc_cmd} {resource}",
|
|
subdir=_subdir, tags=_tag)
|
|
if _res['status'] == 0:
|
|
for _res_name in _res['output'].splitlines()[1:]:
|
|
self.add_cmd_output(
|
|
f"oc describe {resource} {_res_name.split()[0]}",
|
|
subdir=_subdir
|
|
)
|
|
|
|
def collect_from_namespace(self, namespace):
|
|
"""Run through the collection routines for an individual namespace.
|
|
This collection should include all requested resources that exist
|
|
within that namesapce
|
|
|
|
:param namespace str: The name of the namespace
|
|
"""
|
|
|
|
# define the list of resources to collect
|
|
resources = [
|
|
'buildconfigs',
|
|
'builds',
|
|
'catalogsourceconfigs',
|
|
'catalogsources',
|
|
'clusterserviceversions',
|
|
'configmaps',
|
|
'daemonsets',
|
|
'deploymentconfigs',
|
|
'deployments',
|
|
'events',
|
|
'horizontalpodautoscalers',
|
|
'imagestreams',
|
|
'ingresscontrollers',
|
|
'ingresses',
|
|
'installplans',
|
|
'limitranges',
|
|
'machines',
|
|
'machinesets',
|
|
'mcoconfigs',
|
|
'net-attach-def',
|
|
'operatorgroups',
|
|
'operatorsources',
|
|
'pods',
|
|
'pvc',
|
|
'resourcequotas',
|
|
'routes',
|
|
'secrets',
|
|
'services',
|
|
'statefulsets',
|
|
'subscriptions'
|
|
|
|
]
|
|
|
|
# save to namespace-specific subdirs to keep the plugin dir organized
|
|
subdir = f"namespaces/{namespace}"
|
|
|
|
# namespace-specific non-resource collections
|
|
self.add_cmd_output(f"oc describe namespace {namespace}",
|
|
subdir=subdir)
|
|
|
|
for res in resources:
|
|
_subdir = f"{subdir}/{res}"
|
|
_tags = [
|
|
f"ocp_{res}",
|
|
f"ocp_{namespace}_{res}",
|
|
namespace
|
|
]
|
|
_get_cmd = f"{self.oc_cmd} --namespace={namespace} {res}"
|
|
# get the 'normal' output first
|
|
_res_out = self.collect_cmd_output(
|
|
_get_cmd,
|
|
subdir=_subdir,
|
|
tags=_tags
|
|
)
|
|
|
|
# then get specific detail on each instance of the resource
|
|
if _res_out['status'] == 0:
|
|
_instances = _res_out['output'].splitlines()[1:]
|
|
for _instance in _instances:
|
|
_instance_name = _instance.split()[0]
|
|
self.add_cmd_output(
|
|
f"{_get_cmd} {_instance_name} -o yaml",
|
|
subdir=_subdir,
|
|
suggest_filename=f"{_instance_name}.yaml"
|
|
)
|
|
# check for podlogs here as a slight optimization to re-running
|
|
# 'oc get pods' on all namespaces
|
|
if res == 'pods' and _instances and self.get_option('podlogs'):
|
|
pod_list = [p.split()[0] for p in _instances]
|
|
self.collect_podlogs(namespace, pod_list)
|
|
|
|
def collect_podlogs(self, namespace, pod_list):
|
|
"""For any namespace that has active pods in it, collect the current
|
|
and previous pod's logs
|
|
|
|
:param pod_list list: A list of pod names
|
|
"""
|
|
_log_dir = f"namespaces/{namespace}/pods/podlogs"
|
|
|
|
if self.get_option('podlogs-filter'):
|
|
# this allows shell-style regex which is more commonly known by
|
|
# sysadmins than python-style regex
|
|
regex = translate(self.get_option('podlogs-filter'))
|
|
else:
|
|
regex = None
|
|
|
|
for pod in pod_list:
|
|
if regex and not re.match(regex, pod):
|
|
continue
|
|
_log_cmd = f"oc logs --namespace={namespace} {pod}"
|
|
self.add_cmd_output([
|
|
_log_cmd,
|
|
_log_cmd + " -p"
|
|
], subdir=_log_dir)
|
|
|
|
def postproc(self):
|
|
|
|
# clear any certificate output
|
|
self.do_cmd_private_sub('oc ')
|
|
self.do_file_private_sub('/etc/kubernetes/*')
|
|
|
|
# clear the certificate data from /etc/kubernetes that does not have
|
|
# the certificate banners that the _private_sub() methods look for
|
|
_fields = [
|
|
'.*.crt',
|
|
'client-certificate-data',
|
|
'client-key-data',
|
|
'certificate-authority-data',
|
|
'.*.key',
|
|
'token',
|
|
'.*token.*.value' # don't blind match `.*token.*` and lose names
|
|
]
|
|
|
|
regex = fr'(\s*({"|".join(_fields)}):)(.*)'
|
|
|
|
self.do_path_regex_sub('/etc/kubernetes/*', regex, r'\1 *******')
|
|
# scrub secret content
|
|
self.do_cmd_output_sub('secrets', regex, r'\1 *******')
|
|
|
|
# `oc describe` output can include url-encoded file content. For the
|
|
# most part this is not important as the majority of these instances
|
|
# are the contents of bash scripts. However, a select few can contain
|
|
# actual data, so just scrub everything that matches the describe
|
|
# format for this content
|
|
regex = r'(?P<var>(.*\\n)?Source:\s(.*),)((.*?))\n'
|
|
self.do_cmd_output_sub('oc describe', regex, r'\g<var> *******\n')
|
|
|
|
# vim: set et ts=4 sw=4 :
|