server/usr/lib/python3/dist-packages/cloudinit/cmd/status.py

#!/usr/bin/env python3

# Copyright (C) 2017 Canonical Ltd.
#
# This file is part of cloud-init. See LICENSE file for license information.

"""Define 'status' utility and handler as part of cloud-init command line."""

import argparse
import enum
import json
import os
import sys
from copy import deepcopy
from time import gmtime, sleep, strftime
from typing import Any, Dict, List, NamedTuple, Optional, Tuple

from cloudinit import safeyaml, subp
from cloudinit.cmd.devel import read_cfg_paths
from cloudinit.distros import uses_systemd
from cloudinit.helpers import Paths
from cloudinit.util import get_cmdline, load_json, load_text_file

CLOUDINIT_DISABLED_FILE = "/etc/cloud/cloud-init.disabled"


@enum.unique
class RunningStatus(enum.Enum):
    """Enum representing user-visible cloud-init application status."""

    NOT_STARTED = "not started"
    RUNNING = "running"
    DONE = "done"
    DISABLED = "disabled"


@enum.unique
class ConditionStatus(enum.Enum):
    """Enum representing user-visible cloud-init condition status."""

    ERROR = "error"  # cloud-init exited abnormally
    DEGRADED = "degraded"  # we have warnings
    PEACHY = "healthy"  # internal names can be fun, right?


@enum.unique
class EnabledStatus(enum.Enum):
    """Enum representing user-visible cloud-init boot status codes."""

    DISABLED_BY_GENERATOR = "disabled-by-generator"
    DISABLED_BY_KERNEL_CMDLINE = "disabled-by-kernel-command-line"
    DISABLED_BY_MARKER_FILE = "disabled-by-marker-file"
    DISABLED_BY_ENV_VARIABLE = "disabled-by-environment-variable"
    ENABLED_BY_GENERATOR = "enabled-by-generator"
    ENABLED_BY_KERNEL_CMDLINE = "enabled-by-kernel-command-line"
    ENABLED_BY_SYSVINIT = "enabled-by-sysvinit"
    UNKNOWN = "unknown"


DISABLED_BOOT_CODES = frozenset(
    [
        EnabledStatus.DISABLED_BY_GENERATOR,
        EnabledStatus.DISABLED_BY_KERNEL_CMDLINE,
        EnabledStatus.DISABLED_BY_MARKER_FILE,
        EnabledStatus.DISABLED_BY_ENV_VARIABLE,
    ]
)


class StatusDetails(NamedTuple):
    running_status: RunningStatus
    condition_status: ConditionStatus
    boot_status_code: EnabledStatus
    description: str
    errors: List[str]
    recoverable_errors: Dict[str, List[str]]
    last_update: str
    datasource: Optional[str]
    v1: Dict[str, Any]


TABULAR_LONG_TMPL = """\
extended_status: {extended_status}
boot_status_code: {boot_code}
{last_update}detail: {description}
errors:{errors}
recoverable_errors:{recoverable_errors}"""


def query_systemctl(
    systemctl_args: List[str],
    *,
    wait: bool,
) -> str:
    """Query systemd with retries and return output."""
    while True:
        try:
            return subp.subp(["systemctl", *systemctl_args]).stdout.strip()
        except subp.ProcessExecutionError:
            if not wait:
                raise
            sleep(0.25)


def get_parser(parser=None):
    """Build or extend an arg parser for status utility.

    @param parser: Optional existing ArgumentParser instance representing the
        status subcommand which will be extended to support the args of
        this utility.

    @returns: ArgumentParser with proper argument configuration.
    """
    if not parser:
        parser = argparse.ArgumentParser(
            prog="status", description="Report run status of cloud init"
        )
    parser.add_argument(
        "--format",
        type=str,
        choices=["json", "tabular", "yaml"],
        default="tabular",
        help="Specify output format for cloud-id (default: tabular)",
    )
    parser.add_argument(
        "-l",
        "--long",
        action="store_true",
        default=False,
        help=(
            "Report long format of statuses including run stage name and"
            " error messages"
        ),
    )
    parser.add_argument(
        "-w",
        "--wait",
        action="store_true",
        default=False,
        help="Block waiting on cloud-init to complete",
    )
    return parser


def translate_status(
    running: RunningStatus, condition: ConditionStatus
) -> Tuple[str, str]:
    """Translate running and condition status to human readable strings.

    Returns (status, extended_status).
    Much of this is for backwards compatibility
    """
    # If we're done and have errors, we're in an error state
    if condition == ConditionStatus.ERROR:
        return "error", f"{condition.value} - {running.value}"
    # Handle the "degraded done" and "degraded running" states
    elif condition == ConditionStatus.DEGRADED and running in [
        RunningStatus.DONE,
        RunningStatus.RUNNING,
    ]:
        return running.value, f"{condition.value} {running.value}"
    return running.value, running.value


def print_status(args, details: StatusDetails):
    """Print status out to the CLI."""
    status, extended_status = translate_status(
        details.running_status, details.condition_status
    )
    details_dict: Dict[str, Any] = {
        "datasource": details.datasource,
        "boot_status_code": details.boot_status_code.value,
        "status": status,
        "extended_status": extended_status,
        "detail": details.description,
        "errors": details.errors,
        "recoverable_errors": details.recoverable_errors,
        "last_update": details.last_update,
        **details.v1,
    }
    if args.format == "tabular":
        prefix = ""

        # For backwards compatibility, don't report degraded status here,
        # extended_status key reports the complete status (includes degraded)
        state = details_dict["status"]
        print(f"{prefix}status: {state}")
        if args.long:
            if details_dict.get("last_update"):
                last_update = f"last_update: {details_dict['last_update']}\n"
            else:
                last_update = ""
            errors_output = (
                "\n\t- " + "\n\t- ".join(details_dict["errors"])
                if details_dict["errors"]
                else " []"
            )
            recoverable_errors_output = (
                "\n"
                + "\n".join(
                    [
                        f"{k}:\n\t- "
                        + "\n\t- ".join([i.replace("\n", " ") for i in v])
                        for k, v in details_dict["recoverable_errors"].items()
                    ]
                )
                if details_dict["recoverable_errors"]
                else " {}"
            )
            print(
                TABULAR_LONG_TMPL.format(
                    extended_status=details_dict["extended_status"],
                    prefix=prefix,
                    boot_code=details_dict["boot_status_code"],
                    description=details_dict["detail"],
                    last_update=last_update,
                    errors=errors_output,
                    recoverable_errors=recoverable_errors_output,
                )
            )
    elif args.format == "json":
        print(
            json.dumps(  # Pretty, sorted json
                details_dict, indent=2, sort_keys=True, separators=(",", ": ")
            )
        )
    elif args.format == "yaml":
        print(safeyaml.dumps(details_dict))


def handle_status_args(name, args) -> int:
    """Handle calls to 'cloud-init status' as a subcommand."""
    # Read configured paths
    paths = read_cfg_paths()
    details = get_status_details(paths, args.wait)
    if args.wait:
        while details.running_status in (
            RunningStatus.NOT_STARTED,
            RunningStatus.RUNNING,
        ):
            if args.format == "tabular":
                sys.stdout.write(".")
                sys.stdout.flush()
            details = get_status_details(paths, args.wait)
            sleep(0.25)

    print_status(args, details)

    # Hard error
    if details.condition_status == ConditionStatus.ERROR:
        return 1
    # Recoverable error
    elif details.condition_status == ConditionStatus.DEGRADED:
        return 2
    return 0


def _disabled_via_environment(wait) -> bool:
    """Return whether cloud-init is disabled via environment variable."""
    try:
        env = query_systemctl(["show-environment"], wait=wait)
    except subp.ProcessExecutionError:
        env = ""
    return "cloud-init=disabled" in env


def get_bootstatus(disable_file, paths, wait) -> Tuple[EnabledStatus, str]:
    """Report whether cloud-init current boot status

    @param disable_file: The path to the cloud-init disable file.
    @param paths: An initialized cloudinit.helpers.Paths object.
    @param wait: If user has indicated to wait for cloud-init to complete.
    @returns: A tuple containing (code, reason) about cloud-init's status and
    why.
    """
    cmdline_parts = get_cmdline().split()
    if not uses_systemd():
        bootstatus_code = EnabledStatus.ENABLED_BY_SYSVINIT
        reason = "Cloud-init enabled on sysvinit"
    elif "cloud-init=enabled" in cmdline_parts:
        bootstatus_code = EnabledStatus.ENABLED_BY_KERNEL_CMDLINE
        reason = "Cloud-init enabled by kernel command line cloud-init=enabled"
    elif os.path.exists(disable_file):
        bootstatus_code = EnabledStatus.DISABLED_BY_MARKER_FILE
        reason = "Cloud-init disabled by {0}".format(disable_file)
    elif "cloud-init=disabled" in cmdline_parts:
        bootstatus_code = EnabledStatus.DISABLED_BY_KERNEL_CMDLINE
        reason = "Cloud-init disabled by kernel parameter cloud-init=disabled"
    elif "cloud-init=disabled" in os.environ.get("KERNEL_CMDLINE", "") or (
        uses_systemd() and _disabled_via_environment(wait=wait)
    ):
        bootstatus_code = EnabledStatus.DISABLED_BY_ENV_VARIABLE
        reason = (
            "Cloud-init disabled by environment variable "
            "KERNEL_CMDLINE=cloud-init=disabled"
        )
    elif os.path.exists(os.path.join(paths.run_dir, "disabled")):
        bootstatus_code = EnabledStatus.DISABLED_BY_GENERATOR
        reason = "Cloud-init disabled by cloud-init-generator"
    elif os.path.exists(os.path.join(paths.run_dir, "enabled")):
        bootstatus_code = EnabledStatus.ENABLED_BY_GENERATOR
        reason = "Cloud-init enabled by systemd cloud-init-generator"
    else:
        bootstatus_code = EnabledStatus.UNKNOWN
        reason = "Systemd generator may not have run yet."
    return (bootstatus_code, reason)


def is_cloud_init_enabled() -> bool:
    return (
        get_status_details(read_cfg_paths()).boot_status_code
        not in DISABLED_BOOT_CODES
    )


def systemd_failed(wait: bool) -> bool:
    """Return if systemd units report a cloud-init error."""
    for service in [
        "cloud-final.service",
        "cloud-config.service",
        "cloud-init.service",
        "cloud-init-local.service",
    ]:
        try:
            stdout = query_systemctl(
                [
                    "show",
                    "--property=ActiveState,UnitFileState,SubState,MainPID",
                    service,
                ],
                wait=wait,
            )
        except subp.ProcessExecutionError as e:
            # Systemd isn't ready, assume the same state
            print(
                "Failed to get status from systemd. "
                "Cloud-init status may be inaccurate. "
                f"Error from systemctl: {e.stderr}",
                file=sys.stderr,
            )
            return False
        states = dict(
            [[x.strip() for x in r.split("=")] for r in stdout.splitlines()]
        )
        if not (
            states["UnitFileState"].startswith("enabled")
            or states["UnitFileState"] == "static"
        ):
            # Individual services should not get disabled
            return True
        elif states["ActiveState"] == "active":
            if states["SubState"] == "exited":
                # Service exited normally, nothing interesting from systemd
                continue
            elif states["SubState"] == "running" and states["MainPID"] == "0":
                # Service is active, substate still reports running due to
                # daemon or background process spawned by CGroup/slice still
                # running. MainPID being set back to 0 means control of the
                # service/unit has exited in this case and
                # "the process is no longer around".
                return False
        elif (
            states["ActiveState"] == "failed" or states["SubState"] == "failed"
        ):
            return True
        # If we made it here, our unit is enabled and it hasn't exited
        # normally or exited with failure, so it is still running.
        return False
    # All services exited normally or aren't enabled, so don't report
    # any particular status based on systemd.
    return False


def is_running(status_file, result_file) -> bool:
    """Return True if cloud-init is running."""
    return os.path.exists(status_file) and not os.path.exists(result_file)


def get_running_status(
    status_file, result_file, boot_status_code, latest_event
) -> RunningStatus:
    """Return the running status of cloud-init."""
    if boot_status_code in DISABLED_BOOT_CODES:
        return RunningStatus.DISABLED
    elif is_running(status_file, result_file):
        return RunningStatus.RUNNING
    elif latest_event > 0:
        return RunningStatus.DONE
    else:
        return RunningStatus.NOT_STARTED


def get_datasource(status_v1) -> str:
    """Get the datasource from status.json.

    Return a lowercased non-prefixed version. So "DataSourceEc2" becomes "ec2"
    """
    datasource = status_v1.get("datasource", "")
    if datasource:
        ds, _, _ = datasource.partition(" ")
        datasource = ds.lower().replace("datasource", "")
    return datasource


def get_description(status_v1, boot_description):
    """Return a description of the current status.

    If we have a datasource, return that. If we're running in a particular
    stage, return that. Otherwise, return the boot_description.
    """
    datasource = status_v1.get("datasource")
    if datasource:
        return datasource
    elif status_v1.get("stage"):
        return f"Running in stage: {status_v1['stage']}"
    else:
        return boot_description


def get_latest_event(status_v1):
    """Return the latest event time from status_v1."""
    latest_event = 0
    for stage_info in status_v1.values():
        if isinstance(stage_info, dict):
            latest_event = max(
                latest_event,
                stage_info.get("start") or 0,
                stage_info.get("finished") or 0,
            )
    return latest_event


def get_errors(status_v1) -> Tuple[List, Dict]:
    """Return a list of errors and recoverable_errors from status_v1."""
    errors = []
    recoverable_errors = {}
    for _key, stage_info in sorted(status_v1.items()):
        if isinstance(stage_info, dict):
            errors.extend(stage_info.get("errors", []))

            # Aggregate recoverable_errors from all stages
            current_recoverable_errors = stage_info.get(
                "recoverable_errors", {}
            )
            for err_type in current_recoverable_errors.keys():
                if err_type not in recoverable_errors:
                    recoverable_errors[err_type] = deepcopy(
                        current_recoverable_errors[err_type]
                    )
                else:
                    recoverable_errors[err_type].extend(
                        current_recoverable_errors[err_type]
                    )
    return errors, recoverable_errors


def get_status_details(
    paths: Optional[Paths] = None, wait: bool = False
) -> StatusDetails:
    """Return a dict with status, details and errors.

    @param paths: An initialized cloudinit.helpers.paths object.
    @param wait: If user has indicated to wait for cloud-init to complete.

    Values are obtained from parsing paths.run_dir/status.json.
    """
    condition_status = ConditionStatus.PEACHY
    paths = paths or read_cfg_paths()
    status_file = os.path.join(paths.run_dir, "status.json")
    result_file = os.path.join(paths.run_dir, "result.json")
    boot_status_code, boot_description = get_bootstatus(
        CLOUDINIT_DISABLED_FILE, paths, wait
    )
    status_v1 = {}
    if os.path.exists(status_file):
        status_v1 = load_json(load_text_file(status_file)).get("v1", {})

    datasource = get_datasource(status_v1)
    description = get_description(status_v1, boot_description)

    latest_event = get_latest_event(status_v1)
    last_update = (
        strftime("%a, %d %b %Y %H:%M:%S %z", gmtime(latest_event))
        if latest_event
        else ""
    )

    errors, recoverable_errors = get_errors(status_v1)
    if errors:
        condition_status = ConditionStatus.ERROR
    elif recoverable_errors:
        condition_status = ConditionStatus.DEGRADED

    running_status = get_running_status(
        status_file, result_file, boot_status_code, latest_event
    )

    if (
        running_status == RunningStatus.RUNNING
        and uses_systemd()
        and systemd_failed(wait=wait)
    ):
        running_status = RunningStatus.DONE
        condition_status = ConditionStatus.ERROR
        description = "Failed due to systemd unit failure"
        errors.append(
            "Failed due to systemd unit failure. Ensure all cloud-init "
            "services are enabled, and check 'systemctl' or 'journalctl' "
            "for more information."
        )

    # this key is a duplicate
    status_v1.pop("datasource", None)

    return StatusDetails(
        running_status,
        condition_status,
        boot_status_code,
        description,
        errors,
        recoverable_errors,
        last_update,
        datasource,
        status_v1,
    )


def main():
    """Tool to report status of cloud-init."""
    parser = get_parser()
    sys.exit(handle_status_args("status", parser.parse_args()))


if __name__ == "__main__":
    main()