450 lines
15 KiB
Python
450 lines
15 KiB
Python
# This file is part of cloud-init. See LICENSE file for license information.
|
|
|
|
import abc
|
|
import fcntl
|
|
import json
|
|
import logging
|
|
import os
|
|
import queue
|
|
import struct
|
|
import threading
|
|
import time
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from threading import Event
|
|
from typing import Union
|
|
|
|
from cloudinit import performance, url_helper, util
|
|
from cloudinit.registry import DictRegistry
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class ReportException(Exception):
|
|
pass
|
|
|
|
|
|
class ReportingHandler(metaclass=abc.ABCMeta):
|
|
"""Base class for report handlers.
|
|
|
|
Implement :meth:`~publish_event` for controlling what
|
|
the handler does with an event.
|
|
"""
|
|
|
|
@abc.abstractmethod
|
|
def publish_event(self, event):
|
|
"""Publish an event."""
|
|
|
|
def flush(self):
|
|
"""Ensure ReportingHandler has published all events"""
|
|
|
|
|
|
class LogHandler(ReportingHandler):
|
|
"""Publishes events to the cloud-init log at the ``DEBUG`` log level."""
|
|
|
|
def __init__(self, level="DEBUG"):
|
|
super(LogHandler, self).__init__()
|
|
if isinstance(level, int):
|
|
pass
|
|
else:
|
|
input_level = level
|
|
try:
|
|
level = getattr(logging, level.upper())
|
|
except Exception:
|
|
LOG.warning("invalid level '%s', using WARN", input_level)
|
|
level = logging.WARN
|
|
self.level = level
|
|
|
|
def publish_event(self, event):
|
|
logger = logging.getLogger(
|
|
".".join(["cloudinit", "reporting", event.event_type, event.name])
|
|
)
|
|
logger.log(self.level, event.as_string())
|
|
|
|
|
|
class PrintHandler(ReportingHandler):
|
|
"""Print the event as a string."""
|
|
|
|
def publish_event(self, event):
|
|
print(event.as_string())
|
|
|
|
|
|
class WebHookHandler(ReportingHandler):
|
|
def __init__(
|
|
self,
|
|
endpoint,
|
|
consumer_key=None,
|
|
token_key=None,
|
|
token_secret=None,
|
|
consumer_secret=None,
|
|
timeout=None,
|
|
retries=None,
|
|
):
|
|
super(WebHookHandler, self).__init__()
|
|
|
|
if any([consumer_key, token_key, token_secret, consumer_secret]):
|
|
oauth_helper = url_helper.OauthUrlHelper(
|
|
consumer_key=consumer_key,
|
|
token_key=token_key,
|
|
token_secret=token_secret,
|
|
consumer_secret=consumer_secret,
|
|
)
|
|
self.readurl = oauth_helper.readurl
|
|
else:
|
|
self.readurl = url_helper.readurl
|
|
self.endpoint = endpoint
|
|
self.timeout = timeout
|
|
self.retries = retries
|
|
self.ssl_details = util.fetch_ssl_details()
|
|
|
|
self.flush_requested = Event()
|
|
self.queue: queue.Queue = queue.Queue()
|
|
self.event_processor = threading.Thread(target=self.process_requests)
|
|
self.event_processor.daemon = True
|
|
self.event_processor.start()
|
|
|
|
def process_requests(self):
|
|
consecutive_failed = 0
|
|
while True:
|
|
if self.flush_requested.is_set() and consecutive_failed > 2:
|
|
# At this point the main thread is waiting for the queue to
|
|
# drain. If we have a queue of events piled up and recent
|
|
# events have failed, lets not waste time trying to post
|
|
# the rest, especially since a long timeout could block
|
|
# cloud-init for quite a long time.
|
|
LOG.warning(
|
|
"Multiple consecutive failures in WebHookHandler. "
|
|
"Cancelling all queued events."
|
|
)
|
|
while not self.queue.empty():
|
|
self.queue.get_nowait()
|
|
self.queue.task_done()
|
|
consecutive_failed = 0
|
|
args = self.queue.get(block=True)
|
|
try:
|
|
self.readurl(
|
|
args[0],
|
|
data=args[1],
|
|
timeout=args[2],
|
|
retries=args[3],
|
|
ssl_details=args[4],
|
|
log_req_resp=False,
|
|
)
|
|
consecutive_failed = 0
|
|
except Exception as e:
|
|
LOG.warning(
|
|
"Failed posting event: %s. This was caused by: %s",
|
|
args[1],
|
|
e,
|
|
)
|
|
consecutive_failed += 1
|
|
finally:
|
|
self.queue.task_done()
|
|
|
|
def publish_event(self, event):
|
|
event_data = event.as_dict()
|
|
LOG.debug(
|
|
"Queuing POST to %s, data: %s",
|
|
self.endpoint,
|
|
event_data,
|
|
)
|
|
self.queue.put(
|
|
(
|
|
self.endpoint,
|
|
json.dumps(event_data),
|
|
self.timeout,
|
|
self.retries,
|
|
self.ssl_details,
|
|
)
|
|
)
|
|
|
|
def flush(self):
|
|
self.flush_requested.set()
|
|
LOG.debug("WebHookHandler flushing remaining events")
|
|
self.queue.join()
|
|
self.flush_requested.clear()
|
|
|
|
|
|
class HyperVKvpReportingHandler(ReportingHandler):
|
|
"""
|
|
Reports events to a Hyper-V host using Key-Value-Pair exchange protocol
|
|
and can be used to obtain high level diagnostic information from the host.
|
|
|
|
To use this facility, the KVP user-space daemon (hv_kvp_daemon) has to be
|
|
running. It reads the kvp_file when the host requests the guest to
|
|
enumerate the KVP's.
|
|
|
|
This reporter collates all events for a module (origin|name) in a single
|
|
json string in the dictionary.
|
|
|
|
For more information, see
|
|
https://technet.microsoft.com/en-us/library/dn798287.aspx#Linux%20guests
|
|
"""
|
|
|
|
HV_KVP_EXCHANGE_MAX_VALUE_SIZE = 2048
|
|
# The maximum value size expected in Azure
|
|
HV_KVP_AZURE_MAX_VALUE_SIZE = 1024
|
|
HV_KVP_EXCHANGE_MAX_KEY_SIZE = 512
|
|
HV_KVP_RECORD_SIZE = (
|
|
HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE
|
|
)
|
|
EVENT_PREFIX = "CLOUD_INIT"
|
|
MSG_KEY = "msg"
|
|
RESULT_KEY = "result"
|
|
DESC_IDX_KEY = "msg_i"
|
|
JSON_SEPARATORS = (",", ":")
|
|
KVP_POOL_FILE_GUEST = "/var/lib/hyperv/.kvp_pool_1"
|
|
_already_truncated_pool_file = False
|
|
|
|
def __init__(self, kvp_file_path=KVP_POOL_FILE_GUEST, event_types=None):
|
|
super(HyperVKvpReportingHandler, self).__init__()
|
|
self._kvp_file_path = kvp_file_path
|
|
HyperVKvpReportingHandler._truncate_guest_pool_file(
|
|
self._kvp_file_path
|
|
)
|
|
|
|
self._event_types = event_types
|
|
self.q: queue.Queue = queue.Queue()
|
|
self.incarnation_no = self._get_incarnation_no()
|
|
self.event_key_prefix = "{0}|{1}".format(
|
|
self.EVENT_PREFIX, self.incarnation_no
|
|
)
|
|
self.publish_thread = threading.Thread(
|
|
target=self._publish_event_routine
|
|
)
|
|
self.publish_thread.daemon = True
|
|
self.publish_thread.start()
|
|
|
|
@classmethod
|
|
def _truncate_guest_pool_file(cls, kvp_file):
|
|
"""
|
|
Truncate the pool file if it has not been truncated since boot.
|
|
This should be done exactly once for the file indicated by
|
|
KVP_POOL_FILE_GUEST constant above. This method takes a filename
|
|
so that we can use an arbitrary file during unit testing.
|
|
Since KVP is a best-effort telemetry channel we only attempt to
|
|
truncate the file once and only if the file has not been modified
|
|
since boot. Additional truncation can lead to loss of existing
|
|
KVPs.
|
|
"""
|
|
if cls._already_truncated_pool_file:
|
|
return
|
|
boot_time = time.time() - float(util.uptime())
|
|
try:
|
|
if os.path.getmtime(kvp_file) < boot_time:
|
|
with open(kvp_file, "w"):
|
|
pass
|
|
except (OSError, IOError) as e:
|
|
LOG.warning("failed to truncate kvp pool file, %s", e)
|
|
finally:
|
|
cls._already_truncated_pool_file = True
|
|
|
|
def _get_incarnation_no(self):
|
|
"""
|
|
use the time passed as the incarnation number.
|
|
the incarnation number is the number which are used to
|
|
distinguish the old data stored in kvp and the new data.
|
|
"""
|
|
uptime_str = util.uptime()
|
|
try:
|
|
return int(time.time() - float(uptime_str))
|
|
except ValueError:
|
|
LOG.warning("uptime '%s' not in correct format.", uptime_str)
|
|
return 0
|
|
|
|
def _iterate_kvps(self, offset):
|
|
"""iterate the kvp file from the current offset."""
|
|
with open(self._kvp_file_path, "rb") as f:
|
|
fcntl.flock(f, fcntl.LOCK_EX)
|
|
f.seek(offset)
|
|
record_data = f.read(self.HV_KVP_RECORD_SIZE)
|
|
while len(record_data) == self.HV_KVP_RECORD_SIZE:
|
|
kvp_item = self._decode_kvp_item(record_data)
|
|
yield kvp_item
|
|
record_data = f.read(self.HV_KVP_RECORD_SIZE)
|
|
fcntl.flock(f, fcntl.LOCK_UN)
|
|
|
|
def _event_key(self, event):
|
|
"""
|
|
the event key format is:
|
|
CLOUD_INIT|<incarnation number>|<event_type>|<event_name>|<uuid>
|
|
[|subevent_index]
|
|
"""
|
|
return "{0}|{1}|{2}|{3}".format(
|
|
self.event_key_prefix, event.event_type, event.name, uuid.uuid4()
|
|
)
|
|
|
|
def _encode_kvp_item(self, key, value):
|
|
data = struct.pack(
|
|
"%ds%ds"
|
|
% (
|
|
self.HV_KVP_EXCHANGE_MAX_KEY_SIZE,
|
|
self.HV_KVP_EXCHANGE_MAX_VALUE_SIZE,
|
|
),
|
|
key.encode("utf-8"),
|
|
value.encode("utf-8"),
|
|
)
|
|
return data
|
|
|
|
def _decode_kvp_item(self, record_data):
|
|
record_data_len = len(record_data)
|
|
if record_data_len != self.HV_KVP_RECORD_SIZE:
|
|
raise ReportException(
|
|
"record_data len not correct {0} {1}.".format(
|
|
record_data_len, self.HV_KVP_RECORD_SIZE
|
|
)
|
|
)
|
|
k = (
|
|
record_data[0 : self.HV_KVP_EXCHANGE_MAX_KEY_SIZE]
|
|
.decode("utf-8")
|
|
.strip("\x00")
|
|
)
|
|
v = (
|
|
record_data[
|
|
self.HV_KVP_EXCHANGE_MAX_KEY_SIZE : self.HV_KVP_RECORD_SIZE
|
|
]
|
|
.decode("utf-8")
|
|
.strip("\x00")
|
|
)
|
|
|
|
return {"key": k, "value": v}
|
|
|
|
def _append_kvp_item(self, record_data):
|
|
with performance.Timed(f"Appending {self._kvp_file_path}"), open(
|
|
self._kvp_file_path, "ab"
|
|
) as f:
|
|
fcntl.flock(f, fcntl.LOCK_EX)
|
|
for data in record_data:
|
|
f.write(data)
|
|
f.flush()
|
|
fcntl.flock(f, fcntl.LOCK_UN)
|
|
|
|
def _break_down(self, key, meta_data, description):
|
|
del meta_data[self.MSG_KEY]
|
|
des_in_json = json.dumps(description)
|
|
des_in_json = des_in_json[1 : (len(des_in_json) - 1)]
|
|
i = 0
|
|
result_array = []
|
|
message_place_holder = '"' + self.MSG_KEY + '":""'
|
|
while True:
|
|
meta_data[self.DESC_IDX_KEY] = i
|
|
meta_data[self.MSG_KEY] = ""
|
|
data_without_desc = json.dumps(
|
|
meta_data, separators=self.JSON_SEPARATORS
|
|
)
|
|
room_for_desc = (
|
|
self.HV_KVP_AZURE_MAX_VALUE_SIZE - len(data_without_desc) - 8
|
|
)
|
|
value = data_without_desc.replace(
|
|
message_place_holder,
|
|
'"{key}":"{desc}"'.format(
|
|
key=self.MSG_KEY, desc=des_in_json[:room_for_desc]
|
|
),
|
|
)
|
|
subkey = "{}|{}".format(key, i)
|
|
result_array.append(self._encode_kvp_item(subkey, value))
|
|
i += 1
|
|
des_in_json = des_in_json[room_for_desc:]
|
|
if not des_in_json:
|
|
break
|
|
return result_array
|
|
|
|
def write_key(self, key: str, value: str) -> None:
|
|
"""Write KVP key-value.
|
|
|
|
Values will be truncated as needed.
|
|
"""
|
|
if len(value) >= self.HV_KVP_AZURE_MAX_VALUE_SIZE:
|
|
value = value[0 : self.HV_KVP_AZURE_MAX_VALUE_SIZE - 1]
|
|
|
|
data = [self._encode_kvp_item(key, value)]
|
|
|
|
try:
|
|
self._append_kvp_item(data)
|
|
except (OSError, IOError):
|
|
LOG.warning("failed posting kvp=%s value=%s", key, value)
|
|
|
|
def _encode_event(self, event):
|
|
"""
|
|
encode the event into kvp data bytes.
|
|
if the event content reaches the maximum length of kvp value.
|
|
then it would be cut to multiple slices.
|
|
"""
|
|
key = self._event_key(event)
|
|
meta_data = {
|
|
"name": event.name,
|
|
"type": event.event_type,
|
|
"ts": (
|
|
datetime.fromtimestamp(
|
|
event.timestamp, timezone.utc
|
|
).isoformat()
|
|
),
|
|
}
|
|
if hasattr(event, self.RESULT_KEY):
|
|
meta_data[self.RESULT_KEY] = event.result
|
|
meta_data[self.MSG_KEY] = event.description
|
|
value = json.dumps(meta_data, separators=self.JSON_SEPARATORS)
|
|
# if it reaches the maximum length of kvp value,
|
|
# break it down to slices.
|
|
# this should be very corner case.
|
|
if len(value) > self.HV_KVP_AZURE_MAX_VALUE_SIZE:
|
|
return self._break_down(key, meta_data, event.description)
|
|
else:
|
|
data = self._encode_kvp_item(key, value)
|
|
return [data]
|
|
|
|
def _publish_event_routine(self):
|
|
while True:
|
|
items_from_queue = 0
|
|
try:
|
|
event = self.q.get(block=True)
|
|
items_from_queue += 1
|
|
encoded_data = []
|
|
while event is not None:
|
|
encoded_data += self._encode_event(event)
|
|
try:
|
|
# get all the rest of the events in the queue
|
|
event = self.q.get(block=False)
|
|
items_from_queue += 1
|
|
except queue.Empty:
|
|
event = None
|
|
try:
|
|
self._append_kvp_item(encoded_data)
|
|
except (OSError, IOError) as e:
|
|
LOG.warning("failed posting events to kvp, %s", e)
|
|
finally:
|
|
for _ in range(items_from_queue):
|
|
self.q.task_done()
|
|
# when main process exits, q.get() will through EOFError
|
|
# indicating we should exit this thread.
|
|
except EOFError:
|
|
return
|
|
|
|
# since the saving to the kvp pool can be a time costing task
|
|
# if the kvp pool already contains a chunk of data,
|
|
# so defer it to another thread.
|
|
def publish_event(self, event):
|
|
if not self._event_types or event.event_type in self._event_types:
|
|
self.q.put(event)
|
|
|
|
def flush(self):
|
|
LOG.debug("HyperVReportingHandler flushing remaining events")
|
|
self.q.join()
|
|
|
|
|
|
# Type[ReportingHandler] doesn't work here because each class has different
|
|
# call args. Protocols in python 3.8 can probably make this simpler.
|
|
HandlerType = Union[
|
|
ReportingHandler,
|
|
LogHandler,
|
|
PrintHandler,
|
|
WebHookHandler,
|
|
HyperVKvpReportingHandler,
|
|
]
|
|
|
|
available_handlers = DictRegistry()
|
|
available_handlers.register_item("log", LogHandler)
|
|
available_handlers.register_item("print", PrintHandler)
|
|
available_handlers.register_item("webhook", WebHookHandler)
|
|
available_handlers.register_item("hyperv", HyperVKvpReportingHandler)
|