Files
server/opt/psa/admin/sbin/modules/firewall/rules
2026-01-07 20:52:11 +01:00

446 lines
18 KiB
Python
Executable File

#!/usr/local/psa/bin/py3-python -IS
""" Safe firewall rules activation and feature checks. This is a 'safeact' replacement. """
import argparse
import atexit
import errno
import logging
import os
import select
import shutil
import signal
import stat
import subprocess
import sys
import textwrap
import time
from datetime import datetime
log = logging.getLogger('rules')
PLESKRC_BIN = "/usr/local/psa/admin/sbin/pleskrc"
VAR_D = "/usr/local/psa/var/modules/firewall"
""" extension var directory """
SCRIPT_NEW = os.path.join(VAR_D, "firewall-new.sh")
""" new set of firewall rules """
SCRIPT_ACTIVE = os.path.join(VAR_D, "firewall-active.sh")
""" previous (active) set of firewall rules """
SCRIPT_EMERGENCY = os.path.join(VAR_D, "firewall-emergency.sh")
""" emergency set of firewall rules - ones that disable firewall """
PIPE_PATH = os.path.join(VAR_D, "confirm.pipe")
""" interprocess communication named pipe (fifo) """
ROLLBACK_FLAG = os.path.join(VAR_D, "rollback.flag")
""" "new firewall rules turned out to be bad" flag """
DEFAULT_CONFIRM_INTERVAL = 15
""" default confirmation timeout, in seconds """
MINIMAL_CONFIRM_INTERVAL = 5
""" minimal time the code will actually await confirmation token, in seconds """
MINIMAL_SCRIPT_TIMEOUT = 5
""" minimal time the code will allow a subprocess to execute, in seconds """
class ConfirmFailed(RuntimeError):
pass
def set_up_logging(verbosity):
""" Set up logging based on --verbose count and PLESK_DEBUG environment. """
verbosity = verbosity or 0
level = {
0: logging.CRITICAL,
1: logging.ERROR,
2: logging.WARNING,
3: logging.INFO,
4: logging.DEBUG,
}.get(verbosity, logging.CRITICAL)
if verbosity >= 4 or os.getenv('PLESK_DEBUG'):
level = logging.DEBUG
logging.basicConfig(level=level, format='[%(asctime)s] %(levelname)8s %(message)s')
def parse_args():
epilog = f"""\
environment variables:
PHP_SAFEACT_TOKEN Activation token
PHP_SAFEACT_CONFIRM_INTERVAL Confirmation timeout (default: {DEFAULT_CONFIRM_INTERVAL})
(activation and rollback each take at most this time,
but system will actually wait for confirmation token
for at least {MINIMAL_CONFIRM_INTERVAL} seconds, which may
increase the effective timeout, which may be
additionally increased due to misbehaving child
processes by up to {3 * MINIMAL_SCRIPT_TIMEOUT} seconds)
PLESK_DEBUG Set logging verbosity to maximum
"""
parser = argparse.ArgumentParser(description="Activate firewall rules or check its features safely",
epilog=textwrap.dedent(epilog),
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-v', '--verbose', action='count', default=0,
help="Increase logging verbosity, can be specified multiple times.")
commands = parser.add_mutually_exclusive_group(required=True)
commands.add_argument('--activate', action='store_true',
help="Activate new rules. Synchronous.")
commands.add_argument('--confirm', action='store_true',
help="Commit activation of the new rules. Should be invoked from a new "
"SSH session or web/app server worker process to ensure an existing "
"network connection is not re-used.")
commands.add_argument('--try-enable-features', action='store_true',
help="Probe iptables features support. This will both check and "
"try to enable the specified features.")
act_opts = parser.add_argument_group("--activate arguments")
act_opts.add_argument('--rules-file', type=argparse.FileType('r'), default='-', metavar='PATH',
help="New rules script (default: %(default)s, i.e. STDIN)")
cfm_opts = parser.add_argument_group("--confirm arguments")
cfm_opts.add_argument('--wait', action='store_true',
help="Wait for the activation process to appear")
try_opts = parser.add_argument_group("--try-enable-features arguments")
try_opts.add_argument('--iptables', default='/usr/sbin/iptables',
help="iptables binary path (default: %(default)s)")
try_opts.add_argument('--table', default='filter',
help="iptables table name (default: %(default)s)")
try_opts.add_argument('--rule', default='-L',
help="iptables rule options (default: %(default)s), "
"use the default to check table and/or binary availability")
args = parser.parse_args()
return args
def get_token():
""" Returns activation token string. """
token = os.getenv('PHP_SAFEACT_TOKEN', '').strip()
if not token:
raise RuntimeError("Activation token is absent")
# Writes to pipes are atomic only up to certain system-specific limit (at least 512)
if len(token) >= select.PIPE_BUF - 1:
raise RuntimeError(f"Activation token is too long: {len(token)} characters")
return token
def get_confirm_timeout():
""" Returns confirmation timeout as int. """
timeout = os.getenv('PHP_SAFEACT_CONFIRM_INTERVAL')
if not timeout:
return DEFAULT_CONFIRM_INTERVAL
else:
value = int(timeout)
if value <= 0:
raise ValueError(f"Confirmation timeout is too small: {value}")
return value
def rm_f(path):
""" Equivalent of 'rm -f' for a file path. """
try:
log.debug("rm -f %r", path)
os.unlink(path)
except OSError as ex:
if ex.errno != errno.ENOENT:
raise
def verify_script_perms(path):
""" Checks that script file looks to be OK. """
log.debug("Checking %r script attributes", path)
st = os.lstat(path)
if not stat.S_ISREG(st.st_mode):
raise ValueError(f"{path}: The script is not a regular file")
if st.st_size == 0:
raise ValueError(f"{path}: The script is empty")
if st.st_uid != 0:
raise ValueError(f"{path}: The script is not owned by root")
if st.st_mode != (stat.S_IFREG | 0o700):
raise ValueError(f"{path}: The script has permissions other than 0700")
def try_restart_service(service, timeout):
""" Restarts the service if it is already running. """
timeout = max(timeout, MINIMAL_SCRIPT_TIMEOUT)
log.debug("Trying to restart %r service with timeout=%s", service, timeout)
subprocess.check_call([PLESKRC_BIN, service, 'try-restart'], timeout=timeout)
def is_service_running(service):
""" Returns whether the given service is running. """
log.debug("Checking %r service status", service)
result = subprocess.run([PLESKRC_BIN, service, 'status'])
return result.returncode == 0
def execute_rules_script(script, timeout):
""" Executes script within a given timeout. """
timeout = max(timeout, MINIMAL_SCRIPT_TIMEOUT)
env = {k: v for k, v in os.environ.items() if k not in ('PHP_SAFEACT_TOKEN',)}
log.debug("Executing script %r with timeout=%s", script, timeout)
subprocess.check_call([script], timeout=timeout, env=env)
def apply_rules(script, cutoff_timestamp, confirm=True):
""" Applies rules script and (optionally) waits for confirmation until cutoff_timestamp.
On success links the script into active configuration.
"""
log.info("Trying to apply rules from %r until %s, %s confirmation",
script, datetime.fromtimestamp(cutoff_timestamp), "with" if confirm else "without")
execute_rules_script(script, cutoff_timestamp - time.time())
if confirm:
# This is required to ensure that there are no outstanding connections to browser
# and any new connections are allowed by firewall.
try:
try_restart_service('sw-cp-server', cutoff_timestamp - time.time())
if is_service_running('nginx'):
log.debug("Nginx looks to be the frontend web server")
try_restart_service('nginx', cutoff_timestamp - time.time())
else:
log.debug("Apache looks to be the frontend web server")
try_restart_service('apache', cutoff_timestamp - time.time())
except subprocess.TimeoutExpired as ex:
log.warning(f"{ex}. Will attempt to wait for confirmation anyway.")
log.debug("This exception happened at:", exc_info=sys.exc_info())
expected_token = get_token()
cutoff_timestamp = max(cutoff_timestamp, time.time() + MINIMAL_CONFIRM_INTERVAL)
log.debug("Waiting for a matching activation token on %r until %s",
PIPE_PATH, datetime.fromtimestamp(cutoff_timestamp))
# Open w/o blocking to ensure open doesn't block w/o writers present
with os.fdopen(os.open(PIPE_PATH, os.O_RDONLY | os.O_NONBLOCK), 'r') as pipe:
# Also keep the pipe open for writing, otherwise after the first read select()
# will immediately return with only EOF available to read
# (this normally indicates absence or writers).
with open(PIPE_PATH, 'wb'):
timeout = cutoff_timestamp - time.time()
while timeout > 0 and select.select([pipe], [], [], timeout)[0]:
token = pipe.readline().strip()
if token == expected_token:
log.info("Received matching activation token")
break
log.debug("Received non-matching activation token: %r", token)
timeout = cutoff_timestamp - time.time()
else:
raise ConfirmFailed("Did not receive a matching activation token "
"before confirmation timeout")
if script != SCRIPT_ACTIVE:
log.debug("Setting %r as the active configuration %r", script, SCRIPT_ACTIVE)
# Previously files were hardlinked, but we don't really need strict atomicity here
# and hardlinks may cause issues if somebody decides to meddle with the files manually
# (e.g. emergency may be hardlinked into active and may be updated due to copy into active)
rm_f(SCRIPT_ACTIVE)
log.debug("cp -Pa %r %r", script, SCRIPT_ACTIVE)
shutil.copy2(script, SCRIPT_ACTIVE, follow_symlinks=False)
else:
log.debug("Rules from %r are already the active configuration", script)
def try_create_pipe(path, stale_timestamp):
""" Creates a pipe if it doesn't exist, removes it if it is too old. Otherwise returns False. """
try:
ctime = os.path.getctime(path)
if ctime < stale_timestamp:
log.info("Removing stale named pipe %r created at %s", path, datetime.fromtimestamp(ctime))
os.unlink(path)
else:
return False
except OSError as ex:
if ex.errno != errno.ENOENT:
raise
log.debug("Creating named pipe %r and setting up atexit handler", path)
os.mkfifo(path, 0o600)
@atexit.register
def remove_pipe():
log.debug("Removing named pipe %r on exit", path)
rm_f(path)
return True
def rollback():
""" Rolls back to some working configuration. """
log.info("Rolling back to working configuration")
log.debug("touch %r", ROLLBACK_FLAG)
with open(ROLLBACK_FLAG, 'wb'):
pass
try:
try:
log.info("Trying to roll back from new to active configuration")
cutoff_timestamp = time.time() + get_confirm_timeout()
apply_rules(SCRIPT_ACTIVE, cutoff_timestamp)
except ConfirmFailed as ex:
raise ConfirmFailed(
"Connectivity failure occurred with both the new and rollback (previous) firewall configurations, "
"indicating that both configurations are faulty.") from ex
except Exception as ex:
log.info("Trying to roll back from active to emergency configuration")
apply_rules(SCRIPT_EMERGENCY, 0, confirm=False)
raise ConfirmFailed(f"{ex} "
"As an emergency measure, "
"the firewall was disabled and a configuration without firewall rules was applied. "
"To resolve the issue, correct the firewall rules and re-enable the firewall.")
def activate(rules_file):
""" Activates new rules supplied via rules_file. """
rm_f(ROLLBACK_FLAG)
timeout = get_confirm_timeout()
start_timestamp = time.time()
cutoff_timestamp = start_timestamp + timeout
# Assume other activations use the same timeout
stale_timestamp = start_timestamp - 2.1 * timeout
log.info("Activating with token=%r, timeout=%s", get_token(), timeout)
log.debug("Setting up signal handlers to ensure cleanup")
for signum in (signal.SIGTERM, signal.SIGHUP, signal.SIGQUIT):
signal.signal(signum, signal.getsignal(signal.SIGINT))
log.debug("Trying to create named pipe %r, until %s, file older than %s is considered stale",
PIPE_PATH,
datetime.fromtimestamp(cutoff_timestamp),
datetime.fromtimestamp(stale_timestamp))
while time.time() < cutoff_timestamp:
if try_create_pipe(PIPE_PATH, stale_timestamp):
log.debug("Pipe created")
break
time.sleep(0.5)
else:
log.debug("Could not create pipe")
raise RuntimeError("Previous rules activation didn't finish before confirmation timeout")
log.info("Writing new rules from %r into %r", rules_file.name, SCRIPT_NEW)
rm_f(SCRIPT_NEW)
log.debug("cat > %r", SCRIPT_NEW)
with os.fdopen(os.open(SCRIPT_NEW, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o700), 'w') as script_new:
shutil.copyfileobj(rules_file, script_new)
verify_script_perms(SCRIPT_ACTIVE)
try:
log.info("Trying to apply new configuration")
apply_rules(SCRIPT_NEW, cutoff_timestamp)
rm_f(SCRIPT_NEW)
except Exception:
rollback()
raise
def confirm(wait=False):
""" Confirms rules activation (new ones or during rollback). """
token = get_token()
if wait:
timeout = max(get_confirm_timeout(), MINIMAL_CONFIRM_INTERVAL)
cutoff_timestamp = time.time() + 2 * (timeout + 3 * MINIMAL_SCRIPT_TIMEOUT)
else:
cutoff_timestamp = time.time()
log.info("Confirming with token=%r until %s", token, datetime.fromtimestamp(cutoff_timestamp))
while True:
try:
# Open w/o creating the pipe/file if it doesn't exist ([Errno 2] No such file or directory)
# Open w/o blocking if no readers are present ([Errno 6] No such device or address)
with os.fdopen(os.open(PIPE_PATH, os.O_WRONLY | os.O_APPEND | os.O_NONBLOCK), 'w') as pipe:
log.debug("Writing activation token to %r", PIPE_PATH)
pipe.write(token + "\n")
break
except Exception as ex:
if time.time() >= cutoff_timestamp:
raise ConfirmFailed("Too late to confirm: no rules activation process") from ex
log.debug(f"No activation process yet, continue to wait: {ex}")
time.sleep(0.5)
if os.path.lexists(ROLLBACK_FLAG):
raise ConfirmFailed("Too late to confirm: new rules were rolled back")
def try_enable_features(iptables, table, rule):
"""
Checks if desired iptables features are enabled. Tries to enable them if not.
On modern systems iptables is capable of dynamically loading required kernel
modules. This is convenient, misleading and maybe even dangerous at the same time
( http://backstage.soundcloud.com/2012/08/shoot-yourself-in-the-foot-with-iptables-and-kmod-auto-loading/ ).
Since we don't want to meddle with kernel modules for obvious reasons, we use
iptables itself to check features support. As a side effect such checks may trigger
kernel module loading. Checks are isolated in a separate temporary chain, that
nobody refers to.
This approach has an added advantage of checking whether real iptables rules would
work, not some "support" per se. Practice shows that the latter may be misleading
and result in bugs. Therefore if you're not sure <rule> works on a given system,
just call this command with the given <rule>.
<rule> is <rule-specification> in terms of iptables(8). Specifying <target> as part
of it is not required and not particularly useful. <rule> can also be '-L' to check
table and/or binary availability.
"""
if rule == '-L':
# listing is "safe"
log.info("Checking feature: iptables=%r, table=%r, rule=%r", iptables, table, rule)
subprocess.check_call([iptables, '-t', table, rule, '-n'])
else:
# everything else is isolated in a temporary chain
chain = "plesk-fw-tmp-chain"
log.info("Checking feature: iptables=%r, table=%r, rule=%r, chain=%r",
iptables, table, rule, chain)
def remove_chain():
subprocess.check_call([iptables, '-t', table, '-F', chain])
subprocess.check_call([iptables, '-t', table, '-Z', chain])
subprocess.check_call([iptables, '-t', table, '-X', chain])
def create_chain():
subprocess.check_call([iptables, '-t', table, '-N', chain])
def append_rule(rule_args):
subprocess.check_call([iptables, '-t', table, '-A', chain] + rule_args)
try:
remove_chain()
except Exception as ex:
# Failure is OK here - it means chain didn't exist
log.debug("During initial %r chain removal: %s", chain, ex)
create_chain()
append_rule(rule.split())
remove_chain()
def main():
args = parse_args()
set_up_logging(args.verbose)
log.debug("Options: %s", args)
if args.activate:
activate(args.rules_file)
elif args.confirm:
confirm(args.wait)
elif args.try_enable_features:
try_enable_features(args.iptables, args.table, args.rule)
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
log.debug("Interrupted:", exc_info=sys.exc_info())
sys.exit(2)
except Exception as ex:
print(f"{ex}")
log.error("%s", ex)
log.debug("This exception happened at:", exc_info=sys.exc_info())
sys.exit(1)
# vim: ft=python