446 lines
18 KiB
Python
Executable File
446 lines
18 KiB
Python
Executable File
#!/usr/local/psa/bin/py3-python -IS
|
|
""" Safe firewall rules activation and feature checks. This is a 'safeact' replacement. """
|
|
import argparse
|
|
import atexit
|
|
import errno
|
|
import logging
|
|
import os
|
|
import select
|
|
import shutil
|
|
import signal
|
|
import stat
|
|
import subprocess
|
|
import sys
|
|
import textwrap
|
|
import time
|
|
from datetime import datetime
|
|
|
|
|
|
log = logging.getLogger('rules')
|
|
|
|
PLESKRC_BIN = "/usr/local/psa/admin/sbin/pleskrc"
|
|
VAR_D = "/usr/local/psa/var/modules/firewall"
|
|
""" extension var directory """
|
|
SCRIPT_NEW = os.path.join(VAR_D, "firewall-new.sh")
|
|
""" new set of firewall rules """
|
|
SCRIPT_ACTIVE = os.path.join(VAR_D, "firewall-active.sh")
|
|
""" previous (active) set of firewall rules """
|
|
SCRIPT_EMERGENCY = os.path.join(VAR_D, "firewall-emergency.sh")
|
|
""" emergency set of firewall rules - ones that disable firewall """
|
|
PIPE_PATH = os.path.join(VAR_D, "confirm.pipe")
|
|
""" interprocess communication named pipe (fifo) """
|
|
ROLLBACK_FLAG = os.path.join(VAR_D, "rollback.flag")
|
|
""" "new firewall rules turned out to be bad" flag """
|
|
DEFAULT_CONFIRM_INTERVAL = 15
|
|
""" default confirmation timeout, in seconds """
|
|
MINIMAL_CONFIRM_INTERVAL = 5
|
|
""" minimal time the code will actually await confirmation token, in seconds """
|
|
MINIMAL_SCRIPT_TIMEOUT = 5
|
|
""" minimal time the code will allow a subprocess to execute, in seconds """
|
|
|
|
|
|
class ConfirmFailed(RuntimeError):
|
|
pass
|
|
|
|
|
|
def set_up_logging(verbosity):
|
|
""" Set up logging based on --verbose count and PLESK_DEBUG environment. """
|
|
verbosity = verbosity or 0
|
|
level = {
|
|
0: logging.CRITICAL,
|
|
1: logging.ERROR,
|
|
2: logging.WARNING,
|
|
3: logging.INFO,
|
|
4: logging.DEBUG,
|
|
}.get(verbosity, logging.CRITICAL)
|
|
|
|
if verbosity >= 4 or os.getenv('PLESK_DEBUG'):
|
|
level = logging.DEBUG
|
|
|
|
logging.basicConfig(level=level, format='[%(asctime)s] %(levelname)8s %(message)s')
|
|
|
|
|
|
def parse_args():
|
|
epilog = f"""\
|
|
environment variables:
|
|
PHP_SAFEACT_TOKEN Activation token
|
|
PHP_SAFEACT_CONFIRM_INTERVAL Confirmation timeout (default: {DEFAULT_CONFIRM_INTERVAL})
|
|
(activation and rollback each take at most this time,
|
|
but system will actually wait for confirmation token
|
|
for at least {MINIMAL_CONFIRM_INTERVAL} seconds, which may
|
|
increase the effective timeout, which may be
|
|
additionally increased due to misbehaving child
|
|
processes by up to {3 * MINIMAL_SCRIPT_TIMEOUT} seconds)
|
|
PLESK_DEBUG Set logging verbosity to maximum
|
|
"""
|
|
parser = argparse.ArgumentParser(description="Activate firewall rules or check its features safely",
|
|
epilog=textwrap.dedent(epilog),
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
parser.add_argument('-v', '--verbose', action='count', default=0,
|
|
help="Increase logging verbosity, can be specified multiple times.")
|
|
|
|
commands = parser.add_mutually_exclusive_group(required=True)
|
|
commands.add_argument('--activate', action='store_true',
|
|
help="Activate new rules. Synchronous.")
|
|
commands.add_argument('--confirm', action='store_true',
|
|
help="Commit activation of the new rules. Should be invoked from a new "
|
|
"SSH session or web/app server worker process to ensure an existing "
|
|
"network connection is not re-used.")
|
|
commands.add_argument('--try-enable-features', action='store_true',
|
|
help="Probe iptables features support. This will both check and "
|
|
"try to enable the specified features.")
|
|
|
|
act_opts = parser.add_argument_group("--activate arguments")
|
|
act_opts.add_argument('--rules-file', type=argparse.FileType('r'), default='-', metavar='PATH',
|
|
help="New rules script (default: %(default)s, i.e. STDIN)")
|
|
|
|
cfm_opts = parser.add_argument_group("--confirm arguments")
|
|
cfm_opts.add_argument('--wait', action='store_true',
|
|
help="Wait for the activation process to appear")
|
|
|
|
try_opts = parser.add_argument_group("--try-enable-features arguments")
|
|
try_opts.add_argument('--iptables', default='/usr/sbin/iptables',
|
|
help="iptables binary path (default: %(default)s)")
|
|
try_opts.add_argument('--table', default='filter',
|
|
help="iptables table name (default: %(default)s)")
|
|
try_opts.add_argument('--rule', default='-L',
|
|
help="iptables rule options (default: %(default)s), "
|
|
"use the default to check table and/or binary availability")
|
|
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def get_token():
|
|
""" Returns activation token string. """
|
|
token = os.getenv('PHP_SAFEACT_TOKEN', '').strip()
|
|
if not token:
|
|
raise RuntimeError("Activation token is absent")
|
|
# Writes to pipes are atomic only up to certain system-specific limit (at least 512)
|
|
if len(token) >= select.PIPE_BUF - 1:
|
|
raise RuntimeError(f"Activation token is too long: {len(token)} characters")
|
|
return token
|
|
|
|
|
|
def get_confirm_timeout():
|
|
""" Returns confirmation timeout as int. """
|
|
timeout = os.getenv('PHP_SAFEACT_CONFIRM_INTERVAL')
|
|
if not timeout:
|
|
return DEFAULT_CONFIRM_INTERVAL
|
|
else:
|
|
value = int(timeout)
|
|
if value <= 0:
|
|
raise ValueError(f"Confirmation timeout is too small: {value}")
|
|
return value
|
|
|
|
|
|
def rm_f(path):
|
|
""" Equivalent of 'rm -f' for a file path. """
|
|
try:
|
|
log.debug("rm -f %r", path)
|
|
os.unlink(path)
|
|
except OSError as ex:
|
|
if ex.errno != errno.ENOENT:
|
|
raise
|
|
|
|
|
|
def verify_script_perms(path):
|
|
""" Checks that script file looks to be OK. """
|
|
log.debug("Checking %r script attributes", path)
|
|
st = os.lstat(path)
|
|
|
|
if not stat.S_ISREG(st.st_mode):
|
|
raise ValueError(f"{path}: The script is not a regular file")
|
|
if st.st_size == 0:
|
|
raise ValueError(f"{path}: The script is empty")
|
|
if st.st_uid != 0:
|
|
raise ValueError(f"{path}: The script is not owned by root")
|
|
if st.st_mode != (stat.S_IFREG | 0o700):
|
|
raise ValueError(f"{path}: The script has permissions other than 0700")
|
|
|
|
|
|
def try_restart_service(service, timeout):
|
|
""" Restarts the service if it is already running. """
|
|
timeout = max(timeout, MINIMAL_SCRIPT_TIMEOUT)
|
|
log.debug("Trying to restart %r service with timeout=%s", service, timeout)
|
|
subprocess.check_call([PLESKRC_BIN, service, 'try-restart'], timeout=timeout)
|
|
|
|
|
|
def is_service_running(service):
|
|
""" Returns whether the given service is running. """
|
|
log.debug("Checking %r service status", service)
|
|
result = subprocess.run([PLESKRC_BIN, service, 'status'])
|
|
return result.returncode == 0
|
|
|
|
|
|
def execute_rules_script(script, timeout):
|
|
""" Executes script within a given timeout. """
|
|
timeout = max(timeout, MINIMAL_SCRIPT_TIMEOUT)
|
|
env = {k: v for k, v in os.environ.items() if k not in ('PHP_SAFEACT_TOKEN',)}
|
|
log.debug("Executing script %r with timeout=%s", script, timeout)
|
|
subprocess.check_call([script], timeout=timeout, env=env)
|
|
|
|
|
|
def apply_rules(script, cutoff_timestamp, confirm=True):
|
|
""" Applies rules script and (optionally) waits for confirmation until cutoff_timestamp.
|
|
On success links the script into active configuration.
|
|
"""
|
|
log.info("Trying to apply rules from %r until %s, %s confirmation",
|
|
script, datetime.fromtimestamp(cutoff_timestamp), "with" if confirm else "without")
|
|
|
|
execute_rules_script(script, cutoff_timestamp - time.time())
|
|
|
|
if confirm:
|
|
# This is required to ensure that there are no outstanding connections to browser
|
|
# and any new connections are allowed by firewall.
|
|
try:
|
|
try_restart_service('sw-cp-server', cutoff_timestamp - time.time())
|
|
if is_service_running('nginx'):
|
|
log.debug("Nginx looks to be the frontend web server")
|
|
try_restart_service('nginx', cutoff_timestamp - time.time())
|
|
else:
|
|
log.debug("Apache looks to be the frontend web server")
|
|
try_restart_service('apache', cutoff_timestamp - time.time())
|
|
except subprocess.TimeoutExpired as ex:
|
|
log.warning(f"{ex}. Will attempt to wait for confirmation anyway.")
|
|
log.debug("This exception happened at:", exc_info=sys.exc_info())
|
|
|
|
expected_token = get_token()
|
|
cutoff_timestamp = max(cutoff_timestamp, time.time() + MINIMAL_CONFIRM_INTERVAL)
|
|
|
|
log.debug("Waiting for a matching activation token on %r until %s",
|
|
PIPE_PATH, datetime.fromtimestamp(cutoff_timestamp))
|
|
# Open w/o blocking to ensure open doesn't block w/o writers present
|
|
with os.fdopen(os.open(PIPE_PATH, os.O_RDONLY | os.O_NONBLOCK), 'r') as pipe:
|
|
# Also keep the pipe open for writing, otherwise after the first read select()
|
|
# will immediately return with only EOF available to read
|
|
# (this normally indicates absence or writers).
|
|
with open(PIPE_PATH, 'wb'):
|
|
timeout = cutoff_timestamp - time.time()
|
|
while timeout > 0 and select.select([pipe], [], [], timeout)[0]:
|
|
token = pipe.readline().strip()
|
|
if token == expected_token:
|
|
log.info("Received matching activation token")
|
|
break
|
|
log.debug("Received non-matching activation token: %r", token)
|
|
timeout = cutoff_timestamp - time.time()
|
|
else:
|
|
raise ConfirmFailed("Did not receive a matching activation token "
|
|
"before confirmation timeout")
|
|
|
|
if script != SCRIPT_ACTIVE:
|
|
log.debug("Setting %r as the active configuration %r", script, SCRIPT_ACTIVE)
|
|
# Previously files were hardlinked, but we don't really need strict atomicity here
|
|
# and hardlinks may cause issues if somebody decides to meddle with the files manually
|
|
# (e.g. emergency may be hardlinked into active and may be updated due to copy into active)
|
|
rm_f(SCRIPT_ACTIVE)
|
|
log.debug("cp -Pa %r %r", script, SCRIPT_ACTIVE)
|
|
shutil.copy2(script, SCRIPT_ACTIVE, follow_symlinks=False)
|
|
else:
|
|
log.debug("Rules from %r are already the active configuration", script)
|
|
|
|
|
|
def try_create_pipe(path, stale_timestamp):
|
|
""" Creates a pipe if it doesn't exist, removes it if it is too old. Otherwise returns False. """
|
|
try:
|
|
ctime = os.path.getctime(path)
|
|
if ctime < stale_timestamp:
|
|
log.info("Removing stale named pipe %r created at %s", path, datetime.fromtimestamp(ctime))
|
|
os.unlink(path)
|
|
else:
|
|
return False
|
|
except OSError as ex:
|
|
if ex.errno != errno.ENOENT:
|
|
raise
|
|
|
|
log.debug("Creating named pipe %r and setting up atexit handler", path)
|
|
os.mkfifo(path, 0o600)
|
|
|
|
@atexit.register
|
|
def remove_pipe():
|
|
log.debug("Removing named pipe %r on exit", path)
|
|
rm_f(path)
|
|
|
|
return True
|
|
|
|
|
|
def rollback():
|
|
""" Rolls back to some working configuration. """
|
|
log.info("Rolling back to working configuration")
|
|
|
|
log.debug("touch %r", ROLLBACK_FLAG)
|
|
with open(ROLLBACK_FLAG, 'wb'):
|
|
pass
|
|
|
|
try:
|
|
try:
|
|
log.info("Trying to roll back from new to active configuration")
|
|
cutoff_timestamp = time.time() + get_confirm_timeout()
|
|
apply_rules(SCRIPT_ACTIVE, cutoff_timestamp)
|
|
except ConfirmFailed as ex:
|
|
raise ConfirmFailed(
|
|
"Connectivity failure occurred with both the new and rollback (previous) firewall configurations, "
|
|
"indicating that both configurations are faulty.") from ex
|
|
except Exception as ex:
|
|
log.info("Trying to roll back from active to emergency configuration")
|
|
apply_rules(SCRIPT_EMERGENCY, 0, confirm=False)
|
|
raise ConfirmFailed(f"{ex} "
|
|
"As an emergency measure, "
|
|
"the firewall was disabled and a configuration without firewall rules was applied. "
|
|
"To resolve the issue, correct the firewall rules and re-enable the firewall.")
|
|
|
|
|
|
def activate(rules_file):
|
|
""" Activates new rules supplied via rules_file. """
|
|
rm_f(ROLLBACK_FLAG)
|
|
|
|
timeout = get_confirm_timeout()
|
|
start_timestamp = time.time()
|
|
cutoff_timestamp = start_timestamp + timeout
|
|
# Assume other activations use the same timeout
|
|
stale_timestamp = start_timestamp - 2.1 * timeout
|
|
|
|
log.info("Activating with token=%r, timeout=%s", get_token(), timeout)
|
|
|
|
log.debug("Setting up signal handlers to ensure cleanup")
|
|
for signum in (signal.SIGTERM, signal.SIGHUP, signal.SIGQUIT):
|
|
signal.signal(signum, signal.getsignal(signal.SIGINT))
|
|
|
|
log.debug("Trying to create named pipe %r, until %s, file older than %s is considered stale",
|
|
PIPE_PATH,
|
|
datetime.fromtimestamp(cutoff_timestamp),
|
|
datetime.fromtimestamp(stale_timestamp))
|
|
while time.time() < cutoff_timestamp:
|
|
if try_create_pipe(PIPE_PATH, stale_timestamp):
|
|
log.debug("Pipe created")
|
|
break
|
|
time.sleep(0.5)
|
|
else:
|
|
log.debug("Could not create pipe")
|
|
raise RuntimeError("Previous rules activation didn't finish before confirmation timeout")
|
|
|
|
log.info("Writing new rules from %r into %r", rules_file.name, SCRIPT_NEW)
|
|
rm_f(SCRIPT_NEW)
|
|
log.debug("cat > %r", SCRIPT_NEW)
|
|
with os.fdopen(os.open(SCRIPT_NEW, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o700), 'w') as script_new:
|
|
shutil.copyfileobj(rules_file, script_new)
|
|
|
|
verify_script_perms(SCRIPT_ACTIVE)
|
|
|
|
try:
|
|
log.info("Trying to apply new configuration")
|
|
apply_rules(SCRIPT_NEW, cutoff_timestamp)
|
|
rm_f(SCRIPT_NEW)
|
|
except Exception:
|
|
rollback()
|
|
raise
|
|
|
|
|
|
def confirm(wait=False):
|
|
""" Confirms rules activation (new ones or during rollback). """
|
|
token = get_token()
|
|
if wait:
|
|
timeout = max(get_confirm_timeout(), MINIMAL_CONFIRM_INTERVAL)
|
|
cutoff_timestamp = time.time() + 2 * (timeout + 3 * MINIMAL_SCRIPT_TIMEOUT)
|
|
else:
|
|
cutoff_timestamp = time.time()
|
|
log.info("Confirming with token=%r until %s", token, datetime.fromtimestamp(cutoff_timestamp))
|
|
|
|
while True:
|
|
try:
|
|
# Open w/o creating the pipe/file if it doesn't exist ([Errno 2] No such file or directory)
|
|
# Open w/o blocking if no readers are present ([Errno 6] No such device or address)
|
|
with os.fdopen(os.open(PIPE_PATH, os.O_WRONLY | os.O_APPEND | os.O_NONBLOCK), 'w') as pipe:
|
|
log.debug("Writing activation token to %r", PIPE_PATH)
|
|
pipe.write(token + "\n")
|
|
break
|
|
except Exception as ex:
|
|
if time.time() >= cutoff_timestamp:
|
|
raise ConfirmFailed("Too late to confirm: no rules activation process") from ex
|
|
log.debug(f"No activation process yet, continue to wait: {ex}")
|
|
time.sleep(0.5)
|
|
|
|
if os.path.lexists(ROLLBACK_FLAG):
|
|
raise ConfirmFailed("Too late to confirm: new rules were rolled back")
|
|
|
|
|
|
def try_enable_features(iptables, table, rule):
|
|
"""
|
|
Checks if desired iptables features are enabled. Tries to enable them if not.
|
|
|
|
On modern systems iptables is capable of dynamically loading required kernel
|
|
modules. This is convenient, misleading and maybe even dangerous at the same time
|
|
( http://backstage.soundcloud.com/2012/08/shoot-yourself-in-the-foot-with-iptables-and-kmod-auto-loading/ ).
|
|
Since we don't want to meddle with kernel modules for obvious reasons, we use
|
|
iptables itself to check features support. As a side effect such checks may trigger
|
|
kernel module loading. Checks are isolated in a separate temporary chain, that
|
|
nobody refers to.
|
|
|
|
This approach has an added advantage of checking whether real iptables rules would
|
|
work, not some "support" per se. Practice shows that the latter may be misleading
|
|
and result in bugs. Therefore if you're not sure <rule> works on a given system,
|
|
just call this command with the given <rule>.
|
|
|
|
<rule> is <rule-specification> in terms of iptables(8). Specifying <target> as part
|
|
of it is not required and not particularly useful. <rule> can also be '-L' to check
|
|
table and/or binary availability.
|
|
"""
|
|
if rule == '-L':
|
|
# listing is "safe"
|
|
log.info("Checking feature: iptables=%r, table=%r, rule=%r", iptables, table, rule)
|
|
subprocess.check_call([iptables, '-t', table, rule, '-n'])
|
|
else:
|
|
# everything else is isolated in a temporary chain
|
|
chain = "plesk-fw-tmp-chain"
|
|
log.info("Checking feature: iptables=%r, table=%r, rule=%r, chain=%r",
|
|
iptables, table, rule, chain)
|
|
|
|
def remove_chain():
|
|
subprocess.check_call([iptables, '-t', table, '-F', chain])
|
|
subprocess.check_call([iptables, '-t', table, '-Z', chain])
|
|
subprocess.check_call([iptables, '-t', table, '-X', chain])
|
|
|
|
def create_chain():
|
|
subprocess.check_call([iptables, '-t', table, '-N', chain])
|
|
|
|
def append_rule(rule_args):
|
|
subprocess.check_call([iptables, '-t', table, '-A', chain] + rule_args)
|
|
|
|
try:
|
|
remove_chain()
|
|
except Exception as ex:
|
|
# Failure is OK here - it means chain didn't exist
|
|
log.debug("During initial %r chain removal: %s", chain, ex)
|
|
|
|
create_chain()
|
|
append_rule(rule.split())
|
|
remove_chain()
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
set_up_logging(args.verbose)
|
|
log.debug("Options: %s", args)
|
|
|
|
if args.activate:
|
|
activate(args.rules_file)
|
|
elif args.confirm:
|
|
confirm(args.wait)
|
|
elif args.try_enable_features:
|
|
try_enable_features(args.iptables, args.table, args.rule)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
try:
|
|
main()
|
|
except KeyboardInterrupt:
|
|
log.debug("Interrupted:", exc_info=sys.exc_info())
|
|
sys.exit(2)
|
|
except Exception as ex:
|
|
print(f"{ex}")
|
|
log.error("%s", ex)
|
|
log.debug("This exception happened at:", exc_info=sys.exc_info())
|
|
sys.exit(1)
|
|
|
|
# vim: ft=python
|