server/usr/lib/python3/dist-packages/lazr/restfulclient/_browser.py

# Copyright 2008,2012 Canonical Ltd.

# This file is part of lazr.restfulclient.
#
# lazr.restfulclient is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# lazr.restfulclient is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with lazr.restfulclient.  If not, see
# <http://www.gnu.org/licenses/>.

"""Browser object to make requests of lazr.restful web services.

The `Browser` class does some massage of HTTP requests and responses,
and handles custom caches. It is not part of the public
lazr.restfulclient API. (But maybe it should be?)
"""

__metaclass__ = type
__all__ = [
    "Browser",
    "RestfulHttp",
    "ssl_certificate_validation_disabled",
]

import atexit
import errno
import os
import re
import shutil
import sys
import tempfile
from hashlib import md5
from io import BytesIO
from json import dumps

# Import sleep directly into the module so we can monkey-patch it
# during a test.
from time import sleep

from httplib2 import Http, urlnorm

try:
    from httplib2 import proxy_info_from_environment
except ImportError:
    from httplib2 import ProxyInfo

    proxy_info_from_environment = ProxyInfo.from_environment

try:
    # Python 3.
    from urllib.parse import urlencode
except ImportError:
    from urllib import urlencode

from wadllib.application import Application

from lazr.restfulclient._json import DatetimeJSONEncoder
from lazr.restfulclient.errors import HTTPError, error_for
from lazr.uri import URI

if bytes is str:
    # Python 2
    unicode_type = unicode  # noqa: F821
    str_types = basestring  # noqa: F821
else:
    unicode_type = str
    str_types = str


# A drop-in replacement for httplib2's safename.  Substantially borrowed
# from httplib2, but its cache name format changed in 0.12.0 and we want to
# stick with the previous version.

re_url_scheme = re.compile(br"^\w+://")
re_url_scheme_s = re.compile(r"^\w+://")
re_slash = re.compile(br"[?/:|]+")


def safename(filename):
    """Return a filename suitable for the cache.

    Strips dangerous and common characters to create a filename we
    can use to store the cache in.
    """
    try:
        if isinstance(filename, bytes):
            filename_match = filename.decode("utf-8")
        else:
            filename_match = filename

        if re_url_scheme_s.match(filename_match):
            if isinstance(filename, bytes):
                filename = filename.decode("utf-8")
                filename = filename.encode("idna")
            else:
                filename = filename.encode("idna")
    except UnicodeError:
        pass
    if isinstance(filename, unicode_type):
        filename = filename.encode("utf-8")
    filemd5 = md5(filename).hexdigest()
    filename = re_url_scheme.sub(b"", filename)
    filename = re_slash.sub(b",", filename)

    # This is the part that we changed. In stock httplib2, the
    # filename is trimmed if it's longer than 200 characters, and then
    # a comma and a 32-character md5 sum are appended. This causes
    # problems on eCryptfs filesystems, where the maximum safe
    # filename length is closer to 143 characters.
    #
    # We take a (user-hackable) maximum filename length from
    # RestfulHttp and subtract 33 characters to make room for the comma
    # and the md5 sum.
    #
    # See:
    #  http://code.google.com/p/httplib2/issues/detail?id=92
    #  https://bugs.launchpad.net/bugs/344878
    #  https://bugs.launchpad.net/bugs/545197
    maximum_filename_length = RestfulHttp.maximum_cache_filename_length
    maximum_length_before_md5_sum = maximum_filename_length - 32 - 1
    if len(filename) > maximum_length_before_md5_sum:
        filename = filename[:maximum_length_before_md5_sum]
    return ",".join((filename.decode("utf-8"), filemd5))


def ssl_certificate_validation_disabled():
    """Whether the user has disabled SSL certificate connection.

    Some testing servers have broken certificates.  Rather than raising an
    error, we allow an environment variable,
    ``LP_DISABLE_SSL_CERTIFICATE_VALIDATION`` to disable the check.
    """
    return bool(os.environ.get("LP_DISABLE_SSL_CERTIFICATE_VALIDATION", False))


if os.path.exists("/etc/ssl/certs/ca-certificates.crt"):
    SYSTEM_CA_CERTS = "/etc/ssl/certs/ca-certificates.crt"
else:
    from httplib2 import CA_CERTS as SYSTEM_CA_CERTS


class RestfulHttp(Http):
    """An Http subclass with some custom behavior.

    This Http client uses the TE header instead of the Accept-Encoding
    header to ask for compressed representations. It also knows how to
    react when its cache is a MultipleRepresentationCache.
    """

    maximum_cache_filename_length = 143

    def __init__(
        self,
        authorizer=None,
        cache=None,
        timeout=None,
        proxy_info=proxy_info_from_environment,
    ):
        cert_disabled = ssl_certificate_validation_disabled()
        super(RestfulHttp, self).__init__(
            cache,
            timeout,
            proxy_info,
            disable_ssl_certificate_validation=cert_disabled,
            ca_certs=SYSTEM_CA_CERTS,
        )
        self.authorizer = authorizer
        if self.authorizer is not None:
            self.authorizer.authorizeSession(self)

    def _request(
        self,
        conn,
        host,
        absolute_uri,
        request_uri,
        method,
        body,
        headers,
        redirections,
        cachekey,
    ):
        """Use the authorizer to authorize an outgoing request."""
        if "authorization" in headers:
            # There's an authorization header left over from a
            # previous request that resulted in a redirect. Resources
            # protected by OAuth or HTTP Digest must send a distinct
            # Authorization header with each request, to prevent
            # playback attacks. Remove the Authorization header and
            # start again.
            del headers["authorization"]
        if self.authorizer is not None:
            self.authorizer.authorizeRequest(
                absolute_uri, method, body, headers
            )
        return super(RestfulHttp, self)._request(
            conn,
            host,
            absolute_uri,
            request_uri,
            method,
            body,
            headers,
            redirections,
            cachekey,
        )

    def _getCachedHeader(self, uri, header):
        """Retrieve a cached value for an HTTP header."""
        if isinstance(self.cache, MultipleRepresentationCache):
            return self.cache._getCachedHeader(uri, header)
        return None


class AtomicFileCache(object):
    """A FileCache that can be shared by multiple processes.

    Based on a patch found at
    <http://code.google.com/p/httplib2/issues/detail?id=125>.
    """

    TEMPFILE_PREFIX = ".temp"

    def __init__(self, cache, safe=safename):
        """Construct an ``AtomicFileCache``.

        :param cache: The directory to use as a cache.
        :param safe: A function that takes a key and returns a name that's
            safe to use as a filename.  The key must never return a string
            that begins with ``TEMPFILE_PREFIX``.  By default uses
            ``safename``.
        """
        self._cache_dir = os.path.normpath(cache)
        self._get_safe_name = safe
        try:
            os.makedirs(self._cache_dir)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

    def _get_key_path(self, key):
        """Return the path on disk where ``key`` is stored."""
        safe_key = self._get_safe_name(key)
        if safe_key.startswith(self.TEMPFILE_PREFIX):
            # If the cache key starts with the tempfile prefix, then it's
            # possible that it will clash with a temporary file that we
            # create.
            raise ValueError(
                "Cache key cannot start with '%s'" % self.TEMPFILE_PREFIX
            )
        return os.path.join(self._cache_dir, safe_key)

    def get(self, key):
        """Get the value of ``key`` if set.

        This behaves slightly differently to ``FileCache`` in that if
        ``set()`` fails to store a key, this ``get()`` will behave as if that
        key were never set whereas ``FileCache`` returns the empty string.

        :param key: The key to retrieve.  Must be either bytes or unicode
            text.
        :return: The value of ``key`` if set, None otherwise.
        """
        cache_full_path = self._get_key_path(key)
        try:
            f = open(cache_full_path, "rb")
            try:
                return f.read()
            finally:
                f.close()
        except (IOError, OSError) as e:
            if e.errno != errno.ENOENT:
                raise

    def set(self, key, value):
        """Set ``key`` to ``value``.

        :param key: The key to set.  Must be either bytes or unicode text.
        :param value: The value to set ``key`` to.  Must be bytes.
        """
        # Open a temporary file
        handle, path_name = tempfile.mkstemp(
            prefix=self.TEMPFILE_PREFIX, dir=self._cache_dir
        )
        f = os.fdopen(handle, "wb")
        f.write(value)
        f.close()
        cache_full_path = self._get_key_path(key)
        # And rename atomically (on POSIX at least)
        if sys.platform == "win32" and os.path.exists(cache_full_path):
            os.unlink(cache_full_path)
        os.rename(path_name, cache_full_path)

    def delete(self, key):
        """Delete ``key`` from the cache.

        If ``key`` has not already been set then has no effect.

        :param key: The key to delete.  Must be either bytes or unicode text.
        """
        cache_full_path = self._get_key_path(key)
        try:
            os.remove(cache_full_path)
        except OSError as e:
            if e.errno != errno.ENOENT:
                raise


class MultipleRepresentationCache(AtomicFileCache):
    """A cache that can hold different representations of the same resource.

    If a resource has two representations with two media types,
    FileCache will only store the most recently fetched
    representation. This cache can keep track of multiple
    representations of the same resource.

    This class works on the assumption that outside calling code sets
    an instance's request_media_type attribute to the value of the
    'Accept' header before initiating the request.

    This class is very much not thread-safe, but FileCache isn't
    thread-safe anyway.
    """

    def __init__(self, cache):
        """Tell FileCache to call append_media_type when generating keys."""
        super(MultipleRepresentationCache, self).__init__(
            cache, self.append_media_type
        )
        self.request_media_type = None

    def append_media_type(self, key):
        """Append the request media type to the cache key.

        This ensures that representations of the same resource will be
        cached separately, so long as they're served as different
        media types.
        """
        if self.request_media_type is not None:
            key = key + "-" + self.request_media_type
        return safename(key)

    def _getCachedHeader(self, uri, header):
        """Retrieve a cached value for an HTTP header."""
        (scheme, authority, request_uri, cachekey) = urlnorm(uri)
        cached_value = self.get(cachekey)
        header_start = header + ":"
        if not isinstance(header_start, bytes):
            header_start = header_start.encode("utf-8")
        if cached_value is not None:
            for line in BytesIO(cached_value):
                if line.startswith(header_start):
                    return line[len(header_start) :].strip()
        return None


class Browser:
    """A class for making calls to lazr.restful web services."""

    NOT_MODIFIED = object()
    MAX_RETRIES = 6

    def __init__(
        self,
        service_root,
        credentials,
        cache=None,
        timeout=None,
        proxy_info=None,
        user_agent=None,
        max_retries=MAX_RETRIES,
    ):
        """Initialize, possibly creating a cache.

        If no cache is provided, a temporary directory will be used as
        a cache. The temporary directory will be automatically removed
        when the Python process exits.
        """
        if cache is None:
            cache = tempfile.mkdtemp()
            atexit.register(shutil.rmtree, cache)
        if isinstance(cache, str_types):
            cache = MultipleRepresentationCache(cache)
        self._connection = service_root.httpFactory(
            credentials, cache, timeout, proxy_info
        )
        self.user_agent = user_agent
        self.max_retries = max_retries

    def _request_and_retry(self, url, method, body, headers):
        for retry_count in range(0, self.max_retries + 1):
            response, content = self._connection.request(
                url, method=method, body=body, headers=headers
            )
            if (
                response.status in [502, 503]
                and retry_count < self.max_retries
            ):
                # The server returned a 502 or 503. Sleep for 0, 1, 2,
                # 4, 8, 16, ... seconds and try again.
                sleep_for = int(2 ** (retry_count - 1))
                sleep(sleep_for)
            else:
                break
        # Either the request succeeded or we gave up.
        return response, content

    def _request(
        self,
        url,
        data=None,
        method="GET",
        media_type="application/json",
        extra_headers=None,
    ):
        """Create an authenticated request object."""
        # If the user is trying to get data that has been redacted,
        # give a helpful message.
        if url == "tag:launchpad.net:2008:redacted":
            raise ValueError(
                "You tried to access a resource that you "
                "don't have the server-side permission to see."
            )

        # Add extra headers for the request.
        headers = {"Accept": media_type}
        if self.user_agent is not None:
            headers["User-Agent"] = self.user_agent
        if isinstance(self._connection.cache, MultipleRepresentationCache):
            self._connection.cache.request_media_type = media_type
        if extra_headers is not None:
            headers.update(extra_headers)
        response, content = self._request_and_retry(
            str(url), method=method, body=data, headers=headers
        )
        if response.status == 304:
            # The resource didn't change.
            if content == b"":
                if (
                    "If-None-Match" in headers
                    or "If-Modified-Since" in headers
                ):
                    # The caller made a conditional request, and the
                    # condition failed. Rather than send an empty
                    # representation, which might be misinterpreted,
                    # send a special object that will let the calling code know
                    # that the resource was not modified.
                    return response, self.NOT_MODIFIED
                else:
                    # The caller didn't make a conditional request,
                    # but the response code is 304 and there's no
                    # content. The only way to handle this is to raise
                    # an error.
                    #
                    # We don't use error_for() here because 304 is not
                    # normally considered an error condition.
                    raise HTTPError(response, content)
            else:
                # XXX leonardr 2010/04/12 bug=httplib2#97
                #
                # Why is this check here? Why would there ever be any
                # content when the response code is 304? It's because of
                # an httplib2 bug that sometimes sets a 304 response
                # code when caching retrieved documents. When the
                # cached document is retrieved, we get a 304 response
                # code and a full representation.
                #
                # Since the cache lookup succeeded, the 'real'
                # response code is 200. This code undoes the bad
                # behavior in httplib2.
                response.status = 200
            return response, content
        # Turn non-2xx responses into appropriate HTTPError subclasses.
        error = error_for(response, content)
        if error is not None:
            raise error
        return response, content

    def get(self, resource_or_uri, headers=None, return_response=False):
        """GET a representation of the given resource or URI."""
        if isinstance(resource_or_uri, (str_types, URI)):
            url = resource_or_uri
        else:
            method = resource_or_uri.get_method("get")
            url = method.build_request_url()
        response, content = self._request(url, extra_headers=headers)
        if return_response:
            return (response, content)
        return content

    def get_wadl_application(self, url):
        """GET a WADL representation of the resource at the requested url."""
        wadl_type = "application/vnd.sun.wadl+xml"
        response, content = self._request(url, media_type=wadl_type)
        url = str(url)
        if not isinstance(content, bytes):
            content = content.encode("utf-8")
        return Application(url, content)

    def post(self, url, method_name, **kws):
        """POST a request to the web service."""
        kws["ws.op"] = method_name
        data = urlencode(kws)
        return self._request(url, data, "POST")

    def put(self, url, representation, media_type, headers=None):
        """PUT the given representation to the URL."""
        extra_headers = {"Content-Type": media_type}
        if headers is not None:
            extra_headers.update(headers)
        return self._request(
            url, representation, "PUT", extra_headers=extra_headers
        )

    def delete(self, url):
        """DELETE the resource at the given URL."""
        self._request(url, method="DELETE")
        return None

    def patch(self, url, representation, headers=None):
        """PATCH the object at url with the updated representation."""
        extra_headers = {"Content-Type": "application/json"}
        if headers is not None:
            extra_headers.update(headers)
        # httplib2 doesn't know about the PATCH method, so we need to
        # do some work ourselves. Pull any cached value of "ETag" out
        # and use it as the value for "If-Match".
        cached_etag = self._connection._getCachedHeader(str(url), "etag")
        if cached_etag is not None and not self._connection.ignore_etag:
            # http://www.w3.org/1999/04/Editing/
            headers["If-Match"] = cached_etag

        return self._request(
            url,
            dumps(representation, cls=DatetimeJSONEncoder),
            "PATCH",
            extra_headers=extra_headers,
        )