941 lines
34 KiB
Python
941 lines
34 KiB
Python
"""Abstract crash database interface."""
|
|
|
|
# Copyright (C) 2007 - 2009 Canonical Ltd.
|
|
# Author: Martin Pitt <martin.pitt@ubuntu.com>
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by the
|
|
# Free Software Foundation; either version 2 of the License, or (at your
|
|
# option) any later version. See http://www.gnu.org/copyleft/gpl.html for
|
|
# the full text of the license.
|
|
|
|
# TODO: Address following pylint complaints
|
|
# pylint: disable=invalid-name
|
|
|
|
import functools
|
|
import os
|
|
import shutil
|
|
import sys
|
|
import urllib.error
|
|
import urllib.parse
|
|
import urllib.request
|
|
from typing import Any
|
|
|
|
from apport.packaging_impl import impl as packaging
|
|
|
|
|
|
def _u(string):
|
|
"""Convert str to an unicode if it isn't already."""
|
|
if isinstance(string, bytes):
|
|
return string.decode("UTF-8", "ignore")
|
|
return string
|
|
|
|
|
|
class CrashDatabase:
|
|
"""Crash database interface."""
|
|
|
|
# TODO: Check if some methods can be made private
|
|
# pylint: disable=too-many-public-methods
|
|
def __init__(self, auth_file, options):
|
|
"""Initialize crash database connection.
|
|
|
|
You need to specify an implementation specific file with the
|
|
authentication credentials for retracing access for download() and
|
|
update(). For upload() and get_comment_url() you can use None.
|
|
|
|
options is a dictionary with additional settings from crashdb.conf; see
|
|
get_crashdb() for details.
|
|
"""
|
|
self.auth_file = auth_file
|
|
self.options = options
|
|
self.duplicate_db = None
|
|
self.format_version = None
|
|
|
|
def get_bugpattern_baseurl(self):
|
|
"""Return the base URL for bug patterns.
|
|
|
|
See apport.report.Report.search_bug_patterns() for details. If this
|
|
function returns None, bug patterns are disabled.
|
|
"""
|
|
return self.options.get("bug_pattern_url")
|
|
|
|
def accepts(self, report):
|
|
"""Check if this report can be uploaded to this database.
|
|
|
|
Crash databases might limit the types of reports they get with e. g.
|
|
the "problem_types" option.
|
|
"""
|
|
if "problem_types" in self.options:
|
|
return report.get("ProblemType") in self.options["problem_types"]
|
|
|
|
return True
|
|
|
|
#
|
|
# API for duplicate detection
|
|
#
|
|
# Tests are in apport/crashdb_impl/memory.py.
|
|
|
|
def init_duplicate_db(self, path):
|
|
"""Initialize duplicate database.
|
|
|
|
path specifies an SQLite database. It will be created if it does not
|
|
exist yet.
|
|
"""
|
|
import sqlite3 as dbapi2 # pylint: disable=import-outside-toplevel
|
|
|
|
assert (
|
|
dbapi2.paramstyle == "qmark"
|
|
), "this module assumes qmark dbapi parameter style"
|
|
|
|
self.format_version = 3
|
|
|
|
init = (
|
|
not os.path.exists(path) or path == ":memory:" or os.path.getsize(path) == 0
|
|
)
|
|
self.duplicate_db = dbapi2.connect(path, timeout=7200)
|
|
|
|
if init:
|
|
cur = self.duplicate_db.cursor()
|
|
cur.execute("CREATE TABLE version (format INTEGER NOT NULL)")
|
|
cur.execute("INSERT INTO version VALUES (?)", [self.format_version])
|
|
|
|
cur.execute(
|
|
"""CREATE TABLE crashes (
|
|
signature VARCHAR(255) NOT NULL,
|
|
crash_id INTEGER NOT NULL,
|
|
fixed_version VARCHAR(50),
|
|
last_change TIMESTAMP,
|
|
CONSTRAINT crashes_pk PRIMARY KEY (crash_id))"""
|
|
)
|
|
|
|
cur.execute(
|
|
"""CREATE TABLE address_signatures (
|
|
signature VARCHAR(1000) NOT NULL,
|
|
crash_id INTEGER NOT NULL,
|
|
CONSTRAINT address_signatures_pk PRIMARY KEY (signature))"""
|
|
)
|
|
|
|
self.duplicate_db.commit()
|
|
|
|
# verify integrity
|
|
cur = self.duplicate_db.cursor()
|
|
cur.execute("PRAGMA integrity_check")
|
|
result = cur.fetchall()
|
|
if result != [("ok",)]:
|
|
raise SystemError(f"Corrupt duplicate db:{result}")
|
|
|
|
try:
|
|
cur.execute("SELECT format FROM version")
|
|
result = cur.fetchone()
|
|
except self.duplicate_db.OperationalError as error:
|
|
if "no such table" in str(error):
|
|
# first db format did not have version table yet
|
|
result = [0]
|
|
if result[0] > self.format_version:
|
|
raise SystemError(f"duplicate DB has unknown format {result[0]}")
|
|
if result[0] < self.format_version:
|
|
print(
|
|
f"duplicate db has format {result[0]},"
|
|
f" upgrading to {self.format_version}"
|
|
)
|
|
self._duplicate_db_upgrade(result[0])
|
|
|
|
def check_duplicate(self, crash_id, report=None):
|
|
# TODO: Split into smaller functions/methods
|
|
# pylint: disable=too-many-branches,too-many-statements
|
|
"""Check whether a crash is already known.
|
|
|
|
If the crash is new, it will be added to the duplicate database and the
|
|
function returns None. If the crash is already known, the function
|
|
returns a pair (crash_id, fixed_version), where fixed_version might be
|
|
None if the crash is not fixed in the latest version yet. Depending on
|
|
whether the version in report is smaller than/equal to the fixed
|
|
version or larger, this calls close_duplicate() or mark_regression().
|
|
|
|
If the report does not have a valid crash signature, this function does
|
|
nothing and just returns None.
|
|
|
|
By default, the report gets download()ed, but for performance reasons
|
|
it can be explicitly passed to this function if it is already
|
|
available.
|
|
"""
|
|
assert self.duplicate_db, "init_duplicate_db() needs to be called before"
|
|
|
|
if not report:
|
|
report = self.download(crash_id)
|
|
|
|
self._mark_dup_checked(crash_id, report)
|
|
|
|
if "DuplicateSignature" in report:
|
|
sig = report["DuplicateSignature"]
|
|
else:
|
|
sig = report.crash_signature()
|
|
existing = []
|
|
if sig:
|
|
# use real duplicate signature
|
|
existing = self._duplicate_search_signature(sig, crash_id)
|
|
|
|
if existing:
|
|
# update status of existing master bugs
|
|
for ex_id, _ in existing:
|
|
self._duplicate_db_sync_status(ex_id)
|
|
existing = self._duplicate_search_signature(sig, crash_id)
|
|
|
|
try:
|
|
report_package_version = report["Package"].split()[1]
|
|
except (KeyError, IndexError):
|
|
report_package_version = None
|
|
|
|
# check the existing IDs whether there is one that is unfixed or not
|
|
# older than the report's package version; if so, we have a duplicate.
|
|
master_id = None
|
|
master_ver = None
|
|
for ex_id, ex_ver in existing:
|
|
if (
|
|
not ex_ver
|
|
or not report_package_version
|
|
or packaging.compare_versions(report_package_version, ex_ver) < 0
|
|
):
|
|
master_id = ex_id
|
|
master_ver = ex_ver
|
|
break
|
|
else:
|
|
# if we did not find a new enough open master report,
|
|
# we have a regression of the latest fix. Mark it so, and create a
|
|
# new unfixed ID for it later on
|
|
if existing:
|
|
self.mark_regression(crash_id, existing[-1][0])
|
|
|
|
# now query address signatures, they might turn up another duplicate
|
|
# (not necessarily the same, due to Stacktraces sometimes being
|
|
# slightly different)
|
|
addr_sig = report.crash_signature_addresses()
|
|
if addr_sig:
|
|
addr_match = self._duplicate_search_address_signature(addr_sig)
|
|
if addr_match and addr_match != master_id:
|
|
if master_id is None:
|
|
# we have a duplicate only identified by address sig,
|
|
# close it
|
|
master_id = addr_match
|
|
# our bug is a dupe of two different masters, one from
|
|
# symbolic, the other from addr matching (see LP#943117);
|
|
# make them all duplicates of each other, using the lower
|
|
# number as master
|
|
elif master_id < addr_match:
|
|
self.close_duplicate(report, addr_match, master_id)
|
|
self._duplicate_db_merge_id(addr_match, master_id)
|
|
else:
|
|
self.close_duplicate(report, master_id, addr_match)
|
|
self._duplicate_db_merge_id(master_id, addr_match)
|
|
master_id = addr_match
|
|
# no version tracking for address signatures yet
|
|
master_ver = None
|
|
|
|
if master_id is not None and master_id != crash_id:
|
|
if addr_sig:
|
|
self._duplicate_db_add_address_signature(addr_sig, master_id)
|
|
self.close_duplicate(report, crash_id, master_id)
|
|
return (master_id, master_ver)
|
|
|
|
# no duplicate detected; create a new record for the ID
|
|
# if we don't have one already
|
|
if sig:
|
|
cur = self.duplicate_db.cursor()
|
|
cur.execute("SELECT count(*) FROM crashes WHERE crash_id == ?", [crash_id])
|
|
count_id = cur.fetchone()[0]
|
|
if count_id == 0:
|
|
cur.execute(
|
|
"INSERT INTO crashes VALUES (?, ?, ?, CURRENT_TIMESTAMP)",
|
|
(_u(sig), crash_id, None),
|
|
)
|
|
self.duplicate_db.commit()
|
|
if addr_sig:
|
|
self._duplicate_db_add_address_signature(addr_sig, crash_id)
|
|
|
|
return None
|
|
|
|
def known(self, report):
|
|
# TODO: Split into smaller functions/methods
|
|
# pylint: disable=too-many-branches
|
|
"""Check if the crash db already knows about the crash signature.
|
|
|
|
Check if the report has a DuplicateSignature, crash_signature(), or
|
|
StacktraceAddressSignature, and ask the database whether the problem is
|
|
already known. If so, return an URL where the user can check the status
|
|
or subscribe (if available), or just return True if the report is known
|
|
but there is no public URL. In that case the report will not be
|
|
uploaded (i. e. upload() will not be called).
|
|
|
|
Return None if the report does not have any signature or the crash
|
|
database does not support checking for duplicates on the client side.
|
|
|
|
The default implementation uses a text file format generated by
|
|
duplicate_db_publish() at an URL specified by the "dupdb_url" option.
|
|
Subclasses are free to override this with a custom implementation, such
|
|
as a real database lookup.
|
|
"""
|
|
if not self.options.get("dupdb_url"):
|
|
return None
|
|
|
|
for kind in ("sig", "address"):
|
|
# get signature
|
|
if kind == "sig":
|
|
if "DuplicateSignature" in report:
|
|
sig = report["DuplicateSignature"]
|
|
else:
|
|
sig = report.crash_signature()
|
|
else:
|
|
sig = report.crash_signature_addresses()
|
|
|
|
if not sig:
|
|
continue
|
|
|
|
# build URL where the data should be
|
|
h = self.duplicate_sig_hash(sig)
|
|
if not h:
|
|
return None
|
|
|
|
# the hash is already quoted, but we really want to open the quoted
|
|
# file names; as urlopen() unquotes, we need to double-quote here
|
|
# again so that urlopen() sees the single-quoted file names
|
|
url = os.path.join(
|
|
self.options["dupdb_url"], kind, urllib.parse.quote_plus(h)
|
|
)
|
|
|
|
# read data file
|
|
try:
|
|
with urllib.request.urlopen(url) as dupdb_url:
|
|
contents = dupdb_url.read().decode("UTF-8")
|
|
if "<title>404 Not Found" in contents:
|
|
continue
|
|
except (OSError, urllib.error.URLError):
|
|
# does not exist, failed to load, etc.
|
|
continue
|
|
|
|
# now check if we find our signature
|
|
for line in contents.splitlines():
|
|
try:
|
|
crash_id, s = line.split(None, 1)
|
|
crash_id = int(crash_id)
|
|
except ValueError:
|
|
continue
|
|
if s == sig:
|
|
result = self.get_id_url(report, crash_id)
|
|
if not result:
|
|
# if we can't have an URL, just report as "known"
|
|
result = "1"
|
|
return result
|
|
|
|
return None
|
|
|
|
def duplicate_db_fixed(self, crash_id, version):
|
|
"""Mark given crash ID as fixed in the duplicate database.
|
|
|
|
version specifies the package version the crash was fixed in (None for
|
|
'still unfixed').
|
|
"""
|
|
assert self.duplicate_db, "init_duplicate_db() needs to be called before"
|
|
|
|
cur = self.duplicate_db.cursor()
|
|
n = cur.execute(
|
|
"UPDATE crashes "
|
|
"SET fixed_version = ?, last_change = CURRENT_TIMESTAMP "
|
|
"WHERE crash_id = ?",
|
|
(version, crash_id),
|
|
)
|
|
assert n.rowcount == 1
|
|
self.duplicate_db.commit()
|
|
|
|
def duplicate_db_remove(self, crash_id):
|
|
"""Remove crash from the duplicate database.
|
|
|
|
This happens when a report got rejected or manually duplicated.
|
|
"""
|
|
assert self.duplicate_db, "init_duplicate_db() needs to be called before"
|
|
|
|
cur = self.duplicate_db.cursor()
|
|
cur.execute("DELETE FROM crashes WHERE crash_id = ?", [crash_id])
|
|
cur.execute("DELETE FROM address_signatures WHERE crash_id = ?", [crash_id])
|
|
self.duplicate_db.commit()
|
|
|
|
def duplicate_db_change_master_id(self, old_id, new_id):
|
|
"""Change a crash ID."""
|
|
assert self.duplicate_db, "init_duplicate_db() needs to be called before"
|
|
|
|
cur = self.duplicate_db.cursor()
|
|
cur.execute(
|
|
"UPDATE crashes "
|
|
"SET crash_id = ?, last_change = CURRENT_TIMESTAMP "
|
|
"WHERE crash_id = ?",
|
|
[new_id, old_id],
|
|
)
|
|
cur.execute(
|
|
"UPDATE address_signatures SET crash_id = ? WHERE crash_id = ?",
|
|
[new_id, old_id],
|
|
)
|
|
self.duplicate_db.commit()
|
|
|
|
def duplicate_db_publish(self, publish_dir):
|
|
"""Create text files suitable for www publishing.
|
|
|
|
Create a number of text files in the given directory which Apport
|
|
clients can use to determine whether a problem is already reported to
|
|
the database, through the known() method. This directory is suitable
|
|
for publishing to the web.
|
|
|
|
The database is indexed by the first two fields of the duplicate or
|
|
crash signature, to avoid having to download the entire database every
|
|
time.
|
|
|
|
If the directory already exists, it will be updated. The new content is
|
|
built in a new directory which is the given one with ".new" appended,
|
|
then moved to the given name in an almost atomic way.
|
|
"""
|
|
# hard to change, pylint: disable=consider-using-with
|
|
assert self.duplicate_db, "init_duplicate_db() needs to be called before"
|
|
|
|
# first create the temporary new dir; if that fails, nothing has been
|
|
# changed and we fail early
|
|
out = f"{publish_dir}.new"
|
|
os.mkdir(out)
|
|
|
|
# crash addresses
|
|
addr_base = os.path.join(out, "address")
|
|
os.mkdir(addr_base)
|
|
cur_hash = None
|
|
cur_file = None
|
|
|
|
cur = self.duplicate_db.cursor()
|
|
|
|
cur.execute("SELECT * from address_signatures ORDER BY signature")
|
|
for sig, crash_id in cur.fetchall():
|
|
h = self.duplicate_sig_hash(sig)
|
|
if h is None:
|
|
# some entries can't be represented in a single line
|
|
continue
|
|
if h != cur_hash:
|
|
cur_hash = h
|
|
if cur_file:
|
|
cur_file.close()
|
|
cur_file = open(
|
|
os.path.join(addr_base, cur_hash), "w", encoding="utf-8"
|
|
)
|
|
|
|
cur_file.write(f"{crash_id} {sig}\n")
|
|
|
|
if cur_file:
|
|
cur_file.close()
|
|
|
|
# duplicate signatures
|
|
sig_base = os.path.join(out, "sig")
|
|
os.mkdir(sig_base)
|
|
cur_hash = None
|
|
cur_file = None
|
|
|
|
cur.execute("SELECT signature, crash_id from crashes ORDER BY signature")
|
|
for sig, crash_id in cur.fetchall():
|
|
h = self.duplicate_sig_hash(sig)
|
|
if h is None:
|
|
# some entries can't be represented in a single line
|
|
continue
|
|
if h != cur_hash:
|
|
cur_hash = h
|
|
if cur_file:
|
|
cur_file.close()
|
|
cur_file = open(os.path.join(sig_base, cur_hash), "wb")
|
|
|
|
cur_file.write(f"{crash_id} {sig}\n".encode("UTF-8"))
|
|
|
|
if cur_file:
|
|
cur_file.close()
|
|
|
|
# switch over tree; this is as atomic as we can be with directories
|
|
if os.path.exists(publish_dir):
|
|
os.rename(publish_dir, f"{publish_dir}.old")
|
|
os.rename(out, publish_dir)
|
|
if os.path.exists(f"{publish_dir}.old"):
|
|
shutil.rmtree(f"{publish_dir}.old")
|
|
|
|
def _duplicate_db_upgrade(self, cur_format):
|
|
"""Upgrade database to current format."""
|
|
# Format 3 added a primary key which can't be done as an upgrade in
|
|
# SQLite
|
|
if cur_format < 3:
|
|
raise SystemError("Cannot upgrade database from format earlier than 3")
|
|
|
|
cur = self.duplicate_db.cursor()
|
|
|
|
cur.execute("UPDATE version SET format = ?", (cur_format,))
|
|
self.duplicate_db.commit()
|
|
|
|
assert cur_format == self.format_version
|
|
|
|
def _duplicate_search_signature(self, sig, crash_id):
|
|
"""Look up signature in the duplicate db.
|
|
|
|
Return [(crash_id, fixed_version)] tuple list.
|
|
|
|
There might be several matches if a crash has been reintroduced in a
|
|
later version. The results are sorted so that the highest fixed version
|
|
comes first, and "unfixed" being the last result.
|
|
|
|
id is the bug we are looking to find a duplicate for. The result will
|
|
never contain id, to avoid marking a bug as a duplicate of itself if a
|
|
bug is reprocessed more than once.
|
|
"""
|
|
cur = self.duplicate_db.cursor()
|
|
cur.execute(
|
|
"SELECT crash_id, fixed_version FROM crashes "
|
|
"WHERE signature = ? AND crash_id <> ?",
|
|
[_u(sig), crash_id],
|
|
)
|
|
existing = cur.fetchall()
|
|
|
|
def cmp(x, y):
|
|
x = x[1]
|
|
y = y[1]
|
|
if x == y:
|
|
return 0
|
|
if not x and y is not None:
|
|
return 1
|
|
if not y:
|
|
return -1
|
|
return packaging.compare_versions(x, y)
|
|
|
|
existing.sort(key=functools.cmp_to_key(cmp))
|
|
|
|
return existing
|
|
|
|
def _duplicate_search_address_signature(self, sig):
|
|
"""Return ID for crash address signature.
|
|
|
|
Return None if signature is unknown.
|
|
"""
|
|
if not sig:
|
|
return None
|
|
|
|
cur = self.duplicate_db.cursor()
|
|
|
|
cur.execute(
|
|
"SELECT crash_id FROM address_signatures WHERE signature == ?", [sig]
|
|
)
|
|
existing_ids = cur.fetchall()
|
|
assert len(existing_ids) <= 1
|
|
if existing_ids:
|
|
return existing_ids[0][0]
|
|
return None
|
|
|
|
def duplicate_db_dump(self, with_timestamps=False):
|
|
"""Return the entire duplicate database as a dictionary.
|
|
|
|
The returned dictionary maps "signature" to (crash_id, fixed_version)
|
|
pairs.
|
|
|
|
If with_timestamps is True, then the map will contain triples
|
|
(crash_id, fixed_version, last_change) instead.
|
|
|
|
This is mainly useful for debugging and test suites.
|
|
"""
|
|
assert self.duplicate_db, "init_duplicate_db() needs to be called before"
|
|
|
|
dump = {}
|
|
cur = self.duplicate_db.cursor()
|
|
cur.execute("SELECT * FROM crashes")
|
|
for sig, crash_id, ver, last_change in cur:
|
|
if with_timestamps:
|
|
dump[sig] = (crash_id, ver, last_change)
|
|
else:
|
|
dump[sig] = (crash_id, ver)
|
|
return dump
|
|
|
|
def _duplicate_db_sync_status(self, crash_id):
|
|
"""Update the duplicate db to the reality of the report in the
|
|
crash db.
|
|
|
|
This uses get_fixed_version() to get the status of the given crash.
|
|
An invalid ID gets removed from the duplicate db, and a crash which got
|
|
fixed is marked as such in the database.
|
|
"""
|
|
assert self.duplicate_db, "init_duplicate_db() needs to be called before"
|
|
|
|
cur = self.duplicate_db.cursor()
|
|
cur.execute("SELECT fixed_version FROM crashes WHERE crash_id = ?", [crash_id])
|
|
db_fixed_version = cur.fetchone()
|
|
if not db_fixed_version:
|
|
return
|
|
db_fixed_version = db_fixed_version[0]
|
|
|
|
real_fixed_version = self.get_fixed_version(crash_id)
|
|
|
|
# crash got rejected
|
|
if real_fixed_version == "invalid":
|
|
print(f"DEBUG: bug {crash_id} was invalidated, removing from database")
|
|
self.duplicate_db_remove(crash_id)
|
|
return
|
|
|
|
# crash got fixed
|
|
if not db_fixed_version and real_fixed_version:
|
|
print(
|
|
f"DEBUG: bug {crash_id} got fixed"
|
|
f" in version {real_fixed_version}, updating database"
|
|
)
|
|
self.duplicate_db_fixed(crash_id, real_fixed_version)
|
|
return
|
|
|
|
# crash got reopened
|
|
if db_fixed_version and not real_fixed_version:
|
|
print(
|
|
f"DEBUG: bug {crash_id} got reopened,"
|
|
f" dropping fixed version {db_fixed_version} from database"
|
|
)
|
|
self.duplicate_db_fixed(crash_id, real_fixed_version)
|
|
return
|
|
|
|
def _duplicate_db_add_address_signature(self, sig, crash_id):
|
|
# consistency check
|
|
existing = self._duplicate_search_address_signature(sig)
|
|
if existing:
|
|
if existing != crash_id:
|
|
raise SystemError(
|
|
f"ID {crash_id} has signature {sig}, but database"
|
|
f" already has that signature for ID {existing}"
|
|
)
|
|
else:
|
|
cur = self.duplicate_db.cursor()
|
|
cur.execute(
|
|
"INSERT INTO address_signatures VALUES (?, ?)", (_u(sig), crash_id)
|
|
)
|
|
self.duplicate_db.commit()
|
|
|
|
def _duplicate_db_merge_id(self, dup, master):
|
|
"""Merge two crash IDs.
|
|
|
|
This is necessary when having to mark a bug as a duplicate if it
|
|
already is in the duplicate DB.
|
|
"""
|
|
assert self.duplicate_db, "init_duplicate_db() needs to be called before"
|
|
|
|
cur = self.duplicate_db.cursor()
|
|
cur.execute("DELETE FROM crashes WHERE crash_id = ?", [dup])
|
|
cur.execute(
|
|
"UPDATE address_signatures SET crash_id = ? WHERE crash_id = ?",
|
|
[master, dup],
|
|
)
|
|
self.duplicate_db.commit()
|
|
|
|
@staticmethod
|
|
def duplicate_sig_hash(sig):
|
|
"""Create a www/URL proof hash for a duplicate signature."""
|
|
# cannot hash multi-line custom duplicate signatures
|
|
if "\n" in sig:
|
|
return None
|
|
|
|
# custom DuplicateSignatures have a free format, split off first word
|
|
i = sig.split(" ", 1)[0]
|
|
# standard crash/address signatures use ':' as field separator, usually
|
|
# for ExecutableName:Signal
|
|
i = "_".join(i.split(":", 2)[:2])
|
|
# we manually quote '/' to make them nicer to read
|
|
i = i.replace("/", "_")
|
|
i = urllib.parse.quote_plus(i.encode("UTF-8"))
|
|
# avoid too long file names
|
|
i = i[:200]
|
|
return i
|
|
|
|
#
|
|
# Abstract functions that need to be implemented by subclasses
|
|
#
|
|
|
|
def upload(self, report, progress_callback=None, user_message_callback=None):
|
|
"""Upload given problem report return a handle for it.
|
|
|
|
This should happen noninteractively.
|
|
|
|
If the implementation supports it, and a function progress_callback is
|
|
passed, that is called repeatedly with two arguments: the number of
|
|
bytes already sent, and the total number of bytes to send. This can be
|
|
used to provide a proper upload progress indication on frontends.
|
|
|
|
Implementations ought to "assert self.accepts(report)". The UI logic
|
|
already prevents uploading a report to a database which does not accept
|
|
it, but for third-party users of the API this should still be checked.
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def get_comment_url(self, report, handle):
|
|
"""Return an URL that should be opened after report has been uploaded
|
|
and upload() returned handle.
|
|
|
|
Should return None if no URL should be opened (anonymous filing without
|
|
user comments); in that case this function should do whichever
|
|
interactive steps it wants to perform.
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def get_id_url(self, report, crash_id):
|
|
"""Return URL for a given report ID.
|
|
|
|
The report is passed in case building the URL needs additional
|
|
information from it, such as the SourcePackage name.
|
|
|
|
Return None if URL is not available or cannot be determined.
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def download(self, crash_id):
|
|
"""Download the problem report from given ID and return a Report."""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def update(
|
|
self,
|
|
crash_id,
|
|
report,
|
|
comment,
|
|
change_description=False,
|
|
attachment_comment=None,
|
|
key_filter=None,
|
|
): # pylint: disable=too-many-arguments
|
|
"""Update the given report ID with all data from report.
|
|
|
|
This creates a text comment with the "short" data (see
|
|
ProblemReport.write_mime()), and creates attachments for all the
|
|
bulk/binary data.
|
|
|
|
If change_description is True, and the crash db implementation supports
|
|
it, the short data will be put into the description instead (like in a
|
|
new bug).
|
|
|
|
comment will be added to the "short" data. If attachment_comment is
|
|
given, it will be added to the attachment uploads.
|
|
|
|
If key_filter is a list or set, then only those keys will be added.
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def update_traces(self, crash_id, report, comment=""):
|
|
"""Update the given report ID for retracing results.
|
|
|
|
This updates Stacktrace, ThreadStacktrace, StacktraceTop,
|
|
and StacktraceSource. You can also supply an additional comment.
|
|
"""
|
|
self.update(
|
|
crash_id,
|
|
report,
|
|
comment,
|
|
key_filter=[
|
|
"Stacktrace",
|
|
"ThreadStacktrace",
|
|
"StacktraceSource",
|
|
"StacktraceTop",
|
|
],
|
|
)
|
|
|
|
def get_distro_release(self, crash_id):
|
|
"""Get 'DistroRelease: <release>' from the report ID."""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def get_unretraced(self):
|
|
"""Return set of crash IDs which have not been retraced yet.
|
|
|
|
This should only include crashes which match the current host
|
|
architecture.
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def get_dup_unchecked(self):
|
|
"""Return set of crash IDs which need duplicate checking.
|
|
|
|
This is mainly useful for crashes of scripting languages such as
|
|
Python, since they do not need to be retraced. It should not return
|
|
bugs that are covered by get_unretraced().
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def get_unfixed(self):
|
|
"""Return an ID set of all crashes which are not yet fixed.
|
|
|
|
The list must not contain bugs which were rejected or duplicate.
|
|
|
|
This function should make sure that the returned list is correct. If
|
|
there are any errors with connecting to the crash database, it should
|
|
raise an exception (preferably OSError).
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def get_fixed_version(self, crash_id):
|
|
"""Return the package version that fixes a given crash.
|
|
|
|
Return None if the crash is not yet fixed, or an empty string if the
|
|
crash is fixed, but it cannot be determined by which version. Return
|
|
'invalid' if the crash report got invalidated, such as closed a
|
|
duplicate or rejected.
|
|
|
|
This function should make sure that the returned result is correct. If
|
|
there are any errors with connecting to the crash database, it should
|
|
raise an exception (preferably OSError).
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def get_affected_packages(self, crash_id):
|
|
"""Return list of affected source packages for given ID."""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def is_reporter(self, crash_id):
|
|
"""Check whether the user is the reporter of given ID."""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def can_update(self, crash_id):
|
|
"""Check whether the user is eligible to update a report.
|
|
|
|
A user should add additional information to an existing ID if (s)he is
|
|
the reporter or subscribed, the bug is open, not a duplicate, etc. The
|
|
exact policy and checks should be done according to the particular
|
|
implementation.
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def duplicate_of(self, crash_id):
|
|
"""Return master ID for a duplicate bug.
|
|
|
|
If the bug is not a duplicate, return None.
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def close_duplicate(self, report, crash_id, master_id):
|
|
"""Mark a crash id as duplicate of given master ID.
|
|
|
|
If master is None, id gets un-duplicated.
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def mark_regression(self, crash_id, master):
|
|
"""Mark a crash id as reintroducing an earlier crash which is
|
|
already marked as fixed (having ID 'master').
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def mark_retraced(self, crash_id):
|
|
"""Mark crash id as retraced."""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def mark_retrace_failed(self, crash_id, invalid_msg=None):
|
|
"""Mark crash id as 'failed to retrace'.
|
|
|
|
If invalid_msg is given, the bug should be closed as invalid with given
|
|
message, otherwise just marked as a failed retrace.
|
|
|
|
This can be a no-op if you are not interested in this.
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
def _mark_dup_checked(self, crash_id, report):
|
|
"""Mark crash id as checked for being a duplicate.
|
|
|
|
This is an internal method that should not be called from outside.
|
|
"""
|
|
raise NotImplementedError(
|
|
"this method must be implemented by a concrete subclass"
|
|
)
|
|
|
|
|
|
#
|
|
# factory
|
|
#
|
|
|
|
|
|
def get_crashdb(
|
|
auth_file: str | None, name: str | None = None, conf: str | None = None
|
|
) -> CrashDatabase:
|
|
"""Return a CrashDatabase object for the given crash db name.
|
|
|
|
This reads the configuration file 'conf'.
|
|
|
|
If name is None, it defaults to the 'default' value in conf.
|
|
|
|
If conf is None, it defaults to the environment variable
|
|
APPORT_CRASHDB_CONF; if that does not exist, the hardcoded default is
|
|
/etc/apport/crashdb.conf. This Python syntax file needs to specify:
|
|
|
|
- A string variable 'default', giving a default value for 'name' if that is
|
|
None.
|
|
|
|
- A dictionary 'databases' which maps names to crash db configuration
|
|
dictionaries. These need to have at least the key 'impl' (Python module
|
|
in apport.crashdb_impl which contains a concrete 'CrashDatabase' class
|
|
implementation for that crash db type). Other generally known options are
|
|
'bug_pattern_url', 'dupdb_url', and 'problem_types'.
|
|
"""
|
|
if conf is None:
|
|
conf = os.environ.get("APPORT_CRASHDB_CONF", "/etc/apport/crashdb.conf")
|
|
assert conf
|
|
settings: dict[str, Any] = {}
|
|
with open(conf, encoding="utf-8") as f:
|
|
# legacy, pylint: disable=exec-used
|
|
exec(compile(f.read(), conf, "exec"), settings)
|
|
|
|
# Load third parties crashdb.conf
|
|
confdDir = f"{conf}.d"
|
|
if os.path.isdir(confdDir):
|
|
for cf in os.listdir(confdDir):
|
|
cfpath = os.path.join(confdDir, cf)
|
|
if os.path.isfile(cfpath) and cf.endswith(".conf"):
|
|
try:
|
|
with open(cfpath, encoding="utf-8") as f:
|
|
# legacy, pylint: disable=exec-used
|
|
exec(compile(f.read(), cfpath, "exec"), settings["databases"])
|
|
except Exception as error: # pylint: disable=broad-except
|
|
# ignore broken files
|
|
sys.stderr.write(f"Invalid file {cfpath}: {error}\n")
|
|
|
|
if not name:
|
|
name = settings["default"]
|
|
|
|
return load_crashdb(auth_file, settings["databases"][name])
|
|
|
|
|
|
def load_crashdb(auth_file, spec):
|
|
"""Return a CrashDatabase object for a given DB specification.
|
|
|
|
spec is a crash db configuration dictionary as described in get_crashdb().
|
|
"""
|
|
m = __import__(
|
|
f"apport.crashdb_impl.{spec['impl']}", globals(), locals(), ["CrashDatabase"]
|
|
)
|
|
return m.CrashDatabase(auth_file, spec)
|