#!/usr/bin/python

# Collect information about a crash and create a report in the directory
# specified by apport.fileutils.report_dir.
# See https://wiki.ubuntu.com/Apport for details.
#
# Copyright (c) 2006 - 2011 Canonical Ltd.
# Author: Martin Pitt <martin.pitt@ubuntu.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.  See http://www.gnu.org/copyleft/gpl.html for
# the full text of the license.

import sys, os, os.path, subprocess, time, traceback, pwd, io
import signal, inspect, grp, fcntl

import apport, apport.fileutils

#################################################################
#
# functions
#
#################################################################


def check_lock():
    '''Abort if another instance of apport is already running.

    This avoids bringing down the system to its knees if there is a series of
    crashes.'''

    # create a lock file
    lockfile = os.path.join(apport.fileutils.report_dir, '.lock')
    try:
        fd = os.open(lockfile, os.O_WRONLY | os.O_CREAT | os.O_NOFOLLOW)
    except OSError as e:
        error_log('cannot create lock file (uid %i): %s' % (os.getuid(), str(e)))
        sys.exit(1)

    try:
        fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except IOError:
        error_log('another apport instance is already running, aborting')
        sys.exit(1)


def drop_privileges(pid, partial=False):
    '''Change user and group to match the given target process.'''

    stat = None
    try:
        stat = os.stat('/proc/%s/stat' % pid)
    except OSError as e:
        raise ValueError('Invalid process ID: ' + str(e))

    if partial:
        effective_gid = os.getegid()
        effective_uid = os.geteuid()
    else:
        effective_gid = stat.st_gid
        effective_uid = stat.st_uid

    os.setregid(stat.st_gid, effective_gid)
    os.setreuid(stat.st_uid, effective_uid)
    assert os.getegid() == effective_gid
    assert os.getgid() == stat.st_gid
    assert os.geteuid() == effective_uid
    assert os.getuid() == stat.st_uid


def init_error_log():
    '''Open a suitable error log if sys.stderr is not a tty.'''

    if not os.isatty(2):
        log = os.environ.get('APPORT_LOG_FILE', '/var/log/apport.log')
        try:
            f = os.open(log, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600)
            try:
                admgid = grp.getgrnam('adm')[2]
                os.chown(log, -1, admgid)
                os.chmod(log, 0o640)
            except KeyError:
                pass  # if group adm doesn't exist, just leave it as root
        except OSError:  # on a permission error, don't touch stderr
            return
        os.dup2(f, 1)
        os.dup2(f, 2)
        sys.stderr = os.fdopen(2, 'wb')
        if sys.version_info.major >= 3:
            sys.stderr = io.TextIOWrapper(sys.stderr)
        sys.stdout = sys.stderr


def error_log(msg):
    '''Output something to the error log.'''

    apport.error('apport (pid %s) %s: %s', os.getpid(), time.asctime(), msg)


def _log_signal_handler(sgn, frame):
    '''Internal apport signal handler. Just log the signal handler and exit.'''

    # reset handler so that we do not get stuck in loops
    signal.signal(sgn, signal.SIG_IGN)
    try:
        error_log('Got signal %i, aborting; frame:' % sgn)
        for s in inspect.stack():
            error_log(str(s))
    except:
        pass
    sys.exit(1)


def setup_signals():
    '''Install a signal handler for all crash-like signals, so that apport is
    not called on itself when apport crashed.'''

    signal.signal(signal.SIGILL, _log_signal_handler)
    signal.signal(signal.SIGABRT, _log_signal_handler)
    signal.signal(signal.SIGFPE, _log_signal_handler)
    signal.signal(signal.SIGSEGV, _log_signal_handler)
    signal.signal(signal.SIGPIPE, _log_signal_handler)
    signal.signal(signal.SIGBUS, _log_signal_handler)


def write_user_coredump(pid, cwd, limit, from_report=None):
    '''Write the core into the current directory if ulimit requests it.'''

    # three cases:
    # limit == 0: do not write anything
    # limit < 0: unlimited, write out everything
    # limit nonzero: crashed process' core size ulimit in bytes

    if limit == 0:
        return

    core_path = os.path.join(cwd, 'core')
    try:
        with open('/proc/sys/kernel/core_uses_pid') as f:
            if f.read().strip() != '0':
                core_path += '.' + str(pid)
        core_file = os.open(core_path, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600)
    except (OSError, IOError):
        return

    error_log('writing core dump to %s (limit: %s)' % (core_path, str(limit)))

    written = 0

    # Priming read
    if from_report:
        r = apport.Report()
        with open(from_report, 'rb') as f:
            r.load(f)
        core_size = len(r['CoreDump'])
        if limit > 0 and core_size > limit:
            error_log('aborting core dump writing, size %i exceeds current limit' % core_size)
            os.close(core_file)
            os.unlink(core_path)
            return
        error_log('writing core dump %s of size %i' % (core_path, core_size))
        os.write(core_file, r['CoreDump'])
    else:
        # read from stdin
        block = os.read(0, 1048576)

        while True:
            size = len(block)
            if size == 0:
                break
            written += size
            if limit > 0 and written > limit:
                error_log('aborting core dump writing, size exceeds current limit %i' % limit)
                os.close(core_file)
                os.unlink(core_path)
                return
            if os.write(core_file, block) != size:
                error_log('aborting core dump writing, could not write')
                os.close(core_file)
                os.unlink(core_path)
                return
            block = os.read(0, 1048576)

    os.close(core_file)
    return core_path


def usable_ram():
    '''Return how many bytes of RAM is currently available that can be
    allocated without causing major thrashing.'''

    # abuse our excellent RFC822 parser to parse /proc/meminfo
    r = apport.Report()
    with open('/proc/meminfo', 'rb') as f:
        r.load(f)

    memfree = int(r['MemFree'].split()[0])
    cached = int(r['Cached'].split()[0])
    writeback = int(r['Writeback'].split()[0])

    return (memfree + cached - writeback) * 1024


def is_closing_session(pid, uid):
    '''Check if pid is in a closing user session.

    During that, crashes are common as the session D-BUS and X.org are going
    away, etc. These crash reports are mostly noise, so should be ignored.
    '''
    with open('/proc/%s/environ' % pid) as e:
        env = e.read().split('\0')
    for e in env:
        if e.startswith('DBUS_SESSION_BUS_ADDRESS='):
            dbus_addr = e.split('=', 1)[1]
            break
    else:
        error_log('is_closing_session(): no DBUS_SESSION_BUS_ADDRESS in environment')
        return False

    orig_uid = os.geteuid()
    os.setresuid(uid, uid, -1)
    try:
        gdbus = subprocess.Popen(['/usr/bin/gdbus', 'call', '-e', '-d',
                                  'org.gnome.SessionManager', '-o', '/org/gnome/SessionManager', '-m',
                                  'org.gnome.SessionManager.IsSessionRunning'], stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE, env={'DBUS_SESSION_BUS_ADDRESS': dbus_addr})
        (out, err) = gdbus.communicate()
        if err:
            error_log('gdbus call error: ' + err.decode('UTF-8'))
    except OSError as e:
        error_log('gdbus call failed, cannot determine running session: ' + str(e))
        return False
    finally:
        os.setresuid(orig_uid, orig_uid, -1)
    error_log('debug: session gdbus call: ' + out.decode('UTF-8'))
    if out.startswith(b'(false,'):
        return True

    return False


#################################################################
#
# main
#
#################################################################

if len(sys.argv) not in (4, 5):
    try:
        print('Usage: %s <pid> <signal number> <core file ulimit> [global pid]' % sys.argv[0])
        print('The core dump is read from stdin.')
    except IOError:
        # sys.stderr might not actually exist, expecially not when being called
        # from the kernel
        pass
    sys.exit(1)

init_error_log()

# Check if we received a valid global PID (kernel >= 3.12). If we do,
# then compare it with the local PID. If they don't match, it's an
# indication that the crash originated from another PID namespace. In that
# case, attempt to forward the crash to apport in that namespace. If
# apport can't be found, then simply log an entry in the host error log
# and exit 0.
if len(sys.argv) == 5 and sys.argv[4].isdigit() and sys.argv[4] != sys.argv[1]:
    if os.path.exists('/proc/%s/root/%s' % (sys.argv[4], __file__)):
        error_log('pid %s (host pid %s) crashed in a container with apport '
                  'support, forwarding' % (sys.argv[1], sys.argv[4]))
        sys.stderr.flush()
        os.execv('/usr/sbin/chroot', ('chroot', '/proc/%s/root/' % sys.argv[4],
                                      __file__, sys.argv[1], sys.argv[2],
                                      sys.argv[3]))
    else:
        error_log('pid %s crashed in a container without apport support' % sys.argv[4])
        sys.exit(0)

check_lock()

try:
    setup_signals()

    (pid, signum, core_ulimit) = sys.argv[1:4]

    # drop our process priority level to not disturb userspace so much
    try:
        os.nice(10)
    except OSError:
        pass  # *shrug*, we tried

    # Partially drop privs to gain proper os.access() checks
    drop_privileges(pid, True)

    # try to find the core dump file; if path is relative, prepend cwd of
    # crashed process
    cwd = os.readlink('/proc/' + pid + '/cwd')

    error_log('called for pid %s, signal %s, core limit %s' % (pid, signum, core_ulimit))

    try:
        core_ulimit = int(core_ulimit)
    except ValueError:
        error_log('core limit is invalid, disabling core files')
        core_ulimit = 0
    # clamp core_ulimit to a sensible size, for -1 the kernel reports something
    # absurdly big
    if core_ulimit > 9223372036854775807:
        error_log('ignoring implausibly big core limit, treating as unlimited')
        core_ulimit = -1
    # ulimit specifies blocks, which are kB
    if core_ulimit > 0:
        core_ulimit *= 1024

    # ignore SIGQUIT (it's usually deliberately generated by users)
    if signum == str(signal.SIGQUIT):
        drop_privileges(pid)
        write_user_coredump(pid, cwd, core_ulimit)
        sys.exit(0)

    try:
        pidstat = os.stat('/proc/%s/stat' % pid)
    except OSError:
        error_log('Invalid PID')
        sys.exit(1)

    # check if the executable was modified after the process started (e. g.
    # package got upgraded in between)
    exe_mtime = os.stat('/proc/%s/exe' % pid).st_mtime
    process_start = os.lstat('/proc/%s/cmdline' % pid).st_mtime
    if not os.path.exists(os.readlink('/proc/%s/exe' % pid)) or exe_mtime > process_start:
        error_log('executable was modified after program start, ignoring')
        sys.exit(1)

    info = apport.Report('Crash')
    info['Signal'] = signum
    if sys.version_info.major < 3:
        info['CoreDump'] = (sys.stdin, True, usable_ram() * 3 / 4, True)
    else:
        # read binary data from stdio
        info['CoreDump'] = (sys.stdin.detach(), True, usable_ram() * 3 / 4, True)

    # We already need this here to figure out the ExecutableName (for scripts,
    # etc).
    info.add_proc_info(pid)

    if 'ExecutablePath' not in info:
        error_log('could not determine ExecutablePath, aborting')
        sys.exit(1)

    subject = info['ExecutablePath'].replace('/', '_')
    base = '%s.%s.%s.hanging' % (subject, str(pidstat.st_uid), pid)
    hanging = os.path.join(apport.fileutils.report_dir, base)

    if os.path.exists(hanging):
        if (os.stat('/proc/uptime').st_ctime < os.stat(hanging).st_mtime):
            info['ProblemType'] = 'Hang'
        os.unlink(hanging)

    if 'InterpreterPath' in info:
        error_log('script: %s, interpreted by %s (command line "%s")' %
                  (info['ExecutablePath'], info['InterpreterPath'],
                   info['ProcCmdline']))
    else:
        error_log('executable: %s (command line "%s")' %
                  (info['ExecutablePath'], info['ProcCmdline']))

    # ignore non-package binaries (unless configured otherwise)
    if not apport.fileutils.likely_packaged(info['ExecutablePath']):
        if not apport.fileutils.get_config('main', 'unpackaged', False, bool=True):
            error_log('executable does not belong to a package, ignoring')
            # check if the user wants a core dump
            drop_privileges(pid)
            write_user_coredump(pid, cwd, core_ulimit)
            sys.exit(1)

    # ignore SIGXCPU and SIGXFSZ since this indicates some external
    # influence changing soft RLIMIT values when running programs.
    if signum in [str(signal.SIGXCPU), str(signal.SIGXFSZ)]:
        error_log('Ignoring signal %s (caused by exceeding soft RLIMIT)' % signum)
        drop_privileges(pid)
        write_user_coredump(pid, cwd, core_ulimit)
        sys.exit(0)

    # ignore blacklisted binaries
    if info.check_ignored():
        error_log('executable version is blacklisted, ignoring')
        sys.exit(1)

    if is_closing_session(pid, pidstat.st_uid):
        error_log('happens for shutting down session, ignoring')
        sys.exit(1)

    crash_counter = 0

    # Create crash report file descriptor for writing the report into
    # report_dir
    try:
        report = '%s/%s.%i.crash' % (apport.fileutils.report_dir, info['ExecutablePath'].replace('/', '_'), pidstat.st_uid)
        if os.path.exists(report):
            if apport.fileutils.seen_report(report):
                # do not flood the logs and the user with repeated crashes
                with open(report, 'rb') as f:
                    crash_counter = apport.fileutils.get_recent_crashes(f)
                crash_counter += 1
                if crash_counter > 1:
                    drop_privileges(pid)
                    write_user_coredump(pid, cwd, core_ulimit)
                    error_log('this executable already crashed %i times, ignoring' % crash_counter)
                    sys.exit(1)
                # remove the old file, so that we can create the new one with
                # os.O_CREAT|os.O_EXCL
                os.unlink(report)
            else:
                error_log('apport: report %s already exists and unseen, doing nothing to avoid disk usage DoS' % report)
                drop_privileges(pid)
                write_user_coredump(pid, cwd, core_ulimit)
                sys.exit(1)
        reportfile = os.fdopen(os.open(report, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0), 'wb')
        assert reportfile.fileno() > sys.stderr.fileno()

        # Make sure the crash reporting daemon can read this report
        try:
            gid = pwd.getpwnam('whoopsie').pw_gid
            os.chown(report, pidstat.st_uid, gid)
        except (OSError, KeyError):
            os.chown(report, pidstat.st_uid, pidstat.st_gid)
    except (OSError, IOError) as e:
        error_log('Could not create report file: %s' % str(e))
        sys.exit(1)

    # Totally drop privs before writing out the reportfile.
    drop_privileges(pid)

    info.add_user_info()
    info.add_os_info()

    if crash_counter > 0:
        info['CrashCounter'] = '%i' % crash_counter

    try:
        info.write(reportfile)
        if reportfile != sys.stderr:
            # Ensure that the file gets written to disk in the event of an
            # Upstart crash.
            if info.get('ExecutablePath', '') == '/sbin/init':
                reportfile.flush()
                os.fsync(reportfile.fileno())
                parent_directory = os.path.dirname(report)
                try:
                    fd = os.open(parent_directory, os.O_RDONLY)
                    os.fsync(fd)
                finally:
                    os.close(fd)
            reportfile.close()
    except IOError:
        if reportfile != sys.stderr:
            os.unlink(report)
        raise
    if report:
        os.chmod(report, 0o640)
    if reportfile != sys.stderr:
        error_log('wrote report %s' % report)

    # Check if the user wants a core file. We need to create that from the
    # written report, as we can only read stdin once and write_user_coredump()
    # might abort reading from stdin and remove the written core file when
    # core_ulimit is > 0 and smaller than the core size.
    write_user_coredump(pid, cwd, core_ulimit, from_report=report)

except (SystemExit, KeyboardInterrupt):
    raise
except Exception as e:
    error_log('Unhandled exception:')
    traceback.print_exc()
    error_log('pid: %i, uid: %i, gid: %i, euid: %i, egid: %i' % (
              os.getpid(), os.getuid(), os.getgid(), os.geteuid(), os.getegid()))
    error_log('environment: %s' % str(os.environ))
