From e27e30cae0468903473641efe3853c12d9294ac3 Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Mon, 28 Oct 2019 13:29:26 -0400 Subject: debuginfod 2/2: server side Add the server to the debuginfod/ subdirectory. This is a highly multithreaded c++11 program (still buildable on rhel7's gcc 4.8, which is only partly c++11 compliant). Includes an initial suite of tests, man pages, and a sample systemd service. Signed-off-by: Frank Ch. Eigler Signed-off-by: Aaron Merey --- config/ChangeLog | 7 + config/Makefile.am | 5 +- config/debuginfod.service | 15 + config/debuginfod.sysconfig | 14 + config/elfutils.spec.in | 100 +- config/eu.am | 10 + configure.ac | 36 +- debuginfod/ChangeLog | 6 + debuginfod/Makefile.am | 8 +- debuginfod/debuginfod.cxx | 2514 ++++++++++++++++++++ doc/Makefile.am | 3 +- doc/debuginfod.8 | 369 +++ tests/ChangeLog | 7 + tests/Makefile.am | 30 +- .../debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm | Bin 0 -> 8087 bytes .../fedora30/hello2-1.0-2.x86_64.rpm | Bin 0 -> 10448 bytes .../fedora30/hello2-debuginfo-1.0-2.x86_64.rpm | Bin 0 -> 11316 bytes .../fedora30/hello2-debugsource-1.0-2.x86_64.rpm | Bin 0 -> 7308 bytes .../fedora30/hello2-two-1.0-2.x86_64.rpm | Bin 0 -> 10380 bytes .../fedora30/hello2-two-debuginfo-1.0-2.x86_64.rpm | Bin 0 -> 10888 bytes tests/debuginfod-rpms/hello2.spec. | 57 + tests/debuginfod-rpms/rhel6/hello2-1.0-2.i686.rpm | Bin 0 -> 4112 bytes tests/debuginfod-rpms/rhel6/hello2-1.0-2.src.rpm | Bin 0 -> 3816 bytes .../rhel6/hello2-debuginfo-1.0-2.i686.rpm | Bin 0 -> 6060 bytes .../rhel6/hello2-two-1.0-2.i686.rpm | Bin 0 -> 4052 bytes tests/debuginfod-rpms/rhel7/hello2-1.0-2.src.rpm | Bin 0 -> 3819 bytes .../debuginfod-rpms/rhel7/hello2-1.0-2.x86_64.rpm | Bin 0 -> 5156 bytes .../rhel7/hello2-debuginfo-1.0-2.x86_64.rpm | Bin 0 -> 6936 bytes .../rhel7/hello2-two-1.0-2.x86_64.rpm | Bin 0 -> 5092 bytes tests/debuginfod_build_id_find.c | 62 + tests/run-debuginfod-find.sh | 230 ++ 31 files changed, 3446 insertions(+), 27 deletions(-) create mode 100644 config/debuginfod.service create mode 100644 config/debuginfod.sysconfig create mode 100644 debuginfod/debuginfod.cxx create mode 100644 doc/debuginfod.8 create mode 100644 tests/debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm create mode 100644 tests/debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm create mode 100644 tests/debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm create mode 100644 tests/debuginfod-rpms/fedora30/hello2-debugsource-1.0-2.x86_64.rpm create mode 100644 tests/debuginfod-rpms/fedora30/hello2-two-1.0-2.x86_64.rpm create mode 100644 tests/debuginfod-rpms/fedora30/hello2-two-debuginfo-1.0-2.x86_64.rpm create mode 100644 tests/debuginfod-rpms/hello2.spec. create mode 100644 tests/debuginfod-rpms/rhel6/hello2-1.0-2.i686.rpm create mode 100644 tests/debuginfod-rpms/rhel6/hello2-1.0-2.src.rpm create mode 100644 tests/debuginfod-rpms/rhel6/hello2-debuginfo-1.0-2.i686.rpm create mode 100644 tests/debuginfod-rpms/rhel6/hello2-two-1.0-2.i686.rpm create mode 100644 tests/debuginfod-rpms/rhel7/hello2-1.0-2.src.rpm create mode 100644 tests/debuginfod-rpms/rhel7/hello2-1.0-2.x86_64.rpm create mode 100644 tests/debuginfod-rpms/rhel7/hello2-debuginfo-1.0-2.x86_64.rpm create mode 100644 tests/debuginfod-rpms/rhel7/hello2-two-1.0-2.x86_64.rpm create mode 100644 tests/debuginfod_build_id_find.c create mode 100755 tests/run-debuginfod-find.sh diff --git a/config/ChangeLog b/config/ChangeLog index b641d0d5..73643f91 100644 --- a/config/ChangeLog +++ b/config/ChangeLog @@ -1,3 +1,10 @@ +2019-10-28 Frank Ch. Eigler + + * eu.am (AM_CXXFLAGS): Clone & amend AM_CFLAGS for c++11 code. + * debuginfod.service, debuginfod.sysconfig: New files: systemd. + * Makefile.am: Install them. + * elfutils.spec.in: Add debuginfod and debuginfod-client subrpms. + 2019-08-29 Mark Wielaard * elfutils.spec.in (%description devel): Remove libebl text. diff --git a/config/Makefile.am b/config/Makefile.am index 10bd8d31..55e895ac 100644 --- a/config/Makefile.am +++ b/config/Makefile.am @@ -28,8 +28,9 @@ ## the GNU Lesser General Public License along with this program. If ## not, see . ## -EXTRA_DIST = elfutils.spec.in known-dwarf.awk 10-default-yama-scope.conf - libelf.pc.in libdw.pc.in libdebuginfod.pc.in +EXTRA_DIST = elfutils.spec.in known-dwarf.awk 10-default-yama-scope.conf \ + libelf.pc.in libdw.pc.in libdebuginfod.pc.in \ + debuginfod.service debuginfod.sysconfig pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = libelf.pc libdw.pc libdebuginfod.pc diff --git a/config/debuginfod.service b/config/debuginfod.service new file mode 100644 index 00000000..d8ef072b --- /dev/null +++ b/config/debuginfod.service @@ -0,0 +1,15 @@ +[Unit] +Description=elfutils debuginfo-over-http server +Documentation=http://elfutils.org/ +After=network.target + +[Service] +EnvironmentFile=/etc/sysconfig/debuginfod +User=debuginfod +Group=debuginfod +#CacheDirectory=debuginfod +ExecStart=/usr/bin/debuginfod -d /var/cache/debuginfod/debuginfod.sqlite -p $DEBUGINFOD_PORT $DEBUGINFOD_VERBOSE $DEBUGINFOD_PRAGMAS $DEBUGINFOD_PATHS +TimeoutStopSec=10 + +[Install] +WantedBy=multi-user.target diff --git a/config/debuginfod.sysconfig b/config/debuginfod.sysconfig new file mode 100644 index 00000000..c56bcf3f --- /dev/null +++ b/config/debuginfod.sysconfig @@ -0,0 +1,14 @@ +# +DEBUGINFOD_PORT="8002" +#DEBUGINFOD_VERBOSE="-v" + +# some common places to find trustworthy ELF/DWARF files and RPMs +DEBUGINFOD_PATHS="-t43200 -F -R /usr/lib/debug /usr/bin /usr/libexec /usr/sbin /usr/lib /usr/lib64 /var/cache/yum /var/cache/dnf" + +# prefer reliability/durability over performance +#DEBUGINFOD_PRAGMAS="-D 'pragma synchronous=full;'" + +# upstream debuginfods +#DEBUGINFOD_URLS="http://secondhost:8002 http://thirdhost:8002" +#DEBUGINFOD_TIMEOUT="5" +#DEBUGINFOD_CACHE_DIR="" diff --git a/config/elfutils.spec.in b/config/elfutils.spec.in index 6771d13b..3cd15ce3 100644 --- a/config/elfutils.spec.in +++ b/config/elfutils.spec.in @@ -12,6 +12,11 @@ Requires: elfutils-libelf = %{version}-%{release} Requires: glibc >= 2.7 Requires: libstdc++ Requires: default-yama-scope +%if 0%{?rhel} >= 8 || 0%{?fedora} >= 20 +Recommends: elfutils-debuginfod-client +%else +Requires: elfutils-debuginfod-client +%endif # ExcludeArch: xxx @@ -23,10 +28,20 @@ BuildRequires: flex >= 2.5.4a BuildRequires: bzip2 BuildRequires: m4 BuildRequires: gettext -BuildRequires: zlib-devel +BuildRequires: pkgconfig(zlib) +%if 0%{?rhel} == 7 BuildRequires: bzip2-devel -BuildRequires: xz-devel +%else +BuildRequires: pkgconfig(bzip2) +%endif +BuildRequires: pkgconfig(liblzma) BuildRequires: gcc-c++ +BuildRequires: pkgconfig(libmicrohttpd) >= 0.9.33 +BuildRequires: pkgconfig(libcurl) >= 7.29.0 +BuildRequires: pkgconfig(sqlite3) >= 3.7.17 +BuildRequires: pkgconfig(libarchive) >= 3.1.2 +# for the run-debuginfod-find.sh test case in %check for /usr/sbin/ss +BuildRequires: iproute %define _gnu %{nil} %define _programprefix eu- @@ -116,18 +131,53 @@ interprocess services, communication and introspection (like synchronisation, signaling, debugging, tracing and profiling) of processes. +%package debuginfod-client +Summary: Libraries and command-line frontend for HTTP ELF/DWARF file server addressed by build-id. +License: GPLv3+ and (GPLv2+ or LGPLv3+) + +%package debuginfod-client-devel +Summary: Libraries and headers to build debuginfod client applications. +License: GPLv2+ or LGPLv3+ + +%package debuginfod +Summary: HTTP ELF/DWARF file server addressed by build-id. +License: GPLv3+ +BuildRequires: systemd +Requires(post): systemd +Requires(preun): systemd +Requires(postun): systemd +Requires: shadow-utils +Requires: /usr/bin/rpm2cpio + +%description debuginfod-client +The elfutils-debuginfod-client package contains shared libraries +dynamically loaded from -ldw, which use a debuginfod service +to look up debuginfo and associated data. Also includes a +command-line frontend. + +%description debuginfod-client-devel +The elfutils-debuginfod-client-devel package contains the libraries +to create applications to use the debuginfod service. + +%description debuginfod +The elfutils-debuginfod package contains the debuginfod binary +and control files for a service that can provide ELF/DWARF +files to remote clients, based on build-id identification. +The ELF/DWARF file searching functions in libdwfl can query +such servers to download those files on demand. + %prep %setup -q %build -%configure --program-prefix=%{_programprefix} +%configure --program-prefix=%{_programprefix} --enable-debuginfod make %install rm -rf ${RPM_BUILD_ROOT} mkdir -p ${RPM_BUILD_ROOT}%{_prefix} -%makeinstall +%make_install chmod +x ${RPM_BUILD_ROOT}%{_prefix}/%{_lib}/lib*.so* @@ -140,6 +190,11 @@ chmod +x ${RPM_BUILD_ROOT}%{_prefix}/%{_lib}/lib*.so* install -Dm0644 config/10-default-yama-scope.conf ${RPM_BUILD_ROOT}%{_sysctldir}/10-default-yama-scope.conf +install -Dm0644 config/debuginfod.service ${RPM_BUILD_ROOT}%{_unitdir}/debuginfod.service +install -Dm0644 config/debuginfod.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/debuginfod +mkdir -p ${RPM_BUILD_ROOT}%{_localstatedir}/cache/debuginfod +touch ${RPM_BUILD_ROOT}%{_localstatedir}/cache/debuginfod/debuginfod.sqlite + %check make check @@ -225,6 +280,43 @@ rm -rf ${RPM_BUILD_ROOT} %files default-yama-scope %{_sysctldir}/10-default-yama-scope.conf +%files debuginfod-client +%defattr(-,root,root) +%{_libdir}/libdebuginfod-%{version}.so +%{_bindir}/debuginfod-find +%{_mandir}/man1/debuginfod-find.1* + +%files debuginfod-client-devel +%defattr(-,root,root) +%{_libdir}/pkgconfig/libdebuginfod.pc +%{_mandir}/man3/debuginfod_*.3* +%{_includedir}/elfutils/debuginfod.h +%{_libdir}/libdebuginfod.so* + +%files debuginfod +%defattr(-,root,root) +%{_bindir}/debuginfod +%config(noreplace) %verify(not md5 size mtime) %{_sysconfdir}/sysconfig/debuginfod +%{_unitdir}/debuginfod.service +%{_sysconfdir}/sysconfig/debuginfod +%{_mandir}/man8/debuginfod.8* + +%dir %attr(0700,debuginfod,debuginfod) %{_localstatedir}/cache/debuginfod +%verify(not md5 size mtime) %attr(0600,debuginfod,debuginfod) %{_localstatedir}/cache/debuginfod/debuginfod.sqlite + +%pre debuginfod +getent group debuginfod >/dev/null || groupadd -r debuginfod +getent passwd debuginfod >/dev/null || \ + useradd -r -g debuginfod -d /var/cache/debuginfod -s /sbin/nologin \ + -c "elfutils debuginfo server" debuginfod +exit 0 + +%post debuginfod +%systemd_post debuginfod.service + +%postun debuginfod +%systemd_postun_with_restart debuginfod.service + %changelog * Tue Aug 13 2019 Mark Wielaard 0.177-1 - elfclassify: New tool to analyze ELF objects. diff --git a/config/eu.am b/config/eu.am index 82acda3a..6c3c444f 100644 --- a/config/eu.am +++ b/config/eu.am @@ -79,6 +79,16 @@ AM_CFLAGS = -std=gnu99 -Wall -Wshadow -Wformat=2 \ $(if $($(*F)_no_Wpacked_not_aligned),-Wno-packed-not-aligned,) \ $($(*F)_CFLAGS) +AM_CXXFLAGS = -std=c++11 -Wall -Wshadow \ + -Wtrampolines \ + $(LOGICAL_OP_WARNING) $(DUPLICATED_COND_WARNING) \ + $(NULL_DEREFERENCE_WARNING) $(IMPLICIT_FALLTHROUGH_WARNING) \ + $(if $($(*F)_no_Werror),,-Werror) \ + $(if $($(*F)_no_Wunused),,-Wunused -Wextra) \ + $(if $($(*F)_no_Wstack_usage),,$(STACK_USAGE_WARNING)) \ + $(if $($(*F)_no_Wpacked_not_aligned),-Wno-packed-not-aligned,) \ + $($(*F)_CXXFLAGS) + COMPILE.os = $(filter-out -fprofile-arcs -ftest-coverage, $(COMPILE)) DEFS.os = -DPIC -DSHARED diff --git a/configure.ac b/configure.ac index 8a3ed3af..5deec336 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ dnl Process this file with autoconf to produce a configure script. dnl Configure input file for elfutils. -*-autoconf-*- dnl -dnl Copyright (C) 1996-2018 Red Hat, Inc. +dnl Copyright (C) 1996-2019 Red Hat, Inc. dnl dnl This file is part of elfutils. dnl @@ -88,8 +88,6 @@ AS_IF([test "$use_locks" = yes], AH_TEMPLATE([USE_LOCKS], [Defined if libraries should be thread-safe.]) AC_PROG_CC -AC_PROG_CXX -AX_CXX_COMPILE_STDCXX(11, noext, optional) AC_PROG_RANLIB AC_PROG_YACC AM_PROG_LEX @@ -676,17 +674,25 @@ fi # Look for libmicrohttpd, libcurl, libarchive, sqlite for debuginfo server # minimum versions as per rhel7. Single --enable-* option arranges to build -# both client libs and server process. - -PKG_PROG_PKG_CONFIG -AC_ARG_ENABLE([debuginfod], AC_HELP_STRING([--enable-debuginfod], [Build debuginfo server and client solib])) -AS_IF([test "x$enable_debuginfod" = "xyes"], [ - AC_DEFINE([ENABLE_DEBUGINFOD],[1],[Build debuginfo-server]) - PKG_CHECK_MODULES([libmicrohttpd],[libmicrohttpd >= 0.9.33]) - PKG_CHECK_MODULES([libcurl],[libcurl >= 7.29.0]) - PKG_CHECK_MODULES([sqlite3],[sqlite3 >= 3.7.17]) - PKG_CHECK_MODULES([libarchive],[libarchive >= 3.1.2]) -], [enable_debuginfod="no"]) +# both client and server. +AC_ARG_ENABLE([debuginfod],AC_HELP_STRING([--enable-debuginfod], [Build debuginfod server and client])) +AC_PROG_CXX +AX_CXX_COMPILE_STDCXX(11, noext, optional) +AS_IF([test "x$enable_debuginfod" != "xno"], [ + AC_MSG_NOTICE([checking debuginfod dependencies, disable to skip]) + enable_debuginfod=yes # presume success + PKG_PROG_PKG_CONFIG + if test "x$ac_cv_prog_ac_ct_CXX" = "x"; then enable_debuginfod=no; fi + PKG_CHECK_MODULES([libmicrohttpd],[libmicrohttpd >= 0.9.33],[],[enable_debuginfod=no]) + PKG_CHECK_MODULES([libcurl],[libcurl >= 7.29.0],[],[enable_debuginfod=no]) + PKG_CHECK_MODULES([sqlite3],[sqlite3 >= 3.7.17],[],[enable_debuginfod=no]) + PKG_CHECK_MODULES([libarchive],[libarchive >= 3.1.2],[],[enable_debuginfod=no]) + if test "x$enable_debuginfod" = "xno"; then + AC_MSG_ERROR([C++ compiler or dependencies not found, use --disable-debuginfod to disable.]) + fi +]) + +AS_IF([test "x$enable_debuginfod" != "xno"],AC_DEFINE([ENABLE_DEBUGINFOD],[1],[Build debuginfod])) AM_CONDITIONAL([DEBUGINFOD],[test "x$enable_debuginfod" = "xyes"]) @@ -719,7 +725,7 @@ AC_MSG_NOTICE([ Deterministic archives by default : ${default_ar_deterministic} Native language support : ${USE_NLS} Extra Valgrind annotations : ${use_vg_annotations} - Debuginfo client/server support : ${enable_debuginfod} + Debuginfod client/server support : ${enable_debuginfod} EXTRA TEST FEATURES (used with make check) have bunzip2 installed (required) : ${HAVE_BUNZIP2} diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 1a31cf6f..b5679a2f 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,9 @@ +2019-10-28 Frank Ch. Eigler + + * debuginfod.cxx: New file: debuginfod server. + * debuginfod.8: New file: man page. + * Makefile.am: Build it. + 2019-10-28 Aaron Merey * debuginfod-client.c: New file: debuginfod client library. diff --git a/debuginfod/Makefile.am b/debuginfod/Makefile.am index a8ee4594..ec0f49f1 100644 --- a/debuginfod/Makefile.am +++ b/debuginfod/Makefile.am @@ -31,7 +31,9 @@ ## include $(top_srcdir)/config/eu.am AM_CPPFLAGS += -I$(srcdir) -I$(srcdir)/../libelf -I$(srcdir)/../libebl \ - -I$(srcdir)/../libdw -I$(srcdir)/../libdwelf + -I$(srcdir)/../libdw -I$(srcdir)/../libdwelf \ + $(libmicrohttpd_CFLAGS) $(libcurl_CFLAGS) $(sqlite3_CFLAGS) \ + $(libarchive_CFLAGS) VERSION = 1 # Disable eu- prefixing for artifacts (binaries & man pages) in this @@ -55,7 +57,9 @@ libeu = ../lib/libeu.a AM_LDFLAGS = -Wl,-rpath-link,../libelf:../libdw:. -bin_PROGRAMS = debuginfod-find +bin_PROGRAMS = debuginfod debuginfod-find +debuginfod_SOURCES = debuginfod.cxx +debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(libmicrohttpd_LIBS) $(libcurl_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) -lpthread -ldl debuginfod_find_SOURCES = debuginfod-find.c debuginfod_find_LDADD = $(libeu) $(libdebuginfod) diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx new file mode 100644 index 00000000..a87ec4d0 --- /dev/null +++ b/debuginfod/debuginfod.cxx @@ -0,0 +1,2514 @@ +/* Debuginfo-over-http server. + Copyright (C) 2019 Red Hat, Inc. + This file is part of elfutils. + + This file is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + +/* cargo-cult from libdwfl linux-kernel-modules.c */ +/* In case we have a bad fts we include this before config.h because it + can't handle _FILE_OFFSET_BITS. + Everything we need here is fine if its declarations just come first. + Also, include sys/types.h before fts. On some systems fts.h is not self + contained. */ +#ifdef BAD_FTS + #include + #include +#endif + +#ifdef HAVE_CONFIG_H + #include "config.h" +#endif + +extern "C" { +#include "printversion.h" +} + +#include "debuginfod.h" +#include + +#include +#ifdef __GNUC__ +#undef __attribute__ /* glibc bug - rhbz 1763325 */ +#endif + +#include +#include +#include +// #include // not until it supports C++ << better +#include +#include +#include +#include +#include +#include +#include +#include + + +/* If fts.h is included before config.h, its indirect inclusions may not + give us the right LFS aliases of these functions, so map them manually. */ +#ifdef BAD_FTS + #ifdef _FILE_OFFSET_BITS + #define open open64 + #define fopen fopen64 + #endif +#else + #include + #include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +// #include // on rhel7 gcc 4.8, not competent +#include +// #include +using namespace std; + +#include +#include + +#include +#include +#include +#include +#include + +#ifdef __linux__ +#include +#endif + +#ifdef __linux__ +#define tid() syscall(SYS_gettid) +#else +#define tid() pthread_self() +#endif + + +// Roll this identifier for every sqlite schema incompatiblity. +#define BUILDIDS "buildids9" + +#if SQLITE_VERSION_NUMBER >= 3008000 +#define WITHOUT_ROWID "without rowid" +#else +#define WITHOUT_ROWID "" +#endif + +static const char DEBUGINFOD_SQLITE_DDL[] = + "pragma foreign_keys = on;\n" + "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash + "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html + "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file + "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's) + "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html + "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html + // NB: all these are overridable with -D option + + // Normalization table for interning file names + "create table if not exists " BUILDIDS "_files (\n" + " id integer primary key not null,\n" + " name text unique not null\n" + " );\n" + // Normalization table for interning buildids + "create table if not exists " BUILDIDS "_buildids (\n" + " id integer primary key not null,\n" + " hex text unique not null);\n" + // Track the completion of scanning of a given file & sourcetype at given time + "create table if not exists " BUILDIDS "_file_mtime_scanned (\n" + " mtime integer not null,\n" + " file integer not null,\n" + " size integer not null,\n" // in bytes + " sourcetype text(1) not null\n" + " check (sourcetype IN ('F', 'R')),\n" + " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n" + " primary key (file, mtime, sourcetype)\n" + " ) " WITHOUT_ROWID ";\n" + "create table if not exists " BUILDIDS "_f_de (\n" + " buildid integer not null,\n" + " debuginfo_p integer not null,\n" + " executable_p integer not null,\n" + " file integer not null,\n" + " mtime integer not null,\n" + " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n" + " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n" + " primary key (buildid, file, mtime)\n" + " ) " WITHOUT_ROWID ";\n" + "create table if not exists " BUILDIDS "_f_s (\n" + " buildid integer not null,\n" + " artifactsrc integer not null,\n" + " file integer not null,\n" // NB: not necessarily entered into _mtime_scanned + " mtime integer not null,\n" + " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n" + " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n" + " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n" + " primary key (buildid, artifactsrc, file, mtime)\n" + " ) " WITHOUT_ROWID ";\n" + "create table if not exists " BUILDIDS "_r_de (\n" + " buildid integer not null,\n" + " debuginfo_p integer not null,\n" + " executable_p integer not null,\n" + " file integer not null,\n" + " mtime integer not null,\n" + " content integer not null,\n" + " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n" + " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n" + " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n" + " primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n" + " ) " WITHOUT_ROWID ";\n" + "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm + " buildid integer not null,\n" + " artifactsrc integer not null,\n" + " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n" + " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n" + " primary key (buildid, artifactsrc)\n" + " ) " WITHOUT_ROWID ";\n" + "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref + " file integer not null,\n" + " mtime integer not null,\n" + " content integer not null,\n" + " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n" + " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n" + " primary key (content, file, mtime)\n" + " ) " WITHOUT_ROWID ";\n" + // create views to glue together some of the above tables, for webapi D queries + "create view if not exists " BUILDIDS "_query_d as \n" + "select\n" + " b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n" + " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n" + " where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n" + "union all select\n" + " b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n" + " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n" + " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n" + ";" + // ... and for E queries + "create view if not exists " BUILDIDS "_query_e as \n" + "select\n" + " b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n" + " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n" + " where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n" + "union all select\n" + " b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n" + " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n" + " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n" + ";" + // ... and for S queries + "create view if not exists " BUILDIDS "_query_s as \n" + "select\n" + " b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n" + " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files fs, " BUILDIDS "_f_s n\n" + " where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n" + "union all select\n" + " b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n" + " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_files fsref, " + " " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n" + " where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n" + " and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n" + ";" + // and for startup overview counts + "drop view if exists " BUILDIDS "_stats;\n" + "create view if not exists " BUILDIDS "_stats as\n" + " select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n" + "union all select 'file s',count(*) from " BUILDIDS "_f_s\n" + "union all select 'rpm d/e',count(*) from " BUILDIDS "_r_de\n" + "union all select 'rpm sref',count(*) from " BUILDIDS "_r_sref\n" + "union all select 'rpm sdef',count(*) from " BUILDIDS "_r_sdef\n" + "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n" + "union all select 'filenames',count(*) from " BUILDIDS "_files\n" + "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n" + "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n" +#if SQLITE_VERSION_NUMBER >= 3016000 + "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n" +#endif + ";\n" + +// schema change history & garbage collection +// +// XXX: we could have migration queries here to bring prior-schema +// data over instead of just dropping it. +// +// buildids9: widen the mtime_scanned table + "" // <<< we are here +// buildids8: slim the sref table + "drop table if exists buildids8_f_de;\n" + "drop table if exists buildids8_f_s;\n" + "drop table if exists buildids8_r_de;\n" + "drop table if exists buildids8_r_sref;\n" + "drop table if exists buildids8_r_sdef;\n" + "drop table if exists buildids8_file_mtime_scanned;\n" + "drop table if exists buildids8_files;\n" + "drop table if exists buildids8_buildids;\n" +// buildids7: separate _norm table into dense subtype tables + "drop table if exists buildids7_f_de;\n" + "drop table if exists buildids7_f_s;\n" + "drop table if exists buildids7_r_de;\n" + "drop table if exists buildids7_r_sref;\n" + "drop table if exists buildids7_r_sdef;\n" + "drop table if exists buildids7_file_mtime_scanned;\n" + "drop table if exists buildids7_files;\n" + "drop table if exists buildids7_buildids;\n" +// buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table + "drop table if exists buildids6_norm;\n" + "drop table if exists buildids6_files;\n" + "drop table if exists buildids6_buildids;\n" + "drop view if exists buildids6;\n" +// buildids5: redefine srcfile1 column to be '.'-less (for rpms) + "drop table if exists buildids5_norm;\n" + "drop table if exists buildids5_files;\n" + "drop table if exists buildids5_buildids;\n" + "drop table if exists buildids5_bolo;\n" + "drop table if exists buildids5_rfolo;\n" + "drop view if exists buildids5;\n" +// buildids4: introduce rpmfile RFOLO + "drop table if exists buildids4_norm;\n" + "drop table if exists buildids4_files;\n" + "drop table if exists buildids4_buildids;\n" + "drop table if exists buildids4_bolo;\n" + "drop table if exists buildids4_rfolo;\n" + "drop view if exists buildids4;\n" +// buildids3*: split out srcfile BOLO + "drop table if exists buildids3_norm;\n" + "drop table if exists buildids3_files;\n" + "drop table if exists buildids3_buildids;\n" + "drop table if exists buildids3_bolo;\n" + "drop view if exists buildids3;\n" +// buildids2: normalized buildid and filenames into interning tables; + "drop table if exists buildids2_norm;\n" + "drop table if exists buildids2_files;\n" + "drop table if exists buildids2_buildids;\n" + "drop view if exists buildids2;\n" + // buildids1: made buildid and artifacttype NULLable, to represent cached-negative +// lookups from sources, e.g. files or rpms that contain no buildid-indexable content + "drop table if exists buildids1;\n" +// buildids: original + "drop table if exists buildids;\n" + ; + +static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] = + "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file + ; + + + + +/* Name and version of program. */ +/* ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; */ // not this simple for C++ + +/* Bug report address. */ +ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT; + +/* Definitions of arguments for argp functions. */ +static const struct argp_option options[] = + { + { NULL, 0, NULL, 0, "Scanners:", 1 }, + { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning threads.", 0 }, + { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning threads.", 0 }, + // "source-oci-imageregistry" ... + + { NULL, 0, NULL, 0, "Options:", 2 }, + { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 }, + { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 }, + { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 }, + { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM.", 0 }, + { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 }, + { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 }, + { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 }, + { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 }, + { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 }, + { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 }, + + { NULL, 0, NULL, 0, NULL, 0 } + }; + +/* Short description of program. */ +static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs."; + +/* Strings for arguments in help texts. */ +static const char args_doc[] = "[PATH ...]"; + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = + { + options, parse_opt, args_doc, doc, NULL, NULL, NULL + }; + + +static string db_path; +static sqlite3 *db; +static unsigned verbose; +static volatile sig_atomic_t interrupted = 0; +static volatile sig_atomic_t sigusr1 = 0; +static volatile sig_atomic_t sigusr2 = 0; +static unsigned http_port = 8002; +static unsigned rescan_s = 300; +static unsigned groom_s = 86400; +static unsigned maxigroom = false; +static unsigned concurrency = std::thread::hardware_concurrency() ?: 1; +static set source_paths; +static bool scan_files = false; +static bool scan_rpms = false; +static vector extra_ddl; +static regex_t file_include_regex; +static regex_t file_exclude_regex; + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, + struct argp_state *state __attribute__ ((unused))) +{ + int rc; + switch (key) + { + case 'v': verbose ++; break; + case 'd': db_path = string(arg); break; + case 'p': http_port = (unsigned) atoi(arg); + if (http_port > 65535) argp_failure(state, 1, EINVAL, "port number"); + break; + case 'F': scan_files = true; break; + case 'R': scan_rpms = true; break; + case 'D': extra_ddl.push_back(string(arg)); break; + case 't': + rescan_s = (unsigned) atoi(arg); + break; + case 'g': + groom_s = (unsigned) atoi(arg); + break; + case 'G': + maxigroom = true; + break; + case 'c': + concurrency = (unsigned) atoi(arg); + if (concurrency < 1) concurrency = 1; + break; + case 'I': + // NB: no problem with unconditional free here - an earlier failed regcomp would exit program + regfree (&file_include_regex); + rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB); + if (rc != 0) + argp_failure(state, 1, EINVAL, "regular expession"); + break; + case 'X': + regfree (&file_exclude_regex); + rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB); + if (rc != 0) + argp_failure(state, 1, EINVAL, "regular expession"); + break; + case ARGP_KEY_ARG: + source_paths.insert(string(arg)); + break; + // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK); + default: return ARGP_ERR_UNKNOWN; + } + + return 0; +} + + +//////////////////////////////////////////////////////////////////////// + + +// represent errors that may get reported to an ostream and/or a libmicrohttpd connection + +struct reportable_exception +{ + int code; + string message; + + reportable_exception(int c, const string& m): code(c), message(m) {} + reportable_exception(const string& m): code(503), message(m) {} + reportable_exception(): code(503), message() {} + + void report(ostream& o) const; // defined under obatched() class below + + int mhd_send_response(MHD_Connection* c) const { + MHD_Response* r = MHD_create_response_from_buffer (message.size(), + (void*) message.c_str(), + MHD_RESPMEM_MUST_COPY); + int rc = MHD_queue_response (c, code, r); + MHD_destroy_response (r); + return rc; + } +}; + + +struct sqlite_exception: public reportable_exception +{ + sqlite_exception(int rc, const string& msg): + reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {} +}; + +struct libc_exception: public reportable_exception +{ + libc_exception(int rc, const string& msg): + reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {} +}; + + +struct archive_exception: public reportable_exception +{ + archive_exception(const string& msg): + reportable_exception(string("libarchive error: ") + msg) {} + archive_exception(struct archive* a, const string& msg): + reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {} +}; + + +struct elfutils_exception: public reportable_exception +{ + elfutils_exception(int rc, const string& msg): + reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {} +}; + + +//////////////////////////////////////////////////////////////////////// + +// a c++ counting-semaphore class ... since we're c++11 not c++20 + +class semaphore +{ +public: + semaphore (unsigned c=1): count(c) {} + inline void notify () { + unique_lock lock(mtx); + count++; + cv.notify_one(); + } + inline void wait() { + unique_lock lock(mtx); + while (count == 0) + cv.wait(lock); + count--; + } +private: + mutex mtx; + condition_variable cv; + unsigned count; +}; + + +class semaphore_borrower +{ +public: + semaphore_borrower(semaphore* s): sem(s) { sem->wait(); } + ~semaphore_borrower() { sem->notify(); } +private: + semaphore* sem; +}; + + +//////////////////////////////////////////////////////////////////////// + + +// Print a standard timestamp. +static ostream& +timestamp (ostream &o) +{ + char datebuf[80]; + char *now2 = NULL; + time_t now_t = time(NULL); + struct tm *now = gmtime (&now_t); + if (now) + { + (void) strftime (datebuf, sizeof (datebuf), "%c", now); + now2 = datebuf; + } + + return o << "[" << (now2 ? now2 : "") << "] " + << "(" << getpid () << "/" << tid() << "): "; +} + + +// A little class that impersonates an ostream to the extent that it can +// take << streaming operations. It batches up the bits into an internal +// stringstream until it is destroyed; then flushes to the original ostream. +// It adds a timestamp +class obatched +{ +private: + ostream& o; + stringstream stro; + static mutex lock; +public: + obatched(ostream& oo, bool timestamp_p = true): o(oo) + { + if (timestamp_p) + timestamp(stro); + } + ~obatched() + { + unique_lock do_not_cross_the_streams(obatched::lock); + o << stro.str(); + o.flush(); + } + operator ostream& () { return stro; } + template ostream& operator << (const T& t) { stro << t; return stro; } +}; +mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe + + +void reportable_exception::report(ostream& o) const { + obatched(o) << message << endl; +} + + +//////////////////////////////////////////////////////////////////////// + + +// RAII style sqlite prepared-statement holder that matches { } block lifetime + +struct sqlite_ps +{ +private: + sqlite3* db; + const string nickname; + const string sql; + sqlite3_stmt *pp; + + sqlite_ps(const sqlite_ps&); // make uncopyable + sqlite_ps& operator=(const sqlite_ps &); // make unassignable + +public: + sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) { + if (verbose > 4) + obatched(clog) << nickname << " prep " << sql << endl; + int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL); + if (rc != SQLITE_OK) + throw sqlite_exception(rc, "prepare " + sql); + } + + sqlite_ps& reset() + { + sqlite3_reset(this->pp); + return *this; + } + + sqlite_ps& bind(int parameter, const string& str) + { + if (verbose > 4) + obatched(clog) << nickname << " bind " << parameter << "=" << str << endl; + int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT); + if (rc != SQLITE_OK) + throw sqlite_exception(rc, "sqlite3 bind"); + return *this; + } + + sqlite_ps& bind(int parameter, int64_t value) + { + if (verbose > 4) + obatched(clog) << nickname << " bind " << parameter << "=" << value << endl; + int rc = sqlite3_bind_int64 (this->pp, parameter, value); + if (rc != SQLITE_OK) + throw sqlite_exception(rc, "sqlite3 bind"); + return *this; + } + + sqlite_ps& bind(int parameter) + { + if (verbose > 4) + obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl; + int rc = sqlite3_bind_null (this->pp, parameter); + if (rc != SQLITE_OK) + throw sqlite_exception(rc, "sqlite3 bind"); + return *this; + } + + + void step_ok_done() { + int rc = sqlite3_step (this->pp); + if (verbose > 4) + obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl; + if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW) + throw sqlite_exception(rc, "sqlite3 step"); + (void) sqlite3_reset (this->pp); + } + + + int step() { + int rc = sqlite3_step (this->pp); + if (verbose > 4) + obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl; + return rc; + } + + + + ~sqlite_ps () { sqlite3_finalize (this->pp); } + operator sqlite3_stmt* () { return this->pp; } +}; + + +//////////////////////////////////////////////////////////////////////// + +// RAII style templated autocloser + +template +struct defer_dtor +{ +public: + typedef Ignore (*dtor_fn) (Payload); + +private: + Payload p; + dtor_fn fn; + +public: + defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {} + ~defer_dtor() { (void) (*fn)(p); } + +private: + defer_dtor(const defer_dtor&); // make uncopyable + defer_dtor& operator=(const defer_dtor &); // make unassignable +}; + + + +//////////////////////////////////////////////////////////////////////// + + + + + +static string +conninfo (struct MHD_Connection * conn) +{ + char hostname[256]; // RFC1035 + char servname[256]; + int sts = -1; + + if (conn == 0) + return "internal"; + + /* Look up client address data. */ + const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn, + MHD_CONNECTION_INFO_CLIENT_ADDRESS); + struct sockaddr *so = u ? u->client_addr : 0; + + if (so && so->sa_family == AF_INET) { + sts = getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), servname, + sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV); + } else if (so && so->sa_family == AF_INET6) { + sts = getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), + servname, sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV); + } + if (sts != 0) { + hostname[0] = servname[0] = '\0'; + } + + return string(hostname) + string(":") + string(servname); +} + + + +//////////////////////////////////////////////////////////////////////// + +static void +add_mhd_last_modified (struct MHD_Response *resp, time_t mtime) +{ + struct tm *now = gmtime (&mtime); + if (now != NULL) + { + char datebuf[80]; + size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT", now); + if (rc > 0 && rc < sizeof (datebuf)) + (void) MHD_add_response_header (resp, "Last-Modified", datebuf); + } + + (void) MHD_add_response_header (resp, "Cache-Control", "public"); +} + + + +static struct MHD_Response* +handle_buildid_f_match (int64_t b_mtime, + const string& b_source0, + int *result_fd) +{ + int fd = open(b_source0.c_str(), O_RDONLY); + if (fd < 0) + { + if (verbose) + obatched(clog) << "cannot open " << b_source0 << endl; + // if still missing, a periodic groom pass will delete this buildid record + return 0; + } + + // NB: use manual close(2) in error case instead of defer_dtor, because + // in the normal case, we want to hand the fd over to libmicrohttpd for + // file transfer. + + struct stat s; + int rc = fstat(fd, &s); + if (rc < 0) + { + if (verbose) + clog << "cannot fstat " << b_source0 << endl; + close(fd); + return 0; + } + + if ((int64_t) s.st_mtime != b_mtime) + { + if (verbose) + obatched(clog) << "mtime mismatch for " << b_source0 << endl; + close(fd); + return 0; + } + + struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd); + if (r == 0) + { + if (verbose) + obatched(clog) << "cannot create fd-response for " << b_source0 << endl; + close(fd); + } + else + { + add_mhd_last_modified (r, s.st_mtime); + if (verbose > 1) + obatched(clog) << "serving file " << b_source0 << endl; + /* libmicrohttpd will close it. */ + if (result_fd) + *result_fd = fd; + } + + return r; +} + + +// quote all questionable characters of str for safe passage through a sh -c expansion. +static string +shell_escape(const string& str) +{ + string y; + for (auto&& x : str) + { + if (! isalnum(x) && x != '/') + y += "\\"; + y += x; + } + return y; +} + + +static struct MHD_Response* +handle_buildid_r_match (int64_t b_mtime, + const string& b_source0, + const string& b_source1, + int *result_fd) +{ + struct stat fs; + int rc = stat (b_source0.c_str(), &fs); + if (rc != 0) + throw libc_exception (errno, string("stat ") + b_source0); + + if ((int64_t) fs.st_mtime != b_mtime) + { + if (verbose) + obatched(clog) << "mtime mismatch for " << b_source0 << endl; + return 0; + } + + string popen_cmd = string("rpm2cpio " + shell_escape(b_source0)); + FILE* fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC? + if (fp == NULL) + throw libc_exception (errno, string("popen ") + popen_cmd); + defer_dtor fp_closer (fp, pclose); + + struct archive *a; + a = archive_read_new(); + if (a == NULL) + throw archive_exception("cannot create archive reader"); + defer_dtor archive_closer (a, archive_read_free); + + rc = archive_read_support_format_cpio(a); + if (rc != ARCHIVE_OK) + throw archive_exception(a, "cannot select cpio format"); + rc = archive_read_support_filter_all(a); + if (rc != ARCHIVE_OK) + throw archive_exception(a, "cannot select all filters"); + + rc = archive_read_open_FILE (a, fp); + if (rc != ARCHIVE_OK) + throw archive_exception(a, "cannot open archive from rpm2cpio pipe"); + + while(1) // parse cpio archive entries + { + struct archive_entry *e; + rc = archive_read_next_header (a, &e); + if (rc != ARCHIVE_OK) + break; + + if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely + continue; + + string fn = archive_entry_pathname (e); + if (fn != string(".")+b_source1) + continue; + + // extract this file to a temporary file + char tmppath[PATH_MAX] = "/tmp/debuginfod.XXXXXX"; // XXX: $TMP_DIR etc. + int fd = mkstemp (tmppath); + if (fd < 0) + throw libc_exception (errno, "cannot create temporary file"); + unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd + + rc = archive_read_data_into_fd (a, fd); + if (rc != ARCHIVE_OK) + { + close (fd); + throw archive_exception(a, "cannot extract file"); + } + + struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd); + if (r == 0) + { + if (verbose) + obatched(clog) << "cannot create fd-response for " << b_source0 << endl; + close(fd); + break; // assume no chance of better luck around another iteration + } + else + { + add_mhd_last_modified (r, archive_entry_mtime(e)); + if (verbose > 1) + obatched(clog) << "serving rpm " << b_source0 << " file " << b_source1 << endl; + /* libmicrohttpd will close it. */ + if (result_fd) + *result_fd = fd; + return r; + } + } + + // XXX: rpm/file not found: delete this R entry? + return 0; +} + + +static struct MHD_Response* +handle_buildid_match (int64_t b_mtime, + const string& b_stype, + const string& b_source0, + const string& b_source1, + int *result_fd) +{ + if (b_stype == "F") + return handle_buildid_f_match(b_mtime, b_source0, result_fd); + else if (b_stype == "R") + return handle_buildid_r_match(b_mtime, b_source0, b_source1, result_fd); + else + return 0; +} + + + +static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */, + const string& artifacttype /* unsafe */, + const string& suffix /* unsafe */, + int *result_fd + ) +{ + // validate artifacttype + string atype_code; + if (artifacttype == "debuginfo") atype_code = "D"; + else if (artifacttype == "executable") atype_code = "E"; + else if (artifacttype == "source") atype_code = "S"; + else throw reportable_exception("invalid artifacttype"); + + if (atype_code == "S" && suffix == "") + throw reportable_exception("invalid source suffix"); + + // validate buildid + if ((buildid.size() < 2) || // not empty + (buildid.size() % 2) || // even number + (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex + throw reportable_exception("invalid buildid"); + + if (verbose > 1) + obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype + << " suffix=" << suffix << endl; + + sqlite_ps *pp = 0; + + if (atype_code == "D") + { + pp = new sqlite_ps (db, "mhd-query-d", + "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_d where buildid = ? " + "order by mtime desc"); + pp->reset(); + pp->bind(1, buildid); + } + else if (atype_code == "E") + { + pp = new sqlite_ps (db, "mhd-query-e", + "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_e where buildid = ? " + "order by mtime desc"); + pp->reset(); + pp->bind(1, buildid); + } + else if (atype_code == "S") + { + pp = new sqlite_ps (db, "mhd-query-s", + "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc = ? " + "order by sharedprefix(source0,source0ref) desc, mtime desc"); + pp->reset(); + pp->bind(1, buildid); + pp->bind(2, suffix); + } + unique_ptr ps_closer(pp); // release pp if exception or return + + // consume all the rows + while (1) + { + int rc = pp->step(); + if (rc == SQLITE_DONE) break; + if (rc != SQLITE_ROW) + throw sqlite_exception(rc, "step"); + + int64_t b_mtime = sqlite3_column_int64 (*pp, 0); + string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */ + string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */ + string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */ + + if (verbose > 1) + obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype + << " source0=" << b_source0 << " source1=" << b_source1 << endl; + + // Try accessing the located match. + // XXX: in case of multiple matches, attempt them in parallel? + auto r = handle_buildid_match (b_mtime, b_stype, b_source0, b_source1, result_fd); + if (r) + return r; + } + + // We couldn't find it in the database. Last ditch effort + // is to defer to other debuginfo servers. + int fd = -1; + if (artifacttype == "debuginfo") + fd = debuginfod_find_debuginfo ((const unsigned char*) buildid.c_str(), 0, + NULL); + else if (artifacttype == "executable") + fd = debuginfod_find_executable ((const unsigned char*) buildid.c_str(), 0, + NULL); + else if (artifacttype == "source") + fd = debuginfod_find_source ((const unsigned char*) buildid.c_str(), 0, + suffix.c_str(), NULL); + if (fd >= 0) + { + struct stat s; + int rc = fstat (fd, &s); + if (rc == 0) + { + auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd); + if (r) + { + add_mhd_last_modified (r, s.st_mtime); + if (verbose > 1) + obatched(clog) << "serving file from upstream debuginfod/cache" << endl; + if (result_fd) + *result_fd = fd; + return r; // NB: don't close fd; libmicrohttpd will + } + } + close (fd); + } + else if (fd != -ENOSYS) // no DEBUGINFOD_URLS configured + throw libc_exception(-fd, "upstream debuginfod query failed"); + + throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found"); +} + + +//////////////////////////////////////////////////////////////////////// + + +static struct MHD_Response* +handle_metrics () +{ + throw reportable_exception("not yet implemented 2"); +} + + +//////////////////////////////////////////////////////////////////////// + + +/* libmicrohttpd callback */ +static int +handler_cb (void * /*cls*/, + struct MHD_Connection *connection, + const char *url, + const char *method, + const char * /*version*/, + const char * /*upload_data*/, + size_t * /*upload_data_size*/, + void ** /*con_cls*/) +{ + struct MHD_Response *r = NULL; + string url_copy = url; + + if (verbose) + obatched(clog) << conninfo(connection) << " " << method << " " << url << endl; + + try + { + if (string(method) != "GET") + throw reportable_exception(400, "we support GET only"); + + /* Start decoding the URL. */ + size_t slash1 = url_copy.find('/', 1); + string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found + + if (slash1 != string::npos && url1 == "/buildid") + { + size_t slash2 = url_copy.find('/', slash1+1); + if (slash2 == string::npos) + throw reportable_exception("/buildid/ webapi error, need buildid"); + + string buildid = url_copy.substr(slash1+1, slash2-slash1-1); + + size_t slash3 = url_copy.find('/', slash2+1); + string artifacttype, suffix; + if (slash3 == string::npos) + { + artifacttype = url_copy.substr(slash2+1); + suffix = ""; + } + else + { + artifacttype = url_copy.substr(slash2+1, slash3-slash2-1); + suffix = url_copy.substr(slash3); // include the slash in the suffix + } + + r = handle_buildid(buildid, artifacttype, suffix, 0); // NB: don't care about result-fd + } + else if (url1 == "/metrics") + r = handle_metrics(); + else + throw reportable_exception("webapi error, unrecognized /operation"); + + if (r == 0) + throw reportable_exception("internal error, missing response"); + + int rc = MHD_queue_response (connection, MHD_HTTP_OK, r); + MHD_destroy_response (r); + return rc; + } + catch (const reportable_exception& e) + { + e.report(clog); + return e.mhd_send_response (connection); + } +} + + +//////////////////////////////////////////////////////////////////////// +// borrowed originally from src/nm.c get_local_names() + +static void +dwarf_extract_source_paths (Elf *elf, set& debug_sourcefiles) + noexcept // no exceptions - so we can simplify the altdbg resource release at end +{ + Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL); + if (dbg == NULL) + return; + + Dwarf* altdbg = NULL; + int altdbg_fd = -1; + + // DWZ handling: if we have an unsatisfied debug-alt-link, add an + // empty string into the outgoing sourcefiles set, so the caller + // should know that our data is incomplete. + const char *alt_name_p; + const void *alt_build_id; // elfutils-owned memory + ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id); + if (sz > 0) // got one! + { + string buildid; + unsigned char* build_id_bytes = (unsigned char*) alt_build_id; + for (ssize_t idx=0; idx> 4]; + buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf]; + } + + if (verbose > 3) + obatched(clog) << "Need altdebug buildid=" << buildid << endl; + + // but is it unsatisfied the normal elfutils ways? + Dwarf* alt = dwarf_getalt (dbg); + if (alt == NULL) + { + // Yup, unsatisfied the normal way. Maybe we can satisfy it + // from our own debuginfod database. + int alt_fd; + struct MHD_Response *r = 0; + try + { + r = handle_buildid (buildid, "debuginfo", "", &alt_fd); + } + catch (const reportable_exception& e) + { + // swallow exceptions + } + + // NB: this is not actually recursive! This invokes the web-query + // path, which cannot get back into the scan code paths. + if (r) + { + // Found it! + altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok + alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ); + // NB: must close this dwarf and this fd at the bottom of the function! + MHD_destroy_response (r); // will close alt_fd + if (alt) + dwarf_setalt (dbg, alt); + } + } + else + { + // NB: dwarf_setalt(alt) inappropriate - already done! + // NB: altdbg will stay 0 so nothing tries to redundantly dealloc. + } + + if (alt) + { + if (verbose > 3) + obatched(clog) << "Resolved altdebug buildid=" << buildid << endl; + } + else // (alt == NULL) - signal possible presence of poor debuginfo + { + debug_sourcefiles.insert(""); + if (verbose > 3) + obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl; + } + } + + Dwarf_Off offset = 0; + Dwarf_Off old_offset; + size_t hsize; + + while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0) + { + Dwarf_Die cudie_mem; + Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem); + + if (cudie == NULL) + continue; + if (dwarf_tag (cudie) != DW_TAG_compile_unit) + continue; + + const char *cuname = dwarf_diename(cudie) ?: "unknown"; + + Dwarf_Files *files; + size_t nfiles; + if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0) + continue; + + // extract DW_AT_comp_dir to resolve relative file names + const char *comp_dir = ""; + const char *const *dirs; + size_t ndirs; + if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 && + dirs[0] != NULL) + comp_dir = dirs[0]; + if (comp_dir == NULL) + comp_dir = ""; + + if (verbose > 3) + obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir + << " #files=" << nfiles << " #dirs=" << ndirs << endl; + + if (comp_dir[0] == '\0' && cuname[0] != '/') + { + // This is a common symptom for dwz-compressed debug files, + // where the altdebug file cannot be resolved. + if (verbose > 3) + obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl; + continue; + } + + for (size_t f = 1; f < nfiles; f++) + { + const char *hat = dwarf_filesrc (files, f, NULL, NULL); + if (hat == NULL) + continue; + + if (string(hat) == "") // gcc intrinsics, don't bother record + continue; + + string waldo; + if (hat[0] == '/') // absolute + waldo = (string (hat)); + else if (comp_dir[0] != '\0') // comp_dir relative + waldo = (string (comp_dir) + string("/") + string (hat)); + else + { + obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl; + continue; + } + + // NB: this is the 'waldo' that a dbginfo client will have + // to supply for us to give them the file The comp_dir + // prefixing is a definite complication. Otherwise we'd + // have to return a setof comp_dirs (one per CU!) with + // corresponding filesrc[] names, instead of one absolute + // resoved set. Maybe we'll have to do that anyway. XXX + + if (verbose > 4) + obatched(clog) << waldo + << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") << endl; + + debug_sourcefiles.insert (waldo); + } + } + + dwarf_end(dbg); + if (altdbg) + dwarf_end(altdbg); + if (altdbg_fd >= 0) + close(altdbg_fd); +} + + + +static void +elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set& debug_sourcefiles) +{ + Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL); + if (elf == NULL) + return; + + try // catch our types of errors and clean up the Elf* object + { + if (elf_kind (elf) != ELF_K_ELF) + { + elf_end (elf); + return; + } + + GElf_Ehdr ehdr_storage; + GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage); + if (ehdr == NULL) + { + elf_end (elf); + return; + } + auto elf_type = ehdr->e_type; + + const void *build_id; // elfutils-owned memory + ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id); + if (sz <= 0) + { + // It's not a diagnostic-worthy error for an elf file to lack build-id. + // It might just be very old. + elf_end (elf); + return; + } + + // build_id is a raw byte array; convert to hexadecimal *lowercase* + unsigned char* build_id_bytes = (unsigned char*) build_id; + for (ssize_t idx=0; idx> 4]; + buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf]; + } + + // now decide whether it's an executable - namely, any allocatable section has + // PROGBITS; + if (elf_type == ET_EXEC || elf_type == ET_DYN) + { + size_t shnum; + int rc = elf_getshdrnum (elf, &shnum); + if (rc < 0) + throw elfutils_exception(rc, "getshdrnum"); + + executable_p = false; + for (size_t sc = 0; sc < shnum; sc++) + { + Elf_Scn *scn = elf_getscn (elf, sc); + if (scn == NULL) + continue; + + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem); + if (shdr == NULL) + continue; + + // allocated (loadable / vm-addr-assigned) section with available content? + if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC)) + { + if (verbose > 4) + obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl; + executable_p = true; + break; // no need to keep looking for others + } + } // iterate over sections + } // executable_p classification + + // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections + // logic mostly stolen from fweimer@redhat.com's elfclassify drafts + size_t shstrndx; + int rc = elf_getshdrstrndx (elf, &shstrndx); + if (rc < 0) + throw elfutils_exception(rc, "getshdrstrndx"); + + Elf_Scn *scn = NULL; + while (true) + { + scn = elf_nextscn (elf, scn); + if (scn == NULL) + break; + GElf_Shdr shdr_storage; + GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage); + if (shdr == NULL) + break; + const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name); + if (section_name == NULL) + break; + if (strncmp(section_name, ".debug_line", 11) == 0 || + strncmp(section_name, ".zdebug_line", 12) == 0) + { + debuginfo_p = true; + dwarf_extract_source_paths (elf, debug_sourcefiles); + break; // expecting only one .*debug_line, so no need to look for others + } + else if (strncmp(section_name, ".debug_", 7) == 0 || + strncmp(section_name, ".zdebug_", 8) == 0) + { + debuginfo_p = true; + // NB: don't break; need to parse .debug_line for sources + } + } + } + catch (const reportable_exception& e) + { + e.report(clog); + } + elf_end (elf); +} + + +static semaphore* scan_concurrency_sem = 0; // used to implement -c load limiting + + +static void +scan_source_file_path (const string& dir) +{ + obatched(clog) << "fts/file traversing " << dir << endl; + + struct timeval tv_start, tv_end; + gettimeofday (&tv_start, NULL); + + sqlite_ps ps_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);"); + sqlite_ps ps_upsert_files (db, "file-files-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);"); + sqlite_ps ps_upsert_de (db, "file-de-upsert", + "insert or ignore into " BUILDIDS "_f_de " + "(buildid, debuginfo_p, executable_p, file, mtime) " + "values ((select id from " BUILDIDS "_buildids where hex = ?)," + " ?,?," + " (select id from " BUILDIDS "_files where name = ?), ?);"); + sqlite_ps ps_upsert_s (db, "file-s-upsert", + "insert or ignore into " BUILDIDS "_f_s " + "(buildid, artifactsrc, file, mtime) " + "values ((select id from " BUILDIDS "_buildids where hex = ?)," + " (select id from " BUILDIDS "_files where name = ?)," + " (select id from " BUILDIDS "_files where name = ?)," + " ?);"); + sqlite_ps ps_query (db, "file-negativehit-find", + "select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;"); + sqlite_ps ps_scan_done (db, "file-scanned", + "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)" + "values ('F', (select id from " BUILDIDS "_files where name = ?), ?, ?);"); + + + char * const dirs[] = { (char*) dir.c_str(), NULL }; + + unsigned fts_scanned=0, fts_regex=0, fts_cached=0, fts_debuginfo=0, fts_executable=0, fts_sourcefiles=0; + + FTS *fts = fts_open (dirs, + FTS_PHYSICAL /* don't follow symlinks */ + | FTS_XDEV /* don't cross devices/mountpoints */ + | FTS_NOCHDIR /* multithreaded */, + NULL); + if (fts == NULL) + { + obatched(cerr) << "cannot fts_open " << dir << endl; + return; + } + + FTSENT *f; + while ((f = fts_read (fts)) != NULL) + { + semaphore_borrower handle_one_file (scan_concurrency_sem); + + fts_scanned ++; + if (interrupted) + break; + + if (verbose > 2) + obatched(clog) << "fts/file traversing " << f->fts_path << endl; + + try + { + /* Found a file. Convert it to an absolute path, so + the buildid database does not have relative path + names that are unresolvable from a subsequent run + in a different cwd. */ + char *rp = realpath(f->fts_path, NULL); + if (rp == NULL) + continue; // ignore dangling symlink or such + string rps = string(rp); + free (rp); + + bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0); + bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0); + if (!ri || rx) + { + if (verbose > 3) + obatched(clog) << "fts/file skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl; + fts_regex ++; + continue; + } + + switch (f->fts_info) + { + case FTS_D: + break; + + case FTS_DP: + break; + + case FTS_F: + { + /* See if we know of it already. */ + int rc = ps_query + .reset() + .bind(1, rps) + .bind(2, f->fts_statp->st_mtime) + .step(); + ps_query.reset(); + if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results) + // no need to recheck a file/version we already know + // specifically, no need to elf-begin a file we already determined is non-elf + // (so is stored with buildid=NULL) + { + fts_cached ++; + continue; + } + + bool executable_p = false, debuginfo_p = false; // E and/or D + string buildid; + set sourcefiles; + + int fd = open (rps.c_str(), O_RDONLY); + try + { + if (fd >= 0) + elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles); + else + throw libc_exception(errno, string("open ") + rps); + } + + // NB: we catch exceptions here too, so that we can + // cache the corrupt-elf case (!executable_p && + // !debuginfo_p) just below, just as if we had an + // EPERM error from open(2). + catch (const reportable_exception& e) + { + e.report(clog); + } + + if (fd >= 0) + close (fd); + + // register this file name in the interning table + ps_upsert_files + .reset() + .bind(1, rps) + .step_ok_done(); + + if (buildid == "") + { + // no point storing an elf file without buildid + executable_p = false; + debuginfo_p = false; + } + else + { + // register this build-id in the interning table + ps_upsert_buildids + .reset() + .bind(1, buildid) + .step_ok_done(); + } + + if (executable_p) + fts_executable ++; + if (debuginfo_p) + fts_debuginfo ++; + if (executable_p || debuginfo_p) + { + ps_upsert_de + .reset() + .bind(1, buildid) + .bind(2, debuginfo_p ? 1 : 0) + .bind(3, executable_p ? 1 : 0) + .bind(4, rps) + .bind(5, f->fts_statp->st_mtime) + .step_ok_done(); + } + + if (sourcefiles.size() && buildid != "") + { + fts_sourcefiles += sourcefiles.size(); + + for (auto&& dwarfsrc : sourcefiles) + { + char *srp = realpath(dwarfsrc.c_str(), NULL); + if (srp == NULL) // also if DWZ unresolved dwarfsrc="" + continue; // unresolvable files are not a serious problem + // throw libc_exception(errno, "fts/file realpath " + srcpath); + string srps = string(srp); + free (srp); + + struct stat sfs; + rc = stat(srps.c_str(), &sfs); + if (rc != 0) + continue; + + if (verbose > 2) + obatched(clog) << "recorded buildid=" << buildid << " file=" << srps + << " mtime=" << sfs.st_mtime + << " as source " << dwarfsrc << endl; + + ps_upsert_files + .reset() + .bind(1, srps) + .step_ok_done(); + + // register the dwarfsrc name in the interning table too + ps_upsert_files + .reset() + .bind(1, dwarfsrc) + .step_ok_done(); + + ps_upsert_s + .reset() + .bind(1, buildid) + .bind(2, dwarfsrc) + .bind(3, srps) + .bind(4, sfs.st_mtime) + .step_ok_done(); + } + } + + ps_scan_done + .reset() + .bind(1, rps) + .bind(2, f->fts_statp->st_mtime) + .bind(3, f->fts_statp->st_size) + .step_ok_done(); + + if (verbose > 2) + obatched(clog) << "recorded buildid=" << buildid << " file=" << rps + << " mtime=" << f->fts_statp->st_mtime << " atype=" + << (executable_p ? "E" : "") + << (debuginfo_p ? "D" : "") << endl; + } + break; + + case FTS_ERR: + case FTS_NS: + throw libc_exception(f->fts_errno, string("fts/file traversal ") + string(f->fts_path)); + + default: + case FTS_SL: /* NB: don't enter symbolic links into the database */ + break; + } + + if ((verbose && f->fts_info == FTS_DP) || + (verbose > 1 && f->fts_info == FTS_F)) + obatched(clog) << "fts/file traversing " << rps << ", scanned=" << fts_scanned + << ", regex-skipped=" << fts_regex + << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo + << ", executable=" << fts_executable << ", source=" << fts_sourcefiles << endl; + } + catch (const reportable_exception& e) + { + e.report(clog); + } + } + fts_close (fts); + + gettimeofday (&tv_end, NULL); + double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001; + + obatched(clog) << "fts/file traversed " << dir << " in " << deltas << "s, scanned=" << fts_scanned + << ", regex-skipped=" << fts_regex + << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo + << ", executable=" << fts_executable << ", source=" << fts_sourcefiles << endl; +} + + +static void* +thread_main_scan_source_file_path (void* arg) +{ + string dir = string((const char*) arg); + + unsigned rescan_timer = 0; + sig_atomic_t forced_rescan_count = 0; + while (! interrupted) + { + try + { + if (rescan_timer == 0) + scan_source_file_path (dir); + else if (sigusr1 != forced_rescan_count) + { + forced_rescan_count = sigusr1; + scan_source_file_path (dir); + } + } + catch (const sqlite_exception& e) + { + obatched(cerr) << e.message << endl; + } + sleep (1); + rescan_timer ++; + if (rescan_s) + rescan_timer %= rescan_s; + } + + return 0; +} + + +//////////////////////////////////////////////////////////////////////// + + + + +// Analyze given *.rpm file of given age; record buildids / exec/debuginfo-ness of its +// constituent files with given upsert statements. +static void +rpm_classify (const string& rps, sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_files, + sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef, + time_t mtime, + unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef, + bool& fts_sref_complete_p) +{ + string popen_cmd = string("rpm2cpio " + shell_escape(rps)); + FILE* fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC? + if (fp == NULL) + throw libc_exception (errno, string("popen ") + popen_cmd); + defer_dtor fp_closer (fp, pclose); + + struct archive *a; + a = archive_read_new(); + if (a == NULL) + throw archive_exception("cannot create archive reader"); + defer_dtor archive_closer (a, archive_read_free); + + int rc = archive_read_support_format_cpio(a); + if (rc != ARCHIVE_OK) + throw archive_exception(a, "cannot select cpio format"); + rc = archive_read_support_filter_all(a); + if (rc != ARCHIVE_OK) + throw archive_exception(a, "cannot select all filters"); + + rc = archive_read_open_FILE (a, fp); + if (rc != ARCHIVE_OK) + throw archive_exception(a, "cannot open archive from rpm2cpio pipe"); + + if (verbose > 3) + obatched(clog) << "rpm2cpio|libarchive scanning " << rps << endl; + + while(1) // parse cpio archive entries + { + try + { + struct archive_entry *e; + rc = archive_read_next_header (a, &e); + if (rc != ARCHIVE_OK) + break; + + if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely + continue; + + string fn = archive_entry_pathname (e); + if (fn.size() > 1 && fn[0] == '.') + fn = fn.substr(1); // trim off the leading '.' + + if (verbose > 3) + obatched(clog) << "rpm2cpio|libarchive checking " << fn << endl; + + // extract this file to a temporary file + const char *tmpdir_env = getenv ("TMPDIR") ?: "/tmp"; + char* tmppath = NULL; + rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir_env); + if (rc < 0) + throw libc_exception (ENOMEM, "cannot allocate tmppath"); + defer_dtor tmmpath_freer (tmppath, free); + int fd = mkstemp (tmppath); + if (fd < 0) + throw libc_exception (errno, "cannot create temporary file"); + unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd + defer_dtor minifd_closer (fd, close); + + rc = archive_read_data_into_fd (a, fd); + if (rc != ARCHIVE_OK) + throw archive_exception(a, "cannot extract file"); + + // finally ... time to run elf_classify on this bad boy and update the database + bool executable_p = false, debuginfo_p = false; + string buildid; + set sourcefiles; + elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles); + // NB: might throw + + if (buildid != "") // intern buildid + { + ps_upsert_buildids + .reset() + .bind(1, buildid) + .step_ok_done(); + } + + ps_upsert_files // register this rpm constituent file name in interning table + .reset() + .bind(1, fn) + .step_ok_done(); + + if (sourcefiles.size() > 0) // sref records needed + { + // NB: we intern each source file once. Once raw, as it + // appears in the DWARF file list coming back from + // elf_classify() - because it'll end up in the + // _norm.artifactsrc column. We don't also put another + // version with a '.' at the front, even though that's + // how rpm/cpio packs names, because we hide that from + // the database for storage efficiency. + + for (auto&& s : sourcefiles) + { + if (s == "") + { + fts_sref_complete_p = false; + continue; + } + + ps_upsert_files + .reset() + .bind(1, s) + .step_ok_done(); + + ps_upsert_sref + .reset() + .bind(1, buildid) + .bind(2, s) + .step_ok_done(); + + fts_sref ++; + } + } + + if (executable_p) + fts_executable ++; + if (debuginfo_p) + fts_debuginfo ++; + + if (executable_p || debuginfo_p) + { + ps_upsert_de + .reset() + .bind(1, buildid) + .bind(2, debuginfo_p ? 1 : 0) + .bind(3, executable_p ? 1 : 0) + .bind(4, rps) + .bind(5, mtime) + .bind(6, fn) + .step_ok_done(); + } + else // potential source - sdef record + { + fts_sdef ++; + ps_upsert_sdef + .reset() + .bind(1, rps) + .bind(2, mtime) + .bind(3, fn) + .step_ok_done(); + } + + if ((verbose > 2) && (executable_p || debuginfo_p)) + obatched(clog) << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn + << " mtime=" << mtime << " atype=" + << (executable_p ? "E" : "") + << (debuginfo_p ? "D" : "") + << " sourcefiles=" << sourcefiles.size() << endl; + + } + catch (const reportable_exception& e) + { + e.report(clog); + } + } +} + + + +// scan for *.rpm files +static void +scan_source_rpm_path (const string& dir) +{ + obatched(clog) << "fts/rpm traversing " << dir << endl; + + sqlite_ps ps_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);"); + sqlite_ps ps_upsert_files (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);"); + sqlite_ps ps_upsert_de (db, "rpm-de-insert", + "insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values (" + "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, " + "(select id from " BUILDIDS "_files where name = ?), ?, " + "(select id from " BUILDIDS "_files where name = ?));"); + sqlite_ps ps_upsert_sref (db, "rpm-sref-insert", + "insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values (" + "(select id from " BUILDIDS "_buildids where hex = ?), " + "(select id from " BUILDIDS "_files where name = ?));"); + sqlite_ps ps_upsert_sdef (db, "rpm-sdef-insert", + "insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values (" + "(select id from " BUILDIDS "_files where name = ?), ?," + "(select id from " BUILDIDS "_files where name = ?));"); + sqlite_ps ps_query (db, "rpm-negativehit-query", + "select 1 from " BUILDIDS "_file_mtime_scanned where " + "sourcetype = 'R' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;"); + sqlite_ps ps_scan_done (db, "rpm-scanned", + "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)" + "values ('R', (select id from " BUILDIDS "_files where name = ?), ?, ?);"); + + char * const dirs[] = { (char*) dir.c_str(), NULL }; + + struct timeval tv_start, tv_end; + gettimeofday (&tv_start, NULL); + unsigned fts_scanned=0, fts_regex=0, fts_cached=0, fts_debuginfo=0; + unsigned fts_executable=0, fts_rpm = 0, fts_sref=0, fts_sdef=0; + + FTS *fts = fts_open (dirs, + FTS_PHYSICAL /* don't follow symlinks */ + | FTS_XDEV /* don't cross devices/mountpoints */ + | FTS_NOCHDIR /* multithreaded */, + NULL); + if (fts == NULL) + { + obatched(cerr) << "cannot fts_open " << dir << endl; + return; + } + + FTSENT *f; + while ((f = fts_read (fts)) != NULL) + { + semaphore_borrower handle_one_file (scan_concurrency_sem); + + fts_scanned ++; + if (interrupted) + break; + + if (verbose > 2) + obatched(clog) << "fts/rpm traversing " << f->fts_path << endl; + + try + { + /* Found a file. Convert it to an absolute path, so + the buildid database does not have relative path + names that are unresolvable from a subsequent run + in a different cwd. */ + char *rp = realpath(f->fts_path, NULL); + if (rp == NULL) + continue; // ignore dangling symlink or such + string rps = string(rp); + free (rp); + + bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0); + bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0); + if (!ri || rx) + { + if (verbose > 3) + obatched(clog) << "fts/rpm skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl; + fts_regex ++; + continue; + } + + switch (f->fts_info) + { + case FTS_D: + break; + + case FTS_DP: + break; + + case FTS_F: + { + // heuristic: reject if file name does not end with ".rpm" + // (alternative: try opening with librpm etc., caching) + string suffix = ".rpm"; + if (rps.size() < suffix.size() || + rps.substr(rps.size()-suffix.size()) != suffix) + continue; + fts_rpm ++; + + /* See if we know of it already. */ + int rc = ps_query + .reset() + .bind(1, rps) + .bind(2, f->fts_statp->st_mtime) + .step(); + ps_query.reset(); + if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results) + // no need to recheck a file/version we already know + // specifically, no need to parse this rpm again, since we already have + // it as a D or E or S record, + // (so is stored with buildid=NULL) + { + fts_cached ++; + continue; + } + + // intern the rpm file name + ps_upsert_files + .reset() + .bind(1, rps) + .step_ok_done(); + + // extract the rpm contents via popen("rpm2cpio") | libarchive | loop-of-elf_classify() + unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0; + bool my_fts_sref_complete_p = true; + try + { + rpm_classify (rps, + ps_upsert_buildids, ps_upsert_files, + ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, // dalt + f->fts_statp->st_mtime, + my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef, + my_fts_sref_complete_p); + } + catch (const reportable_exception& e) + { + e.report(clog); + } + + if (verbose > 2) + obatched(clog) << "scanned rpm=" << rps + << " mtime=" << f->fts_statp->st_mtime + << " executables=" << my_fts_executable + << " debuginfos=" << my_fts_debuginfo + << " srefs=" << my_fts_sref + << " sdefs=" << my_fts_sdef + << endl; + + fts_executable += my_fts_executable; + fts_debuginfo += my_fts_debuginfo; + fts_sref += my_fts_sref; + fts_sdef += my_fts_sdef; + + if (my_fts_sref_complete_p) // leave incomplete? + ps_scan_done + .reset() + .bind(1, rps) + .bind(2, f->fts_statp->st_mtime) + .bind(3, f->fts_statp->st_size) + .step_ok_done(); + } + break; + + case FTS_ERR: + case FTS_NS: + throw libc_exception(f->fts_errno, string("fts/rpm traversal ") + string(f->fts_path)); + + default: + case FTS_SL: /* NB: don't enter symbolic links into the database */ + break; + } + + if ((verbose && f->fts_info == FTS_DP) || + (verbose > 1 && f->fts_info == FTS_F)) + obatched(clog) << "fts/rpm traversing " << rps << ", scanned=" << fts_scanned + << ", regex-skipped=" << fts_regex + << ", rpm=" << fts_rpm << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo + << ", executable=" << fts_executable + << ", sourcerefs=" << fts_sref << ", sourcedefs=" << fts_sdef << endl; + } + catch (const reportable_exception& e) + { + e.report(clog); + } + } + fts_close (fts); + + gettimeofday (&tv_end, NULL); + double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001; + + obatched(clog) << "fts/rpm traversed " << dir << " in " << deltas << "s, scanned=" << fts_scanned + << ", regex-skipped=" << fts_regex + << ", rpm=" << fts_rpm << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo + << ", executable=" << fts_executable + << ", sourcerefs=" << fts_sref << ", sourcedefs=" << fts_sdef << endl; +} + + + +static void* +thread_main_scan_source_rpm_path (void* arg) +{ + string dir = string((const char*) arg); + + unsigned rescan_timer = 0; + sig_atomic_t forced_rescan_count = 0; + while (! interrupted) + { + try + { + if (rescan_timer == 0) + scan_source_rpm_path (dir); + else if (sigusr1 != forced_rescan_count) + { + forced_rescan_count = sigusr1; + scan_source_rpm_path (dir); + } + } + catch (const sqlite_exception& e) + { + obatched(cerr) << e.message << endl; + } + sleep (1); + rescan_timer ++; + if (rescan_s) + rescan_timer %= rescan_s; + } + + return 0; +} + + +//////////////////////////////////////////////////////////////////////// + +static void +database_stats_report() +{ + sqlite_ps ps_query (db, "database-overview", + "select label,quantity from " BUILDIDS "_stats"); + + obatched(clog) << "database record counts:" << endl; + while (1) + { + int rc = sqlite3_step (ps_query); + if (rc == SQLITE_DONE) break; + if (rc != SQLITE_ROW) + throw sqlite_exception(rc, "step"); + + obatched(clog) + << right << setw(20) << ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL") + << " " + << (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL") + << endl; + } +} + + +// Do a round of database grooming that might take many minutes to run. +void groom() +{ + obatched(clog) << "grooming database" << endl; + + struct timeval tv_start, tv_end; + gettimeofday (&tv_start, NULL); + + // scan for files that have disappeared + sqlite_ps files (db, "check old files", "select s.mtime, s.file, f.name from " + BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f " + "where f.id = s.file"); + sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?"); + sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?"); + sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned " + "where file = ? and mtime = ?"); + files.reset(); + while(1) + { + int rc = files.step(); + if (rc != SQLITE_ROW) + break; + + int64_t mtime = sqlite3_column_int64 (files, 0); + int64_t fileid = sqlite3_column_int64 (files, 1); + const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: ""); + struct stat s; + rc = stat(filename, &s); + if (rc < 0 || (mtime != (int64_t) s.st_mtime)) + { + if (verbose > 2) + obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl; + files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); + files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); + files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); + } + } + files.reset(); + + // delete buildids with no references in _r_de or _f_de tables; + // cascades to _r_sref & _f_s records + sqlite_ps buildids_del (db, "nuke orphan buildids", + "delete from " BUILDIDS "_buildids " + "where not exists (select 1 from " BUILDIDS "_f_de d where " BUILDIDS "_buildids.id = d.buildid) " + "and not exists (select 1 from " BUILDIDS "_r_de d where " BUILDIDS "_buildids.id = d.buildid)"); + buildids_del.reset().step_ok_done(); + + // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G + sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum"); + g1.reset().step_ok_done(); + sqlite_ps g2 (db, "optimize", "pragma optimize"); + g2.reset().step_ok_done(); + sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate"); + g3.reset().step_ok_done(); + + database_stats_report(); + + gettimeofday (&tv_end, NULL); + double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001; + + obatched(clog) << "groomed database in " << deltas << "s" << endl; +} + + +static void* +thread_main_groom (void* /*arg*/) +{ + unsigned groom_timer = 0; + sig_atomic_t forced_groom_count = 0; + while (! interrupted) + { + try + { + if (groom_timer == 0) + groom (); + else if (sigusr2 != forced_groom_count) + { + forced_groom_count = sigusr2; + groom (); + } + } + catch (const sqlite_exception& e) + { + obatched(cerr) << e.message << endl; + } + sleep (1); + groom_timer ++; + if (groom_s) + groom_timer %= groom_s; + } + + return 0; +} + + +//////////////////////////////////////////////////////////////////////// + + +static void +signal_handler (int /* sig */) +{ + interrupted ++; + + if (db) + sqlite3_interrupt (db); + + // NB: don't do anything else in here +} + +static void +sigusr1_handler (int /* sig */) +{ + sigusr1 ++; + // NB: don't do anything else in here +} + +static void +sigusr2_handler (int /* sig */) +{ + sigusr2 ++; + // NB: don't do anything else in here +} + + + + + +// A user-defined sqlite function, to score the sharedness of the +// prefix of two strings. This is used to compare candidate debuginfo +// / source-rpm names, so that the closest match +// (directory-topology-wise closest) is found. This is important in +// case the same sref (source file name) is in many -debuginfo or +// -debugsource RPMs, such as when multiple versions/releases of the +// same package are in the database. + +static void sqlite3_sharedprefix_fn (sqlite3_context* c, int argc, sqlite3_value** argv) +{ + if (argc != 2) + sqlite3_result_error(c, "expect 2 string arguments", -1); + else if ((sqlite3_value_type(argv[0]) != SQLITE_TEXT) || + (sqlite3_value_type(argv[1]) != SQLITE_TEXT)) + sqlite3_result_null(c); + else + { + const unsigned char* a = sqlite3_value_text (argv[0]); + const unsigned char* b = sqlite3_value_text (argv[1]); + int i = 0; + while (*a++ == *b++) + i++; + sqlite3_result_int (c, i); + } +} + + +int +main (int argc, char *argv[]) +{ + (void) setlocale (LC_ALL, ""); + (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR); + (void) textdomain (PACKAGE_TARNAME); + + /* Tell the library which version we are expecting. */ + elf_version (EV_CURRENT); + + /* Set computed default values. */ + db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */ + int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything + if (rc != 0) + error (EXIT_FAILURE, 0, "regcomp failure: %d", rc); + rc = regcomp (& file_exclude_regex, "^$", REG_EXTENDED|REG_NOSUB); // match nothing + if (rc != 0) + error (EXIT_FAILURE, 0, "regcomp failure: %d", rc); + + /* Parse and process arguments. */ + int remaining; + argp_program_version_hook = print_version; // this works + (void) argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &remaining, NULL); + if (remaining != argc) + error (EXIT_FAILURE, 0, + "unexpected argument: %s", argv[remaining]); + + if (!scan_rpms && !scan_files && source_paths.size()>0) + obatched(clog) << "warning: without -F and/or -R, ignoring PATHs" << endl; + + (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore + (void) signal (SIGINT, signal_handler); // ^C + (void) signal (SIGHUP, signal_handler); // EOF + (void) signal (SIGTERM, signal_handler); // systemd + (void) signal (SIGUSR1, sigusr1_handler); // end-user + (void) signal (SIGUSR2, sigusr2_handler); // end-user + + // do this before any threads start + scan_concurrency_sem = new semaphore(concurrency); + + /* Get database ready. */ + rc = sqlite3_open_v2 (db_path.c_str(), &db, (SQLITE_OPEN_READWRITE + |SQLITE_OPEN_CREATE + |SQLITE_OPEN_FULLMUTEX), /* thread-safe */ + NULL); + if (rc == SQLITE_CORRUPT) + { + (void) unlink (db_path.c_str()); + error (EXIT_FAILURE, 0, + "cannot open %s, deleted database: %s", db_path.c_str(), sqlite3_errmsg(db)); + } + else if (rc) + { + error (EXIT_FAILURE, 0, + "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(db)); + } + + obatched(clog) << "opened database " << db_path << endl; + obatched(clog) << "sqlite version " << sqlite3_version << endl; + + // add special string-prefix-similarity function used in rpm sref/sdef resolution + rc = sqlite3_create_function(db, "sharedprefix", 2, SQLITE_UTF8, NULL, + & sqlite3_sharedprefix_fn, NULL, NULL); + if (rc != SQLITE_OK) + error (EXIT_FAILURE, 0, + "cannot create sharedprefix( function: %s", sqlite3_errmsg(db)); + + if (verbose > 3) + obatched(clog) << "ddl: " << DEBUGINFOD_SQLITE_DDL << endl; + rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_DDL, NULL, NULL, NULL); + if (rc != SQLITE_OK) + { + error (EXIT_FAILURE, 0, + "cannot run database schema ddl: %s", sqlite3_errmsg(db)); + } + + // Start httpd server threads. Separate pool for IPv4 and IPv6, in + // case the host only has one protocol stack. + MHD_Daemon *d4 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION +#if MHD_VERSION >= 0x00095300 + | MHD_USE_INTERNAL_POLLING_THREAD +#else + | MHD_USE_SELECT_INTERNALLY +#endif + | MHD_USE_DEBUG, /* report errors to stderr */ + http_port, + NULL, NULL, /* default accept policy */ + handler_cb, NULL, /* handler callback */ + MHD_OPTION_END); + MHD_Daemon *d6 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION +#if MHD_VERSION >= 0x00095300 + | MHD_USE_INTERNAL_POLLING_THREAD +#else + | MHD_USE_SELECT_INTERNALLY +#endif + | MHD_USE_IPv6 + | MHD_USE_DEBUG, /* report errors to stderr */ + http_port, + NULL, NULL, /* default accept policy */ + handler_cb, NULL, /* handler callback */ + MHD_OPTION_END); + + if (d4 == NULL && d6 == NULL) // neither ipv4 nor ipv6? boo + { + sqlite3 *database = db; + db = 0; // for signal_handler not to freak + sqlite3_close (database); + error (EXIT_FAILURE, 0, "cannot start http server at port %d", http_port); + } + + obatched(clog) << "started http server on " + << (d4 != NULL ? "IPv4 " : "") + << (d6 != NULL ? "IPv6 " : "") + << "port=" << http_port << endl; + + // add maxigroom sql if -G given + if (maxigroom) + { + obatched(clog) << "maxigrooming database, please wait." << endl; + extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);"); + extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);"); + extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;"); + + // NB: we don't maxigroom the _files interning table. It'd require a temp index on all the + // tables that have file foreign-keys, which is a lot. + + // NB: with =delete, may take up 3x disk space total during vacuum process + // vs. =off (only 2x but may corrupt database if program dies mid-vacuum) + // vs. =wal (>3x observed, but safe) + extra_ddl.push_back("pragma journal_mode=delete;"); + extra_ddl.push_back("vacuum;"); + extra_ddl.push_back("pragma journal_mode=wal;"); + } + + // run extra -D sql if given + for (auto&& i: extra_ddl) + { + if (verbose > 1) + obatched(clog) << "extra ddl:\n" << i << endl; + rc = sqlite3_exec (db, i.c_str(), NULL, NULL, NULL); + if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW) + error (0, 0, + "warning: cannot run database extra ddl %s: %s", i.c_str(), sqlite3_errmsg(db)); + } + + if (maxigroom) + obatched(clog) << "maxigroomed database" << endl; + + + obatched(clog) << "search concurrency " << concurrency << endl; + obatched(clog) << "rescan time " << rescan_s << endl; + obatched(clog) << "groom time " << groom_s << endl; + + vector source_file_scanner_threads; + vector source_rpm_scanner_threads; + pthread_t groom_thread; + + rc = pthread_create (& groom_thread, NULL, thread_main_groom, NULL); + if (rc < 0) + error (0, 0, "warning: cannot spawn thread (%d) to groom database\n", rc); + + if (scan_files) for (auto&& it : source_paths) + { + pthread_t pt; + rc = pthread_create (& pt, NULL, thread_main_scan_source_file_path, (void*) it.c_str()); + if (rc < 0) + error (0, 0, "warning: cannot spawn thread (%d) to scan source files %s\n", rc, it.c_str()); + else + source_file_scanner_threads.push_back(pt); + } + + if (scan_rpms) for (auto&& it : source_paths) + { + pthread_t pt; + rc = pthread_create (& pt, NULL, thread_main_scan_source_rpm_path, (void*) it.c_str()); + if (rc < 0) + error (0, 0, "warning: cannot spawn thread (%d) to scan source rpms %s\n", rc, it.c_str()); + else + source_rpm_scanner_threads.push_back(pt); + } + + + const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR); + if (du && du[0] != '\0') // set to non-empty string? + obatched(clog) << "upstream debuginfod servers: " << du << endl; + + /* Trivial main loop! */ + while (! interrupted) + pause (); + + if (verbose) + obatched(clog) << "stopping" << endl; + + /* Stop all the web service threads. */ + if (d4) MHD_stop_daemon (d4); + if (d6) MHD_stop_daemon (d6); + + /* Join any source scanning threads. */ + for (auto&& it : source_file_scanner_threads) + pthread_join (it, NULL); + for (auto&& it : source_rpm_scanner_threads) + pthread_join (it, NULL); + pthread_join (groom_thread, NULL); + + /* With all threads known dead, we can clean up the global resources. */ + delete scan_concurrency_sem; + rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_CLEANUP_DDL, NULL, NULL, NULL); + if (rc != SQLITE_OK) + { + error (0, 0, + "warning: cannot run database cleanup ddl: %s", sqlite3_errmsg(db)); + } + + // NB: no problem with unconditional free here - an earlier failed regcomp would exit program + (void) regfree (& file_include_regex); + (void) regfree (& file_exclude_regex); + + sqlite3 *database = db; + db = 0; // for signal_handler not to freak + (void) sqlite3_close (database); + + return 0; +} diff --git a/doc/Makefile.am b/doc/Makefile.am index 23102604..60e942cb 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -19,10 +19,11 @@ EXTRA_DIST = COPYING-GFDL README dist_man1_MANS=readelf.1 elfclassify.1 notrans_dist_man3_MANS=elf_update.3 elf_getdata.3 elf_clone.3 elf_begin.3 - +notrans_dist_man8_MANS= notrans_dist_man1_MANS= if DEBUGINFOD +notrans_dist_man8_MANS += debuginfod.8 notrans_dist_man3_MANS += debuginfod_find_debuginfo.3 debuginfod_find_source.3 debuginfod_find_executable.3 notrans_dist_man1_MANS += debuginfod-find.1 endif diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 new file mode 100644 index 00000000..eb1d8910 --- /dev/null +++ b/doc/debuginfod.8 @@ -0,0 +1,369 @@ +'\"! tbl | nroff \-man +'\" t macro stdmacro + +.de SAMPLE +.br +.RS 0 +.nf +.nh +.. +.de ESAMPLE +.hy +.fi +.RE +.. + +.TH DEBUGINFOD 8 +.SH NAME +debuginfod \- debuginfo-related http file-server daemon + +.SH SYNOPSIS +.B debuginfod +[\fIOPTION\fP]... [\fIPATH\fP]... + +.SH DESCRIPTION +\fBdebuginfod\fP serves debuginfo-related artifacts over HTTP. It +periodically scans a set of directories for ELF/DWARF files and their +associated source code, as well as RPM files containing the above, to +build an index by their buildid. This index is used when remote +clients use the HTTP webapi, to fetch these files by the same buildid. + +If a debuginfod cannot service a given buildid artifact request +itself, and it is configured with information about upstream +debuginfod servers, it queries them for the same information, just as +\fBdebuginfod-find\fP would. If successful, it locally caches then +relays the file content to the original requester. + +If the \fB\-F\fP option is given, each listed PATH creates a thread to +scan for matching ELF/DWARF/source files under the given physical +directory. Source files are matched with DWARF files based on the +AT_comp_dir (compilation directory) attributes inside it. Duplicate +directories are ignored. You may use a file name for a PATH, but +source code indexing may be incomplete; prefer using a directory that +contains the binaries. Caution: source files listed in the DWARF may +be a path \fIanywhere\fP in the file system, and debuginfod will +readily serve their content on demand. (Imagine a doctored DWARF file +that lists \fI/etc/passwd\fP as a source file.) If this is a concern, +audit your binaries with tools such as: + +.SAMPLE +% eu-readelf -wline BINARY | sed -n '/^Directory.table/,/^File.name.table/p' +or +% eu-readelf -wline BINARY | sed -n '/^Directory.table/,/^Line.number/p' +or even use debuginfod itself: +% debuginfod -vvv -d :memory: -F BINARY 2>&1 | grep 'recorded.*source' +^C +.ESAMPLE + +If the \fB\-R\fP option is given each listed PATH creates a thread to +scan for ELF/DWARF/source files contained in matching RPMs under the +given physical directory. Duplicate directories are ignored. You may +use a file name for a PATH, but source code indexing may be +incomplete; prefer using a directory that contains normal RPMs +alongside debuginfo/debugsource RPMs. Because of complications such +as DWZ-compressed debuginfo, may require \fItwo\fP scan passes to +identify all source code. Source files for RPMs are only served +from other RPMs, so the caution for \-F does not apply. + +If no PATH is listed, or neither \-F nor \-R option is given, then +\fBdebuginfod\fP will simply serve content that it scanned into its +index in previous runs: the data is cumulative. + +File names must match extended regular expressions given by the \-I +option and not the \-X option (if any) in order to be considered. + + +.SH OPTIONS + +.TP +.B "\-F" +Activate ELF/DWARF file scanning threads. The default is off. + +.TP +.B "\-R" +Activate RPM file scanning threads. The default is off. + +.TP +.B "\-d FILE" "\-\-database=FILE" +Set the path of the sqlite database used to store the index. This +file is disposable in the sense that a later rescan will repopulate +data. It will contain absolute file path names, so it may not be +portable across machines. It may be frequently read/written, so it +should be on a fast filesytem. It should not be shared across +machines or users, to maximize sqlite locking performance. The +default database file is $HOME/.debuginfod.sqlite. + +.TP +.B "\-D SQL" "\-\-ddl=SQL" +Execute given sqlite statement after the database is opened and +initialized as extra DDL (SQL data definition language). This may be +useful to tune performance-related pragmas or indexes. May be +repeated. The default is nothing extra. + +.TP +.B "\-p NUM" "\-\-port=NUM" +Set the TCP port number on which debuginfod should listen, to service +HTTP requests. Both IPv4 and IPV6 sockets are opened, if possible. +The webapi is documented below. The default port number is 8002. + +.TP +.B "\-I REGEX" "\-\-include=REGEX" "\-X REGEX" "\-\-exclude=REGEX" +Govern the inclusion and exclusion of file names under the search +paths. The regular expressions are interpreted as unanchored POSIX +extended REs, thus may include alternation. They are evaluated +against the full path of each file, based on its \fBrealpath(3)\fP +canonicalization. By default, all files are included and none are +excluded. A file that matches both include and exclude REGEX is +excluded. (The \fIcontents\fP of RPM files are not subject to +inclusion or exclusion filtering: they are all processed.) + +.TP +.B "\-t SECONDS" "\-\-rescan\-time=SECONDS" +Set the rescan time for the file and RPM directories. This is the +amount of time the scanning threads will wait after finishing a scan, +before doing it again. A rescan for unchanged files is fast (because +the index also stores the file mtimes). A time of zero is acceptable, +and means that only one initial scan should performed. The default +rescan time is 300 seconds. Receiving a SIGUSR1 signal triggers a new +scan, independent of the rescan time (including if it was zero). + +.TP +.B "\-g SECONDS" "\-\-groom\-time=SECONDS" +Set the groom time for the index database. This is the amount of time +the grooming thread will wait after finishing a grooming pass before +doing it again. A groom operation quickly rescans all previously +scanned files, only to see if they are still present and current, so +it can deindex obsolete files. See also the \fIDATA MANAGEMENT\fP +section. The default groom time is 86400 seconds (1 day). A time of +zero is acceptable, and means that only one initial groom should be +performed. Receiving a SIGUSR2 signal triggers a new grooming pass, +independent of the groom time (including if it was zero). + +.TP +.B "\-G" +Run an extraordinary maximal-grooming pass at debuginfod startup. +This pass can take considerable time, because it tries to remove any +debuginfo-unrelated content from the RPM-related parts of the index. +It should not be run if any recent RPM-related indexing operations +were aborted early. It can take considerable space, because it +finishes up with an sqlite "vacuum" operation, which repacks the +database file by triplicating it temporarily. The default is not to +do maximal-grooming. See also the \fIDATA MANAGEMENT\fP section. + +.TP +.B "\-c NUM" "\-\-concurrency=NUM" +Set the concurrency limit for all the scanning threads. While many +threads may be spawned to cover all the given PATHs, only NUM may +concurrently do CPU-intensive operations like parsing an ELF file +or an RPM. The default is the number of processors on the system; +the minimum is 1. + +.TP +.B "\-v" +Increase verbosity of logging to the standard error file descriptor. +May be repeated to increase details. The default verbosity is 0. + +.SH WEBAPI + +.\" Much of the following text is duplicated with debuginfod-find.1 + +The debuginfod's webapi resembles ordinary file service, where a GET +request with a path containing a known buildid results in a file. +Unknown buildid / request combinations result in HTTP error codes. +This file service resemblance is intentional, so that an installation +can take advantage of standard HTTP management infrastructure. + +There are three requests. In each case, the buildid is encoded as a +lowercase hexadecimal string. For example, for a program \fI/bin/ls\fP, +look at the ELF note GNU_BUILD_ID: + +.SAMPLE +% readelf -n /bin/ls | grep -A4 build.id +Note section [ 4] '.note.gnu.buildid' of 36 bytes at offset 0x340: +Owner Data size Type +GNU 20 GNU_BUILD_ID +Build ID: 8713b9c3fb8a720137a4a08b325905c7aaf8429d +.ESAMPLE + +Then the hexadecimal BUILDID is simply: + +.SAMPLE +8713b9c3fb8a720137a4a08b325905c7aaf8429d +.ESAMPLE + +.SS /buildid/\fIBUILDID\fP/debuginfo + +If the given buildid is known to the server, this request will result +in a binary object that contains the customary \fB.*debug_*\fP +sections. This may be a split debuginfo file as created by +\fBstrip\fP, or it may be an original unstripped executable. + +.SS /buildid/\fIBUILDID\fP/executable + +If the given buildid is known to the server, this request will result +in a binary object that contains the normal executable segments. This +may be a executable stripped by \fBstrip\fP, or it may be an original +unstripped executable. \fBET_DYN\fP shared libraries are considered +to be a type of executable. + +.SS /buildid/\fIBUILDID\fP/source\fI/SOURCE/FILE\fP + +If the given buildid is known to the server, this request will result +in a binary object that contains the source file mentioned. The path +should be absolute. Relative path names commonly appear in the DWARF +file's source directory, but these paths are relative to +individual compilation unit AT_comp_dir paths, and yet an executable +is made up of multiple CUs. Therefore, to disambiguate, debuginfod +expects source queries to prefix relative path names with the CU +compilation-directory, followed by a mandatory "/". + +Note: contrary to RFC 3986, the client should not elide \fB../\fP or +\fB/./\fP or extraneous \fB///\fP sorts of path components in the +directory names, because if this is how those names appear in the +DWARF files, that is what debuginfod needs to see too. + +For example: +.TS +l l. +#include /buildid/BUILDID/source/usr/include/stdio.h +/path/to/foo.c /buildid/BUILDID/source/path/to/foo.c +\../bar/foo.c AT_comp_dir=/zoo/ /buildid/BUILDID/source/zoo//../bar/foo.c +.TE + +.SH DATA MANAGEMENT + +debuginfod stores its index in an sqlite database in a densely packed +set of interlinked tables. While the representation is as efficient +as we have been able to make it, it still takes a considerable amount +of data to record all debuginfo-related data of potentially a great +many files. This section offers some advice about the implications. + +As a general explanation for size, consider that debuginfod indexes +ELF/DWARF files, it stores their names and referenced source file +names, and buildids will be stored. When indexing RPMs, it stores +every file name \fIof or in\fP an RPM, every buildid, plus every +source file name referenced from a DWARF file. (Indexing RPMs takes +more space because the source files often reside in separate +subpackages that may not be indexed at the same pass, so extra +metadata has to be kept.) + +Getting down to numbers, in the case of Fedora RPMs (essentially, +gzip-compressed cpio files), the sqlite index database tends to be +from 0.5% to 3% of their size. It's larger for binaries that are +assembled out of a great many source files, or packages that carry +much debuginfo-unrelated content. It may be even larger during the +indexing phase due to temporary sqlite write-ahead-logging files; +these are checkpointed (cleaned out and removed) at shutdown. It may +be helpful to apply tight \-I or \-X regular-expression constraints to +exclude files from scanning that you know have no debuginfo-relevant +content. + +As debuginfod runs, it periodically rescans its target directories, +and any new content found is added to the database. Old content, such +as data for files that have disappeared or that have been replaced +with newer versions is removed at a periodic \fIgrooming\fP pass. +This means that the sqlite files grow fast during initial indexing, +slowly during index rescans, and periodically shrink during grooming. +There is also an optional one-shot \fImaximal grooming\fP pass is +available. It removes information debuginfo-unrelated data from the +RPM content index such as file names found in RPMs ("rpm sdef" +records) that are not referred to as source files from any binaries +find in RPMs ("rpm sref" records). This can save considerable disk +space. However, it is slow and temporarily requires up to twice the +database size as free space. Worse: it may result in missing +source-code info if the RPM traversals were interrupted, so the not +all source file references were known. Use it rarely to polish a +complete index. + +You should ensure that ample disk space remains available. (The flood +of error messages on -ENOSPC is ugly and nagging. But, like for most +other errors, debuginfod will resume when resources permit.) If +necessary, debuginfod can be stopped, the database file moved or +removed, and debuginfod restarted. + +sqlite offers several performance-related options in the form of +pragmas. Some may be useful to fine-tune the defaults plus the +debuginfod extras. The \-D option may be useful to tell debuginfod to +execute the given bits of SQL after the basic schema creation +commands. For example, the "synchronous", "cache_size", +"auto_vacuum", "threads", "journal_mode" pragmas may be fun to tweak +via \-D, if you're searching for peak performance. The "optimize", +"wal_checkpoint" pragmas may be useful to run periodically, outside +debuginfod. The default settings are performance- rather than +reliability-oriented, so a hardware crash might corrupt the database. +In these cases, it may be necessary to manually delete the sqlite +database and start over. + +As debuginfod changes in the future, we may have no choice but to +change the database schema in an incompatible manner. If this +happens, new versions of debuginfod will issue SQL statements to +\fIdrop\fP all prior schema & data, and start over. So, disk space +will not be wasted for retaining a no-longer-useable dataset. + +In summary, if your system can bear a 0.5%-3% index-to-RPM-dataset +size ratio, and slow growth afterwards, you should not need to +worry about disk space. If a system crash corrupts the database, +or you want to force debuginfod to reset and start over, simply +erase the sqlite file before restarting debuginfod. + + +.SH SECURITY + +debuginfod \fBdoes not\fP include any particular security features. +While it is robust with respect to inputs, some abuse is possible. It +forks a new thread for each incoming HTTP request, which could lead to +a denial-of-service in terms of RAM, CPU, disk I/O, or network I/O. +If this is a problem, users are advised to install debuginfod with a +HTTPS reverse-proxy front-end that enforces site policies for +firewalling, authentication, integrity, authorization, and load +control. + +When relaying queries to upstream debuginfods, debuginfod \fBdoes not\fP +include any particular security features. It trusts that the binaries +returned by the debuginfods are accurate. Therefore, the list of +servers should include only trustworthy ones. If accessed across HTTP +rather than HTTPS, the network should be trustworthy. Authentication +information through the internal \fIlibcurl\fP library is not currently +enabled. + + +.SH "ENVIRONMENT VARIABLES" + +.TP 21 +.B DEBUGINFOD_URLS +This environment variable contains a list of URL prefixes for trusted +debuginfod instances. Alternate URL prefixes are separated by space. +Avoid referential loops that cause a server to contact itself, directly +or indirectly - the results would be hilarious. + +.TP 21 +.B DEBUGINFOD_TIMEOUT +This environment variable governs the timeout for each debuginfod HTTP +connection. A server that fails to respond within this many seconds +is skipped. The default is 5. + +.TP 21 +.B DEBUGINFOD_CACHE_PATH +This environment variable governs the location of the cache where +downloaded files are kept. It is cleaned periodically as this +program is reexecuted. The default is $HOME/.debuginfod_client_cache. +.\" XXX describe cache eviction policy + +.SH FILES +.LP +.PD .1v +.TP 20 +.B $HOME/.debuginfod.sqlite +Default database file. +.PD + +.TP 20 +.B $HOME/.debuginfod_client_cache +Default cache directory for content from upstream debuginfods. +.PD + + +.SH "SEE ALSO" +.I "debuginfod-find(1)" +.I "sqlite3(1)" +.I \%https://prometheus.io/docs/instrumenting/exporters/ diff --git a/tests/ChangeLog b/tests/ChangeLog index 369af37e..a5e57282 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,10 @@ +2019-10-28 Aaron Merey + Frank Ch. Eigler + + * run-debuginfod-find.sh, debuginfod_build_id_find.c: New test. + * testfile-debuginfod-*.rpm.bz2: New data files for test. + * Makefile.am: Run it. + 2019-11-14 Andreas Schwab * run-large-elf-file.sh: Skip if available memory cannot be diff --git a/tests/Makefile.am b/tests/Makefile.am index ad0855de..83d27a06 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,6 +1,6 @@ ## Process this file with automake to create Makefile.in ## -## Copyright (C) 1996-2018 Red Hat, Inc. +## Copyright (C) 1996-2019 Red Hat, Inc. ## This file is part of elfutils. ## ## This file is free software; you can redistribute it and/or modify @@ -190,6 +190,11 @@ check_PROGRAMS += $(asm_TESTS) TESTS += $(asm_TESTS) run-disasm-bpf.sh endif +if DEBUGINFOD +check_PROGRAMS += debuginfod_build_id_find +TESTS += run-debuginfod-find.sh +endif + EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \ run-show-die-info.sh run-get-files.sh run-get-lines.sh \ run-next-files.sh run-next-lines.sh testfile-only-debug-line.bz2 \ @@ -440,7 +445,25 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \ run-dwelf_elf_e_machine_string.sh \ run-elfclassify.sh run-elfclassify-self.sh \ run-disasm-riscv64.sh \ - testfile-riscv64-dis1.o.bz2 testfile-riscv64-dis1.expect.bz2 + testfile-riscv64-dis1.o.bz2 testfile-riscv64-dis1.expect.bz2 \ + run-debuginfod-find.sh \ + debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm \ + debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm \ + debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm \ + debuginfod-rpms/fedora30/hello2-debugsource-1.0-2.x86_64.rpm \ + debuginfod-rpms/fedora30/hello2-two-1.0-2.x86_64.rpm \ + debuginfod-rpms/fedora30/hello2-two-debuginfo-1.0-2.x86_64.rpm \ + debuginfod-rpms/hello2.spec. \ + debuginfod-rpms/rhel6/hello2-1.0-2.i686.rpm \ + debuginfod-rpms/rhel6/hello2-1.0-2.src.rpm \ + debuginfod-rpms/rhel6/hello2-debuginfo-1.0-2.i686.rpm \ + debuginfod-rpms/rhel6/hello2-two-1.0-2.i686.rpm \ + debuginfod-rpms/rhel7/hello2-1.0-2.src.rpm \ + debuginfod-rpms/rhel7/hello2-1.0-2.x86_64.rpm \ + debuginfod-rpms/rhel7/hello2-debuginfo-1.0-2.x86_64.rpm \ + debuginfod-rpms/rhel7/hello2-two-1.0-2.x86_64.rpm + + if USE_VALGRIND valgrind_cmd='valgrind -q --leak-check=full --error-exitcode=1' @@ -474,7 +497,7 @@ TESTS_ENVIRONMENT = LC_ALL=C; LANG=C; VALGRIND_CMD=$(valgrind_cmd); \ export LC_ALL; export LANG; export VALGRIND_CMD; \ NM=$(NM); export NM; LOG_COMPILER = $(abs_srcdir)/test-wrapper.sh \ - $(abs_top_builddir)/libdw:$(abs_top_builddir)/backends:$(abs_top_builddir)/libelf:$(abs_top_builddir)/libasm + $(abs_top_builddir)/libdw:$(abs_top_builddir)/backends:$(abs_top_builddir)/libelf:$(abs_top_builddir)/libasm:$(abs_top_builddir)/debuginfod installcheck-local: $(MAKE) $(AM_MAKEFLAGS) \ @@ -610,6 +633,7 @@ unit_info_LDADD = $(libdw) next_cfi_LDADD = $(libelf) $(libdw) elfcopy_LDADD = $(libelf) addsections_LDADD = $(libelf) +debuginfod_build_id_find_LDADD = $(libelf) $(libdw) xlate_notes_LDADD = $(libelf) elfrdwrnop_LDADD = $(libelf) dwelf_elf_e_machine_string_LDADD = $(libelf) $(libdw) diff --git a/tests/debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm b/tests/debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm new file mode 100644 index 00000000..29a60999 Binary files /dev/null and b/tests/debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm differ diff --git a/tests/debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm new file mode 100644 index 00000000..2757e01a Binary files /dev/null and b/tests/debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm differ diff --git a/tests/debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm new file mode 100644 index 00000000..dc6e0f2f Binary files /dev/null and b/tests/debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm differ diff --git a/tests/debuginfod-rpms/fedora30/hello2-debugsource-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/fedora30/hello2-debugsource-1.0-2.x86_64.rpm new file mode 100644 index 00000000..f036fa39 Binary files /dev/null and b/tests/debuginfod-rpms/fedora30/hello2-debugsource-1.0-2.x86_64.rpm differ diff --git a/tests/debuginfod-rpms/fedora30/hello2-two-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/fedora30/hello2-two-1.0-2.x86_64.rpm new file mode 100644 index 00000000..e1c09dac Binary files /dev/null and b/tests/debuginfod-rpms/fedora30/hello2-two-1.0-2.x86_64.rpm differ diff --git a/tests/debuginfod-rpms/fedora30/hello2-two-debuginfo-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/fedora30/hello2-two-debuginfo-1.0-2.x86_64.rpm new file mode 100644 index 00000000..b9a63dfa Binary files /dev/null and b/tests/debuginfod-rpms/fedora30/hello2-two-debuginfo-1.0-2.x86_64.rpm differ diff --git a/tests/debuginfod-rpms/hello2.spec. b/tests/debuginfod-rpms/hello2.spec. new file mode 100644 index 00000000..0690992f --- /dev/null +++ b/tests/debuginfod-rpms/hello2.spec. @@ -0,0 +1,57 @@ +Summary: hello2 -- double hello, world rpm +Name: hello2 +Version: 1.0 +Release: 2 +Group: Utilities +License: GPL +Distribution: RPM ^W Elfutils test suite. +Vendor: Red Hat Software +Packager: Red Hat Software +URL: http://www.redhat.com +BuildRequires: gcc make +Source0: hello-1.0.tar.gz + +%description +Simple rpm demonstration with an eye to consumption by debuginfod. + +%package two +Summary: hello2two +License: GPL + +%description two +Dittoish. + +%prep +%setup -q -n hello-1.0 + +%build +gcc -g -O1 hello.c -o hello +gcc -g -O2 -D_FORTIFY_SOURCE=2 hello.c -o hello2 + +%install +rm -rf $RPM_BUILD_ROOT +mkdir -p $RPM_BUILD_ROOT/usr/local/bin +cp hello $RPM_BUILD_ROOT/usr/local/bin/ +cp hello2 $RPM_BUILD_ROOT/usr/local/bin/ + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root) +%attr(0751,root,root) /usr/local/bin/hello + +%files two +%defattr(-,root,root) +%attr(0751,root,root) /usr/local/bin/hello2 + +%changelog +* Thu Nov 14 2019 Frank Ch. Eigler +- Added source code right here to make spec file self-contained. +- Dropped misc files not relevant to debuginfod testing. + +* Wed May 18 2016 Mark Wielaard +- Add hello2 for dwz testing support. + +* Tue Oct 20 1998 Jeff Johnson +- create. diff --git a/tests/debuginfod-rpms/rhel6/hello2-1.0-2.i686.rpm b/tests/debuginfod-rpms/rhel6/hello2-1.0-2.i686.rpm new file mode 100644 index 00000000..cb99fd6e Binary files /dev/null and b/tests/debuginfod-rpms/rhel6/hello2-1.0-2.i686.rpm differ diff --git a/tests/debuginfod-rpms/rhel6/hello2-1.0-2.src.rpm b/tests/debuginfod-rpms/rhel6/hello2-1.0-2.src.rpm new file mode 100644 index 00000000..603a9b1a Binary files /dev/null and b/tests/debuginfod-rpms/rhel6/hello2-1.0-2.src.rpm differ diff --git a/tests/debuginfod-rpms/rhel6/hello2-debuginfo-1.0-2.i686.rpm b/tests/debuginfod-rpms/rhel6/hello2-debuginfo-1.0-2.i686.rpm new file mode 100644 index 00000000..19f4360c Binary files /dev/null and b/tests/debuginfod-rpms/rhel6/hello2-debuginfo-1.0-2.i686.rpm differ diff --git a/tests/debuginfod-rpms/rhel6/hello2-two-1.0-2.i686.rpm b/tests/debuginfod-rpms/rhel6/hello2-two-1.0-2.i686.rpm new file mode 100644 index 00000000..58c77a1d Binary files /dev/null and b/tests/debuginfod-rpms/rhel6/hello2-two-1.0-2.i686.rpm differ diff --git a/tests/debuginfod-rpms/rhel7/hello2-1.0-2.src.rpm b/tests/debuginfod-rpms/rhel7/hello2-1.0-2.src.rpm new file mode 100644 index 00000000..5ca4d423 Binary files /dev/null and b/tests/debuginfod-rpms/rhel7/hello2-1.0-2.src.rpm differ diff --git a/tests/debuginfod-rpms/rhel7/hello2-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/rhel7/hello2-1.0-2.x86_64.rpm new file mode 100644 index 00000000..0e6f6cc8 Binary files /dev/null and b/tests/debuginfod-rpms/rhel7/hello2-1.0-2.x86_64.rpm differ diff --git a/tests/debuginfod-rpms/rhel7/hello2-debuginfo-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/rhel7/hello2-debuginfo-1.0-2.x86_64.rpm new file mode 100644 index 00000000..c1136f3a Binary files /dev/null and b/tests/debuginfod-rpms/rhel7/hello2-debuginfo-1.0-2.x86_64.rpm differ diff --git a/tests/debuginfod-rpms/rhel7/hello2-two-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/rhel7/hello2-two-1.0-2.x86_64.rpm new file mode 100644 index 00000000..5e89afd2 Binary files /dev/null and b/tests/debuginfod-rpms/rhel7/hello2-two-1.0-2.x86_64.rpm differ diff --git a/tests/debuginfod_build_id_find.c b/tests/debuginfod_build_id_find.c new file mode 100644 index 00000000..753a20c2 --- /dev/null +++ b/tests/debuginfod_build_id_find.c @@ -0,0 +1,62 @@ +/* Test program for fetching debuginfo with debuginfo-server. + Copyright (C) 2019 Red Hat, Inc. + This file is part of elfutils. + + This file is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#include ELFUTILS_HEADER(dwfl) +#include +#include +#include +#include +#include + +static const char *debuginfo_path = ""; +static const Dwfl_Callbacks cb = + { + NULL, + dwfl_standard_find_debuginfo, + NULL, + (char **)&debuginfo_path, + }; + +int +main (int argc __attribute__ ((unused)), char **argv) +{ + int expect_pass = strcmp(argv[3], "0"); + Dwarf_Addr bias = 0; + Dwfl *dwfl = dwfl_begin(&cb); + dwfl_report_begin(dwfl); + + /* Open an executable. */ + Dwfl_Module *mod = dwfl_report_offline(dwfl, argv[2], argv[2], -1); + + /* The corresponding debuginfo will not be found in debuginfo_path + (since it's empty), causing the server to be queried. */ + + Dwarf *res = dwfl_module_getdwarf(mod, &bias); + if (expect_pass) + assert(res); + else + assert(!res); + + dwfl_end (dwfl); + + return 0; +} diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh new file mode 100755 index 00000000..145c704a --- /dev/null +++ b/tests/run-debuginfod-find.sh @@ -0,0 +1,230 @@ +#!/bin/bash +# +# Copyright (C) 2019 Red Hat, Inc. +# This file is part of elfutils. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# elfutils is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +set -x +. $srcdir/test-subr.sh # includes set -e + +DB=${PWD}/.debuginfod_tmp.sqlite +export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache + +# clean up trash if we were aborted early +trap 'kill $PID1 $PID2 || true; sleep 5; rm -rf F R ${PWD}/.client_cache*; exit_cleanup' 0 1 2 3 5 9 15 + +# find an unused port number +while true; do + PORT1=`expr '(' $RANDOM % 1000 ')' + 9000` + ss -atn | fgrep ":$PORT1" || break +done + +# We want to run debuginfod in the background. We also want to start +# it with the same check/installcheck-sensitive LD_LIBRARY_PATH stuff +# that the testrun alias sets. But: we if we just use +# testrun .../debuginfod +# it runs in a subshell, with different pid, so not helpful. +# +# So we gather the LD_LIBRARY_PATH with this cunning trick: +ldpath=`testrun sh -c 'echo $LD_LIBRARY_PATH'` + +mkdir F R +# not tempfiles F R - they are directories which we clean up manually +env DEBUGINFOD_TEST_WEBAPI_SLEEP=3 LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod -F -R -vvvv -d $DB -p $PORT1 -t0 -g0 R F & +PID1=$! +sleep 3 +export DEBUGINFOD_URLS=http://localhost:$PORT1/ # or without trailing / + +# Be patient when run on a busy machine things might take a bit. +# And under valgrind debuginfod-find is really, really slow. +if [ "x$VALGRIND_CMD" = "x" ]; then + export DEBUGINFOD_TIMEOUT=60 +else + export DEBUGINFOD_TIMEOUT=300 +fi + +# We use -t0 and -g0 here to turn off time-based scanning & grooming. +# For testing purposes, we just sic SIGUSR1 / SIGUSR2 at the process. + +######################################################################## + +# Compile a simple program, strip its debuginfo and save the build-id. +# Also move the debuginfo into another directory so that elfutils +# cannot find it without debuginfod. +echo "int main() { return 0; }" > ${PWD}/prog.c +tempfiles prog.c +gcc -g -o prog ${PWD}/prog.c + ${abs_top_builddir}/src/strip -g -f prog.debug ${PWD}/prog +BUILDID=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \ + -a prog | grep 'Build ID' | cut -d ' ' -f 7` + +mv prog F +mv prog.debug F +kill -USR1 $PID1 +sleep 3 # give enough time for scanning pass + +######################################################################## + +# Test whether elfutils, via the debuginfod client library dlopen hooks, +# is able to fetch debuginfo from the local debuginfod. +testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog 1 + +######################################################################## + +# Test whether debuginfod-find is able to fetch those files. +rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID` +cmp $filename F/prog.debug + +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID` +cmp $filename F/prog + +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID ${PWD}/prog.c` +cmp $filename ${PWD}/prog.c + +######################################################################## + +# Add artifacts to the search paths and test whether debuginfod finds them while already running. + +# Build another, non-stripped binary +echo "int main() { return 0; }" > ${PWD}/prog2.c +tempfiles prog2.c +gcc -g -o prog2 ${PWD}/prog2.c +BUILDID2=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \ + -a prog2 | grep 'Build ID' | cut -d ' ' -f 7` + +mv prog2 F +kill -USR1 $PID1 +sleep 3 + +# Rerun same tests for the prog2 binary +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID2` +cmp $filename F/prog2 +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID2` +cmp $filename F/prog2 +filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID2 ${PWD}/prog2.c` +cmp $filename ${PWD}/prog2.c + +cp -rp ${abs_srcdir}/debuginfod-rpms R +kill -USR1 $PID1 +sleep 10 +kill -USR1 $PID1 # two hits of SIGUSR1 may be needed to resolve .debug->dwz->srefs +sleep 10 + + +# Run a bank of queries against the debuginfod-rpms test cases + +rpm_test() { + __BUILDID=$1 + __SOURCEPATH=$2 + __SOURCESHA1=$3 + + filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $__BUILDID` + buildid=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \ + -a $filename | grep 'Build ID' | cut -d ' ' -f 7` + test $__BUILDID = $buildid + + filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $__BUILDID` + buildid=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \ + -a $filename | grep 'Build ID' | cut -d ' ' -f 7` + test $__BUILDID = $buildid + + filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $__BUILDID $__SOURCEPATH` + hash=`cat $filename | sha1sum | awk '{print $1}'` + test $__SOURCESHA1 = $hash +} + + +# common source file sha1 +SHA=f4a1a8062be998ae93b8f1cd744a398c6de6dbb1 +# fedora30 +rpm_test c36708a78618d597dee15d0dc989f093ca5f9120 /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA +rpm_test 41a236eb667c362a1c4196018cc4581e09722b1b /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA +# rhel7 +rpm_test bc1febfd03ca05e030f0d205f7659db29f8a4b30 /usr/src/debug/hello-1.0/hello.c $SHA +rpm_test f0aa15b8aba4f3c28cac3c2a73801fefa644a9f2 /usr/src/debug/hello-1.0/hello.c $SHA +# rhel6 +rpm_test bbbf92ebee5228310e398609c23c2d7d53f6e2f9 /usr/src/debug/hello-1.0/hello.c $SHA +rpm_test d44d42cbd7d915bc938c81333a21e355a6022fb7 /usr/src/debug/hello-1.0/hello.c $SHA + +RPM_BUILDID=d44d42cbd7d915bc938c81333a21e355a6022fb7 # in rhel6/ subdir, for a later test + + +######################################################################## + +# Drop some of the artifacts, run a groom cycle; confirm that +# debuginfod has forgotten them, but remembers others + +rm -r R/debuginfod-rpms/rhel6/* +kill -USR2 $PID1 # groom cycle +sleep 3 +rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests + +testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $RPM_BUILDID && false || true + +testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID2 + +######################################################################## + +# Federation mode + +# find another unused port +while true; do + PORT2=`expr '(' $RANDOM % 1000 ')' + 9000` + ss -atn | fgrep ":$PORT2" || break +done + +export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache2 +mkdir -p $DEBUGINFOD_CACHE_PATH +# NB: inherits the DEBUGINFOD_URLS to the first server +env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod -F -vvvv -d ${DB}_2 -p $PORT2 & +PID2=$! +sleep 3 + +# have clients contact the new server +export DEBUGINFOD_URLS=http://localhost:$PORT2 +testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog 1 + +# test parallel queries in client +export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache3 +mkdir -p $DEBUGINFOD_CACHE_PATH +export DEBUGINFOD_URLS="BAD http://localhost:$PORT1 localhost:$PORT1 http://localhost:$PORT2 DNE" + +testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog2 1 + + +######################################################################## + +# Run the tests again without the servers running. The target file should +# be found in the cache. + +kill -INT $PID1 $PID2 +sleep 5 +tempfiles .debuginfod_* + +testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog2 1 + +######################################################################## + +# Trigger a cache clean and run the tests again. The clients should be unable to +# find the target. +echo 0 > $DEBUGINFOD_CACHE_PATH/cache_clean_interval_s +echo 0 > $DEBUGINFOD_CACHE_PATH/max_unused_age_s + +testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog 1 + +testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID2 && false || true + +exit 0 -- cgit v1.2.3