summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrank Ch. Eigler <fche@redhat.com>2019-12-26 19:06:46 -0500
committerFrank Ch. Eigler <fche@redhat.com>2020-01-01 13:12:14 -0500
commit53022c032943fa1c319537c7aa0dd955014916ee (patch)
treee5a8d37f78dabaff88fc9bbe31d05e4afd7f6db9
parentb39082297d5bdefc19baa74573f46566fd253c09 (diff)
debuginfod: extracted-from-archive file cache
Add a facility to service webapi and dwz/altdebug requests that resolve to archives via a $TMPDIR file cache. This permits instantaneous dwz resolution during -debuginfo rpm scanning, and also instantanous duplicate webapi requests. The cache is limited both in number of entries and in storage space. Heuristics provide serviceable defaults.
-rw-r--r--config/ChangeLog4
-rw-r--r--config/debuginfod.service1
-rw-r--r--debuginfod/ChangeLog12
-rw-r--r--debuginfod/debuginfod.cxx234
-rw-r--r--doc/ChangeLog4
-rw-r--r--doc/debuginfod.810
-rw-r--r--tests/ChangeLog4
-rwxr-xr-xtests/run-debuginfod-find.sh9
8 files changed, 267 insertions, 11 deletions
diff --git a/config/ChangeLog b/config/ChangeLog
index cc4187bf..b56c2c15 100644
--- a/config/ChangeLog
+++ b/config/ChangeLog
@@ -1,3 +1,7 @@
+2019-12-26 Frank Ch. Eigler <fche@redhat.com>
+
+ * debuginfod.service: Set PrivateTmp=yes.
+
2019-12-22 Frank Ch. Eigler <fche@redhat.com>
* elfutils.spec.in (debuginfod): Add BuildRequire dpkg
diff --git a/config/debuginfod.service b/config/debuginfod.service
index d8ef072b..8fca343f 100644
--- a/config/debuginfod.service
+++ b/config/debuginfod.service
@@ -10,6 +10,7 @@ Group=debuginfod
#CacheDirectory=debuginfod
ExecStart=/usr/bin/debuginfod -d /var/cache/debuginfod/debuginfod.sqlite -p $DEBUGINFOD_PORT $DEBUGINFOD_VERBOSE $DEBUGINFOD_PRAGMAS $DEBUGINFOD_PATHS
TimeoutStopSec=10
+PrivateTmp=yes
[Install]
WantedBy=multi-user.target
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 68102cea..9888868b 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -14,6 +14,18 @@
(thread_groom): Tweak metrics for consistency.
(main): Start 1 traversal and N scanner threads if needed.
+2019-12-26 Frank Ch. Eigler <fche@redhat.com>
+
+ * debuginfod.cxx (libarchive_fdcache): New class/facility to own a
+ cache of temporary files that were previously extracted from an
+ archive. If only it could store just unlinked fd's instead of
+ filenames.
+ (handle_buildid_r_match): Use it to answer dwz/altdebug and webapi
+ requests.
+ (groom): Clean it.
+ (main): Initialize the cache control parameters from heuristics.
+ Use a consistent tmpdir for these and tmp files elsewhere.
+
2019-12-22 Frank Ch. Eigler <fche@redhat.com>
* debuginfod.cxx (*_rpm_*): Rename to *_archive_* throughout.
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 05fbacc2..4482af71 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -52,6 +52,7 @@ extern "C" {
#include <signal.h>
#include <sys/stat.h>
#include <sys/time.h>
+#include <sys/vfs.h>
#include <unistd.h>
#include <fcntl.h>
#include <netdb.h>
@@ -76,6 +77,7 @@ extern "C" {
#include <string>
#include <iostream>
#include <iomanip>
+#include <deque>
#include <ostream>
#include <sstream>
#include <mutex>
@@ -349,7 +351,10 @@ static const struct argp_option options[] =
{ "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
{ "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
{ "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
-
+#define ARGP_KEY_FDCACHE_FDS 0x1001
+ { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
+#define ARGP_KEY_FDCACHE_MBS 0x1002
+ { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
{ NULL, 0, NULL, 0, NULL, 0 }
};
@@ -378,7 +383,7 @@ static volatile sig_atomic_t sigusr2 = 0;
static unsigned http_port = 8002;
static unsigned rescan_s = 300;
static unsigned groom_s = 86400;
-static unsigned maxigroom = false;
+static bool maxigroom = false;
static unsigned concurrency = std::thread::hardware_concurrency() ?: 1;
static set<string> source_paths;
static bool scan_files = false;
@@ -387,6 +392,9 @@ static vector<string> extra_ddl;
static regex_t file_include_regex;
static regex_t file_exclude_regex;
static bool traverse_logical;
+static long fdcache_fds;
+static long fdcache_mbs;
+static string tmpdir;
static void set_metric(const string& key, int64_t value);
// static void inc_metric(const string& key);
@@ -451,6 +459,12 @@ parse_opt (int key, char *arg,
if (rc != 0)
argp_failure(state, 1, EINVAL, "regular expession");
break;
+ case ARGP_KEY_FDCACHE_FDS:
+ fdcache_fds = atol (arg);
+ break;
+ case ARGP_KEY_FDCACHE_MBS:
+ fdcache_mbs = atol (arg);
+ break;
case ARGP_KEY_ARG:
source_paths.insert(string(arg));
break;
@@ -743,8 +757,6 @@ private:
-
-
static string
conninfo (struct MHD_Connection * conn)
{
@@ -869,6 +881,148 @@ shell_escape(const string& str)
}
+// A map-like class that owns a cache of file descriptors (indexed by
+// file / content names).
+//
+// If only it could use fd's instead of file names ... but we can't
+// dup(2) to create independent descriptors for the same unlinked
+// files, so would have to use some goofy linux /proc/self/fd/%d
+// hack such as the following
+
+#if 0
+int superdup(int fd)
+{
+#ifdef __linux__
+ char *fdpath = NULL;
+ int rc = asprintf(& fdpath, "/proc/self/fd/%d", fd);
+ int newfd;
+ if (rc >= 0)
+ newfd = open(fdpath, O_RDONLY);
+ else
+ newfd = -1;
+ free (fdpath);
+ return newfd;
+#else
+ return -1;
+#endif
+}
+#endif
+
+class libarchive_fdcache
+{
+private:
+ mutex fdcache_lock;
+
+ struct fdcache_entry
+ {
+ string archive;
+ string entry;
+ string fd;
+ long fd_size_mb; // rounded up megabytes
+ };
+ deque<fdcache_entry> lru; // @head: most recently used
+ long max_fds;
+ long max_mbs;
+
+public:
+ void intern(const string& a, const string& b, string fd, off_t sz)
+ {
+ {
+ unique_lock<mutex> lock(fdcache_lock);
+ for (auto i = lru.begin(); i < lru.end(); i++) // nuke preexisting copy
+ {
+ if (i->archive == a && i->entry == b)
+ {
+ unlink (i->fd.c_str());
+ lru.erase(i);
+ break; // must not continue iterating
+ }
+ }
+ long mb = ((sz+1023)/1024+1023)/1024;
+ fdcache_entry n = { a, b, fd, mb };
+ lru.push_front(n);
+ if (verbose > 3)
+ obatched(clog) << "fdcache interned a=" << a << " b=" << b << " fd=" << fd << " mb=" << mb << endl;
+ }
+
+ this->limit(max_fds, max_mbs); // age cache if required
+ }
+
+ int lookup(const string& a, const string& b)
+ {
+ unique_lock<mutex> lock(fdcache_lock);
+ for (auto i = lru.begin(); i < lru.end(); i++)
+ {
+ if (i->archive == a && i->entry == b)
+ { // found it; move it to head of lru
+ fdcache_entry n = *i;
+ lru.erase(i); // invalidates i, so no more iteration!
+ lru.push_front(n);
+
+ return open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
+ }
+ }
+ return -1;
+ }
+
+ void clear(const string& a, const string& b)
+ {
+ unique_lock<mutex> lock(fdcache_lock);
+ for (auto i = lru.begin(); i < lru.end(); i++)
+ {
+ if (i->archive == a && i->entry == b)
+ { // found it; move it to head of lru
+ fdcache_entry n = *i;
+ lru.erase(i); // invalidates i, so no more iteration!
+ unlink (n.fd.c_str());
+ return;
+ }
+ }
+ }
+
+ void limit(long maxfds, long maxmbs)
+ {
+ if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs))
+ obatched(clog) << "fdcache limited to maxfds=" << maxfds << " maxmbs=" << maxmbs << endl;
+
+ unique_lock<mutex> lock(fdcache_lock);
+ this->max_fds = maxfds;
+ this->max_mbs = maxmbs;
+
+ long total_fd = 0;
+ long total_mb = 0;
+ for (auto i = lru.begin(); i < lru.end(); i++)
+ {
+ // accumulate totals from most recently used one going backward
+ total_fd ++;
+ total_mb += i->fd_size_mb;
+ if (total_fd > max_fds || total_mb > max_mbs)
+ {
+ // found the cut here point!
+
+ for (auto j = i; j < lru.end(); j++) // close all the fds from here on in
+ {
+ if (verbose > 3)
+ obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
+ << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
+ unlink (j->fd.c_str());
+ }
+
+ lru.erase(i, lru.end()); // erase the nodes generally
+ break;
+ }
+
+ }
+ }
+
+ ~libarchive_fdcache()
+ {
+ limit(0, 0);
+ }
+};
+static libarchive_fdcache fdcache;
+
+
static struct MHD_Response*
handle_buildid_r_match (int64_t b_mtime,
const string& b_source0,
@@ -887,6 +1041,41 @@ handle_buildid_r_match (int64_t b_mtime,
return 0;
}
+ int fd = fdcache.lookup(b_source0, b_source1);
+ while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
+ {
+ rc = fstat(fd, &fs);
+ if (rc < 0) // disappeared?
+ {
+ if (verbose)
+ obatched(clog) << "cannot fstat fdcache " << b_source0 << endl;
+ close(fd);
+ fdcache.clear(b_source0, b_source1);
+ break; // branch out of if "loop", to try new libarchive fetch attempt
+ }
+
+ struct MHD_Response* r = MHD_create_response_from_fd (fs.st_size, fd);
+ if (r == 0)
+ {
+ if (verbose)
+ obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
+ close(fd);
+ break; // branch out of if "loop", to try new libarchive fetch attempt
+ }
+
+ inc_metric ("http_responses_total","result","archive fdcache");
+
+ MHD_add_response_header (r, "Content-Type", "application/octet-stream");
+ add_mhd_last_modified (r, fs.st_mtime);
+ if (verbose > 1)
+ obatched(clog) << "serving fdcache archive " << b_source0 << " file " << b_source1 << endl;
+ /* libmicrohttpd will close it. */
+ if (result_fd)
+ *result_fd = fd;
+ return r;
+ // NB: see, we never go around the 'loop' more than once
+ }
+
string archive_decoder = "/dev/null";
string archive_extension = "";
for (auto&& arch : scan_archives)
@@ -933,19 +1122,27 @@ handle_buildid_r_match (int64_t b_mtime,
continue;
// extract this file to a temporary file
- char tmppath[PATH_MAX] = "/tmp/debuginfod.XXXXXX"; // XXX: $TMP_DIR etc.
- int fd = mkstemp (tmppath);
+ char* tmppath = NULL;
+ rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
+ if (rc < 0)
+ throw libc_exception (ENOMEM, "cannot allocate tmppath");
+ defer_dtor<void*,void> tmmpath_freer (tmppath, free);
+ fd = mkstemp (tmppath);
if (fd < 0)
throw libc_exception (errno, "cannot create temporary file");
- unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
rc = archive_read_data_into_fd (a, fd);
- if (rc != ARCHIVE_OK)
+ if (rc != ARCHIVE_OK) // e.g. ENOSPC!
{
close (fd);
+ unlink (tmppath);
throw archive_exception(a, "cannot extract file");
}
+ // NB: now we know we have a complete reusable file; make fdcache
+ // responsible for unlinking it later.
+ fdcache.intern(b_source0, b_source1, tmppath, archive_entry_size(e));
+
inc_metric ("http_responses_total","result",archive_extension + " archive");
struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd);
if (r == 0)
@@ -1819,9 +2016,8 @@ archive_classify (const string& rps, string& archive_extension,
obatched(clog) << "libarchive checking " << fn << endl;
// extract this file to a temporary file
- const char *tmpdir_env = getenv ("TMPDIR") ?: "/tmp";
char* tmppath = NULL;
- rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir_env);
+ rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
if (rc < 0)
throw libc_exception (ENOMEM, "cannot allocate tmppath");
defer_dtor<void*,void> tmmpath_freer (tmppath, free);
@@ -2356,6 +2552,9 @@ void groom()
sqlite3_db_release_memory(db); // shrink the process if possible
+ fdcache.limit(0,0); // release the fdcache contents
+ fdcache.limit(fdcache_fds,fdcache_mbs); // restore status quo parameters
+
gettimeofday (&tv_end, NULL);
double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
@@ -2471,6 +2670,8 @@ main (int argc, char *argv[])
/* Tell the library which version we are expecting. */
elf_version (EV_CURRENT);
+ tmpdir = string(getenv("TMPDIR") ?: "/tmp");
+
/* Set computed default values. */
db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */
int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything
@@ -2480,6 +2681,15 @@ main (int argc, char *argv[])
if (rc != 0)
error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
+ // default parameters for fdcache are computed from system stats
+ struct statfs sfs;
+ rc = statfs(tmpdir.c_str(), &sfs);
+ if (rc < 0)
+ fdcache_mbs = 1024; // 1 gigabyte
+ else
+ fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
+ fdcache_fds = concurrency * 2;
+
/* Parse and process arguments. */
int remaining;
argp_program_version_hook = print_version; // this works
@@ -2491,6 +2701,8 @@ main (int argc, char *argv[])
if (scan_archives.size()==0 && !scan_files && source_paths.size()>0)
obatched(clog) << "warning: without -F -R -U, ignoring PATHs" << endl;
+ fdcache.limit(fdcache_fds, fdcache_mbs);
+
(void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore
(void) signal (SIGINT, signal_handler); // ^C
(void) signal (SIGHUP, signal_handler); // EOF
@@ -2608,6 +2820,8 @@ main (int argc, char *argv[])
obatched(clog) << "search concurrency " << concurrency << endl;
obatched(clog) << "rescan time " << rescan_s << endl;
+ obatched(clog) << "fdcache fds " << fdcache_fds << endl;
+ obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
obatched(clog) << "groom time " << groom_s << endl;
if (scan_archives.size()>0)
{
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 24f62af9..7117c1a2 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -3,6 +3,10 @@
* debuginfod.8: Rework sections dealing with traversal/scanning,
explaining new threading model.
+2019-12-26 Frank Ch. Eigler <fche@redhat.com
+
+ * debuginfod.8: Document --fdcache-fds and --fdcache-mbs opts.
+
2019-12-22 Frank Ch. Eigler <fche@redhat.com
* debuginfod.8: Add -U (DEB) flag, generalize RPM to "archive".
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index d2917285..084d62aa 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -176,6 +176,16 @@ loops in the symbolic directory tree might lead to \fIinfinite
traversal\fP.
.TP
+.B "\-\-fdcache-fds=NUM" "\-\-fdcache-mbs=MB"
+Configure limits on a cache that keeps recently extracted files from
+archives. Up to NUM files and up to a total of MB megabytes will be
+kept extracted, in order to avoid having to decompress their archives
+again. The default NUM and MB values depend on the concurrency of the
+system, and on the available disk space on the $TMPDIR or \fB/tmp\fP
+filesystem. This is because that is where the most recently used
+extracted files are kept. Grooming cleans this cache.
+
+.TP
.B "\-v"
Increase verbosity of logging to the standard error file descriptor.
May be repeated to increase details. The default verbosity is 0.
diff --git a/tests/ChangeLog b/tests/ChangeLog
index b087e60a..0b329a23 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -2,6 +2,10 @@
* run-debuginfod-find.sh: Adjust to new work-queue metrics.
+2019-12-26 Frank Ch. Eigler <fche@redhat.com>
+
+ * run-debuginfod-find.sh: Test --fdcache* options.
+
2019-12-22 Frank Ch. Eigler <fche@redhat.com>
* debuginfod-debs/*: New test files, based on
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 277e9719..3bcd84f9 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -87,7 +87,7 @@ wait_ready()
fi
}
-env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 R F L &
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 R F L &
PID1=$!
# Server must become ready
wait_ready $PORT1 'ready' 1
@@ -213,6 +213,12 @@ archive_test() {
-a $filename | grep 'Build ID' | cut -d ' ' -f 7`
test $__BUILDID = $buildid
+ # run again to assure that fdcache is being enjoyed
+ filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $__BUILDID`
+ buildid=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \
+ -a $filename | grep 'Build ID' | cut -d ' ' -f 7`
+ test $__BUILDID = $buildid
+
filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $__BUILDID`
buildid=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \
-a $filename | grep 'Build ID' | cut -d ' ' -f 7`
@@ -323,6 +329,7 @@ if type curl 2>/dev/null; then
curl http://127.0.0.1:$PORT1/metrics
curl http://127.0.0.1:$PORT2/metrics
curl http://127.0.0.1:$PORT1/metrics | grep -q 'http_responses_total.*result.*error'
+ curl http://127.0.0.1:$PORT1/metrics | grep -q 'http_responses_total.*result.*fdcache'
curl http://127.0.0.1:$PORT2/metrics | grep -q 'http_responses_total.*result.*upstream'
fi