summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrank Ch. Eigler <fche@redhat.com>2023-04-11 23:35:25 -0400
committerFrank Ch. Eigler <fche@redhat.com>2023-10-18 14:59:32 -0400
commit8dd1108b6779b8efd5e4037e0653de7702839cc8 (patch)
treedc3b099a6eaa9a6723d77c9f8072a4ff6bd3f5c1
parent4e45a08aee42958298a3fad6043cbf96243d13a5 (diff)
PR29472: debuginfod: add metadata query webapi, C api, clientupstream/users/fche/try-pr29472
This patch extends the debuginfod API with a "metadata query" operation. It allows clients to request an enumeration of file names known to debuginfod servers, returning a JSON response including the matching buildids. This lets clients later download debuginfo for a range of versions of the same named binaries, in case they need to to prospective work (like systemtap-based live-patching). It also lets server operators implement prefetch triggering operations for popular but slow debuginfo slivers like kernel vdso.debug files on fedora. Implementation requires a modern enough json-c library, namely 0.11, which dates from 2014. Without that, debuginfod client/server bits will refuse to build. % debuginfod-find metadata file /bin/ls % debuginfod-find metadata glob "/usr/local/bin/c*" Documentation and testing are included. Signed-off-by: Ryan Goldberg <rgoldber@redhat.com> Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
-rw-r--r--ChangeLog6
-rw-r--r--NEWS2
-rw-r--r--config/elfutils.spec.in4
-rw-r--r--configure.ac7
-rw-r--r--debuginfod/ChangeLog23
-rw-r--r--debuginfod/Makefile.am8
-rw-r--r--debuginfod/debuginfod-client.c1291
-rw-r--r--debuginfod/debuginfod-find.c123
-rw-r--r--debuginfod/debuginfod.cxx373
-rw-r--r--debuginfod/debuginfod.h.in27
-rw-r--r--debuginfod/libdebuginfod.map3
-rw-r--r--doc/ChangeLog7
-rw-r--r--doc/debuginfod-client-config.78
-rw-r--r--doc/debuginfod-find.159
-rw-r--r--doc/debuginfod.818
-rw-r--r--tests/ChangeLog6
-rw-r--r--tests/Makefile.am12
-rwxr-xr-xtests/debuginfod-subr.sh1
-rwxr-xr-xtests/run-debuginfod-find-metadata.sh112
19 files changed, 1566 insertions, 524 deletions
diff --git a/ChangeLog b/ChangeLog
index b3b1a8eb..72205e97 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2023-04-12 Ryan Golberg <rgoldberg@redhat.com>, Frank Ch. Eigler <fche@redhat.com>
+
+ PR29472: debuginfod metadata query
+ * NEWS: Mention this.
+ * configure.ac: Look for json-c library.
+
2023-08-14 Ryan Goldberg <rgoldber@redhat.com>
* configure.ac (ENABLE_IMA_VERIFICATION): Look for librpm, libimaevm and libcrypto
diff --git a/NEWS b/NEWS
index 53c717eb..281bbb6d 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,8 @@ debuginfod: Schema change (reindexing required, sorry!) for a 60%
part of the sqlite index; also, more deliberate sqlite
-wal management during scanning using the
--scan-checkpoint option.
+
+debuginfod: New API for metadata queries: file name -> buildid.
Version 0.189 "Don't deflate!"
diff --git a/config/elfutils.spec.in b/config/elfutils.spec.in
index 2e962bb4..8b5a08b0 100644
--- a/config/elfutils.spec.in
+++ b/config/elfutils.spec.in
@@ -31,6 +31,8 @@ BuildRequires: pkgconfig(libmicrohttpd) >= 0.9.33
BuildRequires: pkgconfig(libcurl) >= 7.29.0
BuildRequires: pkgconfig(sqlite3) >= 3.7.17
BuildRequires: pkgconfig(libarchive) >= 3.1.2
+# For debugindod metadata query
+BuildRequires: pkgconfig(json-c) >= 0.11
# For tests need to bunzip2 test files.
BuildRequires: bzip2
@@ -42,6 +44,8 @@ BuildRequires: bsdtar
BuildRequires: curl
# For run-debuginfod-response-headers.sh test case
BuildRequires: socat
+# For run-debuginfod-find-metadata.sh
+BuildRequires: jq
# For debuginfod rpm IMA verification
BuildRequires: rpm-devel
diff --git a/configure.ac b/configure.ac
index bedd99e3..52257cf6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -851,9 +851,6 @@ AS_IF([test "x$enable_libdebuginfod" != "xno"], [
enable_libdebuginfod=yes # presume success
PKG_PROG_PKG_CONFIG
PKG_CHECK_MODULES([libcurl],[libcurl >= 7.29.0],[],[enable_libdebuginfod=no])
- if test "x$enable_libdebuginfod" = "xno"; then
- AC_MSG_ERROR([dependencies not found, use --disable-libdebuginfod to disable or --enable-libdebuginfod=dummy to build a (bootstrap) dummy library.])
- fi
else
AC_MSG_NOTICE([building (bootstrap) dummy libdebuginfo library])
fi
@@ -886,9 +883,7 @@ AS_IF([test "x$enable_debuginfod" != "xno"], [
PKG_CHECK_MODULES([oldlibmicrohttpd],[libmicrohttpd < 0.9.51],[old_libmicrohttpd=yes],[old_libmicrohttpd=no])
PKG_CHECK_MODULES([sqlite3],[sqlite3 >= 3.7.17],[],[enable_debuginfod=no])
PKG_CHECK_MODULES([libarchive],[libarchive >= 3.1.2],[],[enable_debuginfod=no])
- if test "x$enable_debuginfod" = "xno"; then
- AC_MSG_ERROR([dependencies not found, use --disable-debuginfod to disable.])
- fi
+ PKG_CHECK_MODULES([jsonc],[json-c >= 0.11],[],[enable_debuginfod=no])
])
AS_IF([test "x$enable_debuginfod" != "xno"],AC_DEFINE([ENABLE_DEBUGINFOD],[1],[Build debuginfod]))
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index f4d98c2e..6cc8920c 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,26 @@
+2023-04-12 Ryan Golberg <rgoldberg@redhat.com>, Frank Ch. Eigler <fche@redhat.com>
+
+ PR29472: debuginfod metadata query
+ * Makefile.am: Add json-c usage.
+ * debuginfod-client.c (debuginfod_find_metadata): New function.
+ (handle_data): New fields to hold metadata being received.
+ (debuginfod_clean_cache): Clean metadata too.
+ (header_callback): Simplify to realloc only.
+ (metadata_callback): New function.
+ (init_server_urls, init_handle, perform_queries, make_cache_path):
+ New refactored functions.
+ (debuginfod_query_server_by_buildid): Renamed, refactored. Update
+ callers.
+ * debuginfod-find.c (main): Handle metadata queries.
+ * debuginfod.cxx (DEBUGINFOD_SQLITE_DDL): Add an index or two.
+ (metadata_maxtime_s, parse_opt): New parameter for load control.
+ (add_client_federation_headers): New refactored function.
+ (handle_metadata): New function.
+ (handler_cb): Call it for /metadata URL. Trace it.
+ (groom): Tweak sqlite_ps object lifetimes.
+ * debuginfod.h.in (debuginfod_find_metadata): New decl.
+ * libdebuginfod.map: Export it under ELFUTILS_0.190.
+
2023-08-14 Ryan Goldberg <rgoldber@redhat.com>
* debuginfod.cxx (handle_buildid_r_match): Added extraction of the
diff --git a/debuginfod/Makefile.am b/debuginfod/Makefile.am
index fb9f1fbc..265d6d56 100644
--- a/debuginfod/Makefile.am
+++ b/debuginfod/Makefile.am
@@ -33,7 +33,7 @@ include $(top_srcdir)/config/eu.am
AM_CPPFLAGS += -I$(srcdir) -I$(srcdir)/../libelf -I$(srcdir)/../libebl \
-I$(srcdir)/../libdw -I$(srcdir)/../libdwelf \
$(libmicrohttpd_CFLAGS) $(libcurl_CFLAGS) $(sqlite3_CFLAGS) \
- $(libarchive_CFLAGS)
+ $(libarchive_CFLAGS) $(jsonc_CFLAGS)
# Disable eu- prefixing for artifacts (binaries & man pages) in this
# directory, since they do not conflict with binutils tools.
@@ -70,10 +70,10 @@ bin_PROGRAMS += debuginfod-find
endif
debuginfod_SOURCES = debuginfod.cxx
-debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) $(rpm_LIBS) -lpthread -ldl
+debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) $(rpm_LIBS) $(jsonc_LIBS) $(libcurl_LIBS) -lpthread -ldl
debuginfod_find_SOURCES = debuginfod-find.c
-debuginfod_find_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS)
+debuginfod_find_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(jsonc_LIBS)
if LIBDEBUGINFOD
noinst_LIBRARIES = libdebuginfod.a
@@ -97,7 +97,7 @@ libdebuginfod_so_LIBS = libdebuginfod_pic.a
if DUMMY_LIBDEBUGINFOD
libdebuginfod_so_LDLIBS =
else
-libdebuginfod_so_LDLIBS = -lpthread $(libcurl_LIBS) $(fts_LIBS) $(libelf) $(imaevm_LIBS) $(crypto_LIBS)
+libdebuginfod_so_LDLIBS = -lpthread $(libcurl_LIBS) $(fts_LIBS) $(libelf) $(imaevm_LIBS) $(crypto_LIBS) $(jsonc_LIBS)
endif
$(LIBDEBUGINFOD_SONAME): $(srcdir)/libdebuginfod.map $(libdebuginfod_so_LIBS)
$(AM_V_CCLD)$(LINK) $(dso_LDFLAGS) -o $@ \
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index 7163c887..92b2660d 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -1,5 +1,5 @@
/* Retrieve ELF / DWARF / source files from the debuginfod.
- Copyright (C) 2019-2021 Red Hat, Inc.
+ Copyright (C) 2019-2023 Red Hat, Inc.
Copyright (C) 2021, 2022 Mark J. Wielaard <mark@klomp.org>
This file is part of elfutils.
@@ -60,6 +60,8 @@ int debuginfod_find_source (debuginfod_client *c, const unsigned char *b,
int debuginfod_find_section (debuginfod_client *c, const unsigned char *b,
int s, const char *scn, char **p)
{ return -ENOSYS; }
+int debuginfod_find_metadata (debuginfod_client *c,
+ const char *k, char *v, char **p) { return -ENOSYS; }
void debuginfod_set_progressfn(debuginfod_client *c,
debuginfod_progressfn_t fn) { }
void debuginfod_set_verbose_fd(debuginfod_client *c, int fd) { }
@@ -93,6 +95,7 @@ void debuginfod_end (debuginfod_client *c) { }
#include <sys/utsname.h>
#include <curl/curl.h>
#include <fnmatch.h>
+#include <json-c/json.h>
/* If fts.h is included before config.h, its indirect inclusions may not
give us the right LFS aliases of these functions, so map them manually. */
@@ -238,6 +241,11 @@ static const char *cache_miss_filename = "cache_miss_s";
static const char *cache_max_unused_age_filename = "max_unused_age_s";
static const long cache_default_max_unused_age_s = 604800; /* 1 week */
+/* The metadata_retention_default_s file within the debuginfod cache
+ specifies how long metadata query results should be cached. */
+static const long metadata_retention_default_s = 3600; /* 1 hour */
+static const char *metadata_retention_filename = "metadata_retention_s";
+
/* Location of the cache of files downloaded from debuginfods.
The default parent directory is $HOME, or '/' if $HOME doesn't exist. */
static const char *cache_default_name = ".debuginfod_client_cache";
@@ -276,9 +284,17 @@ struct handle_data
to the cache. Used to ensure that a file is not downloaded from
multiple servers unnecessarily. */
CURL **target_handle;
+
+ /* The IMA policy in effect for this handle. */
+ ima_policy_t ima_policy;
+
/* Response http headers for this client handle, sent from the server */
char *response_data;
size_t response_data_size;
+
+ /* Response metadata values for this client handle, sent from the server */
+ char *metadata;
+ size_t metadata_size;
};
static size_t
@@ -407,7 +423,8 @@ debuginfod_clean_cache(debuginfod_client *c,
return -errno;
regex_t re;
- const char * pattern = ".*/[a-f0-9]+(/debuginfo|/executable|/source.*|)$"; /* include dirs */
+ const char * pattern = ".*/(metadata.*|[a-f0-9]+(/debuginfo|/executable|/source.*|))$"; /* include dirs */
+ /* NB: also matches .../section/ subdirs, so extracted section files also get cleaned. */
if (regcomp (&re, pattern, REG_EXTENDED | REG_NOSUB) != 0)
return -ENOMEM;
@@ -643,18 +660,9 @@ header_callback (char * buffer, size_t size, size_t numitems, void * userdata)
}
/* Temporary buffer for realloc */
char *temp = NULL;
- if (data->response_data == NULL)
- {
- temp = malloc(numitems);
- if (temp == NULL)
- return 0;
- }
- else
- {
- temp = realloc(data->response_data, data->response_data_size + numitems);
- if (temp == NULL)
- return 0;
- }
+ temp = realloc(data->response_data, data->response_data_size + numitems);
+ if (temp == NULL)
+ return 0;
memcpy(temp + data->response_data_size, buffer, numitems-1);
data->response_data = temp;
@@ -664,6 +672,360 @@ header_callback (char * buffer, size_t size, size_t numitems, void * userdata)
return numitems;
}
+
+static size_t
+metadata_callback (char * buffer, size_t size, size_t numitems, void * userdata)
+{
+ if (size != 1)
+ return 0;
+ /* Temporary buffer for realloc */
+ char *temp = NULL;
+ struct handle_data *data = (struct handle_data *) userdata;
+ temp = realloc(data->metadata, data->metadata_size + numitems + 1);
+ if (temp == NULL)
+ return 0;
+
+ memcpy(temp + data->metadata_size, buffer, numitems);
+ data->metadata = temp;
+ data->metadata_size += numitems;
+ data->metadata[data->metadata_size] = '\0';
+ return numitems;
+}
+
+
+/* This function takes a copy of DEBUGINFOD_URLS, server_urls, and
+ * separates it into an array of urls to query, each with a
+ * corresponding IMA policy. The url_subdir is either 'buildid' or
+ * 'metadata', corresponding to the query type. Returns 0 on success
+ * and -Posix error on faliure.
+ */
+int
+init_server_urls(char* url_subdir, char *server_urls, char ***server_url_list, ima_policy_t **url_ima_policies,
+ int *num_urls, int vfd)
+{
+ /* Initialize the memory to zero */
+ char *strtok_saveptr;
+ ima_policy_t verification_mode = permissive; // The default mode
+ char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
+ /* Count number of URLs. */
+ int n = 0;
+ assert(0 == strcmp(url_subdir, "buildid") || 0 == strcmp(url_subdir, "metadata"));
+
+ while (server_url != NULL)
+ {
+ int r;
+ char *tmp_url;
+ if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/')
+ r = asprintf(&tmp_url, "%s%s", server_url, url_subdir);
+ else
+ r = asprintf(&tmp_url, "%s/%s", server_url, url_subdir);
+
+ if (r == -1)
+ {
+ return -ENOMEM;
+ }
+
+ // When we encounted a (well-formed) token off the form ima:foo, we update the policy
+ // under which results from that server will be ima verified
+ if(startswith(server_url, "ima:"))
+ {
+#ifdef ENABLE_IMA_VERIFICATION
+ ima_policy_t m = ima_policy_str2enum(server_url + strlen("ima:"));
+ if(m != undefined) verification_mode = m;
+#else
+ if (vfd >= 0)
+ dprintf(vfd, "IMA signature verification is not enabled, treating %s as ima:ignore\n", server_url);
+#endif
+ goto continue_next_url;
+ }
+
+ /* PR 27983: If the url is duplicate, skip it */
+ int url_index;
+ for (url_index = 0; url_index < n; ++url_index)
+ {
+ if(strcmp(tmp_url, (*server_url_list)[url_index]) == 0)
+ {
+ url_index = -1;
+ break;
+ }
+ }
+ if (url_index == -1)
+ {
+ if (vfd >= 0)
+ dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url);
+ free(tmp_url);
+ }
+ else
+ {
+ /* Have unique URL, save it, along with its IMA verification tag. */
+ n ++;
+ if (NULL == (*server_url_list = reallocarray(*server_url_list, n, sizeof(char*)))
+ || NULL == (*url_ima_policies = reallocarray(*url_ima_policies, n, sizeof(ima_policy_t))))
+ {
+ free (tmp_url);
+ return -ENOMEM;
+ }
+ (*server_url_list)[n-1] = tmp_url;
+ if(NULL != url_ima_policies) (*url_ima_policies)[n-1] = verification_mode;
+ }
+
+ continue_next_url:
+ server_url = strtok_r(NULL, url_delim, &strtok_saveptr);
+ }
+ *num_urls = n;
+ return 0;
+}
+
+/* Some boilerplate for checking curl_easy_setopt. */
+#define curl_easy_setopt_ck(H,O,P) do { \
+ CURLcode curl_res = curl_easy_setopt (H,O,P); \
+ if (curl_res != CURLE_OK) \
+ { \
+ if (vfd >= 0) \
+ dprintf (vfd, \
+ "Bad curl_easy_setopt: %s\n", \
+ curl_easy_strerror(curl_res)); \
+ return -EINVAL; \
+ } \
+ } while (0)
+
+
+/*
+ * This function initializes a CURL handle. It takes optional callbacks for the write
+ * function and the header function, which if defined will use userdata of type struct handle_data*.
+ * Specifically the data[i] within an array of struct handle_data's.
+ * Returns 0 on success and -Posix error on faliure.
+ */
+int
+init_handle(debuginfod_client *client,
+ size_t (*w_callback)(char *buffer,size_t size,size_t nitems,void *userdata),
+ size_t (*h_callback)(char *buffer,size_t size,size_t nitems,void *userdata),
+ struct handle_data *data, int i, long timeout,
+ int vfd)
+{
+ data->handle = curl_easy_init();
+ if (data->handle == NULL)
+ {
+ return -ENETUNREACH;
+ }
+
+ if (vfd >= 0)
+ dprintf (vfd, "url %d %s\n", i, data->url);
+
+ /* Only allow http:// + https:// + file:// so we aren't being
+ redirected to some unsupported protocol. */
+#if CURL_AT_LEAST_VERSION(7, 85, 0)
+ curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS_STR, "http,https,file");
+#else
+ curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS,
+ (CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_FILE));
+#endif
+ curl_easy_setopt_ck(data->handle, CURLOPT_URL, data->url);
+ if (vfd >= 0)
+ curl_easy_setopt_ck(data->handle, CURLOPT_ERRORBUFFER,
+ data->errbuf);
+ if(w_callback) {
+ curl_easy_setopt_ck(data->handle,
+ CURLOPT_WRITEFUNCTION, w_callback);
+ curl_easy_setopt_ck(data->handle, CURLOPT_WRITEDATA, data);
+ }
+ if (timeout > 0)
+ {
+ /* Make sure there is at least some progress,
+ try to get at least 100K per timeout seconds. */
+ curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_TIME,
+ timeout);
+ curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_LIMIT,
+ 100 * 1024L);
+ }
+ curl_easy_setopt_ck(data->handle, CURLOPT_FILETIME, (long) 1);
+ curl_easy_setopt_ck(data->handle, CURLOPT_FOLLOWLOCATION, (long) 1);
+ curl_easy_setopt_ck(data->handle, CURLOPT_FAILONERROR, (long) 1);
+ curl_easy_setopt_ck(data->handle, CURLOPT_NOSIGNAL, (long) 1);
+ if(h_callback){
+ curl_easy_setopt_ck(data->handle,
+ CURLOPT_HEADERFUNCTION, h_callback);
+ curl_easy_setopt_ck(data->handle, CURLOPT_HEADERDATA, data);
+ }
+ #if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */
+ curl_easy_setopt_ck(data->handle, CURLOPT_PATH_AS_IS, (long) 1);
+ #else
+ /* On old curl; no big deal, canonicalization here is almost the
+ same, except perhaps for ? # type decorations at the tail. */
+ #endif
+ curl_easy_setopt_ck(data->handle, CURLOPT_AUTOREFERER, (long) 1);
+ curl_easy_setopt_ck(data->handle, CURLOPT_ACCEPT_ENCODING, "");
+ curl_easy_setopt_ck(data->handle, CURLOPT_HTTPHEADER, client->headers);
+
+ return 0;
+}
+
+
+/*
+ * This function busy-waits on one or more curl queries to complete. This can
+ * be controled via only_one, which, if true, will find the first winner and exit
+ * once found. If positive maxtime and maxsize dictate the maximum allowed wait times
+ * and download sizes respectivly. Returns 0 on success and -Posix error on faliure.
+ */
+int
+perform_queries(CURLM *curlm, CURL **target_handle, struct handle_data *data, debuginfod_client *c,
+ int num_urls, long maxtime, long maxsize, bool only_one, int vfd)
+{
+ int still_running = -1;
+ long loops = 0;
+ int committed_to = -1;
+ bool verbose_reported = false;
+ struct timespec start_time, cur_time;
+ if (c->winning_headers != NULL)
+ {
+ free (c->winning_headers);
+ c->winning_headers = NULL;
+ }
+ if ( maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1)
+ {
+ return errno;
+ }
+ long delta = 0;
+ do
+ {
+ /* Check to see how long querying is taking. */
+ if (maxtime > 0)
+ {
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1)
+ return errno;
+ delta = cur_time.tv_sec - start_time.tv_sec;
+ if ( delta > maxtime)
+ {
+ dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", maxtime, delta );
+ return -ETIME;
+ }
+ }
+ /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */
+ curl_multi_wait(curlm, NULL, 0, 1000, NULL);
+ CURLMcode curlm_res = curl_multi_perform(curlm, &still_running);
+
+ if(only_one)
+ {
+ /* If the target file has been found, abort the other queries. */
+ if (target_handle && *target_handle != NULL)
+ {
+ for (int i = 0; i < num_urls; i++)
+ if (data[i].handle != *target_handle)
+ curl_multi_remove_handle(curlm, data[i].handle);
+ else
+ {
+ committed_to = i;
+ if (c->winning_headers == NULL)
+ {
+ c->winning_headers = data[committed_to].response_data;
+ if (vfd >= 0 && c->winning_headers != NULL)
+ dprintf(vfd, "\n%s", c->winning_headers);
+ data[committed_to].response_data = NULL;
+ data[committed_to].response_data_size = 0;
+ }
+ }
+ }
+
+ if (vfd >= 0 && !verbose_reported && committed_to >= 0)
+ {
+ bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO);
+ dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "",
+ committed_to);
+ if (pnl)
+ c->default_progressfn_printed_p = 0;
+ verbose_reported = true;
+ }
+ }
+
+ if (curlm_res != CURLM_OK)
+ {
+ switch (curlm_res)
+ {
+ case CURLM_CALL_MULTI_PERFORM: continue;
+ case CURLM_OUT_OF_MEMORY: return -ENOMEM;
+ default: return -ENETUNREACH;
+ }
+ }
+
+ long dl_size = -1;
+ if(only_one && target_handle)
+ { // Only bother with progress functions if we're retrieving exactly 1 file
+ if (*target_handle && (c->progressfn || maxsize > 0))
+ {
+ /* Get size of file being downloaded. NB: If going through
+ deflate-compressing proxies, this number is likely to be
+ unavailable, so -1 may show. */
+ CURLcode curl_res;
+#if CURL_AT_LEAST_VERSION(7, 55, 0)
+ curl_off_t cl;
+ curl_res = curl_easy_getinfo(*target_handle,
+ CURLINFO_CONTENT_LENGTH_DOWNLOAD_T,
+ &cl);
+ if (curl_res == CURLE_OK && cl >= 0)
+ dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl);
+#else
+ double cl;
+ curl_res = curl_easy_getinfo(*target_handle,
+ CURLINFO_CONTENT_LENGTH_DOWNLOAD,
+ &cl);
+ if (curl_res == CURLE_OK && cl >= 0)
+ dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl);
+#endif
+ /* If Content-Length is -1, try to get the size from
+ X-Debuginfod-Size */
+ if (dl_size == -1 && c->winning_headers != NULL)
+ {
+ long xdl;
+ char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size");
+ size_t off = strlen("x-debuginfod-size:");
+
+ if (hdr != NULL && sscanf(hdr + off, "%ld", &xdl) == 1)
+ dl_size = xdl;
+ }
+ }
+
+ if (c->progressfn) /* inform/check progress callback */
+ {
+ loops ++;
+ long pa = loops; /* default param for progress callback */
+ if (*target_handle) /* we've committed to a server; report its download progress */
+ {
+ CURLcode curl_res;
+#if CURL_AT_LEAST_VERSION(7, 55, 0)
+ curl_off_t dl;
+ curl_res = curl_easy_getinfo(*target_handle,
+ CURLINFO_SIZE_DOWNLOAD_T,
+ &dl);
+ if (curl_res == 0 && dl >= 0)
+ pa = (dl > LONG_MAX ? LONG_MAX : (long)dl);
+#else
+ double dl;
+ curl_res = curl_easy_getinfo(*target_handle,
+ CURLINFO_SIZE_DOWNLOAD,
+ &dl);
+ if (curl_res == 0)
+ pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl);
+#endif
+
+ }
+
+ if ((*c->progressfn) (c, pa, dl_size == -1 ? 0 : dl_size))
+ break;
+ }
+ }
+ /* Check to see if we are downloading something which exceeds maxsize, if set.*/
+ if (target_handle && *target_handle && dl_size > maxsize && maxsize > 0)
+ {
+ if (vfd >=0)
+ dprintf(vfd, "Content-Length too large.\n");
+ return -EFBIG;
+ }
+ } while (still_running);
+
+ return 0;
+}
+
+
/* Copy SRC to DEST, s,/,#,g */
static void
@@ -915,6 +1277,7 @@ cache_find_section (const char *scn_name, const char *target_cache_dir,
return rc;
}
+
/* Validate an IMA file signature.
* returns 0 on signature validity, EINVAL on signature invalidity, ENOSYS on undefined imaevm machinery,
* ENOKEY on key issues and -errno on error
@@ -1083,6 +1446,84 @@ debuginfod_validate_imasig (debuginfod_client *c, const char* tmp_path, int fd)
return rc;
}
+
+/* Helper function to create client cache directory.
+ $XDG_CACHE_HOME takes priority over $HOME/.cache.
+ $DEBUGINFOD_CACHE_PATH takes priority over $HOME/.cache and $XDG_CACHE_HOME.
+
+ Return resulting path name or NULL on error. Caller must free resulting string.
+ */
+static char *
+make_cache_path(void)
+{
+ char* cache_path = NULL;
+ int rc = 0;
+ /* Determine location of the cache. The path specified by the debuginfod
+ cache environment variable takes priority. */
+ char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR);
+ if (cache_var != NULL && strlen (cache_var) > 0)
+ xalloc_str (cache_path, "%s", cache_var);
+ else
+ {
+ /* If a cache already exists in $HOME ('/' if $HOME isn't set), then use
+ that. Otherwise use the XDG cache directory naming format. */
+ xalloc_str (cache_path, "%s/%s", getenv ("HOME") ?: "/", cache_default_name);
+
+ struct stat st;
+ if (stat (cache_path, &st) < 0)
+ {
+ char cachedir[PATH_MAX];
+ char *xdg = getenv ("XDG_CACHE_HOME");
+
+ if (xdg != NULL && strlen (xdg) > 0)
+ snprintf (cachedir, PATH_MAX, "%s", xdg);
+ else
+ snprintf (cachedir, PATH_MAX, "%s/.cache", getenv ("HOME") ?: "/");
+
+ /* Create XDG cache directory if it doesn't exist. */
+ if (stat (cachedir, &st) == 0)
+ {
+ if (! S_ISDIR (st.st_mode))
+ {
+ rc = -EEXIST;
+ goto out1;
+ }
+ }
+ else
+ {
+ rc = mkdir (cachedir, 0700);
+
+ /* Also check for EEXIST and S_ISDIR in case another client just
+ happened to create the cache. */
+ if (rc < 0
+ && (errno != EEXIST
+ || stat (cachedir, &st) != 0
+ || ! S_ISDIR (st.st_mode)))
+ {
+ rc = -errno;
+ goto out1;
+ }
+ }
+
+ free (cache_path);
+ xalloc_str (cache_path, "%s/%s", cachedir, cache_xdg_name);
+ }
+ }
+
+ goto out;
+
+ out1:
+ (void) rc;
+ free (cache_path);
+ cache_path = NULL;
+
+ out:
+ if (cache_path != NULL)
+ (void) mkdir (cache_path, 0700); // failures with this mkdir would be caught later too
+ return cache_path;
+}
+
+
/* Query each of the server URLs found in $DEBUGINFOD_URLS for the file
with the specified build-id and type (debuginfo, executable, source or
section). If type is source, then type_arg should be a filename. If
@@ -1091,7 +1532,7 @@ debuginfod_validate_imasig (debuginfod_client *c, const char* tmp_path, int fd)
for the target if successful, otherwise return an error code.
*/
static int
-debuginfod_query_server (debuginfod_client *c,
+debuginfod_query_server_by_buildid (debuginfod_client *c,
const unsigned char *build_id,
int build_id_len,
const char *type,
@@ -1112,7 +1553,7 @@ debuginfod_query_server (debuginfod_client *c,
char suffix[PATH_MAX + 1]; /* +1 for zero terminator. */
char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1];
int vfd = c->verbose_fd;
- int rc;
+ int rc, r;
c->progressfn_cancel = false;
@@ -1237,70 +1678,22 @@ debuginfod_query_server (debuginfod_client *c,
dprintf (vfd, "suffix %s\n", suffix);
/* set paths needed to perform the query
-
- example format
+ example format:
cache_path: $HOME/.cache
target_cache_dir: $HOME/.cache/0123abcd
target_cache_path: $HOME/.cache/0123abcd/debuginfo
target_cache_path: $HOME/.cache/0123abcd/source#PATH#TO#SOURCE ?
-
- $XDG_CACHE_HOME takes priority over $HOME/.cache.
- $DEBUGINFOD_CACHE_PATH takes priority over $HOME/.cache and $XDG_CACHE_HOME.
*/
- /* Determine location of the cache. The path specified by the debuginfod
- cache environment variable takes priority. */
- char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR);
- if (cache_var != NULL && strlen (cache_var) > 0)
- xalloc_str (cache_path, "%s", cache_var);
- else
+ cache_path = make_cache_path();
+ if (!cache_path)
{
- /* If a cache already exists in $HOME ('/' if $HOME isn't set), then use
- that. Otherwise use the XDG cache directory naming format. */
- xalloc_str (cache_path, "%s/%s", getenv ("HOME") ?: "/", cache_default_name);
-
- struct stat st;
- if (stat (cache_path, &st) < 0)
- {
- char cachedir[PATH_MAX];
- char *xdg = getenv ("XDG_CACHE_HOME");
-
- if (xdg != NULL && strlen (xdg) > 0)
- snprintf (cachedir, PATH_MAX, "%s", xdg);
- else
- snprintf (cachedir, PATH_MAX, "%s/.cache", getenv ("HOME") ?: "/");
-
- /* Create XDG cache directory if it doesn't exist. */
- if (stat (cachedir, &st) == 0)
- {
- if (! S_ISDIR (st.st_mode))
- {
- rc = -EEXIST;
- goto out;
- }
- }
- else
- {
- rc = mkdir (cachedir, 0700);
-
- /* Also check for EEXIST and S_ISDIR in case another client just
- happened to create the cache. */
- if (rc < 0
- && (errno != EEXIST
- || stat (cachedir, &st) != 0
- || ! S_ISDIR (st.st_mode)))
- {
- rc = -errno;
- goto out;
- }
- }
-
- free (cache_path);
- xalloc_str (cache_path, "%s/%s", cachedir, cache_xdg_name);
- }
+ rc = -ENOMEM;
+ goto out;
}
-
xalloc_str (target_cache_dir, "%s/%s", cache_path, build_id_bytes);
+ (void) mkdir (target_cache_dir, 0700); // failures with this mkdir would be caught later too
+
if (section != NULL)
xalloc_str (target_cache_path, "%s/%s-%s", target_cache_dir, type, suffix);
else
@@ -1435,77 +1828,15 @@ debuginfod_query_server (debuginfod_client *c,
goto out0;
}
- /* Initialize the memory to zero */
- char *strtok_saveptr;
char **server_url_list = NULL;
ima_policy_t* url_ima_policies = NULL;
- char* server_url;
- /* Count number of URLs. */
- int num_urls = 0;
-
- ima_policy_t verification_mode = permissive; // The default mode
- for(server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
- server_url != NULL; server_url = strtok_r(NULL, url_delim, &strtok_saveptr))
- {
- // When we encounted a (well-formed) token off the form ima:foo, we update the policy
- // under which results from that server will be ima verified
- if(startswith(server_url, "ima:"))
- {
-#ifdef ENABLE_IMA_VERIFICATION
- ima_policy_t m = ima_policy_str2enum(server_url + strlen("ima:"));
- if(m != undefined) verification_mode = m;
-#else
- if (vfd >= 0)
- dprintf(vfd, "IMA signature verification is not enabled, skipping %s\n", server_url);
-#endif
- continue; // Not a url, just a mode change so keep going
- }
-
- /* PR 27983: If the url is already set to be used use, skip it */
- char *slashbuildid;
- if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/')
- slashbuildid = "buildid";
- else
- slashbuildid = "/buildid";
-
- char *tmp_url;
- if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1)
- {
- rc = -ENOMEM;
- goto out1;
- }
- int url_index;
- for (url_index = 0; url_index < num_urls; ++url_index)
- {
- if(strcmp(tmp_url, server_url_list[url_index]) == 0)
- {
- url_index = -1;
- break;
- }
- }
- if (url_index == -1)
- {
- if (vfd >= 0)
- dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url);
- free(tmp_url);
- }
- else
- {
- num_urls++;
- if (NULL == (server_url_list = reallocarray(server_url_list, num_urls, sizeof(char*)))
-#ifdef ENABLE_IMA_VERIFICATION
- || NULL == (url_ima_policies = reallocarray(url_ima_policies, num_urls, sizeof(ima_policy_t)))
-#endif
- )
- {
- free (tmp_url);
- rc = -ENOMEM;
- goto out1;
- }
- server_url_list[num_urls-1] = tmp_url;
- if(NULL != url_ima_policies) url_ima_policies[num_urls-1] = verification_mode;
- }
- }
+ char *server_url;
+ int num_urls;
+ r = init_server_urls("buildid", server_urls, &server_url_list, &url_ima_policies, &num_urls, vfd);
+ if(0 != r){
+ rc = r;
+ goto out1;
+ }
int retry_limit = default_retry_limit;
const char* retry_limit_envvar = getenv(DEBUGINFOD_RETRY_LIMIT_ENV_VAR);
@@ -1573,21 +1904,11 @@ debuginfod_query_server (debuginfod_client *c,
if ((server_url = server_url_list[i]) == NULL)
break;
if (vfd >= 0)
-#ifdef ENABLE_IMA_VERIFICATION
- dprintf (vfd, "init server %d %s [IMA verification policy: %s]\n", i, server_url, ima_policy_enum2str(url_ima_policies[i]));
-#else
dprintf (vfd, "init server %d %s\n", i, server_url);
-#endif
data[i].fd = fd;
data[i].target_handle = &target_handle;
- data[i].handle = curl_easy_init();
- if (data[i].handle == NULL)
- {
- if (filename) curl_free (escaped_string);
- rc = -ENETUNREACH;
- goto out2;
- }
+ data[i].ima_policy = url_ima_policies[i];
data[i].client = c;
if (filename) /* must start with / */
@@ -1601,240 +1922,29 @@ debuginfod_query_server (debuginfod_client *c,
build_id_bytes, type, section);
else
snprintf(data[i].url, PATH_MAX, "%s/%s/%s", server_url, build_id_bytes, type);
- if (vfd >= 0)
- dprintf (vfd, "url %d %s\n", i, data[i].url);
-
- /* Some boilerplate for checking curl_easy_setopt. */
-#define curl_easy_setopt_ck(H,O,P) do { \
- CURLcode curl_res = curl_easy_setopt (H,O,P); \
- if (curl_res != CURLE_OK) \
- { \
- if (vfd >= 0) \
- dprintf (vfd, \
- "Bad curl_easy_setopt: %s\n", \
- curl_easy_strerror(curl_res)); \
- rc = -EINVAL; \
- goto out2; \
- } \
- } while (0)
- /* Only allow http:// + https:// + file:// so we aren't being
- redirected to some unsupported protocol. */
-#if CURL_AT_LEAST_VERSION(7, 85, 0)
- curl_easy_setopt_ck(data[i].handle, CURLOPT_PROTOCOLS_STR,
- "http,https,file");
-#else
- curl_easy_setopt_ck(data[i].handle, CURLOPT_PROTOCOLS,
- (CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_FILE));
-#endif
- curl_easy_setopt_ck(data[i].handle, CURLOPT_URL, data[i].url);
- if (vfd >= 0)
- curl_easy_setopt_ck(data[i].handle, CURLOPT_ERRORBUFFER,
- data[i].errbuf);
- curl_easy_setopt_ck(data[i].handle,
- CURLOPT_WRITEFUNCTION,
- debuginfod_write_callback);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_WRITEDATA, (void*)&data[i]);
- if (timeout > 0)
- {
- /* Make sure there is at least some progress,
- try to get at least 100K per timeout seconds. */
- curl_easy_setopt_ck (data[i].handle, CURLOPT_LOW_SPEED_TIME,
- timeout);
- curl_easy_setopt_ck (data[i].handle, CURLOPT_LOW_SPEED_LIMIT,
- 100 * 1024L);
- }
- curl_easy_setopt_ck(data[i].handle, CURLOPT_FILETIME, (long) 1);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_FOLLOWLOCATION, (long) 1);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_FAILONERROR, (long) 1);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_NOSIGNAL, (long) 1);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_HEADERFUNCTION,
- header_callback);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_HEADERDATA,
- (void *) &(data[i]));
-#if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */
- curl_easy_setopt_ck(data[i].handle, CURLOPT_PATH_AS_IS, (long) 1);
-#else
- /* On old curl; no big deal, canonicalization here is almost the
- same, except perhaps for ? # type decorations at the tail. */
-#endif
- curl_easy_setopt_ck(data[i].handle, CURLOPT_AUTOREFERER, (long) 1);
- curl_easy_setopt_ck(data[i].handle, CURLOPT_ACCEPT_ENCODING, "");
- curl_easy_setopt_ck(data[i].handle, CURLOPT_HTTPHEADER, c->headers);
+ r = init_handle(c, debuginfod_write_callback, header_callback, &data[i], i, timeout, vfd);
+ if(0 != r){
+ rc = r;
+ if(filename) curl_free (escaped_string);
+ goto out2;
+ }
curl_multi_add_handle(curlm, data[i].handle);
}
if (filename) curl_free(escaped_string);
+
/* Query servers in parallel. */
if (vfd >= 0)
dprintf (vfd, "query %d urls in parallel\n", num_urls);
- int still_running;
- long loops = 0;
- int committed_to = -1;
- bool verbose_reported = false;
- struct timespec start_time, cur_time;
- free (c->winning_headers);
- c->winning_headers = NULL;
- if ( maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1)
+ r = perform_queries(curlm, &target_handle, data, c, num_urls, maxtime, maxsize, true, vfd);
+ if (0 != r)
{
- rc = -errno;
+ rc = r;
goto out2;
}
- long delta = 0;
- do
- {
- /* Check to see how long querying is taking. */
- if (maxtime > 0)
- {
- if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1)
- {
- rc = -errno;
- goto out2;
- }
- delta = cur_time.tv_sec - start_time.tv_sec;
- if ( delta > maxtime)
- {
- dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", maxtime, delta );
- rc = -ETIME;
- goto out2;
- }
- }
- /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */
- curl_multi_wait(curlm, NULL, 0, 1000, NULL);
- CURLMcode curlm_res = curl_multi_perform(curlm, &still_running);
-
- /* If the target file has been found, abort the other queries. */
- if (target_handle != NULL)
- {
- for (int i = 0; i < num_urls; i++)
- if (data[i].handle != target_handle)
- curl_multi_remove_handle(curlm, data[i].handle);
- else
- {
- committed_to = i;
- if (c->winning_headers == NULL)
- {
- c->winning_headers = data[committed_to].response_data;
- data[committed_to].response_data = NULL;
- data[committed_to].response_data_size = 0;
- }
-
- }
- }
-
- if (vfd >= 0 && !verbose_reported && committed_to >= 0)
- {
- bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO);
- dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "",
- committed_to);
- if (pnl)
- c->default_progressfn_printed_p = 0;
- verbose_reported = true;
- }
-
- if (curlm_res != CURLM_OK)
- {
- switch (curlm_res)
- {
- case CURLM_CALL_MULTI_PERFORM: continue;
- case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break;
- default: rc = -ENETUNREACH; break;
- }
- goto out2;
- }
-
- long dl_size = -1;
- if (target_handle && (c->progressfn || maxsize > 0))
- {
- /* Get size of file being downloaded. NB: If going through
- deflate-compressing proxies, this number is likely to be
- unavailable, so -1 may show. */
- CURLcode curl_res;
-#if CURL_AT_LEAST_VERSION(7, 55, 0)
- curl_off_t cl;
- curl_res = curl_easy_getinfo(target_handle,
- CURLINFO_CONTENT_LENGTH_DOWNLOAD_T,
- &cl);
- if (curl_res == CURLE_OK && cl >= 0)
- dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl);
-#else
- double cl;
- curl_res = curl_easy_getinfo(target_handle,
- CURLINFO_CONTENT_LENGTH_DOWNLOAD,
- &cl);
- if (curl_res == CURLE_OK && cl >= 0)
- dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl);
-#endif
- /* If Content-Length is -1, try to get the size from
- X-Debuginfod-Size */
- if (dl_size == -1 && c->winning_headers != NULL)
- {
- long xdl;
- char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size");
- size_t off = strlen("x-debuginfod-size:");
-
- if (hdr != NULL && sscanf(hdr + off, "%ld", &xdl) == 1)
- dl_size = xdl;
- }
- }
-
- if (c->progressfn) /* inform/check progress callback */
- {
- loops ++;
- long pa = loops; /* default param for progress callback */
- if (target_handle) /* we've committed to a server; report its download progress */
- {
- /* PR30809: Check actual size of cached file. This same
- fd is shared by all the multi-curl handles (but only
- one will end up writing to it). Another way could be
- to tabulate totals in debuginfod_write_callback(). */
- struct stat cached;
- int statrc = fstat(fd, &cached);
- if (statrc == 0)
- pa = (long) cached.st_size;
- else
- {
- /* Otherwise, query libcurl for its tabulated total.
- However, that counts http body length, not
- decoded/decompressed content length, so does not
- measure quite the same thing as dl. */
- CURLcode curl_res;
-#if CURL_AT_LEAST_VERSION(7, 55, 0)
- curl_off_t dl;
- curl_res = curl_easy_getinfo(target_handle,
- CURLINFO_SIZE_DOWNLOAD_T,
- &dl);
- if (curl_res == 0 && dl >= 0)
- pa = (dl > LONG_MAX ? LONG_MAX : (long)dl);
-#else
- double dl;
- curl_res = curl_easy_getinfo(target_handle,
- CURLINFO_SIZE_DOWNLOAD,
- &dl);
- if (curl_res == 0)
- pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl);
-#endif
- }
- }
-
- if ((*c->progressfn) (c, pa, dl_size == -1 ? 0 : dl_size))
- {
- c->progressfn_cancel = true;
- break;
- }
- }
-
- /* Check to see if we are downloading something which exceeds maxsize, if set.*/
- if (target_handle && dl_size > maxsize && maxsize > 0)
- {
- if (vfd >=0)
- dprintf(vfd, "Content-Length too large.\n");
- rc = -EFBIG;
- goto out2;
- }
- } while (still_running);
/* Check whether a query was successful. If so, assign its handle
to verified_handle. */
@@ -1959,6 +2069,37 @@ debuginfod_query_server (debuginfod_client *c,
}
} while (num_msg > 0);
+ /* Perform IMA verification if appropriate. */
+ int committed_to = -1;
+ for (int i=0; i<num_urls; i++)
+ if (target_handle == data[i].handle)
+ committed_to = i;
+ if (committed_to >= 0 && ignore != data[committed_to].ima_policy)
+ {
+ int result = debuginfod_validate_imasig(c, target_cache_tmppath, fd);
+ if(0 == result)
+ {
+ if (vfd >= 0) dprintf (vfd, "valid signature\n");
+ }
+ else if(EINVAL == result || enforcing == data[committed_to].ima_policy)
+ {
+ // All invalid signatures are rejected.
+ // Additionally in enforcing mode any non-valid signature is rejected, so by reaching
+ // this case we do so since we know it is not valid. Note - this not just invalid signatures
+ // but also signatures that cannot be validated
+ if (vfd >= 0) dprintf (vfd, "error: invalid or missing signature (%d)\n", result);
+ rc = -EPERM;
+ goto out2;
+ }
+ else
+ {
+ // By default we are permissive, so since the signature isn't invalid we
+ // give it the benefit of the doubt
+ if (vfd >= 0) dprintf (vfd, "warning: invalid or missing signature (%d)\n", result);
+ }
+ }
+
+
/* Create an empty file in the cache if the query fails with ENOENT and
it wasn't cancelled early. */
if (rc == -ENOENT && !c->progressfn_cancel)
@@ -1986,6 +2127,7 @@ debuginfod_query_server (debuginfod_client *c,
curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */
curl_easy_cleanup (data[i].handle);
free(data[i].response_data);
+ data[i].response_data = NULL;
}
free(c->winning_headers);
c->winning_headers = NULL;
@@ -2023,31 +2165,6 @@ debuginfod_query_server (debuginfod_client *c,
/* PR27571: make cache files casually unwriteable; dirs are already 0700 */
(void) fchmod(fd, 0400);
- if(NULL != url_ima_policies && ignore != url_ima_policies[committed_to])
- {
- int result = debuginfod_validate_imasig(c, target_cache_tmppath, fd);
- if(0 == result)
- {
- if (vfd >= 0) dprintf (vfd, "valid signature\n");
- }
- else if(EINVAL == result || enforcing == url_ima_policies[committed_to])
- {
- // All invalid signatures are rejected.
- // Additionally in enforcing mode any non-valid signature is rejected, so by reaching
- // this case we do so since we know it is not valid. Note - this not just invalid signatures
- // but also signatures that cannot be validated
- if (vfd >= 0) dprintf (vfd, "error: invalid or missing signature (%d)\n", result);
- rc = -EPERM;
- goto out2;
- }
- else
- {
- // By default we are permissive, so since the signature isn't invalid we
- // give it the benefit of the doubt
- if (vfd >= 0) dprintf (vfd, "warning: invalid or missing signature (%d)\n", result);
- }
- }
-
/* rename tmp->real */
rc = rename (target_cache_tmppath, target_cache_path);
if (rc < 0)
@@ -2222,7 +2339,7 @@ debuginfod_find_debuginfo (debuginfod_client *client,
const unsigned char *build_id, int build_id_len,
char **path)
{
- return debuginfod_query_server(client, build_id, build_id_len,
+ return debuginfod_query_server_by_buildid(client, build_id, build_id_len,
"debuginfo", NULL, path);
}
@@ -2233,7 +2350,7 @@ debuginfod_find_executable(debuginfod_client *client,
const unsigned char *build_id, int build_id_len,
char **path)
{
- return debuginfod_query_server(client, build_id, build_id_len,
+ return debuginfod_query_server_by_buildid(client, build_id, build_id_len,
"executable", NULL, path);
}
@@ -2242,7 +2359,7 @@ int debuginfod_find_source(debuginfod_client *client,
const unsigned char *build_id, int build_id_len,
const char *filename, char **path)
{
- return debuginfod_query_server(client, build_id, build_id_len,
+ return debuginfod_query_server_by_buildid(client, build_id, build_id_len,
"source", filename, path);
}
@@ -2251,8 +2368,8 @@ debuginfod_find_section (debuginfod_client *client,
const unsigned char *build_id, int build_id_len,
const char *section, char **path)
{
- int rc = debuginfod_query_server(client, build_id, build_id_len,
- "section", section, path);
+ int rc = debuginfod_query_server_by_buildid(client, build_id, build_id_len,
+ "section", section, path);
if (rc != -EINVAL)
return rc;
@@ -2301,6 +2418,376 @@ debuginfod_find_section (debuginfod_client *client,
return rc;
}
+
+int debuginfod_find_metadata (debuginfod_client *client,
+ const char* key, char* value, char **path)
+{
+ (void) client;
+ (void) key;
+ (void) value;
+ (void) path;
+
+ char *server_urls = NULL;
+ char *urls_envvar = NULL;
+ char *cache_path = NULL;
+ char *target_cache_dir = NULL;
+ char *target_cache_path = NULL;
+ char *target_cache_tmppath = NULL;
+ char *target_file_name = NULL;
+ char *key_and_value = NULL;
+ int rc = 0, r;
+ int vfd = client->verbose_fd;
+ struct handle_data *data = NULL;
+
+ json_object *json_metadata = json_object_new_object();
+ json_bool json_metadata_complete = true;
+ json_object *json_metadata_arr = json_object_new_array();
+ if(NULL == json_metadata) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ json_object_object_add(json_metadata, "results",
+ json_metadata_arr ?: json_object_new_array() /* Empty array */);
+
+ if(NULL == value || NULL == key){
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (vfd >= 0)
+ dprintf (vfd, "debuginfod_find_metadata %s %s\n", key, value);
+
+ /* Without query-able URL, we can stop here*/
+ urls_envvar = getenv(DEBUGINFOD_URLS_ENV_VAR);
+ if (vfd >= 0)
+ dprintf (vfd, "server urls \"%s\"\n",
+ urls_envvar != NULL ? urls_envvar : "");
+ if (urls_envvar == NULL || urls_envvar[0] == '\0')
+ {
+ rc = -ENOSYS;
+ goto out;
+ }
+
+ /* set paths needed to perform the query
+ example format:
+ cache_path: $HOME/.cache
+ target_cache_dir: $HOME/.cache/metadata
+ target_cache_path: $HOME/.cache/metadata/KEYENCODED_VALUEENCODED
+ target_cache_path: $HOME/.cache/metadata/KEYENCODED_VALUEENCODED.XXXXXX
+ */
+
+ // libcurl > 7.62ish has curl_url_set()/etc. to construct these things more properly.
+ // curl_easy_escape() is older
+ {
+ CURL *c = curl_easy_init();
+ if (!c)
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+ char *key_escaped = curl_easy_escape(c, key, 0);
+ char *value_escaped = curl_easy_escape(c, value, 0);
+
+ // fallback to unescaped values in unlikely case of error
+ xalloc_str (key_and_value, "key=%s&value=%s", key_escaped ?: key, value_escaped ?: value);
+ xalloc_str (target_file_name, "%s_%s", key_escaped ?: key, value_escaped ?: value);
+ curl_free(value_escaped);
+ curl_free(key_escaped);
+ curl_easy_cleanup(c);
+ }
+
+ /* Check if we have a recent result already in the cache. */
+ cache_path = make_cache_path();
+ if (! cache_path)
+ goto out;
+ xalloc_str (target_cache_dir, "%s/metadata", cache_path);
+ (void) mkdir (target_cache_dir, 0700);
+ xalloc_str (target_cache_path, "%s/%s", target_cache_dir, target_file_name);
+ xalloc_str (target_cache_tmppath, "%s/%s.XXXXXX", target_cache_dir, target_file_name);
+
+ int fd = open(target_cache_path, O_RDONLY);
+ if (fd >= 0)
+ {
+ struct stat st;
+ int metadata_retention = 0;
+ time_t now = time(NULL);
+ char *metadata_retention_path = 0;
+
+ xalloc_str (metadata_retention_path, "%s/%s", cache_path, metadata_retention_filename);
+ if (metadata_retention_path)
+ {
+ rc = debuginfod_config_cache(client, metadata_retention_path,
+ metadata_retention_default_s, &st);
+ free (metadata_retention_path);
+ if (rc < 0)
+ rc = 0;
+ }
+ else
+ rc = 0;
+ metadata_retention = rc;
+
+ if (fstat(fd, &st) != 0)
+ {
+ rc = -errno;
+ close (fd);
+ goto out;
+ }
+
+ if (metadata_retention > 0 && (now - st.st_mtime <= metadata_retention))
+ {
+ if (client && client->verbose_fd >= 0)
+ dprintf (client->verbose_fd, "cached metadata %s", target_file_name);
+
+ if (path != NULL)
+ {
+ *path = target_cache_path; // pass over the pointer
+ target_cache_path = NULL; // prevent free() in our own cleanup
+ }
+
+ /* Success!!!! */
+ rc = fd;
+ goto out;
+ }
+
+ /* We don't have to clear the likely-expired cached object here
+ by unlinking. We will shortly make a new request and save
+ results right on top. Erasing here could trigger a TOCTOU
+ race with another thread just finishing a query and passing
+ its results back.
+ */
+ // (void) unlink (target_cache_path);
+
+ close (fd);
+ }
+
+ /* No valid cached metadata found: time to make the queries. */
+
+ /* Clear the client of previous urls*/
+ free (client->url);
+ client->url = NULL;
+
+ long maxtime = 0;
+ const char *maxtime_envvar;
+ maxtime_envvar = getenv(DEBUGINFOD_MAXTIME_ENV_VAR);
+ if (maxtime_envvar != NULL)
+ maxtime = atol (maxtime_envvar);
+ if (maxtime && vfd >= 0)
+ dprintf(vfd, "using max time %lds\n", maxtime);
+
+ long timeout = default_timeout;
+ const char* timeout_envvar = getenv(DEBUGINFOD_TIMEOUT_ENV_VAR);
+ if (timeout_envvar != NULL)
+ timeout = atoi (timeout_envvar);
+ if (vfd >= 0)
+ dprintf (vfd, "using timeout %ld\n", timeout);
+
+ add_default_headers(client);
+
+ /* make a copy of the envvar so it can be safely modified. */
+ server_urls = strdup(urls_envvar);
+ if (server_urls == NULL)
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* thereafter, goto out1 on error*/
+
+ char **server_url_list = NULL;
+ ima_policy_t* url_ima_policies = NULL;
+ char *server_url;
+ int num_urls = 0;
+ r = init_server_urls("metadata", server_urls, &server_url_list, &url_ima_policies, &num_urls, vfd);
+ if(0 != r){
+ rc = r;
+ goto out1;
+ }
+
+ CURLM *curlm = client->server_mhandle;
+ assert (curlm != NULL);
+
+ CURL *target_handle = NULL;
+ data = malloc(sizeof(struct handle_data) * num_urls);
+ if (data == NULL)
+ {
+ rc = -ENOMEM;
+ goto out1;
+ }
+
+ /* thereafter, goto out2 on error. */
+
+ /* Initialize handle_data */
+ for (int i = 0; i < num_urls; i++)
+ {
+ if ((server_url = server_url_list[i]) == NULL)
+ break;
+ if (vfd >= 0)
+ dprintf (vfd, "init server %d %s\n", i, server_url);
+
+ data[i].errbuf[0] = '\0';
+ data[i].target_handle = &target_handle;
+ data[i].client = client;
+ data[i].metadata = NULL;
+ data[i].metadata_size = 0;
+ data[i].response_data = NULL;
+ data[i].response_data_size = 0;
+
+ snprintf(data[i].url, PATH_MAX, "%s?%s", server_url, key_and_value);
+
+ r = init_handle(client, metadata_callback, header_callback, &data[i], i, timeout, vfd);
+ if(0 != r){
+ rc = r;
+ goto out2;
+ }
+ curl_multi_add_handle(curlm, data[i].handle);
+ }
+
+ /* Query servers */
+ if (vfd >= 0)
+ dprintf (vfd, "Starting %d queries\n",num_urls);
+ r = perform_queries(curlm, NULL, data, client, num_urls, maxtime, 0, false, vfd);
+ if (0 != r) {
+ rc = r;
+ goto out2;
+ }
+
+ /* NOTE: We don't check the return codes of the curl messages since
+ a metadata query failing silently is just fine. We want to know what's
+ available from servers which can be connected with no issues.
+ If running with additional verbosity, the failure will be noted in stderr */
+
+ /* Building the new json array from all the upstream data and
+ cleanup while at it.
+ */
+ for (int i = 0; i < num_urls; i++)
+ {
+ curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */
+ curl_easy_cleanup (data[i].handle);
+ free (data[i].response_data);
+
+ if (NULL == data[i].metadata)
+ {
+ if (vfd >= 0)
+ dprintf (vfd, "Query to %s failed with error message:\n\t\"%s\"\n",
+ data[i].url, data[i].errbuf);
+ json_metadata_complete = false;
+ continue;
+ }
+
+ json_object *upstream_metadata = json_tokener_parse(data[i].metadata);
+ json_object *upstream_complete;
+ json_object *upstream_metadata_arr;
+ if(NULL == upstream_metadata ||
+ !json_object_object_get_ex(upstream_metadata, "results", &upstream_metadata_arr) ||
+ !json_object_object_get_ex(upstream_metadata, "complete", &upstream_complete)) continue;
+ json_metadata_complete &= json_object_get_boolean(upstream_complete);
+ // Combine the upstream metadata into the json array
+ for (int j = 0, n = json_object_array_length(upstream_metadata_arr); j < n; j++) {
+ json_object *entry = json_object_array_get_idx(upstream_metadata_arr, j);
+ json_object_get(entry); // increment reference count
+ json_object_array_add(json_metadata_arr, entry);
+ }
+ json_object_put(upstream_metadata);
+
+ free (data[i].metadata);
+ }
+
+ /* Because of race with cache cleanup / rmdir, try to mkdir/mkstemp up to twice. */
+ for (int i=0; i<2; i++) {
+ /* (re)create target directory in cache */
+ (void) mkdir(target_cache_dir, 0700); /* files will be 0400 later */
+
+ /* NB: write to a temporary file first, to avoid race condition of
+ multiple clients checking the cache, while a partially-written or empty
+ file is in there, being written from libcurl. */
+ fd = mkstemp (target_cache_tmppath);
+ if (fd >= 0) break;
+ }
+ if (fd < 0) /* Still failed after two iterations. */
+ {
+ rc = -errno;
+ goto out1;
+ }
+
+
+ /* Plop the complete json_metadata object into the cache. */
+ json_object_object_add(json_metadata, "complete", json_object_new_boolean(json_metadata_complete));
+ const char* json_string = json_object_to_json_string_ext(json_metadata, JSON_C_TO_STRING_PRETTY);
+ if (json_string == NULL)
+ {
+ rc = -ENOMEM;
+ goto out1;
+ }
+ ssize_t res = write_retry (fd, json_string, strlen(json_string));
+ (void) lseek(fd, 0, SEEK_SET); // rewind file so client can read it from the top
+
+ /* NB: json_string is auto deleted when json_metadata object is nuked */
+ if (res < 0 || (size_t) res != strlen(json_string))
+ {
+ rc = -EIO;
+ goto out1;
+ }
+ /* PR27571: make cache files casually unwriteable; dirs are already 0700 */
+ (void) fchmod(fd, 0400);
+
+ /* rename tmp->real */
+ rc = rename (target_cache_tmppath, target_cache_path);
+ if (rc < 0)
+ {
+ rc = -errno;
+ goto out1;
+ /* Perhaps we need not give up right away; could retry or something ... */
+ }
+
+ /* don't close fd - we're returning it */
+ /* don't unlink the tmppath; it's already been renamed. */
+ if (path != NULL)
+ *path = strdup(target_cache_path);
+
+ rc = fd;
+ goto out1;
+
+/* error exits */
+out2:
+ /* remove all handles from multi */
+ for (int i = 0; i < num_urls; i++)
+ {
+ if (data[i].handle != NULL)
+ {
+ curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */
+ curl_easy_cleanup (data[i].handle);
+ free (data[i].response_data);
+ free (data[i].metadata);
+ }
+ }
+
+out1:
+ free(data);
+
+ for (int i = 0; i < num_urls; ++i)
+ free(server_url_list[i]);
+ free(server_url_list);
+ free(url_ima_policies);
+
+out:
+ free (server_urls);
+ json_object_put(json_metadata);
+ /* Reset sent headers */
+ curl_slist_free_all (client->headers);
+ client->headers = NULL;
+ client->user_agent_set_p = 0;
+
+ free (target_cache_dir);
+ free (target_cache_path);
+ free (target_cache_tmppath);
+ free (key_and_value);
+ free(target_file_name);
+ free (cache_path);
+
+ return rc;
+}
+
+
/* Add an outgoing HTTP header. */
int debuginfod_add_http_header (debuginfod_client *client, const char* header)
{
diff --git a/debuginfod/debuginfod-find.c b/debuginfod/debuginfod-find.c
index 30731098..b9b52455 100644
--- a/debuginfod/debuginfod-find.c
+++ b/debuginfod/debuginfod-find.c
@@ -1,6 +1,6 @@
/* Command-line frontend for retrieving ELF / DWARF / source files
from the debuginfod.
- Copyright (C) 2019-2020 Red Hat, Inc.
+ Copyright (C) 2019-2023 Red Hat, Inc.
This file is part of elfutils.
This file is free software; you can redistribute it and/or modify
@@ -30,7 +30,7 @@
#include <fcntl.h>
#include <gelf.h>
#include <libdwelf.h>
-
+#include <json-c/json.h>
/* Name and version of program. */
ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
@@ -49,9 +49,10 @@ static const char args_doc[] = N_("debuginfo BUILDID\n"
"executable PATH\n"
"source BUILDID /FILENAME\n"
"source PATH /FILENAME\n"
- "section BUILDID SECTION-NAME\n"
- "section PATH SECTION-NAME\n");
-
+ "section BUILDID SECTION-NAME\n"
+ "section PATH SECTION-NAME\n"
+ "metadata (glob|file|KEY) (GLOB|FILENAME|VALUE)\n"
+ );
/* Definitions of arguments for argp functions. */
static const struct argp_option options[] =
@@ -145,49 +146,60 @@ main(int argc, char** argv)
/* If we were passed an ELF file name in the BUILDID slot, look in there. */
unsigned char* build_id = (unsigned char*) argv[remaining+1];
int build_id_len = 0; /* assume text */
-
- int any_non_hex = 0;
- int i;
- for (i = 0; build_id[i] != '\0'; i++)
- if ((build_id[i] >= '0' && build_id[i] <= '9') ||
- (build_id[i] >= 'a' && build_id[i] <= 'f'))
- ;
- else
- any_non_hex = 1;
-
- int fd = -1;
Elf* elf = NULL;
- if (any_non_hex) /* raw build-id */
- {
- fd = open ((char*) build_id, O_RDONLY);
- if (fd < 0)
- fprintf (stderr, "Cannot open %s: %s\n", build_id, strerror(errno));
- }
- if (fd >= 0)
- {
- elf = dwelf_elf_begin (fd);
- if (elf == NULL)
- fprintf (stderr, "Cannot open as ELF file %s: %s\n", build_id,
- elf_errmsg (-1));
- }
- if (elf != NULL)
+
+ /* Process optional buildid given via ELF file name, for some query types only. */
+ if (strcmp(argv[remaining], "debuginfo") == 0
+ || strcmp(argv[remaining], "executable") == 0
+ || strcmp(argv[remaining], "source") == 0
+ || strcmp(argv[remaining], "section") == 0)
{
- const void *extracted_build_id;
- ssize_t s = dwelf_elf_gnu_build_id(elf, &extracted_build_id);
- if (s > 0)
+ int any_non_hex = 0;
+ int i;
+ for (i = 0; build_id[i] != '\0'; i++)
+ if ((build_id[i] >= '0' && build_id[i] <= '9') ||
+ (build_id[i] >= 'a' && build_id[i] <= 'f'))
+ ;
+ else
+ any_non_hex = 1;
+
+ int fd = -1;
+ if (any_non_hex) /* raw build-id */
{
- /* Success: replace the build_id pointer/len with the binary blob
- that elfutils is keeping for us. It'll remain valid until elf_end(). */
- build_id = (unsigned char*) extracted_build_id;
- build_id_len = s;
+ fd = open ((char*) build_id, O_RDONLY);
+ if (fd < 0)
+ fprintf (stderr, "Cannot open %s: %s\n", build_id, strerror(errno));
+ }
+ if (fd >= 0)
+ {
+ elf = dwelf_elf_begin (fd);
+ if (elf == NULL)
+ fprintf (stderr, "Cannot open as ELF file %s: %s\n", build_id,
+ elf_errmsg (-1));
+ }
+ if (elf != NULL)
+ {
+ const void *extracted_build_id;
+ ssize_t s = dwelf_elf_gnu_build_id(elf, &extracted_build_id);
+ if (s > 0)
+ {
+ /* Success: replace the build_id pointer/len with the binary blob
+ that elfutils is keeping for us. It'll remain valid until elf_end(). */
+ build_id = (unsigned char*) extracted_build_id;
+ build_id_len = s;
+ }
+ else
+ fprintf (stderr, "Cannot extract build-id from %s: %s\n", build_id, elf_errmsg(-1));
}
- else
- fprintf (stderr, "Cannot extract build-id from %s: %s\n", build_id, elf_errmsg(-1));
}
char *cache_name;
int rc = 0;
+ /* By default the stdout output is the path of the cached file.
+ Some requests (ex. metadata query may instead choose to do a different output,
+ in that case a stringified json object) */
+ bool print_cached_file = true;
/* Check whether FILETYPE is valid and call the appropriate
debuginfod_find_* function. If FILETYPE is "source"
then ensure a FILENAME was also supplied as an argument. */
@@ -221,6 +233,35 @@ main(int argc, char** argv)
rc = debuginfod_find_section(client, build_id, build_id_len,
argv[remaining+2], &cache_name);
}
+ else if (strcmp(argv[remaining], "metadata") == 0) /* no buildid! */
+ {
+ if (remaining+2 == argc)
+ {
+ fprintf(stderr, "Require KEY and VALUE for \"metadata\"\n");
+ return 1;
+ }
+
+ rc = debuginfod_find_metadata (client, argv[remaining+1], argv[remaining+2],
+ &cache_name);
+ /* We output a pprinted JSON object, not the regular debuginfod-find cached file path */
+ print_cached_file = false;
+ json_object *metadata = json_object_from_file(cache_name);
+ if(metadata)
+ {
+ printf("%s\n", json_object_to_json_string_ext(metadata,
+ JSON_C_TO_STRING_PRETTY
+#ifdef JSON_C_TO_STRING_NOSLASHESCAPE /* json-c 0.15 */
+ | JSON_C_TO_STRING_NOSLASHESCAPE
+#endif
+ ));
+ json_object_put(metadata);
+ }
+ else
+ {
+ fprintf(stderr, "%s does not contain a valid JSON format object\n", cache_name);
+ return 1;
+ }
+ }
else
{
argp_help (&argp, stderr, ARGP_HELP_USAGE, argv[0]);
@@ -240,8 +281,6 @@ main(int argc, char** argv)
debuginfod_end (client);
if (elf)
elf_end(elf);
- if (fd >= 0)
- close (fd);
if (rc < 0)
{
@@ -251,7 +290,7 @@ main(int argc, char** argv)
else
close (rc);
- printf("%s\n", cache_name);
+ if(print_cached_file) printf("%s\n", cache_name);
free (cache_name);
return 0;
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 8c329858..39e0dc61 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -1,5 +1,5 @@
/* Debuginfo-over-http server.
- Copyright (C) 2019-2021 Red Hat, Inc.
+ Copyright (C) 2019-2023 Red Hat, Inc.
Copyright (C) 2021, 2022 Mark J. Wielaard <mark@klomp.org>
This file is part of elfutils.
@@ -68,6 +68,7 @@ extern "C" {
#include <unistd.h>
#include <fcntl.h>
#include <netdb.h>
+#include <fnmatch.h>
/* If fts.h is included before config.h, its indirect inclusions may not
@@ -135,6 +136,8 @@ using namespace std;
#define tid() pthread_self()
#endif
+#include <json-c/json.h>
+
inline bool
string_endswith(const string& haystack, const string& needle)
@@ -206,7 +209,7 @@ static const char DEBUGINFOD_SQLITE_DDL[] =
" foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
" primary key (buildid, file, mtime)\n"
" ) " WITHOUT_ROWID ";\n"
- // Index for faster delete by file identifier
+ // Index for faster delete by file identifier and metadata searches
"create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de (file, mtime);\n"
"create table if not exists " BUILDIDS "_f_s (\n"
" buildid integer not null,\n"
@@ -232,6 +235,8 @@ static const char DEBUGINFOD_SQLITE_DDL[] =
" ) " WITHOUT_ROWID ";\n"
// Index for faster delete by archive file identifier
"create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de (file, mtime);\n"
+ // Index for metadata searches
+ "create index if not exists " BUILDIDS "_r_de_idx2 on " BUILDIDS "_r_de (content);\n"
"create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
" buildid integer not null,\n"
" artifactsrc integer not null,\n"
@@ -440,6 +445,9 @@ static const struct argp_option options[] =
{ "scan-checkpoint", ARGP_SCAN_CHECKPOINT, "NUM", 0, "Number of files scanned before a WAL checkpoint.", 0 },
#define ARGP_KEY_KOJI_SIGCACHE 0x100B
{ "koji-sigcache", ARGP_KEY_KOJI_SIGCACHE, NULL, 0, "Do a koji specific mapping of rpm paths to get IMA signatures.", 0 },
+#define ARGP_KEY_METADATA_MAXTIME 0x100C
+ { "metadata-maxtime", ARGP_KEY_METADATA_MAXTIME, "SECONDS", 0,
+ "Number of seconds to limit metadata query run time, 0=unlimited.", 0 },
{ NULL, 0, NULL, 0, NULL, 0 },
};
@@ -496,6 +504,7 @@ static string tmpdir;
static bool passive_p = false;
static long scan_checkpoint = 256;
static bool requires_koji_sigcache_mapping = false;
+static unsigned metadata_maxtime_s = 5;
static void set_metric(const string& key, double value);
// static void inc_metric(const string& key);
@@ -700,7 +709,10 @@ parse_opt (int key, char *arg,
case ARGP_SCAN_CHECKPOINT:
scan_checkpoint = atol (arg);
if (scan_checkpoint < 0)
- argp_failure(state, 1, EINVAL, "scan checkpoint");
+ argp_failure(state, 1, EINVAL, "scan checkpoint");
+ break;
+ case ARGP_KEY_METADATA_MAXTIME:
+ metadata_maxtime_s = (unsigned) atoi(arg);
break;
case ARGP_KEY_KOJI_SIGCACHE:
requires_koji_sigcache_mapping = true;
@@ -2332,6 +2344,58 @@ handle_buildid_r_match (bool internal_req_p,
return r;
}
+void
+add_client_federation_headers(debuginfod_client *client, MHD_Connection* conn){
+ // Transcribe incoming User-Agent:
+ string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
+ string ua_complete = string("User-Agent: ") + ua;
+ debuginfod_add_http_header (client, ua_complete.c_str());
+
+ // Compute larger XFF:, for avoiding info loss during
+ // federation, and for future cyclicity detection.
+ string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
+ if (xff != "")
+ xff += string(", "); // comma separated list
+
+ unsigned int xff_count = 0;
+ for (auto&& i : xff){
+ if (i == ',') xff_count++;
+ }
+
+ // if X-Forwarded-For: exceeds N hops,
+ // do not delegate a local lookup miss to upstream debuginfods.
+ if (xff_count >= forwarded_ttl_limit)
+ throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
+and will not query the upstream servers");
+
+ // Compute the client's numeric IP address only - so can't merge with conninfo()
+ const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
+ MHD_CONNECTION_INFO_CLIENT_ADDRESS);
+ struct sockaddr *so = u ? u->client_addr : 0;
+ char hostname[256] = ""; // RFC1035
+ if (so && so->sa_family == AF_INET) {
+ (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
+ NI_NUMERICHOST);
+ } else if (so && so->sa_family == AF_INET6) {
+ struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
+ if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
+ struct sockaddr_in addr4;
+ memset (&addr4, 0, sizeof(addr4));
+ addr4.sin_family = AF_INET;
+ addr4.sin_port = addr6->sin6_port;
+ memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
+ (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
+ hostname, sizeof (hostname), NULL, 0,
+ NI_NUMERICHOST);
+ } else {
+ (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
+ NI_NUMERICHOST);
+ }
+ }
+
+ string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
+ debuginfod_add_http_header (client, xff_complete.c_str());
+}
static struct MHD_Response*
handle_buildid_match (bool internal_req_p,
@@ -2565,58 +2629,8 @@ handle_buildid (MHD_Connection* conn,
debuginfod_set_progressfn (client, & debuginfod_find_progress);
if (conn)
- {
- // Transcribe incoming User-Agent:
- string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
- string ua_complete = string("User-Agent: ") + ua;
- debuginfod_add_http_header (client, ua_complete.c_str());
-
- // Compute larger XFF:, for avoiding info loss during
- // federation, and for future cyclicity detection.
- string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
- if (xff != "")
- xff += string(", "); // comma separated list
-
- unsigned int xff_count = 0;
- for (auto&& i : xff){
- if (i == ',') xff_count++;
- }
+ add_client_federation_headers(client, conn);
- // if X-Forwarded-For: exceeds N hops,
- // do not delegate a local lookup miss to upstream debuginfods.
- if (xff_count >= forwarded_ttl_limit)
- throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
-and will not query the upstream servers");
-
- // Compute the client's numeric IP address only - so can't merge with conninfo()
- const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
- MHD_CONNECTION_INFO_CLIENT_ADDRESS);
- struct sockaddr *so = u ? u->client_addr : 0;
- char hostname[256] = ""; // RFC1035
- if (so && so->sa_family == AF_INET) {
- (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
- NI_NUMERICHOST);
- } else if (so && so->sa_family == AF_INET6) {
- struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
- if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
- struct sockaddr_in addr4;
- memset (&addr4, 0, sizeof(addr4));
- addr4.sin_family = AF_INET;
- addr4.sin_port = addr6->sin6_port;
- memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
- (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
- hostname, sizeof (hostname), NULL, 0,
- NI_NUMERICHOST);
- } else {
- (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
- NI_NUMERICHOST);
- }
- }
-
- string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
- debuginfod_add_http_header (client, xff_complete.c_str());
- }
-
if (artifacttype == "debuginfo")
fd = debuginfod_find_debuginfo (client,
(const unsigned char*) buildid.c_str(),
@@ -2633,7 +2647,7 @@ and will not query the upstream servers");
fd = debuginfod_find_section (client,
(const unsigned char*) buildid.c_str(),
0, section.c_str(), NULL);
-
+
if (fd >= 0)
{
if (conn != 0)
@@ -2827,6 +2841,223 @@ handle_metrics (off_t* size)
return r;
}
+
+static struct MHD_Response*
+handle_metadata (MHD_Connection* conn,
+ string key, string value, off_t* size)
+{
+ MHD_Response* r;
+ sqlite3 *thisdb = dbq;
+
+ // Query locally for matching e, d files
+ string op;
+ if (key == "glob")
+ op = "glob";
+ else if (key == "file")
+ op = "=";
+ else
+ throw reportable_exception("/metadata webapi error, unsupported key");
+
+ // Since PR30378, the file names are segmented into two tables. We
+ // could do a glob/= search over the _files_v view that combines
+ // them, but that means that the entire _files_v thing has to be
+ // materialized & scanned to do the query. Slow! Instead, we can
+ // segment the incoming file/glob pattern into dirname / basename
+ // parts, and apply them to the corresponding table. This is done
+ // by splitting the value at the last "/". If absent, the same
+ // convention as is used in register_file_name().
+
+ string dirname, basename;
+ size_t slash = value.rfind('/');
+ if (slash == std::string::npos) {
+ dirname = "";
+ basename = value;
+ } else {
+ dirname = value.substr(0, slash);
+ basename = value.substr(slash+1);
+ }
+
+ // NB: further optimization is possible: replacing the 'glob' op
+ // with simple equality, if the corresponding value segment lacks
+ // metacharacters. sqlite may or may not be smart enough to do so,
+ // so we help out.
+ string metacharacters = "[]*?";
+ string dop = (op == "glob" && dirname.find_first_of(metacharacters) == string::npos) ? "=" : op;
+ string bop = (op == "glob" && basename.find_first_of(metacharacters) == string::npos) ? "=" : op;
+
+ string sql = string(
+ // explicit query r_de and f_de once here, rather than the query_d and query_e
+ // separately, because they scan the same tables, so we'd double the work
+ "select d1.executable_p, d1.debuginfo_p, 0 as source_p, "
+ " b1.hex, f1d.name || '/' || f1b.name as file, a1.name as archive "
+ "from " BUILDIDS "_r_de d1, " BUILDIDS "_files f1, " BUILDIDS "_fileparts f1b, " BUILDIDS "_fileparts f1d, "
+ BUILDIDS "_buildids b1, " BUILDIDS "_files_v a1 "
+ "where f1.id = d1.content and a1.id = d1.file and d1.buildid = b1.id "
+ " and f1d.name " + dop + " ? and f1b.name " + bop + " ? and f1.dirname = f1d.id and f1.basename = f1b.id "
+ "union all \n"
+ "select d2.executable_p, d2.debuginfo_p, 0, "
+ " b2.hex, f2d.name || '/' || f2b.name, NULL "
+ "from " BUILDIDS "_f_de d2, " BUILDIDS "_files f2, " BUILDIDS "_fileparts f2b, " BUILDIDS "_fileparts f2d, "
+ BUILDIDS "_buildids b2 "
+ "where f2.id = d2.file and d2.buildid = b2.id "
+ " and f2d.name " + dop + " ? and f2b.name " + bop + " ? "
+ " and f2.dirname = f2d.id and f2.basename = f2b.id");
+
+ // NB: we could query source file names too, thusly:
+ //
+ // select * from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f1, " BUILDIDS "_r_sref sr
+ // where b.id = sr.buildid and f1.id = sr.artifactsrc and f1.name " + op + "?"
+ // UNION ALL something with BUILDIDS "_f_s"
+ //
+ // But the first part of this query cannot run fast without the same index temp-created
+ // during "maxigroom":
+ // create index " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);
+ // and unfortunately this index is HUGE. It's similar to the size of the _r_sref
+ // table, which is already the largest part of a debuginfod index. Adding that index
+ // would nearly double the .sqlite db size.
+
+ sqlite_ps *pp = new sqlite_ps (thisdb, "mhd-query-meta-glob", sql);
+ pp->reset();
+ pp->bind(1, dirname);
+ pp->bind(2, basename);
+ pp->bind(3, dirname);
+ pp->bind(4, basename);
+ unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
+
+ json_object *metadata = json_object_new_object();
+ if (!metadata) throw libc_exception(ENOMEM, "json allocation");
+ defer_dtor<json_object*,int> metadata_d(metadata, json_object_put);
+ json_object *metadata_arr = json_object_new_array();
+ if (!metadata_arr) throw libc_exception(ENOMEM, "json allocation");
+ json_object_object_add(metadata, "results", metadata_arr);
+ // consume all the rows
+ struct timespec ts_start;
+ clock_gettime (CLOCK_MONOTONIC, &ts_start);
+
+ int rc;
+ bool metadata_complete = true;
+ while (SQLITE_DONE != (rc = pp->step()))
+ {
+ // break out of loop if we have searched too long
+ struct timespec ts_end;
+ clock_gettime (CLOCK_MONOTONIC, &ts_end);
+ double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
+ if (metadata_maxtime_s > 0 && deltas > metadata_maxtime_s){
+ metadata_complete = false;
+ break;
+ }
+
+ if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step");
+
+ int m_executable_p = sqlite3_column_int (*pp, 0);
+ int m_debuginfo_p = sqlite3_column_int (*pp, 1);
+ int m_source_p = sqlite3_column_int (*pp, 2);
+ string m_buildid = (const char*) sqlite3_column_text (*pp, 3) ?: ""; // should always be non-null
+ string m_file = (const char*) sqlite3_column_text (*pp, 4) ?: "";
+ string m_archive = (const char*) sqlite3_column_text (*pp, 5) ?: "";
+
+ // Confirm that m_file matches in the fnmatch(FNM_PATHNAME)
+ // sense, since sqlite's GLOB operator is a looser filter.
+ if (key == "glob" && fnmatch(value.c_str(), m_file.c_str(), FNM_PATHNAME) != 0)
+ continue;
+
+ auto add_metadata = [metadata_arr, m_buildid, m_file, m_archive](const string& type) {
+ json_object* entry = json_object_new_object();
+ if (NULL == entry) throw libc_exception (ENOMEM, "cannot allocate json");
+ defer_dtor<json_object*,int> entry_d(entry, json_object_put);
+
+ auto add_entry_metadata = [entry](const char* k, string v) {
+ json_object* s;
+ if(v != "") {
+ s = json_object_new_string(v.c_str());
+ if (NULL == s) throw libc_exception (ENOMEM, "cannot allocate json");
+ json_object_object_add(entry, k, s);
+ }
+ };
+
+ add_entry_metadata("type", type.c_str());
+ add_entry_metadata("buildid", m_buildid);
+ add_entry_metadata("file", m_file);
+ if (m_archive != "") add_entry_metadata("archive", m_archive);
+ if (verbose > 3)
+ obatched(clog) << "metadata found local "
+ << json_object_to_json_string_ext(entry,
+ JSON_C_TO_STRING_PRETTY)
+ << endl;
+
+ // Increase ref count to switch its ownership
+ json_object_array_add(metadata_arr, json_object_get(entry));
+ };
+
+ if (m_executable_p) add_metadata("executable");
+ if (m_debuginfo_p) add_metadata("debuginfo");
+ if (m_source_p) add_metadata("source");
+ }
+ pp->reset();
+
+ unsigned num_local_results = json_object_array_length(metadata_arr);
+
+ // Query upstream as well
+ debuginfod_client *client = debuginfod_pool_begin();
+ if (client != NULL)
+ {
+ add_client_federation_headers(client, conn);
+
+ int upstream_metadata_fd;
+ char *upstream_metadata_file = NULL;
+ upstream_metadata_fd = debuginfod_find_metadata(client, key.c_str(), (char*)value.c_str(),
+ &upstream_metadata_file);
+ if (upstream_metadata_fd >= 0) {
+ /* json-c >= 0.13 has json_object_from_fd(). */
+ json_object *upstream_metadata_json = json_object_from_file(upstream_metadata_file);
+ free (upstream_metadata_file);
+ json_object *upstream_metadata_json_arr;
+ json_object *upstream_complete;
+ if (NULL != upstream_metadata_json &&
+ json_object_object_get_ex(upstream_metadata_json, "results", &upstream_metadata_json_arr) &&
+ json_object_object_get_ex(upstream_metadata_json, "complete", &upstream_complete))
+ {
+ metadata_complete &= json_object_get_boolean(upstream_complete);
+ for (int i = 0, n = json_object_array_length(upstream_metadata_json_arr); i < n; i++) {
+ json_object *entry = json_object_array_get_idx(upstream_metadata_json_arr, i);
+ if (verbose > 3)
+ obatched(clog) << "metadata found remote "
+ << json_object_to_json_string_ext(entry,
+ JSON_C_TO_STRING_PRETTY)
+ << endl;
+
+ json_object_get(entry); // increment reference count
+ json_object_array_add(metadata_arr, entry);
+ }
+ json_object_put(upstream_metadata_json);
+ }
+ close(upstream_metadata_fd);
+ }
+ debuginfod_pool_end (client);
+ }
+
+ unsigned num_total_results = json_object_array_length(metadata_arr);
+
+ if (verbose > 2)
+ obatched(clog) << "metadata found local=" << num_local_results
+ << " remote=" << (num_total_results-num_local_results)
+ << " total=" << num_total_results
+ << endl;
+
+ json_object_object_add(metadata, "complete", json_object_new_boolean(metadata_complete));
+ const char* metadata_str = json_object_to_json_string(metadata);
+ if (!metadata_str)
+ throw libc_exception (ENOMEM, "cannot allocate json");
+ r = MHD_create_response_from_buffer (strlen(metadata_str),
+ (void*) metadata_str,
+ MHD_RESPMEM_MUST_COPY);
+ *size = strlen(metadata_str);
+ if (r)
+ add_mhd_response_header(r, "Content-Type", "application/json");
+ return r;
+}
+
+
static struct MHD_Response*
handle_root (off_t* size)
{
@@ -2893,6 +3124,7 @@ handler_cb (void * /*cls*/,
clock_gettime (CLOCK_MONOTONIC, &ts_start);
double afteryou = 0.0;
string artifacttype, suffix;
+ string urlargs; // for logging
try
{
@@ -2961,6 +3193,19 @@ handler_cb (void * /*cls*/,
inc_metric("http_requests_total", "type", artifacttype);
r = handle_metrics(& http_size);
}
+ else if (url1 == "/metadata")
+ {
+ tmp_inc_metric m ("thread_busy", "role", "http-metadata");
+ const char* key = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "key");
+ const char* value = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "value");
+ if (NULL == value || NULL == key)
+ throw reportable_exception("/metadata webapi error, need key and value");
+
+ urlargs = string("?key=") + string(key) + string("&value=") + string(value); // apprx., for logging
+ artifacttype = "metadata";
+ inc_metric("http_requests_total", "type", artifacttype);
+ r = handle_metadata(connection, key, value, &http_size);
+ }
else if (url1 == "/")
{
artifacttype = "/";
@@ -2997,7 +3242,7 @@ handler_cb (void * /*cls*/,
// afteryou: delay waiting for other client's identical query to complete
// deltas: total latency, including afteryou waiting
obatched(clog) << conninfo(connection)
- << ' ' << method << ' ' << url
+ << ' ' << method << ' ' << url << urlargs
<< ' ' << http_code << ' ' << http_size
<< ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms"
<< endl;
@@ -3352,6 +3597,7 @@ register_file_name(sqlite_ps& ps_upsert_fileparts,
dirname = name.substr(0, slash);
basename = name.substr(slash+1);
}
+ // NB: see also handle_metadata()
// intern the two substrings
ps_upsert_fileparts
@@ -4318,12 +4564,13 @@ void groom()
if (interrupted) return;
// NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
- sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum");
- g1.reset().step_ok_done();
- sqlite_ps g2 (db, "optimize", "pragma optimize");
- g2.reset().step_ok_done();
- sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate");
- g3.reset().step_ok_done();
+ { sqlite_ps g (db, "incremental vacuum", "pragma incremental_vacuum"); g.reset().step_ok_done(); }
+ // https://www.sqlite.org/lang_analyze.html#approx
+ { sqlite_ps g (db, "analyze setup", "pragma analysis_limit = 1000;\n"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "analyze", "analyze"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "analyze reload", "analyze sqlite_schema"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "optimize", "pragma optimize"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "wal checkpoint", "pragma wal_checkpoint=truncate"); g.reset().step_ok_done(); }
database_stats_report();
@@ -4695,6 +4942,8 @@ main (int argc, char *argv[])
if (maxigroom)
{
obatched(clog) << "maxigrooming database, please wait." << endl;
+ // NB: this index alone can nearly double the database size!
+ // NB: this index would be necessary to run source-file metadata searches fast
extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
diff --git a/debuginfod/debuginfod.h.in b/debuginfod/debuginfod.h.in
index 73f633f0..3936b17b 100644
--- a/debuginfod/debuginfod.h.in
+++ b/debuginfod/debuginfod.h.in
@@ -63,9 +63,9 @@ debuginfod_client *debuginfod_begin (void);
it is a binary blob of given length.
If successful, return a file descriptor to the target, otherwise
- return a posix error code. If successful, set *path to a
- strdup'd copy of the name of the same file in the cache.
- Caller must free() it later. */
+ return a negative POSIX error code. If successful, set *path to a
+ strdup'd copy of the name of the same file in the cache. Caller
+ must free() it later. */
int debuginfod_find_debuginfo (debuginfod_client *client,
const unsigned char *build_id,
@@ -89,6 +89,27 @@ int debuginfod_find_section (debuginfod_client *client,
const char *section,
char **path);
+/* Query the urls contained in $DEBUGINFOD_URLS for metadata
+ with given query key/value.
+
+ If successful, return a file descriptor to the JSON document
+ describing matches, otherwise return a negative POSIX error code. If
+ successful, set *path to a strdup'd copy of the name of the same
+ file in the cache. Caller must free() it later.
+
+ key can be one of 'glob' or 'file' corresponding to querying for value
+ by exact name or using a pattern matching approach.
+
+ The JSON document will be of the form {results: [{...}, ...], complete: <bool>},
+ where the results are JSON objects containing metadata and complete is true iff
+ all of the federation of servers responded with complete results (as opposed to 1+
+ failing to return or having an issue)
+ */
+int debuginfod_find_metadata (debuginfod_client *client,
+ const char *key,
+ char* value,
+ char **path);
+
typedef int (*debuginfod_progressfn_t)(debuginfod_client *c, long a, long b);
void debuginfod_set_progressfn(debuginfod_client *c,
debuginfod_progressfn_t fn);
diff --git a/debuginfod/libdebuginfod.map b/debuginfod/libdebuginfod.map
index 6334373f..355a89fd 100644
--- a/debuginfod/libdebuginfod.map
+++ b/debuginfod/libdebuginfod.map
@@ -22,3 +22,6 @@ ELFUTILS_0.188 {
debuginfod_get_headers;
debuginfod_find_section;
} ELFUTILS_0.183;
+ELFUTILS_0.190 {
+ debuginfod_find_metadata;
+} ELFUTILS_0.188;
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 914f8f64..d141e2fc 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,10 @@
+2023-04-12 Ryan Golberg <rgoldberg@redhat.com>, Frank Ch. Eigler <fche@redhat.com>
+
+ PR29472: debuginfod metadata query
+ * debuginfod-find.1: Document metadata query.
+ * debuginfod-client-config.7: Document metadata cache control setting.
+ * debuginfod.8: Document new option and webapi.
+
2023-08-14 Ryan Goldberg <rgoldber@redhat.com>
* debuginfod-client-config.7: Document DEBUGINFOD_IMA_CERT_PATH,
diff --git a/doc/debuginfod-client-config.7 b/doc/debuginfod-client-config.7
index 11d24658..2d834cfe 100644
--- a/doc/debuginfod-client-config.7
+++ b/doc/debuginfod-client-config.7
@@ -168,3 +168,11 @@ are short-circuited (returning an immediate failure instead of sending
a new query to servers). This accelerates queries that probably would
still fail. The default is 600, 10 minutes. 0 means "forget
immediately".
+
+.TP
+.B metadata_retention_s
+This control file sets how long to remember the results of a metadata
+query. New queries for the same artifacts within this time window are
+short-circuited (repeating the same results). This accelerates
+queries that probably would probably have the same results. The
+default is 3600, 1 hour. 0 means "do not retain".
diff --git a/doc/debuginfod-find.1 b/doc/debuginfod-find.1
index 89a70672..711bfdd1 100644
--- a/doc/debuginfod-find.1
+++ b/doc/debuginfod-find.1
@@ -29,6 +29,8 @@ debuginfod-find \- request debuginfo-related data
.B debuginfod-find [\fIOPTION\fP]... source \fIBUILDID\fP \fI/FILENAME\fP
.br
.B debuginfod-find [\fIOPTION\fP]... source \fIPATH\fP \fI/FILENAME\fP
+.br
+.B debuginfod-find [\fIOPTION\fP]... metadata \fIKEY\fP \fIVALUE\fP
.SH DESCRIPTION
\fBdebuginfod-find\fP queries one or more \fBdebuginfod\fP servers for
@@ -119,6 +121,63 @@ l l.
\../bar/foo.c AT_comp_dir=/zoo/ source BUILDID /zoo//../bar/foo.c
.TE
+.SS metadata \fIKEY\fP \fIVALUE\fP
+
+All designated debuginfod servers are queried for metadata about files
+in their index. Different search keys may be supported by different
+servers.
+
+.TS
+l l l .
+KEY VALUE DESCRIPTION
+
+\fBfile\fP path exact match \fIpath\fP, including in archives
+\fBglob\fP pattern glob match \fIpattern\fP, including in archives
+.TE
+
+The resulting output will look something like the following
+{
+ "results":[
+ {
+ "type":"executable",
+ "buildid":"f0aa15b8aba4f3c28cac3c2a73801fefa644a9f2",
+ "file":"/usr/local/bin/hello",
+ "archive":"/opt/elfutils/tests/test-2290642/R/rhel7/hello2-1.0-2.x86_64.rpm"
+ },
+ {
+ "type":"executable",
+ "buildid":"bc1febfd03ca05e030f0d205f7659db29f8a4b30",
+ "file":"hello2"
+ }
+ ],
+ "complete":true
+}'
+
+The results of the search are output to \fBstdout\fP as a JSON object
+containing an array of objects, supplying metadata about each match, as
+well as a boolean value corresponding to the completeness of the result.
+The result is considered complete if all of the queries to upstream servers
+returned complete results and the local query succeeded. This metadata report
+may be cached. It may be incomplete and may contain duplicates.
+Additional JSON object fields may be present.
+
+.TS
+l l l .
+NAME TYPE DESCRIPTION
+
+\fBbuildid\fP string hexadecimal buildid associated with the file
+\fBtype\fP string one of \fBdebuginfo\fP or \fBexecutable\fP
+\fBfile\fP string matched file name, outside or inside the archive
+\fBarchive\fP string archive containing matched file name, if any
+.TE
+
+It's worth noting that \fBtype\fP cannot be \fBsource\fP since in order
+to perform such a search fast enough additional indexing would need to be added to
+the database which would nearly double it's size.
+
+The search also always combines both files and archives in the results
+and at this time further granularity is not availible.
+
.SH "OPTIONS"
.TP
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index 3e738687..26f1472a 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -134,6 +134,14 @@ service load. Archive pattern options must still be given, so
debuginfod can recognize file name extensions for unpacking.
.TP
+.B "\-\-metadata\-maxtime=SECONDS"
+Impose a limit on the runtime of metadata webapi queries. These
+queries, especially broad "glob" wildcards, can take a large amount of
+time and produce large results. Public-facing servers may need to
+throttle them. The default limit is 5 seconds. Set 0 to disable this
+limit.
+
+.TP
.B "\-D SQL" "\-\-ddl=SQL"
Execute given sqlite statement after the database is opened and
initialized as extra DDL (SQL data definition language). This may be
@@ -403,6 +411,16 @@ The exact set of metrics and their meanings may change in future
versions. Caution: configuration information (path names, versions)
may be disclosed.
+.SS /metadata?key=\fIKEY\fP&value=\fIVALUE\fP
+
+This endpoint triggers a search of the files in the index plus any
+upstream federated servers, based on given key and value. If
+successful, the result is a application/json textual array, listing
+metadata for the matched files. See \fIdebuginfod-find(1)\fP for
+documentation of the common key/value search parameters, and the
+resulting data schema.
+
+
.SH DATA MANAGEMENT
debuginfod stores its index in an sqlite database in a densely packed
diff --git a/tests/ChangeLog b/tests/ChangeLog
index e52470c0..d934b13e 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2023-04-12 Ryan Golberg <rgoldberg@redhat.com>, Frank Ch. Eigler <fche@redhat.com>
+
+ PR29472: debuginfod metadata query
+ * run-debuginfod-find-metadata.sh: New test.
+ * Makefile.am: Run it, dist it.
+
2023-08-14 Ryan Goldberg <rgoldber@redhat.com>
* run-debuginfod-ima-verification.sh: New test.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 130cc992..ef5b6bb5 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -261,12 +261,13 @@ TESTS += run-debuginfod-dlopen.sh \
run-debuginfod-federation-sqlite.sh \
run-debuginfod-federation-link.sh \
run-debuginfod-percent-escape.sh \
- run-debuginfod-x-forwarded-for.sh \
- run-debuginfod-response-headers.sh \
- run-debuginfod-extraction-passive.sh \
+ run-debuginfod-x-forwarded-for.sh \
+ run-debuginfod-response-headers.sh \
+ run-debuginfod-extraction-passive.sh \
run-debuginfod-webapi-concurrency.sh \
run-debuginfod-section.sh \
- run-debuginfod-IXr.sh
+ run-debuginfod-IXr.sh \
+ run-debuginfod-find-metadata.sh
endif
if !OLD_LIBMICROHTTPD
# Will crash on too old libmicrohttpd
@@ -591,7 +592,8 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \
run-debuginfod-webapi-concurrency.sh \
run-debuginfod-section.sh \
run-debuginfod-IXr.sh \
- run-debuginfod-ima-verification.sh \
+ run-debuginfod-ima-verification.sh \
+ run-debuginfod-find-metadata.sh \
debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm \
debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm \
debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm \
diff --git a/tests/debuginfod-subr.sh b/tests/debuginfod-subr.sh
index 108dff74..1ccf6420 100755
--- a/tests/debuginfod-subr.sh
+++ b/tests/debuginfod-subr.sh
@@ -26,6 +26,7 @@ type curl 2>/dev/null || (echo "need curl"; exit 77)
type rpm2cpio 2>/dev/null || (echo "need rpm2cpio"; exit 77)
type cpio 2>/dev/null || (echo "need cpio"; exit 77)
type bzcat 2>/dev/null || (echo "need bzcat"; exit 77)
+type ss 2>/dev/null || (echo "need ss"; exit 77)
bsdtar --version | grep -q zstd && zstd=true || zstd=false
echo "zstd=$zstd bsdtar=`bsdtar --version`"
diff --git a/tests/run-debuginfod-find-metadata.sh b/tests/run-debuginfod-find-metadata.sh
new file mode 100755
index 00000000..d518f028
--- /dev/null
+++ b/tests/run-debuginfod-find-metadata.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2022 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/debuginfod-subr.sh
+
+# for test case debugging, uncomment:
+set -x
+# unset VALGRIND_CMD
+
+type curl 2>/dev/null || { echo "need curl"; exit 77; }
+type jq 2>/dev/null || { echo "need jq"; exit 77; }
+
+pkg-config json-c libcurl || { echo "one or more libraries are missing (libjson-c, libcurl)"; exit 77; }
+
+DB=${PWD}/.debuginfod_tmp.sqlite
+export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
+tempfiles $DB ${DB}_2
+
+# This variable is essential and ensures no time-race for claiming ports occurs
+# set base to a unique multiple of 100 not used in any other 'run-debuginfod-*' test
+base=13100
+get_ports
+mkdir R D
+cp -rvp ${abs_srcdir}/debuginfod-rpms/rhel7 R
+cp -rvp ${abs_srcdir}/debuginfod-debs/*deb D
+
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -R \
+ -d $DB -p $PORT1 -t0 -g0 R > vlog$PORT1 2>&1 &
+PID1=$!
+tempfiles vlog$PORT1
+errfiles vlog$PORT1
+
+wait_ready $PORT1 'ready' 1
+wait_ready $PORT1 'thread_work_total{role="traverse"}' 1
+wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
+wait_ready $PORT1 'thread_busy{role="scan"}' 0
+
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS="http://127.0.0.1:$PORT1 https://bad/url.web" ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -U \
+ -d ${DB}_2 -p $PORT2 -t0 -g0 D > vlog$PORT2 2>&1 &
+PID2=$!
+tempfiles vlog$PORT2
+errfiles vlog$PORT2
+
+wait_ready $PORT2 'ready' 1
+wait_ready $PORT2 'thread_work_total{role="traverse"}' 1
+wait_ready $PORT2 'thread_work_pending{role="scan"}' 0
+wait_ready $PORT2 'thread_busy{role="scan"}' 0
+
+# have clients contact the new server
+export DEBUGINFOD_URLS=http://127.0.0.1:$PORT2
+
+tempfiles json.txt
+# Check that we find correct number of files, both via local and federated links
+RESULTJ=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata glob "/u?r/bin/*"`
+echo $RESULTJ
+N_FOUND=`echo $RESULTJ | jq '.results | length'`
+test $N_FOUND -eq 1
+RESULTJ=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata glob "/usr/lo?al/bin/*"`
+echo $RESULTJ
+N_FOUND=`echo $RESULTJ | jq '.results | length'`
+test $N_FOUND -eq 2
+
+
+# Query via the webapi as well
+curl http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*'
+test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.results[0].buildid == "f17a29b5a25bd4960531d82aa6b07c8abe84fa66"'` = 'true'
+test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.results[0].file == "/usr/bin/hithere"'` = 'true'
+test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.results[0].archive | test(".*hithere.*deb")'` = 'true'
+# Note we query the upstream server too, since the downstream will have an incomplete result due to the badurl
+test `curl -s http://127.0.0.1:$PORT1'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.complete == true'` = 'true'
+test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.complete == false'` = 'true'
+
+# An empty array is returned on server error or if the file DNE
+RESULTJ=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata file "/this/isnt/there"`
+echo $RESULTJ
+test `echo $RESULTJ | jq ".results == [ ]" ` = 'true'
+
+kill $PID1
+kill $PID2
+wait $PID1
+wait $PID2
+PID1=0
+PID2=0
+
+# check it's still in cache
+RESULTJ=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata file "/usr/bin/hithere"`
+echo $RESULTJ
+test `echo $RESULTJ | jq ".results == [ ]" ` = 'true'
+
+# invalidate cache, retry previously successful query to now-dead servers
+echo 0 > $DEBUGINFOD_CACHE_PATH/metadata_retention_s
+RESULTJ=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata glob "/u?r/bin/*"`
+echo $RESULTJ
+test `echo $RESULTJ | jq ".results == [ ]" ` = 'true'
+test `echo $RESULTJ | jq ".complete == false" ` = 'true'
+
+exit 0