summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrank Ch. Eigler <fche@redhat.com>2019-11-07 04:00:16 -0500
committerMark Wielaard <mark@klomp.org>2019-11-23 00:31:26 +0100
commit8d5a05a7f006968653f813529be1df253a881040 (patch)
tree1ea5957305aa8116e39cdbc8f8e0e83e2ad31274
parent0587c28365ccb119f860c1b3ca115a6105baadc8 (diff)
debuginfod 5: add /metrics endpoint
This webapi extensions allows admins to hook up debuginfod to a prometheus-compatible monitoring system for general situational statistics. The metrics are simple enough that local curl requests can give a user a sense of what's going on. The metrics are documented as unstable with respect to future versions.
-rw-r--r--debuginfod/ChangeLog11
-rw-r--r--debuginfod/debuginfod.cxx230
-rw-r--r--doc/debuginfod.813
-rw-r--r--tests/ChangeLog5
-rwxr-xr-xtests/run-debuginfod-find.sh13
5 files changed, 214 insertions, 58 deletions
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 2870b6df..47efb9b4 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,14 @@
+2019-11-07 Frank Ch. Eigler <fche@redhat.com>
+
+ * debuginfod.cxx: Add /metrics endpoint. Add numerous
+ calls to new functions inc_metric/set_metric to populate
+ threadsafe map containing stats. Add http content-type
+ response headers throughout.
+ (thread_main_*): Simplify counter/timer flow.
+ (main): Reorder web service shutdown to leave http running
+ as long as possible.
+ * debuginfod.8: Document it, add security caution.
+
2019-11-06 Frank Ch. Eigler <fche@redhat.com>
* debuginfod.cxx: Add new -L (symlink-following) mode.
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 8c8f2658..e2535cef 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -72,6 +72,7 @@ extern "C" {
#include <cstring>
#include <vector>
#include <set>
+#include <map>
#include <string>
#include <iostream>
#include <iomanip>
@@ -377,6 +378,13 @@ static regex_t file_exclude_regex;
static int test_webapi_sleep; /* testing only */
static bool traverse_logical;
+static void set_metric(const string& key, int64_t value);
+// static void inc_metric(const string& key);
+static void set_metric(const string& metric,
+ const string& lname, const string& lvalue,
+ int64_t value);
+static void inc_metric(const string& metric,
+ const string& lname, const string& lvalue);
/* Handle program arguments. */
static error_t
@@ -454,6 +462,7 @@ struct reportable_exception
MHD_Response* r = MHD_create_response_from_buffer (message.size(),
(void*) message.c_str(),
MHD_RESPMEM_MUST_COPY);
+ MHD_add_response_header (r, "Content-Type", "text/plain");
int rc = MHD_queue_response (c, code, r);
MHD_destroy_response (r);
return rc;
@@ -543,8 +552,8 @@ timestamp (ostream &o)
now2 = datebuf;
}
- return o << "[" << (now2 ? now2 : "") << "] "
- << "(" << getpid () << "/" << tid() << "): ";
+ return o << "[" << (now2 ? now2 : "") << "] "
+ << "(" << getpid () << "/" << tid() << "): ";
}
@@ -784,6 +793,7 @@ handle_buildid_f_match (int64_t b_mtime,
return 0;
}
+ inc_metric ("http_responses_total","result","file");
struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
if (r == 0)
{
@@ -793,6 +803,7 @@ handle_buildid_f_match (int64_t b_mtime,
}
else
{
+ MHD_add_response_header (r, "Content-Type", "application/octet-stream");
add_mhd_last_modified (r, s.st_mtime);
if (verbose > 1)
obatched(clog) << "serving file " << b_source0 << endl;
@@ -889,6 +900,7 @@ handle_buildid_r_match (int64_t b_mtime,
throw archive_exception(a, "cannot extract file");
}
+ inc_metric ("http_responses_total","result","rpm");
struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd);
if (r == 0)
{
@@ -899,6 +911,7 @@ handle_buildid_r_match (int64_t b_mtime,
}
else
{
+ MHD_add_response_header (r, "Content-Type", "application/octet-stream");
add_mhd_last_modified (r, archive_entry_mtime(e));
if (verbose > 1)
obatched(clog) << "serving rpm " << b_source0 << " file " << b_source1 << endl;
@@ -1034,6 +1047,7 @@ static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */,
suffix.c_str(), NULL);
if (fd >= 0)
{
+ inc_metric ("http_responses_total","result","upstream");
struct stat s;
int rc = fstat (fd, &s);
if (rc == 0)
@@ -1041,6 +1055,7 @@ static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */,
auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
if (r)
{
+ MHD_add_response_header (r, "Content-Type", "application/octet-stream");
add_mhd_last_modified (r, s.st_mtime);
if (verbose > 1)
obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
@@ -1060,11 +1075,84 @@ static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */,
////////////////////////////////////////////////////////////////////////
+static map<string,int64_t> metrics; // arbitrary data for /metrics query
+// NB: store int64_t since all our metrics are integers; prometheus accepts double
+static mutex metrics_lock;
+
+// utility function for assembling prometheus-compatible
+// name="escaped-value" strings
+// https://prometheus.io/docs/instrumenting/exposition_formats/
+static string
+metric_label(const string& name, const string& value)
+{
+ string x = name + "=\"";
+ for (auto&& c : value)
+ switch(c)
+ {
+ case '\\': x += "\\\\"; break;
+ case '\"': x += "\\\""; break;
+ case '\n': x += "\\n"; break;
+ default: x += c; break;
+ }
+ x += "\"";
+ return x;
+}
+
+
+// add prometheus-format metric name + label tuple (if any) + value
+
+static void
+set_metric(const string& metric, int64_t value)
+{
+ unique_lock<mutex> lock(metrics_lock);
+ metrics[metric] = value;
+}
+#if 0 /* unused */
+static void
+inc_metric(const string& metric)
+{
+ unique_lock<mutex> lock(metrics_lock);
+ metrics[metric] ++;
+}
+#endif
+static void
+set_metric(const string& metric,
+ const string& lname, const string& lvalue,
+ int64_t value)
+{
+ string key = (metric + "{" + metric_label(lname, lvalue) + "}");
+ unique_lock<mutex> lock(metrics_lock);
+ metrics[key] = value;
+}
+
+static void
+inc_metric(const string& metric,
+ const string& lname, const string& lvalue)
+{
+ string key = (metric + "{" + metric_label(lname, lvalue) + "}");
+ unique_lock<mutex> lock(metrics_lock);
+ metrics[key] ++;
+}
+
+
+// and more for higher arity labels if needed
+
static struct MHD_Response*
handle_metrics ()
{
- throw reportable_exception("not yet implemented 2");
+ stringstream o;
+ {
+ unique_lock<mutex> lock(metrics_lock);
+ for (auto&& i : metrics)
+ o << i.first << " " << i.second << endl;
+ }
+ const string& os = o.str();
+ MHD_Response* r = MHD_create_response_from_buffer (os.size(),
+ (void*) os.c_str(),
+ MHD_RESPMEM_MUST_COPY);
+ MHD_add_response_header (r, "Content-Type", "text/plain");
+ return r;
}
@@ -1121,10 +1209,14 @@ handler_cb (void * /*cls*/,
suffix = url_copy.substr(slash3); // include the slash in the suffix
}
+ inc_metric("http_requests_total", "type", artifacttype);
r = handle_buildid(buildid, artifacttype, suffix, 0); // NB: don't care about result-fd
}
else if (url1 == "/metrics")
- r = handle_metrics();
+ {
+ inc_metric("http_requests_total", "type", "metrics");
+ r = handle_metrics();
+ }
else
throw reportable_exception("webapi error, unrecognized /operation");
@@ -1137,6 +1229,7 @@ handler_cb (void * /*cls*/,
}
catch (const reportable_exception& e)
{
+ inc_metric("http_responses_total","result","error");
e.report(clog);
return e.mhd_send_response (connection);
}
@@ -1548,6 +1641,7 @@ scan_source_file_path (const string& dir)
elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
else
throw libc_exception(errno, string("open ") + rps);
+ inc_metric ("scanned_total","source","file");
}
// NB: we catch exceptions here too, so that we can
@@ -1699,26 +1793,33 @@ thread_main_scan_source_file_path (void* arg)
unsigned rescan_timer = 0;
sig_atomic_t forced_rescan_count = 0;
+ set_metric("thread_timer_max", "file", dir, rescan_s);
+ set_metric("thread_tid", "file", dir, tid());
while (! interrupted)
{
- try
- {
- if (rescan_timer == 0)
- scan_source_file_path (dir);
- else if (sigusr1 != forced_rescan_count)
- {
- forced_rescan_count = sigusr1;
- scan_source_file_path (dir);
- }
- }
- catch (const sqlite_exception& e)
+ set_metric("thread_timer", "file", dir, rescan_timer);
+ set_metric("thread_forced_total", "file", dir, forced_rescan_count);
+ if (rescan_s && rescan_timer > rescan_s)
+ rescan_timer = 0;
+ if (sigusr1 != forced_rescan_count)
{
- obatched(cerr) << e.message << endl;
+ forced_rescan_count = sigusr1;
+ rescan_timer = 0;
}
+ if (rescan_timer == 0)
+ try
+ {
+ set_metric("thread_working", "file", dir, time(NULL));
+ inc_metric("thread_work_total", "file", dir);
+ scan_source_file_path (dir);
+ set_metric("thread_working", "file", dir, 0);
+ }
+ catch (const sqlite_exception& e)
+ {
+ obatched(cerr) << e.message << endl;
+ }
sleep (1);
rescan_timer ++;
- if (rescan_s)
- rescan_timer %= rescan_s;
}
return 0;
@@ -2030,6 +2131,7 @@ scan_source_rpm_path (const string& dir)
f->fts_statp->st_mtime,
my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
my_fts_sref_complete_p);
+ inc_metric ("scanned_total","source","rpm");
}
catch (const reportable_exception& e)
{
@@ -2103,26 +2205,33 @@ thread_main_scan_source_rpm_path (void* arg)
unsigned rescan_timer = 0;
sig_atomic_t forced_rescan_count = 0;
+ set_metric("thread_timer_max", "rpm", dir, rescan_s);
+ set_metric("thread_tid", "rpm", dir, tid());
while (! interrupted)
{
- try
- {
- if (rescan_timer == 0)
- scan_source_rpm_path (dir);
- else if (sigusr1 != forced_rescan_count)
- {
- forced_rescan_count = sigusr1;
- scan_source_rpm_path (dir);
- }
- }
- catch (const sqlite_exception& e)
+ set_metric("thread_timer", "rpm", dir, rescan_timer);
+ set_metric("thread_forced_total", "rpm", dir, forced_rescan_count);
+ if (rescan_s && rescan_timer > rescan_s)
+ rescan_timer = 0;
+ if (sigusr1 != forced_rescan_count)
{
- obatched(cerr) << e.message << endl;
+ forced_rescan_count = sigusr1;
+ rescan_timer = 0;
}
+ if (rescan_timer == 0)
+ try
+ {
+ set_metric("thread_working", "rpm", dir, time(NULL));
+ inc_metric("thread_work_total", "rpm", dir);
+ scan_source_rpm_path (dir);
+ set_metric("thread_working", "rpm", dir, 0);
+ }
+ catch (const sqlite_exception& e)
+ {
+ obatched(cerr) << e.message << endl;
+ }
sleep (1);
rescan_timer ++;
- if (rescan_s)
- rescan_timer %= rescan_s;
}
return 0;
@@ -2150,6 +2259,10 @@ database_stats_report()
<< " "
<< (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL")
<< endl;
+
+ set_metric("groom", "statistic",
+ ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL"),
+ (sqlite3_column_double(ps_query, 1)));
}
}
@@ -2223,26 +2336,33 @@ thread_main_groom (void* /*arg*/)
{
unsigned groom_timer = 0;
sig_atomic_t forced_groom_count = 0;
+ set_metric("thread_timer_max", "role", "groom", groom_s);
+ set_metric("thread_tid", "role", "groom", tid());
while (! interrupted)
{
- try
- {
- if (groom_timer == 0)
- groom ();
- else if (sigusr2 != forced_groom_count)
- {
- forced_groom_count = sigusr2;
- groom ();
- }
- }
- catch (const sqlite_exception& e)
+ set_metric("thread_timer", "role", "groom", groom_timer);
+ set_metric("thread_forced_total", "role", "groom", forced_groom_count);
+ if (groom_s && groom_timer > groom_s)
+ groom_timer = 0;
+ if (sigusr2 != forced_groom_count)
{
- obatched(cerr) << e.message << endl;
+ forced_groom_count = sigusr2;
+ groom_timer = 0;
}
+ if (groom_timer == 0)
+ try
+ {
+ set_metric("thread_working", "role", "groom", time(NULL));
+ inc_metric("thread_work_total", "role", "groom");
+ groom ();
+ set_metric("thread_working", "role", "groom", 0);
+ }
+ catch (const sqlite_exception& e)
+ {
+ obatched(cerr) << e.message << endl;
+ }
sleep (1);
groom_timer ++;
- if (groom_s)
- groom_timer %= groom_s;
}
return 0;
@@ -2466,6 +2586,9 @@ main (int argc, char *argv[])
obatched(clog) << "search concurrency " << concurrency << endl;
obatched(clog) << "rescan time " << rescan_s << endl;
obatched(clog) << "groom time " << groom_s << endl;
+ const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
+ if (du && du[0] != '\0') // set to non-empty string?
+ obatched(clog) << "upstream debuginfod servers: " << du << endl;
vector<pthread_t> source_file_scanner_threads;
vector<pthread_t> source_rpm_scanner_threads;
@@ -2495,28 +2618,25 @@ main (int argc, char *argv[])
source_rpm_scanner_threads.push_back(pt);
}
-
- const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
- if (du && du[0] != '\0') // set to non-empty string?
- obatched(clog) << "upstream debuginfod servers: " << du << endl;
-
/* Trivial main loop! */
+ set_metric("ready", 1);
while (! interrupted)
pause ();
+ set_metric("ready", 0);
if (verbose)
obatched(clog) << "stopping" << endl;
- /* Stop all the web service threads. */
- if (d4) MHD_stop_daemon (d4);
- if (d6) MHD_stop_daemon (d6);
-
/* Join any source scanning threads. */
for (auto&& it : source_file_scanner_threads)
pthread_join (it, NULL);
for (auto&& it : source_rpm_scanner_threads)
pthread_join (it, NULL);
pthread_join (groom_thread, NULL);
+
+ /* Stop all the web service threads. */
+ if (d4) MHD_stop_daemon (d4);
+ if (d6) MHD_stop_daemon (d6);
/* With all threads known dead, we can clean up the global resources. */
delete scan_concurrency_sem;
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index 8783df58..210550e8 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -176,7 +176,7 @@ May be repeated to increase details. The default verbosity is 0.
.\" Much of the following text is duplicated with debuginfod-find.1
-The debuginfod's webapi resembles ordinary file service, where a GET
+debuginfod's webapi resembles ordinary file service, where a GET
request with a path containing a known buildid results in a file.
Unknown buildid / request combinations result in HTTP error codes.
This file service resemblance is intentional, so that an installation
@@ -239,6 +239,14 @@ l l.
\../bar/foo.c AT_comp_dir=/zoo/ /buildid/BUILDID/source/zoo//../bar/foo.c
.TE
+.SS /metrics
+
+This endpoint returns a Prometheus formatted text/plain dump of a
+variety of statistics about the operation of the debuginfod server.
+The exact set of metrics and their meanings may change in future
+versions. Caution: configuration information (path names, versions)
+may be disclosed.
+
.SH DATA MANAGEMENT
debuginfod stores its index in an sqlite database in a densely packed
@@ -325,7 +333,8 @@ a denial-of-service in terms of RAM, CPU, disk I/O, or network I/O.
If this is a problem, users are advised to install debuginfod with a
HTTPS reverse-proxy front-end that enforces site policies for
firewalling, authentication, integrity, authorization, and load
-control.
+control. The \fI/metrics\fP webapi endpoint is probably not
+appropriate for disclosure to the public.
When relaying queries to upstream debuginfods, debuginfod \fBdoes not\fP
include any particular security features. It trusts that the binaries
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 3ce7f01a..fbf1d252 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,8 @@
+2019-11-07 Frank Ch. Eigler <fche@redhat.com>
+
+ * run-debuginfod-find.sh: Test debuginfod metrics via curl.
+ Fix federated testing, asserted by metrics.
+
2019-11-06 Frank Ch. Eigler <fche@redhat.com>
* run-debuginfod-find.sh: Test debuginfod -L mode. Drop
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 4b31af10..5c66029c 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -203,7 +203,8 @@ sleep 3
# have clients contact the new server
export DEBUGINFOD_URLS=http://localhost:$PORT2
-testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog 1
+rm -rf $DEBUGINFOD_CACHE_PATH
+testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID
# confirm that first server can't resolve symlinked info in L/ but second can
BUILDID=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \
@@ -224,6 +225,16 @@ export DEBUGINFOD_URLS="BAD http://localhost:$PORT1 localhost:$PORT1 http://loca
testrun ${abs_builddir}/debuginfod_build_id_find -e F/prog2 1
+########################################################################
+
+# Fetch some metrics, if curl program is installed
+if type curl 2>/dev/null; then
+ curl http://localhost:$PORT1/badapi
+ curl http://localhost:$PORT1/metrics
+ curl http://localhost:$PORT2/metrics
+ curl http://localhost:$PORT1/metrics | grep -q 'http_responses_total.*result.*error'
+ curl http://localhost:$PORT2/metrics | grep -q 'http_responses_total.*result.*upstream'
+fi
########################################################################