summaryrefslogtreecommitdiffstats
path: root/clangd/index/dex/dexp/Dexp.cpp
blob: 820dc66b0f06415ad26a26f8ca9667d04c1206f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
//===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a simple interactive tool which can be used to manually
// evaluate symbol search quality of Clangd index.
//
//===----------------------------------------------------------------------===//

#include "SourceCode.h"
#include "index/Serialization.h"
#include "index/dex/Dex.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/LineEditor/LineEditor.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Signals.h"

namespace clang {
namespace clangd {
namespace {

llvm::cl::opt<std::string> IndexPath("index-path",
                                     llvm::cl::desc("Path to the index"),
                                     llvm::cl::Positional, llvm::cl::Required);

static const std::string Overview = R"(
This is an **experimental** interactive tool to process user-provided search
queries over given symbol collection obtained via clangd-indexer. The
tool can be used to evaluate search quality of existing index implementations
and manually construct non-trivial test cases.

Type use "help" request to get information about the details.
)";

void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
  const auto TimerStart = std::chrono::high_resolution_clock::now();
  F();
  const auto TimerStop = std::chrono::high_resolution_clock::now();
  const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
      TimerStop - TimerStart);
  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
}

std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
                                            const SymbolIndex *Index) {
  FuzzyFindRequest Request;
  // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
  // qualifier for global scope.
  bool IsGlobalScope = QualifiedName.consume_front("::");
  auto Names = splitQualifiedName(QualifiedName);
  if (IsGlobalScope || !Names.first.empty())
    Request.Scopes = {Names.first};
  else
    // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
    // add the global scope to the request.
    Request.Scopes = {""};

  Request.Query = Names.second;
  std::vector<SymbolID> SymIDs;
  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
    std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
    if (QualifiedName == SymQualifiedName)
      SymIDs.push_back(Sym.ID);
  });
  return SymIDs;
}

// REPL commands inherit from Command and contain their options as members.
// Creating a Command populates parser options, parseAndRun() resets them.
class Command {
  // By resetting the parser options, we lost the standard -help flag.
  llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
      "help", llvm::cl::desc("Display available options"),
      llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::GeneralCategory)};
  virtual void run() = 0;

protected:
  const SymbolIndex *Index;

public:
  virtual ~Command() = default;
  virtual void parseAndRun(llvm::ArrayRef<const char *> Argv,
                           const char *Overview, const SymbolIndex &Index) {
    std::string ParseErrs;
    llvm::raw_string_ostream OS(ParseErrs);
    bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
                                                Overview, &OS);
    if (Help.getNumOccurrences() > 0) {
      // Avoid printing parse errors in this case.
      // (Well, in theory. A bunch get printed to llvm::errs() regardless!)
      llvm::cl::PrintHelpMessage();
    } else {
      llvm::outs() << OS.str();
      if (Ok) {
        this->Index = &Index;
        reportTime(Argv[0], [&] { run(); });
      }
    }
    llvm::cl::ResetCommandLineParser(); // must do this before opts are
                                        // destroyed.
  }
};

// FIXME(kbobyrev): Ideas for more commands:
// * load/swap/reload index: this would make it possible to get rid of llvm::cl
//   usages in the tool driver and actually use llvm::cl library in the REPL.
// * show posting list density histogram (our dump data somewhere so that user
//   could build one)
// * show number of tokens of each kind
// * print out tokens with the most dense posting lists
// * print out tokens with least dense posting lists

class FuzzyFind : public Command {
  llvm::cl::opt<std::string> Query{
      "query",
      llvm::cl::Positional,
      llvm::cl::Required,
      llvm::cl::desc("Query string to be fuzzy-matched"),
  };
  llvm::cl::opt<std::string> Scopes{
      "scopes",
      llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
  };
  llvm::cl::opt<unsigned> Limit{
      "limit",
      llvm::cl::init(10),
      llvm::cl::desc("Max results to display"),
  };

  void run() override {
    FuzzyFindRequest Request;
    Request.Limit = Limit;
    Request.Query = Query;
    if (Scopes.getNumOccurrences() > 0) {
      llvm::SmallVector<llvm::StringRef, 8> Scopes;
      llvm::StringRef(this->Scopes).split(Scopes, ',');
      Request.Scopes = {Scopes.begin(), Scopes.end()};
    }
    Request.AnyScope = Request.Scopes.empty();
    // FIXME(kbobyrev): Print symbol final scores to see the distribution.
    static const auto OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
    llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
                                  "Symbol Name");
    size_t Rank = 0;
    Index->fuzzyFind(Request, [&](const Symbol &Sym) {
      llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
                                    Sym.Scope + Sym.Name);
    });
  }
};

class Lookup : public Command {
  llvm::cl::opt<std::string> ID{
      "id",
      llvm::cl::Positional,
      llvm::cl::desc("Symbol ID to look up (hex)"),
  };
  llvm::cl::opt<std::string> Name{
      "name",
      llvm::cl::desc("Qualified name to look up."),
  };

  void run() override {
    if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
      llvm::outs()
          << "Missing required argument: please provide id or -name.\n";
      return;
    }
    std::vector<SymbolID> IDs;
    if (ID.getNumOccurrences()) {
      auto SID = SymbolID::fromStr(ID);
      if (!SID) {
        llvm::outs() << llvm::toString(SID.takeError()) << "\n";
        return;
      }
      IDs.push_back(*SID);
    } else {
      IDs = getSymbolIDsFromIndex(Name, Index);
    }

    LookupRequest Request;
    Request.IDs.insert(IDs.begin(), IDs.end());
    bool FoundSymbol = false;
    Index->lookup(Request, [&](const Symbol &Sym) {
      FoundSymbol = true;
      llvm::outs() << toYAML(Sym);
    });
    if (!FoundSymbol)
      llvm::outs() << "not found\n";
  }
};

class Refs : public Command {
  llvm::cl::opt<std::string> ID{
      "id",
      llvm::cl::Positional,
      llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
  };
  llvm::cl::opt<std::string> Name{
      "name",
      llvm::cl::desc("Qualified name of the symbol being queried."),
  };
  llvm::cl::opt<std::string> Filter{
      "filter",
      llvm::cl::init(".*"),
      llvm::cl::desc(
          "Print all results from files matching this regular expression."),
  };

  void run() override {
    if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
      llvm::outs()
          << "Missing required argument: please provide id or -name.\n";
      return;
    }
    std::vector<SymbolID> IDs;
    if (ID.getNumOccurrences()) {
      auto SID = SymbolID::fromStr(ID);
      if (!SID) {
        llvm::outs() << llvm::toString(SID.takeError()) << "\n";
        return;
      }
      IDs.push_back(*SID);
    } else {
      IDs = getSymbolIDsFromIndex(Name, Index);
      if (IDs.size() > 1) {
        llvm::outs() << llvm::formatv(
            "The name {0} is ambiguous, found {1} different "
            "symbols. Please use id flag to disambiguate.\n",
            Name, IDs.size());
        return;
      }
    }
    RefsRequest RefRequest;
    RefRequest.IDs.insert(IDs.begin(), IDs.end());
    llvm::Regex RegexFilter(Filter);
    Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
      auto U = URI::parse(R.Location.FileURI);
      if (!U) {
        llvm::outs() << U.takeError();
        return;
      }
      if (RegexFilter.match(U->body()))
        llvm::outs() << R << "\n";
    });
  }
};

struct {
  const char *Name;
  const char *Description;
  std::function<std::unique_ptr<Command>()> Implementation;
} CommandInfo[] = {
    {"find", "Search for symbols with fuzzyFind", llvm::make_unique<FuzzyFind>},
    {"lookup", "Dump symbol details by ID or qualified name",
     llvm::make_unique<Lookup>},
    {"refs", "Find references by ID or qualified name",
     llvm::make_unique<Refs>},
};

std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
  return loadIndex(Index, /*UseDex=*/true);
}

} // namespace
} // namespace clangd
} // namespace clang

int main(int argc, const char *argv[]) {
  using namespace clang::clangd;

  llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
  llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);

  std::unique_ptr<SymbolIndex> Index;
  reportTime("Dex build", [&]() {
    Index = openIndex(IndexPath);
  });

  if (!Index) {
    llvm::outs() << "Failed to open the index.\n";
    return -1;
  }

  llvm::LineEditor LE("dexp");

  while (llvm::Optional<std::string> Request = LE.readLine()) {
    // Split on spaces and add required null-termination.
    std::replace(Request->begin(), Request->end(), ' ', '\0');
    llvm::SmallVector<llvm::StringRef, 8> Args;
    llvm::StringRef(*Request).split(Args, '\0', /*MaxSplit=*/-1,
                                    /*KeepEmpty=*/false);
    if (Args.empty())
      continue;
    if (Args.front() == "help") {
      llvm::outs() << "dexp - Index explorer\nCommands:\n";
      for (const auto &C : CommandInfo)
        llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
      llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
      continue;
    }
    llvm::SmallVector<const char *, 8> FakeArgv;
    for (llvm::StringRef S : Args)
      FakeArgv.push_back(S.data()); // Terminated by separator or end of string.

    bool Recognized = false;
    for (const auto &Cmd : CommandInfo) {
      if (Cmd.Name == Args.front()) {
        Recognized = true;
        Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description, *Index);
        break;
      }
    }
    if (!Recognized)
      llvm::outs() << "Unknown command. Try 'help'.\n";
  }

  return 0;
}