summaryrefslogtreecommitdiffstats
path: root/clangd/index/Index.h
blob: 60aaebaeaf151c4d652ff1b2fdd1ee4798099e57 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
//===--- Symbol.h -----------------------------------------------*- C++-*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H

#include "../Context.h"
#include "clang/Index/IndexSymbol.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include <array>
#include <string>

namespace clang {
namespace clangd {

struct SymbolLocation {
  // The absolute path of the source file where a symbol occurs.
  std::string FilePath;
  // The 0-based offset to the first character of the symbol from the beginning
  // of the source file.
  unsigned StartOffset;
  // The 0-based offset to the last character of the symbol from the beginning
  // of the source file.
  unsigned EndOffset;
};

// The class identifies a particular C++ symbol (class, function, method, etc).
//
// As USRs (Unified Symbol Resolution) could be large, especially for functions
// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to
// guarantee the uniqueness of symbols while using a relatively small amount of
// memory (vs storing USRs directly).
//
// SymbolID can be used as key in the symbol indexes to lookup the symbol.
class SymbolID {
public:
  SymbolID() = default;
  SymbolID(llvm::StringRef USR);

  bool operator==(const SymbolID &Sym) const {
    return HashValue == Sym.HashValue;
  }

private:
  friend llvm::hash_code hash_value(const SymbolID &ID) {
    return hash_value(ArrayRef<uint8_t>(ID.HashValue));
  }
  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
                                       const SymbolID &ID);
  friend void operator>>(llvm::StringRef Str, SymbolID &ID);

  std::array<uint8_t, 20> HashValue;
};

// Write SymbolID into the given stream. SymbolID is encoded as a 40-bytes
// hex string.
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);

// Construct SymbolID from a hex string.
// The HexStr is required to be a 40-bytes hex string, which is encoded from the
// "<<" operator.
void operator>>(llvm::StringRef HexStr, SymbolID &ID);

// The class presents a C++ symbol, e.g. class, function.
//
// FIXME: instead of having own copy fields for each symbol, we can share
// storage from SymbolSlab.
struct Symbol {
  // The ID of the symbol.
  SymbolID ID;
  // The unqualified name of the symbol, e.g. "bar" (for "n1::n2::bar").
  std::string Name;
  // The scope (e.g. namespace) of the symbol, e.g. "n1::n2" (for
  // "n1::n2::bar").
  std::string Scope;
  // The symbol information, like symbol kind.
  index::SymbolInfo SymInfo;
  // The location of the canonical declaration of the symbol.
  //
  // A C++ symbol could have multiple declarations and one definition (e.g.
  // a function is declared in ".h" file, and is defined in ".cc" file).
  //   * For classes, the canonical declaration is usually definition.
  //   * For non-inline functions, the canonical declaration is a declaration
  //     (not a definition), which is usually declared in ".h" file.
  SymbolLocation CanonicalDeclaration;

  // FIXME: add definition location of the symbol.
  // FIXME: add all occurrences support.
  // FIXME: add extra fields for index scoring signals.
  // FIXME: add code completion information.
};

// A symbol container that stores a set of symbols. The container will maintain
// the lifetime of the symbols.
//
// FIXME: Use a space-efficient implementation, a lot of Symbol fields could
// share the same storage.
class SymbolSlab {
public:
  using const_iterator = llvm::DenseMap<SymbolID, Symbol>::const_iterator;

  SymbolSlab() = default;

  const_iterator begin() const;
  const_iterator end() const;
  const_iterator find(const SymbolID &SymID) const;

  // Once called, no more symbols would be added to the SymbolSlab. This
  // operation is irreversible.
  void freeze();

  void insert(Symbol S);

private:
  bool Frozen = false;

  llvm::DenseMap<SymbolID, Symbol> Symbols;
};

struct FuzzyFindRequest {
  /// \brief A query string for the fuzzy find. This is matched against symbols'
  /// un-qualified identifiers and should not contain qualifiers like "::".
  std::string Query;
  /// \brief If this is non-empty, symbols must be in at least one of the scopes
  /// (e.g. namespaces) excluding nested scopes. For example, if a scope "xyz"
  /// is provided, the matched symbols must be defined in scope "xyz" but not
  /// "xyz::abc".
  ///
  /// A scope must be fully qualified without leading or trailing "::" e.g.
  /// "n1::n2". "" is interpreted as the global namespace, and "::" is invalid.
  std::vector<std::string> Scopes;
  /// \brief The maxinum number of candidates to return.
  size_t MaxCandidateCount = UINT_MAX;
};

/// \brief Interface for symbol indexes that can be used for searching or
/// matching symbols among a set of symbols based on names or unique IDs.
class SymbolIndex {
public:
  virtual ~SymbolIndex() = default;

  /// \brief Matches symbols in the index fuzzily and applies \p Callback on
  /// each matched symbol before returning.
  ///
  /// Returns true if the result list is complete, false if it was truncated due
  /// to MaxCandidateCount
  virtual bool
  fuzzyFind(const Context &Ctx, const FuzzyFindRequest &Req,
            std::function<void(const Symbol &)> Callback) const = 0;

  // FIXME: add interfaces for more index use cases:
  //  - Symbol getSymbolInfo(SymbolID);
  //  - getAllOccurrences(SymbolID);
};

} // namespace clangd
} // namespace clang

namespace llvm {

template <> struct DenseMapInfo<clang::clangd::SymbolID> {
  static inline clang::clangd::SymbolID getEmptyKey() {
    static clang::clangd::SymbolID EmptyKey("EMPTYKEY");
    return EmptyKey;
  }
  static inline clang::clangd::SymbolID getTombstoneKey() {
    static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY");
    return TombstoneKey;
  }
  static unsigned getHashValue(const clang::clangd::SymbolID &Sym) {
    return hash_value(Sym);
  }
  static bool isEqual(const clang::clangd::SymbolID &LHS,
                      const clang::clangd::SymbolID &RHS) {
    return LHS == RHS;
  }
};

} // namespace llvm

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H