summaryrefslogtreecommitdiffstats
path: root/clangd/index/Index.h
blob: b5044634d3cfaf9d1ccf33087778cc9a25ef4b1e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
//===--- Symbol.h -----------------------------------------------*- C++-*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H

#include "../Context.h"
#include "clang/Index/IndexSymbol.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include <array>
#include <string>

namespace clang {
namespace clangd {

struct SymbolLocation {
  // The absolute path of the source file where a symbol occurs.
  llvm::StringRef FilePath;
  // The 0-based offset to the first character of the symbol from the beginning
  // of the source file.
  unsigned StartOffset;
  // The 0-based offset to the last character of the symbol from the beginning
  // of the source file.
  unsigned EndOffset;
};

// The class identifies a particular C++ symbol (class, function, method, etc).
//
// As USRs (Unified Symbol Resolution) could be large, especially for functions
// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to
// guarantee the uniqueness of symbols while using a relatively small amount of
// memory (vs storing USRs directly).
//
// SymbolID can be used as key in the symbol indexes to lookup the symbol.
class SymbolID {
public:
  SymbolID() = default;
  SymbolID(llvm::StringRef USR);

  bool operator==(const SymbolID &Sym) const {
    return HashValue == Sym.HashValue;
  }

private:
  friend llvm::hash_code hash_value(const SymbolID &ID) {
    // We already have a good hash, just return the first bytes.
    static_assert(sizeof(size_t) <= 20, "size_t longer than SHA1!");
    return *reinterpret_cast<const size_t *>(ID.HashValue.data());
  }
  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
                                       const SymbolID &ID);
  friend void operator>>(llvm::StringRef Str, SymbolID &ID);

  std::array<uint8_t, 20> HashValue;
};

// Write SymbolID into the given stream. SymbolID is encoded as a 40-bytes
// hex string.
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);

// Construct SymbolID from a hex string.
// The HexStr is required to be a 40-bytes hex string, which is encoded from the
// "<<" operator.
void operator>>(llvm::StringRef HexStr, SymbolID &ID);

// The class presents a C++ symbol, e.g. class, function.
//
// WARNING: Symbols do not own much of their underlying data - typically strings
// are owned by a SymbolSlab. They should be treated as non-owning references.
// Copies are shallow.
// When adding new unowned data fields to Symbol, remember to update
// SymbolSlab::insert to copy them to the slab's storage.
struct Symbol {
  // The ID of the symbol.
  SymbolID ID;
  // The symbol information, like symbol kind.
  index::SymbolInfo SymInfo;
  // The unqualified name of the symbol, e.g. "bar" (for "n1::n2::bar").
  llvm::StringRef Name;
  // The scope (e.g. namespace) of the symbol, e.g. "n1::n2" (for
  // "n1::n2::bar").
  llvm::StringRef Scope;
  // The location of the canonical declaration of the symbol.
  //
  // A C++ symbol could have multiple declarations and one definition (e.g.
  // a function is declared in ".h" file, and is defined in ".cc" file).
  //   * For classes, the canonical declaration is usually definition.
  //   * For non-inline functions, the canonical declaration is a declaration
  //     (not a definition), which is usually declared in ".h" file.
  SymbolLocation CanonicalDeclaration;

  // FIXME: add definition location of the symbol.
  // FIXME: add all occurrences support.
  // FIXME: add extra fields for index scoring signals.
  // FIXME: add code completion information.
};

// A symbol container that stores a set of symbols. The container will maintain
// the lifetime of the symbols.
class SymbolSlab {
public:
  using const_iterator = llvm::DenseMap<SymbolID, Symbol>::const_iterator;

  SymbolSlab() = default;

  const_iterator begin() const;
  const_iterator end() const;
  const_iterator find(const SymbolID &SymID) const;

  // Once called, no more symbols would be added to the SymbolSlab. This
  // operation is irreversible.
  void freeze();

  // Adds the symbol to this slab.
  // This is a deep copy: underlying strings will be owned by the slab.
  void insert(const Symbol& S);

private:
  // Replaces S with a reference to the same string, owned by this slab.
  void intern(llvm::StringRef &S) {
    S = S.empty() ? llvm::StringRef() : Strings.insert(S).first->getKey();
  }

  bool Frozen = false;

  // Intern table for strings. Not StringPool as we don't refcount, just insert.
  // We use BumpPtrAllocator to avoid lots of tiny allocations for nodes.
  llvm::StringSet<llvm::BumpPtrAllocator> Strings;
  llvm::DenseMap<SymbolID, Symbol> Symbols;
};

struct FuzzyFindRequest {
  /// \brief A query string for the fuzzy find. This is matched against symbols'
  /// un-qualified identifiers and should not contain qualifiers like "::".
  std::string Query;
  /// \brief If this is non-empty, symbols must be in at least one of the scopes
  /// (e.g. namespaces) excluding nested scopes. For example, if a scope "xyz"
  /// is provided, the matched symbols must be defined in scope "xyz" but not
  /// "xyz::abc".
  ///
  /// A scope must be fully qualified without leading or trailing "::" e.g.
  /// "n1::n2". "" is interpreted as the global namespace, and "::" is invalid.
  std::vector<std::string> Scopes;
  /// \brief The maxinum number of candidates to return.
  size_t MaxCandidateCount = UINT_MAX;
};

/// \brief Interface for symbol indexes that can be used for searching or
/// matching symbols among a set of symbols based on names or unique IDs.
class SymbolIndex {
public:
  virtual ~SymbolIndex() = default;

  /// \brief Matches symbols in the index fuzzily and applies \p Callback on
  /// each matched symbol before returning.
  ///
  /// Returns true if the result list is complete, false if it was truncated due
  /// to MaxCandidateCount
  virtual bool
  fuzzyFind(const Context &Ctx, const FuzzyFindRequest &Req,
            std::function<void(const Symbol &)> Callback) const = 0;

  // FIXME: add interfaces for more index use cases:
  //  - Symbol getSymbolInfo(SymbolID);
  //  - getAllOccurrences(SymbolID);
};

} // namespace clangd
} // namespace clang

namespace llvm {

template <> struct DenseMapInfo<clang::clangd::SymbolID> {
  static inline clang::clangd::SymbolID getEmptyKey() {
    static clang::clangd::SymbolID EmptyKey("EMPTYKEY");
    return EmptyKey;
  }
  static inline clang::clangd::SymbolID getTombstoneKey() {
    static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY");
    return TombstoneKey;
  }
  static unsigned getHashValue(const clang::clangd::SymbolID &Sym) {
    return hash_value(Sym);
  }
  static bool isEqual(const clang::clangd::SymbolID &LHS,
                      const clang::clangd::SymbolID &RHS) {
    return LHS == RHS;
  }
};

} // namespace llvm

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H