summaryrefslogtreecommitdiffstats
path: root/clangd/FileDistance.h
blob: e7174bccb9ddb5bf9e5660e7b0698ef90fcd1c57 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This library measures the distance between file paths.
// It's used for ranking symbols, e.g. in code completion.
//  |foo/bar.h -> foo/bar.h| = 0.
//  |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
// This is an edit-distance, where edits go up or down the directory tree.
// It's not symmetrical, the costs of going up and down may not match.
//
// Dealing with multiple sources:
// In practice we care about the distance from a source file, but files near
// its main-header and #included files are considered "close".
// So we start with a set of (anchor, cost) pairs, and call the distance to a
// path the minimum of `cost + |source -> path|`.
//
// We allow each source to limit the number of up-traversals paths may start
// with. Up-traversals may reach things that are not "semantically near".
//
// Symbol URI schemes:
// Symbol locations may be represented by URIs rather than file paths directly.
// In this case we want to perform distance computations in URI space rather
// than in file-space, without performing redundant conversions.
// Therefore we have a lookup structure that accepts URIs, so that intermediate
// calculations for the same scheme can be reused.
//
// Caveats:
// Assuming up and down traversals each have uniform costs is simplistic.
// Often there are "semantic roots" whose children are almost unrelated.
// (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H

#include "URI.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
#include <memory>

namespace clang {
namespace clangd {

struct FileDistanceOptions {
  unsigned UpCost = 2;                    // |foo/bar.h -> foo|
  unsigned DownCost = 1;                  // |foo -> foo/bar.h|
  unsigned IncludeCost = 2;               // |foo.cc -> included_header.h|
  bool AllowDownTraversalFromRoot = true; // | / -> /a |
};

struct SourceParams {
  // Base cost for paths starting at this source.
  unsigned Cost = 0;
  // Limits the number of upwards traversals allowed from this source.
  unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
};

// Supports lookups to find the minimum distance to a file from any source.
// This object should be reused, it memoizes intermediate computations.
class FileDistance {
public:
  static constexpr unsigned Unreachable = std::numeric_limits<unsigned>::max();
  static const llvm::hash_code RootHash;

  FileDistance(llvm::StringMap<SourceParams> Sources,
               const FileDistanceOptions &Opts = {});

  // Computes the minimum distance from any source to the file path.
  unsigned distance(llvm::StringRef Path);

private:
  // Costs computed so far. Always contains sources and their ancestors.
  // We store hash codes only. Collisions are rare and consequences aren't dire.
  llvm::DenseMap<llvm::hash_code, unsigned> Cache;
  FileDistanceOptions Opts;
};

// Supports lookups like FileDistance, but the lookup keys are URIs.
// We convert each of the sources to the scheme of the URI and do a FileDistance
// comparison on the bodies.
class URIDistance {
public:
  // \p Sources must contain absolute paths, not URIs.
  URIDistance(llvm::StringMap<SourceParams> Sources,
              const FileDistanceOptions &Opts = {})
      : Sources(Sources), Opts(Opts) {}

  // Computes the minimum distance from any source to the URI.
  // Only sources that can be mapped into the URI's scheme are considered.
  unsigned distance(llvm::StringRef URI);

private:
  // Returns the FileDistance for a URI scheme, creating it if needed.
  FileDistance &forScheme(llvm::StringRef Scheme);

  // We cache the results using the original strings so we can skip URI parsing.
  llvm::DenseMap<llvm::hash_code, unsigned> Cache;
  llvm::StringMap<SourceParams> Sources;
  llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
  FileDistanceOptions Opts;
};

/// Support lookups like FileDistance, but the lookup keys are symbol scopes.
/// For example, a scope "na::nb::" is converted to "/na/nb".
class ScopeDistance {
public:
  /// QueryScopes[0] is the preferred scope.
  ScopeDistance(llvm::ArrayRef<std::string> QueryScopes);

  unsigned distance(llvm::StringRef SymbolScope);

private:
  FileDistance Distance;
};

} // namespace clangd
} // namespace clang

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H