summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Langmuir <blangmuir@apple.com>2022-09-29 16:04:38 -0700
committerBen Langmuir <blangmuir@apple.com>2022-10-05 15:42:38 -0700
commit074fcec1eabfc992c46c95df215b1caf5cf58970 (patch)
tree7fcfcd5cb3de12e4584a39ca6c951e956de64acf
parentff7a2b60555a28754b9513b78c5a6b4678b6656f (diff)
[clang][deps] Canonicalize module map pathworking
When dep-scanning, canonicalize the module map path as much as we can. This avoids unnecessarily needing to build multiple versions of a module due to symlinks or case-insensitive file paths. Despite the name `tryGetRealPathName`, the previous implementation did not actually return the realpath most of the time, and indeed it would be incorrect to do so since the realpath could be outside the module directory, which would have broken finding headers relative to the module. Instead, use a canonicalization that is specific to the needs of modulemap files (canonicalize the directory separately from the filename). Differential Revision: https://reviews.llvm.org/D134923
-rw-r--r--clang/include/clang/Lex/ModuleMap.h9
-rw-r--r--clang/lib/Lex/HeaderSearch.cpp13
-rw-r--r--clang/lib/Lex/ModuleMap.cpp36
-rw-r--r--clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp13
-rw-r--r--clang/test/ClangScanDeps/modules-symlink-dir.c131
5 files changed, 185 insertions, 17 deletions
diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index 1084b49b3534..10c5dfc25d9c 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -622,6 +622,15 @@ public:
void setInferredModuleAllowedBy(Module *M, const FileEntry *ModMap);
+ /// Canonicalize \p Path in a manner suitable for a module map file. In
+ /// particular, this canonicalizes the parent directory separately from the
+ /// filename so that it does not affect header resolution relative to the
+ /// modulemap.
+ ///
+ /// \returns an error code if any filesystem operations failed. In this case
+ /// \p Path is not modified.
+ std::error_code canonicalizeModuleMapPath(SmallVectorImpl<char> &Path);
+
/// Get any module map files other than getModuleMapFileForUniquing(M)
/// that define submodules of a top-level module \p M. This is cheaper than
/// getting the module map file for each submodule individually, since the
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 99596b1378ba..73f967883496 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -255,18 +255,11 @@ std::string HeaderSearch::getCachedModuleFileNameImpl(StringRef ModuleName,
//
// To avoid false-negatives, we form as canonical a path as we can, and map
// to lower-case in case we're on a case-insensitive file system.
- std::string Parent =
- std::string(llvm::sys::path::parent_path(ModuleMapPath));
- if (Parent.empty())
- Parent = ".";
- auto Dir = FileMgr.getDirectory(Parent);
- if (!Dir)
+ SmallString<128> CanonicalPath(ModuleMapPath);
+ if (getModuleMap().canonicalizeModuleMapPath(CanonicalPath))
return {};
- auto DirName = FileMgr.getCanonicalName(*Dir);
- auto FileName = llvm::sys::path::filename(ModuleMapPath);
- llvm::hash_code Hash =
- llvm::hash_combine(DirName.lower(), FileName.lower());
+ llvm::hash_code Hash = llvm::hash_combine(CanonicalPath.str().lower());
SmallString<128> HashStr;
llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36);
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index dbb81dc0d147..cbd3303f3663 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -1303,6 +1303,42 @@ void ModuleMap::setInferredModuleAllowedBy(Module *M, const FileEntry *ModMap) {
InferredModuleAllowedBy[M] = ModMap;
}
+std::error_code
+ModuleMap::canonicalizeModuleMapPath(SmallVectorImpl<char> &Path) {
+ StringRef Dir = llvm::sys::path::parent_path({Path.data(), Path.size()});
+
+ // Do not canonicalize within the framework; the module map parser expects
+ // Modules/ not Versions/A/Modules.
+ if (llvm::sys::path::filename(Dir) == "Modules") {
+ StringRef Parent = llvm::sys::path::parent_path(Dir);
+ if (Parent.endswith(".framework"))
+ Dir = Parent;
+ }
+
+ FileManager &FM = SourceMgr.getFileManager();
+ auto DirEntry = FM.getDirectory(Dir.empty() ? "." : Dir);
+ if (!DirEntry)
+ return DirEntry.getError();
+
+ // Canonicalize the directory.
+ StringRef CanonicalDir = FM.getCanonicalName(*DirEntry);
+ if (CanonicalDir != Dir) {
+ bool Done = llvm::sys::path::replace_path_prefix(Path, Dir, CanonicalDir);
+ (void)Done;
+ assert(Done && "Path should always start with Dir");
+ }
+
+ // In theory, the filename component should also be canonicalized if it
+ // on a case-insensitive filesystem. However, the extra canonicalization is
+ // expensive and if clang looked up the filename it will always be lowercase.
+
+ // Remove ., remove redundant separators, and switch to native separators.
+ // This is needed for separators between CanonicalDir and the filename.
+ llvm::sys::path::remove_dots(Path);
+
+ return std::error_code();
+}
+
void ModuleMap::addAdditionalModuleMapFile(const Module *M,
const FileEntry *ModuleMap) {
AdditionalModMaps[M].insert(ModuleMap);
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index ffb60f17e930..f38ed7be4ddd 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -406,15 +406,14 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName());
MD.IsSystem = M->IsSystem;
- Optional<FileEntryRef> ModuleMap = MDC.ScanInstance.getPreprocessor()
- .getHeaderSearchInfo()
- .getModuleMap()
- .getModuleMapFileForUniquing(M);
+ ModuleMap &ModMapInfo =
+ MDC.ScanInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap();
+
+ Optional<FileEntryRef> ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M);
if (ModuleMap) {
- StringRef Path = ModuleMap->getFileEntry().tryGetRealPathName();
- if (Path.empty())
- Path = ModuleMap->getName();
+ SmallString<128> Path = ModuleMap->getNameAsRequested();
+ ModMapInfo.canonicalizeModuleMapPath(Path);
MD.ClangModuleMapFile = std::string(Path);
}
diff --git a/clang/test/ClangScanDeps/modules-symlink-dir.c b/clang/test/ClangScanDeps/modules-symlink-dir.c
new file mode 100644
index 000000000000..810b88942d2a
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-symlink-dir.c
@@ -0,0 +1,131 @@
+// Check that we canonicalize the module map path without changing the module
+// directory, which would break header lookup.
+
+// REQUIRES: shell
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+// RUN: ln -s module %t/symlink-to-module
+// RUN: ln -s ../actual.modulemap %t/module/module.modulemap
+// RUN: ln -s A %t/module/F.framework/Versions/Current
+// RUN: ln -s Versions/Current/Modules %t/module/F.framework/Modules
+// RUN: ln -s Versions/Current/Headers %t/module/F.framework/Headers
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \
+// RUN: -format experimental-full -mode=preprocess-dependency-directives \
+// RUN: -optimize-args -module-files-dir %t/build > %t/deps.json
+
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+
+// Check the module commands actually build.
+// RUN: %deps-to-rsp %t/deps.json --module-name=Mod > %t/Mod.rsp
+// RUN: %deps-to-rsp %t/deps.json --module-name=F > %t/F.rsp
+// RUN: %clang @%t/Mod.rsp
+// RUN: %clang @%t/F.rsp
+
+// CHECK: "modules": [
+// CHECK: {
+// CHECK: "clang-module-deps": [],
+// CHECK: "clang-modulemap-file": "[[PREFIX]]/module/F.framework/Modules/module.modulemap"
+// CHECK: "command-line": [
+// CHECK-NOT: symlink-to-module
+// CHECK: "[[PREFIX]]/module/F.framework/Modules/module.modulemap"
+// CHECK-NOT: symlink-to-module
+// CHECK: ]
+// CHECK: "context-hash": "[[F_CONTEXT_HASH:[A-Z0-9]+]]"
+// CHECK: "name": "F"
+// CHECK-NEXT: }
+// CHECK-NEXT: {
+// CHECK: "clang-modulemap-file": "[[PREFIX]]/module/module.modulemap"
+// CHECK: "command-line": [
+// CHECK-NOT: symlink-to-module
+// CHECK: "[[PREFIX]]/module/module.modulemap"
+// CHECK-NOT: symlink-to-module
+// CHECK: ]
+// CHECK: "context-hash": "[[CONTEXT_HASH:[A-Z0-9]+]]"
+// CHECK: "name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK: "translation-units": [
+// CHECK: "clang-module-deps": [
+// CHECK: {
+// CHECK: "context-hash": "[[CONTEXT_HASH]]"
+// CHECK: "module-name": "Mod"
+// CHECK: }
+// CHECK-NEXT: ],
+// CHECK: "command-line": [
+// CHECK: "-fmodule-map-file=[[PREFIX]]/module/module.modulemap"
+// CHECK: ]
+// CHECK: "clang-module-deps": [
+// CHECK: {
+// CHECK: "context-hash": "[[CONTEXT_HASH]]"
+// CHECK: "module-name": "Mod"
+// CHECK: }
+// CHECK-NEXT: ]
+// CHECK: "command-line": [
+// CHECK: "-fmodule-map-file=[[PREFIX]]/module/module.modulemap"
+// CHECK: ]
+// CHECK: "clang-module-deps": [
+// CHECK: {
+// CHECK: "context-hash": "[[F_CONTEXT_HASH]]"
+// CHECK: "module-name": "F"
+// CHECK: }
+// CHECK-NEXT: ]
+// CHECK: "command-line": [
+// CHECK: "-fmodule-map-file=[[PREFIX]]/module/F.framework/Modules/module.modulemap"
+// CHECK: ]
+// CHECK: "clang-module-deps": [
+// CHECK: {
+// CHECK: "context-hash": "[[F_CONTEXT_HASH]]"
+// CHECK: "module-name": "F"
+// CHECK: }
+// CHECK-NEXT: ]
+// CHECK: "command-line": [
+// CHECK: "-fmodule-map-file=[[PREFIX]]/module/F.framework/Modules/module.modulemap"
+// CHECK: ]
+
+//--- cdb.json.in
+[
+ {
+ "directory": "DIR",
+ "command": "clang -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu1.c"
+ },
+ {
+ "directory": "DIR",
+ "command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu2.c"
+ },
+ {
+ "directory": "DIR",
+ "command": "clang -fsyntax-only -F DIR/symlink-to-module DIR/tu3.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu3.c"
+ },
+ {
+ "directory": "DIR",
+ "command": "clang -fsyntax-only -F DIR/module DIR/tu3.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu3.c"
+ },
+]
+
+//--- actual.modulemap
+module Mod { header "header.h" }
+
+//--- module/header.h
+
+//--- tu1.c
+#include "symlink-to-module/header.h"
+
+//--- tu2.c
+#include "module/header.h"
+
+//--- module/F.framework/Versions/A/Modules/module.modulemap
+framework module F {
+ umbrella header "F.h"
+}
+
+//--- module/F.framework/Versions/A/Headers/F.h
+
+//--- tu3.c
+#include "F/F.h"