teak-llvm/clang/lib/Tooling/Core/Replacement.cpp
Jonas Devlieghere fc51490baf Lift VFS from clang to llvm (NFC)
This patch moves the virtual file system form clang to llvm so it can be
used by more projects.

Concretely the patch:
 - Moves VirtualFileSystem.{h|cpp} from clang/Basic to llvm/Support.
 - Moves the corresponding unit test from clang to llvm.
 - Moves the vfs namespace from clang::vfs to llvm::vfs.
 - Formats the lines affected by this change, mostly this is the result of
   the added llvm namespace.

RFC on the mailing list:
http://lists.llvm.org/pipermail/llvm-dev/2018-October/126657.html

Differential revision: https://reviews.llvm.org/D52783

llvm-svn: 344140
2018-10-10 13:27:25 +00:00

629 lines
24 KiB
C++

//===- Replacement.cpp - Framework for clang refactoring tools ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implements classes to support/store refactorings.
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Core/Replacement.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
#include "clang/Rewrite/Core/RewriteBuffer.h"
#include "clang/Rewrite/Core/Rewriter.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <limits>
#include <map>
#include <string>
#include <utility>
#include <vector>
using namespace clang;
using namespace tooling;
static const char * const InvalidLocation = "";
Replacement::Replacement() : FilePath(InvalidLocation) {}
Replacement::Replacement(StringRef FilePath, unsigned Offset, unsigned Length,
StringRef ReplacementText)
: FilePath(FilePath), ReplacementRange(Offset, Length),
ReplacementText(ReplacementText) {}
Replacement::Replacement(const SourceManager &Sources, SourceLocation Start,
unsigned Length, StringRef ReplacementText) {
setFromSourceLocation(Sources, Start, Length, ReplacementText);
}
Replacement::Replacement(const SourceManager &Sources,
const CharSourceRange &Range,
StringRef ReplacementText,
const LangOptions &LangOpts) {
setFromSourceRange(Sources, Range, ReplacementText, LangOpts);
}
bool Replacement::isApplicable() const {
return FilePath != InvalidLocation;
}
bool Replacement::apply(Rewriter &Rewrite) const {
SourceManager &SM = Rewrite.getSourceMgr();
const FileEntry *Entry = SM.getFileManager().getFile(FilePath);
if (!Entry)
return false;
FileID ID = SM.getOrCreateFileID(Entry, SrcMgr::C_User);
const SourceLocation Start =
SM.getLocForStartOfFile(ID).
getLocWithOffset(ReplacementRange.getOffset());
// ReplaceText returns false on success.
// ReplaceText only fails if the source location is not a file location, in
// which case we already returned false earlier.
bool RewriteSucceeded = !Rewrite.ReplaceText(
Start, ReplacementRange.getLength(), ReplacementText);
assert(RewriteSucceeded);
return RewriteSucceeded;
}
std::string Replacement::toString() const {
std::string Result;
llvm::raw_string_ostream Stream(Result);
Stream << FilePath << ": " << ReplacementRange.getOffset() << ":+"
<< ReplacementRange.getLength() << ":\"" << ReplacementText << "\"";
return Stream.str();
}
namespace clang {
namespace tooling {
bool operator<(const Replacement &LHS, const Replacement &RHS) {
if (LHS.getOffset() != RHS.getOffset())
return LHS.getOffset() < RHS.getOffset();
if (LHS.getLength() != RHS.getLength())
return LHS.getLength() < RHS.getLength();
if (LHS.getFilePath() != RHS.getFilePath())
return LHS.getFilePath() < RHS.getFilePath();
return LHS.getReplacementText() < RHS.getReplacementText();
}
bool operator==(const Replacement &LHS, const Replacement &RHS) {
return LHS.getOffset() == RHS.getOffset() &&
LHS.getLength() == RHS.getLength() &&
LHS.getFilePath() == RHS.getFilePath() &&
LHS.getReplacementText() == RHS.getReplacementText();
}
} // namespace tooling
} // namespace clang
void Replacement::setFromSourceLocation(const SourceManager &Sources,
SourceLocation Start, unsigned Length,
StringRef ReplacementText) {
const std::pair<FileID, unsigned> DecomposedLocation =
Sources.getDecomposedLoc(Start);
const FileEntry *Entry = Sources.getFileEntryForID(DecomposedLocation.first);
this->FilePath = Entry ? Entry->getName() : InvalidLocation;
this->ReplacementRange = Range(DecomposedLocation.second, Length);
this->ReplacementText = ReplacementText;
}
// FIXME: This should go into the Lexer, but we need to figure out how
// to handle ranges for refactoring in general first - there is no obvious
// good way how to integrate this into the Lexer yet.
static int getRangeSize(const SourceManager &Sources,
const CharSourceRange &Range,
const LangOptions &LangOpts) {
SourceLocation SpellingBegin = Sources.getSpellingLoc(Range.getBegin());
SourceLocation SpellingEnd = Sources.getSpellingLoc(Range.getEnd());
std::pair<FileID, unsigned> Start = Sources.getDecomposedLoc(SpellingBegin);
std::pair<FileID, unsigned> End = Sources.getDecomposedLoc(SpellingEnd);
if (Start.first != End.first) return -1;
if (Range.isTokenRange())
End.second += Lexer::MeasureTokenLength(SpellingEnd, Sources, LangOpts);
return End.second - Start.second;
}
void Replacement::setFromSourceRange(const SourceManager &Sources,
const CharSourceRange &Range,
StringRef ReplacementText,
const LangOptions &LangOpts) {
setFromSourceLocation(Sources, Sources.getSpellingLoc(Range.getBegin()),
getRangeSize(Sources, Range, LangOpts),
ReplacementText);
}
Replacement
Replacements::getReplacementInChangedCode(const Replacement &R) const {
unsigned NewStart = getShiftedCodePosition(R.getOffset());
unsigned NewEnd = getShiftedCodePosition(R.getOffset() + R.getLength());
return Replacement(R.getFilePath(), NewStart, NewEnd - NewStart,
R.getReplacementText());
}
static std::string getReplacementErrString(replacement_error Err) {
switch (Err) {
case replacement_error::fail_to_apply:
return "Failed to apply a replacement.";
case replacement_error::wrong_file_path:
return "The new replacement's file path is different from the file path of "
"existing replacements";
case replacement_error::overlap_conflict:
return "The new replacement overlaps with an existing replacement.";
case replacement_error::insert_conflict:
return "The new insertion has the same insert location as an existing "
"replacement.";
}
llvm_unreachable("A value of replacement_error has no message.");
}
std::string ReplacementError::message() const {
std::string Message = getReplacementErrString(Err);
if (NewReplacement.hasValue())
Message += "\nNew replacement: " + NewReplacement->toString();
if (ExistingReplacement.hasValue())
Message += "\nExisting replacement: " + ExistingReplacement->toString();
return Message;
}
char ReplacementError::ID = 0;
Replacements Replacements::getCanonicalReplacements() const {
std::vector<Replacement> NewReplaces;
// Merge adjacent replacements.
for (const auto &R : Replaces) {
if (NewReplaces.empty()) {
NewReplaces.push_back(R);
continue;
}
auto &Prev = NewReplaces.back();
unsigned PrevEnd = Prev.getOffset() + Prev.getLength();
if (PrevEnd < R.getOffset()) {
NewReplaces.push_back(R);
} else {
assert(PrevEnd == R.getOffset() &&
"Existing replacements must not overlap.");
Replacement NewR(
R.getFilePath(), Prev.getOffset(), Prev.getLength() + R.getLength(),
(Prev.getReplacementText() + R.getReplacementText()).str());
Prev = NewR;
}
}
ReplacementsImpl NewReplacesImpl(NewReplaces.begin(), NewReplaces.end());
return Replacements(NewReplacesImpl.begin(), NewReplacesImpl.end());
}
// `R` and `Replaces` are order-independent if applying them in either order
// has the same effect, so we need to compare replacements associated to
// applying them in either order.
llvm::Expected<Replacements>
Replacements::mergeIfOrderIndependent(const Replacement &R) const {
Replacements Rs(R);
// A Replacements set containing a single replacement that is `R` referring to
// the code after the existing replacements `Replaces` are applied.
Replacements RsShiftedByReplaces(getReplacementInChangedCode(R));
// A Replacements set that is `Replaces` referring to the code after `R` is
// applied.
Replacements ReplacesShiftedByRs;
for (const auto &Replace : Replaces)
ReplacesShiftedByRs.Replaces.insert(
Rs.getReplacementInChangedCode(Replace));
// This is equivalent to applying `Replaces` first and then `R`.
auto MergeShiftedRs = merge(RsShiftedByReplaces);
// This is equivalent to applying `R` first and then `Replaces`.
auto MergeShiftedReplaces = Rs.merge(ReplacesShiftedByRs);
// Since empty or segmented replacements around existing replacements might be
// produced above, we need to compare replacements in canonical forms.
if (MergeShiftedRs.getCanonicalReplacements() ==
MergeShiftedReplaces.getCanonicalReplacements())
return MergeShiftedRs;
return llvm::make_error<ReplacementError>(replacement_error::overlap_conflict,
R, *Replaces.begin());
}
llvm::Error Replacements::add(const Replacement &R) {
// Check the file path.
if (!Replaces.empty() && R.getFilePath() != Replaces.begin()->getFilePath())
return llvm::make_error<ReplacementError>(
replacement_error::wrong_file_path, R, *Replaces.begin());
// Special-case header insertions.
if (R.getOffset() == std::numeric_limits<unsigned>::max()) {
Replaces.insert(R);
return llvm::Error::success();
}
// This replacement cannot conflict with replacements that end before
// this replacement starts or start after this replacement ends.
// We also know that there currently are no overlapping replacements.
// Thus, we know that all replacements that start after the end of the current
// replacement cannot overlap.
Replacement AtEnd(R.getFilePath(), R.getOffset() + R.getLength(), 0, "");
// Find the first entry that starts after or at the end of R. Note that
// entries that start at the end can still be conflicting if R is an
// insertion.
auto I = Replaces.lower_bound(AtEnd);
// If `I` starts at the same offset as `R`, `R` must be an insertion.
if (I != Replaces.end() && R.getOffset() == I->getOffset()) {
assert(R.getLength() == 0);
// `I` is also an insertion, `R` and `I` conflict.
if (I->getLength() == 0) {
// Check if two insertions are order-indepedent: if inserting them in
// either order produces the same text, they are order-independent.
if ((R.getReplacementText() + I->getReplacementText()).str() !=
(I->getReplacementText() + R.getReplacementText()).str())
return llvm::make_error<ReplacementError>(
replacement_error::insert_conflict, R, *I);
// If insertions are order-independent, we can merge them.
Replacement NewR(
R.getFilePath(), R.getOffset(), 0,
(R.getReplacementText() + I->getReplacementText()).str());
Replaces.erase(I);
Replaces.insert(std::move(NewR));
return llvm::Error::success();
}
// Insertion `R` is adjacent to a non-insertion replacement `I`, so they
// are order-independent. It is safe to assume that `R` will not conflict
// with any replacement before `I` since all replacements before `I` must
// either end before `R` or end at `R` but has length > 0 (if the
// replacement before `I` is an insertion at `R`, it would have been `I`
// since it is a lower bound of `AtEnd` and ordered before the current `I`
// in the set).
Replaces.insert(R);
return llvm::Error::success();
}
// `I` is the smallest iterator (after `R`) whose entry cannot overlap.
// If that is begin(), there are no overlaps.
if (I == Replaces.begin()) {
Replaces.insert(R);
return llvm::Error::success();
}
--I;
auto Overlap = [](const Replacement &R1, const Replacement &R2) -> bool {
return Range(R1.getOffset(), R1.getLength())
.overlapsWith(Range(R2.getOffset(), R2.getLength()));
};
// If the previous entry does not overlap, we know that entries before it
// can also not overlap.
if (!Overlap(R, *I)) {
// If `R` and `I` do not have the same offset, it is safe to add `R` since
// it must come after `I`. Otherwise:
// - If `R` is an insertion, `I` must not be an insertion since it would
// have come after `AtEnd`.
// - If `R` is not an insertion, `I` must be an insertion; otherwise, `R`
// and `I` would have overlapped.
// In either case, we can safely insert `R`.
Replaces.insert(R);
} else {
// `I` overlaps with `R`. We need to check `R` against all overlapping
// replacements to see if they are order-indepedent. If they are, merge `R`
// with them and replace them with the merged replacements.
auto MergeBegin = I;
auto MergeEnd = std::next(I);
while (I != Replaces.begin()) {
--I;
// If `I` doesn't overlap with `R`, don't merge it.
if (!Overlap(R, *I))
break;
MergeBegin = I;
}
Replacements OverlapReplaces(MergeBegin, MergeEnd);
llvm::Expected<Replacements> Merged =
OverlapReplaces.mergeIfOrderIndependent(R);
if (!Merged)
return Merged.takeError();
Replaces.erase(MergeBegin, MergeEnd);
Replaces.insert(Merged->begin(), Merged->end());
}
return llvm::Error::success();
}
namespace {
// Represents a merged replacement, i.e. a replacement consisting of multiple
// overlapping replacements from 'First' and 'Second' in mergeReplacements.
//
// Position projection:
// Offsets and lengths of the replacements can generally refer to two different
// coordinate spaces. Replacements from 'First' refer to the original text
// whereas replacements from 'Second' refer to the text after applying 'First'.
//
// MergedReplacement always operates in the coordinate space of the original
// text, i.e. transforms elements from 'Second' to take into account what was
// changed based on the elements from 'First'.
//
// We can correctly calculate this projection as we look at the replacements in
// order of strictly increasing offsets.
//
// Invariants:
// * We always merge elements from 'First' into elements from 'Second' and vice
// versa. Within each set, the replacements are non-overlapping.
// * We only extend to the right, i.e. merge elements with strictly increasing
// offsets.
class MergedReplacement {
public:
MergedReplacement(const Replacement &R, bool MergeSecond, int D)
: MergeSecond(MergeSecond), Delta(D), FilePath(R.getFilePath()),
Offset(R.getOffset() + (MergeSecond ? 0 : Delta)), Length(R.getLength()),
Text(R.getReplacementText()) {
Delta += MergeSecond ? 0 : Text.size() - Length;
DeltaFirst = MergeSecond ? Text.size() - Length : 0;
}
// Merges the next element 'R' into this merged element. As we always merge
// from 'First' into 'Second' or vice versa, the MergedReplacement knows what
// set the next element is coming from.
void merge(const Replacement &R) {
if (MergeSecond) {
unsigned REnd = R.getOffset() + Delta + R.getLength();
unsigned End = Offset + Text.size();
if (REnd > End) {
Length += REnd - End;
MergeSecond = false;
}
StringRef TextRef = Text;
StringRef Head = TextRef.substr(0, R.getOffset() + Delta - Offset);
StringRef Tail = TextRef.substr(REnd - Offset);
Text = (Head + R.getReplacementText() + Tail).str();
Delta += R.getReplacementText().size() - R.getLength();
} else {
unsigned End = Offset + Length;
StringRef RText = R.getReplacementText();
StringRef Tail = RText.substr(End - R.getOffset());
Text = (Text + Tail).str();
if (R.getOffset() + RText.size() > End) {
Length = R.getOffset() + R.getLength() - Offset;
MergeSecond = true;
} else {
Length += R.getLength() - RText.size();
}
DeltaFirst += RText.size() - R.getLength();
}
}
// Returns 'true' if 'R' starts strictly after the MergedReplacement and thus
// doesn't need to be merged.
bool endsBefore(const Replacement &R) const {
if (MergeSecond)
return Offset + Text.size() < R.getOffset() + Delta;
return Offset + Length < R.getOffset();
}
// Returns 'true' if an element from the second set should be merged next.
bool mergeSecond() const { return MergeSecond; }
int deltaFirst() const { return DeltaFirst; }
Replacement asReplacement() const { return {FilePath, Offset, Length, Text}; }
private:
bool MergeSecond;
// Amount of characters that elements from 'Second' need to be shifted by in
// order to refer to the original text.
int Delta;
// Sum of all deltas (text-length - length) of elements from 'First' merged
// into this element. This is used to update 'Delta' once the
// MergedReplacement is completed.
int DeltaFirst;
// Data of the actually merged replacement. FilePath and Offset aren't changed
// as the element is only extended to the right.
const StringRef FilePath;
const unsigned Offset;
unsigned Length;
std::string Text;
};
} // namespace
Replacements Replacements::merge(const Replacements &ReplacesToMerge) const {
if (empty() || ReplacesToMerge.empty())
return empty() ? ReplacesToMerge : *this;
auto &First = Replaces;
auto &Second = ReplacesToMerge.Replaces;
// Delta is the amount of characters that replacements from 'Second' need to
// be shifted so that their offsets refer to the original text.
int Delta = 0;
ReplacementsImpl Result;
// Iterate over both sets and always add the next element (smallest total
// Offset) from either 'First' or 'Second'. Merge that element with
// subsequent replacements as long as they overlap. See more details in the
// comment on MergedReplacement.
for (auto FirstI = First.begin(), SecondI = Second.begin();
FirstI != First.end() || SecondI != Second.end();) {
bool NextIsFirst = SecondI == Second.end() ||
(FirstI != First.end() &&
FirstI->getOffset() < SecondI->getOffset() + Delta);
MergedReplacement Merged(NextIsFirst ? *FirstI : *SecondI, NextIsFirst,
Delta);
++(NextIsFirst ? FirstI : SecondI);
while ((Merged.mergeSecond() && SecondI != Second.end()) ||
(!Merged.mergeSecond() && FirstI != First.end())) {
auto &I = Merged.mergeSecond() ? SecondI : FirstI;
if (Merged.endsBefore(*I))
break;
Merged.merge(*I);
++I;
}
Delta -= Merged.deltaFirst();
Result.insert(Merged.asReplacement());
}
return Replacements(Result.begin(), Result.end());
}
// Combines overlapping ranges in \p Ranges and sorts the combined ranges.
// Returns a set of non-overlapping and sorted ranges that is equivalent to
// \p Ranges.
static std::vector<Range> combineAndSortRanges(std::vector<Range> Ranges) {
llvm::sort(Ranges, [](const Range &LHS, const Range &RHS) {
if (LHS.getOffset() != RHS.getOffset())
return LHS.getOffset() < RHS.getOffset();
return LHS.getLength() < RHS.getLength();
});
std::vector<Range> Result;
for (const auto &R : Ranges) {
if (Result.empty() ||
Result.back().getOffset() + Result.back().getLength() < R.getOffset()) {
Result.push_back(R);
} else {
unsigned NewEnd =
std::max(Result.back().getOffset() + Result.back().getLength(),
R.getOffset() + R.getLength());
Result[Result.size() - 1] =
Range(Result.back().getOffset(), NewEnd - Result.back().getOffset());
}
}
return Result;
}
namespace clang {
namespace tooling {
std::vector<Range>
calculateRangesAfterReplacements(const Replacements &Replaces,
const std::vector<Range> &Ranges) {
// To calculate the new ranges,
// - Turn \p Ranges into Replacements at (offset, length) with an empty
// (unimportant) replacement text of length "length".
// - Merge with \p Replaces.
// - The new ranges will be the affected ranges of the merged replacements.
auto MergedRanges = combineAndSortRanges(Ranges);
if (Replaces.empty())
return MergedRanges;
tooling::Replacements FakeReplaces;
for (const auto &R : MergedRanges) {
auto Err = FakeReplaces.add(Replacement(Replaces.begin()->getFilePath(),
R.getOffset(), R.getLength(),
std::string(R.getLength(), ' ')));
assert(!Err &&
"Replacements must not conflict since ranges have been merged.");
llvm::consumeError(std::move(Err));
}
return FakeReplaces.merge(Replaces).getAffectedRanges();
}
} // namespace tooling
} // namespace clang
std::vector<Range> Replacements::getAffectedRanges() const {
std::vector<Range> ChangedRanges;
int Shift = 0;
for (const auto &R : Replaces) {
unsigned Offset = R.getOffset() + Shift;
unsigned Length = R.getReplacementText().size();
Shift += Length - R.getLength();
ChangedRanges.push_back(Range(Offset, Length));
}
return combineAndSortRanges(ChangedRanges);
}
unsigned Replacements::getShiftedCodePosition(unsigned Position) const {
unsigned Offset = 0;
for (const auto &R : Replaces) {
if (R.getOffset() + R.getLength() <= Position) {
Offset += R.getReplacementText().size() - R.getLength();
continue;
}
if (R.getOffset() < Position &&
R.getOffset() + R.getReplacementText().size() <= Position) {
Position = R.getOffset() + R.getReplacementText().size();
if (!R.getReplacementText().empty())
Position--;
}
break;
}
return Position + Offset;
}
namespace clang {
namespace tooling {
bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite) {
bool Result = true;
for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) {
if (I->isApplicable()) {
Result = I->apply(Rewrite) && Result;
} else {
Result = false;
}
}
return Result;
}
llvm::Expected<std::string> applyAllReplacements(StringRef Code,
const Replacements &Replaces) {
if (Replaces.empty())
return Code.str();
IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
new llvm::vfs::InMemoryFileSystem);
FileManager Files(FileSystemOptions(), InMemoryFileSystem);
DiagnosticsEngine Diagnostics(
IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
new DiagnosticOptions);
SourceManager SourceMgr(Diagnostics, Files);
Rewriter Rewrite(SourceMgr, LangOptions());
InMemoryFileSystem->addFile(
"<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>"));
FileID ID = SourceMgr.createFileID(Files.getFile("<stdin>"), SourceLocation(),
clang::SrcMgr::C_User);
for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) {
Replacement Replace("<stdin>", I->getOffset(), I->getLength(),
I->getReplacementText());
if (!Replace.apply(Rewrite))
return llvm::make_error<ReplacementError>(
replacement_error::fail_to_apply, Replace);
}
std::string Result;
llvm::raw_string_ostream OS(Result);
Rewrite.getEditBuffer(ID).write(OS);
OS.flush();
return Result;
}
std::map<std::string, Replacements> groupReplacementsByFile(
FileManager &FileMgr,
const std::map<std::string, Replacements> &FileToReplaces) {
std::map<std::string, Replacements> Result;
llvm::SmallPtrSet<const FileEntry *, 16> ProcessedFileEntries;
for (const auto &Entry : FileToReplaces) {
const FileEntry *FE = FileMgr.getFile(Entry.first);
if (!FE)
llvm::errs() << "File path " << Entry.first << " is invalid.\n";
else if (ProcessedFileEntries.insert(FE).second)
Result[Entry.first] = std::move(Entry.second);
}
return Result;
}
} // namespace tooling
} // namespace clang