blob: 9bb4022ca4067a0fb90f2416709c86e38034073b [file] [log] [blame]
//===--- CloneDetection.h - Finds code clones in an AST ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// /file
/// This file defines classes for searching and anlyzing source code clones.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_CLONEDETECTION_H
#define LLVM_CLANG_AST_CLONEDETECTION_H
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/StringMap.h"
#include <vector>
namespace clang {
class Stmt;
class Decl;
class ASTContext;
class CompoundStmt;
/// \brief Identifies a list of statements.
///
/// Can either identify a single arbitrary Stmt object, a continuous sequence of
/// child statements inside a CompoundStmt or no statements at all.
class StmtSequence {
/// If this object identifies a sequence of statements inside a CompoundStmt,
/// S points to this CompoundStmt. If this object only identifies a single
/// Stmt, then S is a pointer to this Stmt.
const Stmt *S;
/// The related ASTContext for S.
ASTContext *Context;
/// If EndIndex is non-zero, then S is a CompoundStmt and this StmtSequence
/// instance is representing the CompoundStmt children inside the array
/// [StartIndex, EndIndex).
unsigned StartIndex;
unsigned EndIndex;
public:
/// \brief Constructs a StmtSequence holding multiple statements.
///
/// The resulting StmtSequence identifies a continuous sequence of statements
/// in the body of the given CompoundStmt. Which statements of the body should
/// be identified needs to be specified by providing a start and end index
/// that describe a non-empty sub-array in the body of the given CompoundStmt.
///
/// \param Stmt A CompoundStmt that contains all statements in its body.
/// \param Context The ASTContext for the given CompoundStmt.
/// \param StartIndex The inclusive start index in the children array of
/// \p Stmt
/// \param EndIndex The exclusive end index in the children array of \p Stmt.
StmtSequence(const CompoundStmt *Stmt, ASTContext &Context,
unsigned StartIndex, unsigned EndIndex);
/// \brief Constructs a StmtSequence holding a single statement.
///
/// \param Stmt An arbitrary Stmt.
/// \param Context The ASTContext for the given Stmt.
StmtSequence(const Stmt *Stmt, ASTContext &Context);
/// \brief Constructs an empty StmtSequence.
StmtSequence();
typedef const Stmt *const *iterator;
/// Returns an iterator pointing to the first statement in this sequence.
iterator begin() const;
/// Returns an iterator pointing behind the last statement in this sequence.
iterator end() const;
/// Returns the first statement in this sequence.
///
/// This method should only be called on a non-empty StmtSequence object.
const Stmt *front() const {
assert(!empty());
return begin()[0];
}
/// Returns the last statement in this sequence.
///
/// This method should only be called on a non-empty StmtSequence object.
const Stmt *back() const {
assert(!empty());
return begin()[size() - 1];
}
/// Returns the number of statements this object holds.
unsigned size() const {
if (holdsSequence())
return EndIndex - StartIndex;
if (S == nullptr)
return 0;
return 1;
}
/// Returns true if and only if this StmtSequence contains no statements.
bool empty() const { return size() == 0; }
/// Returns the related ASTContext for the stored Stmts.
ASTContext &getASTContext() const {
assert(Context);
return *Context;
}
/// Returns true if this objects holds a list of statements.
bool holdsSequence() const { return EndIndex != 0; }
/// Returns the start sourcelocation of the first statement in this sequence.
///
/// This method should only be called on a non-empty StmtSequence object.
SourceLocation getStartLoc() const;
/// Returns the end sourcelocation of the last statement in this sequence.
///
/// This method should only be called on a non-empty StmtSequence object.
SourceLocation getEndLoc() const;
bool operator==(const StmtSequence &Other) const {
return std::tie(S, StartIndex, EndIndex) ==
std::tie(Other.S, Other.StartIndex, Other.EndIndex);
}
bool operator!=(const StmtSequence &Other) const {
return std::tie(S, StartIndex, EndIndex) !=
std::tie(Other.S, Other.StartIndex, Other.EndIndex);
}
/// Returns true if and only if this sequence covers a source range that
/// contains the source range of the given sequence \p Other.
///
/// This method should only be called on a non-empty StmtSequence object
/// and passed a non-empty StmtSequence object.
bool contains(const StmtSequence &Other) const;
};
/// \brief Searches for clones in source code.
///
/// First, this class needs a translation unit which is passed via
/// \p analyzeTranslationUnit . It will then generate and store search data
/// for all statements inside the given translation unit.
/// Afterwards the generated data can be used to find code clones by calling
/// \p findClones .
///
/// This class only searches for clones in exectuable source code
/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations)
/// are not supported.
class CloneDetector {
public:
typedef unsigned DataPiece;
/// Holds the data about a StmtSequence that is needed during the search for
/// code clones.
struct CloneSignature {
/// \brief Holds all relevant data of a StmtSequence.
///
/// If this variable is equal for two different StmtSequences, then they can
/// be considered clones of each other.
std::vector<DataPiece> Data;
/// \brief The complexity of the StmtSequence.
///
/// This scalar value serves as a simple way of filtering clones that are
/// too small to be reported. A greater value indicates that the related
/// StmtSequence is probably more interesting to the user.
unsigned Complexity;
/// \brief Creates an empty CloneSignature without any data.
CloneSignature() : Complexity(1) {}
CloneSignature(const std::vector<unsigned> &Data, unsigned Complexity)
: Data(Data), Complexity(Complexity) {}
/// \brief Adds the data from the given CloneSignature to this one.
void add(const CloneSignature &Other) {
Data.insert(Data.end(), Other.Data.begin(), Other.Data.end());
Complexity += Other.Complexity;
}
};
/// Holds group of StmtSequences that are clones of each other and the
/// complexity value (see CloneSignature::Complexity) that all stored
/// StmtSequences have in common.
struct CloneGroup {
std::vector<StmtSequence> Sequences;
unsigned Complexity;
CloneGroup(const StmtSequence &Seq, unsigned Complexity)
: Complexity(Complexity) {
Sequences.push_back(Seq);
}
/// \brief Returns false if and only if this group should be skipped when
/// searching for clones.
bool isValid() const {
// A clone group with only one member makes no sense, so we skip them.
return Sequences.size() > 1;
}
};
/// \brief Generates and stores search data for all statements in the body of
/// the given Decl.
void analyzeCodeBody(const Decl *D);
/// \brief Stores the CloneSignature to allow future querying.
void add(const StmtSequence &S, const CloneSignature &Signature);
/// \brief Searches the provided statements for clones.
///
/// \param Result Output parameter that is filled with a list of found
/// clone groups. Each group contains multiple StmtSequences
/// that were identified to be clones of each other.
/// \param MinGroupComplexity Only return clones which have at least this
/// complexity value.
void findClones(std::vector<CloneGroup> &Result, unsigned MinGroupComplexity);
private:
/// Stores all found clone groups including invalid groups with only a single
/// statement.
std::vector<CloneGroup> CloneGroups;
/// Maps search data to its related index in the \p CloneGroups vector.
llvm::StringMap<std::size_t> CloneGroupIndexes;
};
} // end namespace clang
#endif // LLVM_CLANG_AST_CLONEDETECTION_H