Reputation: 2652
I am developing a clang libTooling application that instruments (rewrites) C code. In order to prepare the code for the a RecursiveASTVisitor (where the final code is rewritten), I need to pre-expand all the macro invocations in the C code. I am facing challenges with handling nested macros and function-like macros where a parameter is also a macros (I think this could be considered nested macro variant).
I implemented a partial solution by creating a MacroExpander (a clang::PPCallbacks subclass) that partially succeeds in expanding macros.
I added the following block of code to the logic that handles functionLike macro parameter expansion (This code update is also reflected in included example code) showing exactly where I need help. Specifically, when we have a macro parameter that needs expansion, I need to relex the parameter.
std::vector<std::string> expandedMacroArgs;
for (auto argNum = 0u; argNum < Args->getNumMacroArguments(); ++argNum) {
if (const auto* token = Args->getUnexpArgument(argNum)) {
// Check if macro argument is an unexpanded macro
if (Args->ArgNeedsPreexpansion(token, mPP)) {
// @BEGIN
// THIS IS WHERE I NEED HELP TO EXPAND argNum to its original expansion
std::vector<clang::Token> tokens;
while (token->isNot(clang::tok::eof)) {
tokens.emplace_back(*token);
++token;
}
const auto& expandedMacroArg = tokens
| std::views::transform([&](const auto& next) {
return mPP.getSpelling(next);
})
| std::views::join_with(std::string(" "))
| std::ranges::to<std::string>();
expandedMacroArgs.emplace_back(
expandedMacroArg);
// @END
}
}
}
Consider for example a pair of macros defined as:
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
Here are a few simple examples of expected macro expansions:
MIN(1, 7) => (((1) <= (7)) ? (1) : (7))
MAX(5, 8) => (((5) > (8)) ? (5) : (8))
MIN(MAX(5, 8), 7) => ((((((5) > (8)) ? (5) : (8))) <= (7)) ? ((((5) > (8)) ? (5) : (8))) : (7))
The last example is nested, and can be obtained by substututing MAX(5, 8)
for a
in the MIN(a, b) macro. i.e:
MIN(MAX(5, 8), 7) => (((MAX(5, 8)) <= (7)) ? (MAX(5, 8)) : (7))
The code below includes a partially working MacroExpander class. This expander has the nesting and macro parameter expansion limitations described earlier. Additionally, I would like some guidance on how to correctly handle the token expansion logic. I used a brute force approach which seems overkill where I needed to handle token pasting ##
and stringizing #
operators in addition to spacing around keywords & punctuation. I am sure there has to be a better way to handle this.
The application (which I have working in Visual Studio - but it shold be cross platform is coded as follows:
#include <range/v3/all.hpp>
#include <clang/Lex/PPCallbacks.h>
#include <clang/Lex/MacroArgs.h>
#include <clang/Lex/Preprocessor.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Tooling/Tooling.h>
#include <clang/Tooling/CommonOptionsParser.h>
#include <clang/Rewrite/Core/Rewriter.h>
using namespace llvm;
using namespace clang::driver;
using namespace clang::tooling;
namespace {
llvm::cl::OptionCategory MyToolCategory("my-tool options");
}
namespace util {
namespace rv = ranges::views;
inline constexpr auto trim_front = rv::drop_while(::isspace);
inline constexpr auto trim_back = rv::reverse | trim_front | rv::reverse;
inline constexpr auto trim = trim_front | trim_back;
inline std::string trim_str(const std::string& s) {
return s | trim | ranges::to<std::string>;
}
inline std::vector<std::string> split (std::string& s, const char delim) {
return ranges::actions::split(s, delim);
}
}
/**
* The <code>MacroExpander</code> class overrides the
* <code>MacroExpands</code> method from
* <code>clang::PPCallbacks</code> to rewrite expanded macros
* back into the source before instrumentation.
*
* <p>As macros are expanded in the code, the
* <code>MacroExpands</code> function differentiates between
* function-like and object-like macros, handling their
* expansion differently. For function-like macros,
* it constructs a map of macro parameter names to their values
* and uses this map to replace the macro invocation in the
* source code with the expanded macro body, substituting
* parameter references with their corresponding values.
*/
class MacroExpander : public clang::PPCallbacks {
public:
/**
* Explicit constructor.
*
* <p>Change Preprocessor options via the returned reference returned by
* <code>mPP.getPreprocessorOpts</code>() if necessary.
*
* @param rPP [in] clang Preprocessor.
* @param rRewriter [in] non const rewriter reference.
*/
explicit MacroExpander(clang::Preprocessor &rPP, clang::Rewriter &rRewriter)
: mPP(rPP)
, mRewriter(rRewriter)
{}
/**
* Hook that gets called as macros are expanded in the C source.
*
* @param MacroNameTok [in] The token representing the macro name.
* @param MD [in] Macro Definition containing all the
* info about the type of macro and its
* parameters.
* @param Range [in] Source range of the macro.
* @param Args [in] Optional macro arguments.
*/
void MacroExpands(
const clang::Token& MacroNameTok,
const clang::MacroDefinition& MD, clang::SourceRange Range,
const clang::MacroArgs* Args) override {
// Get the macro name
const auto MacroName = mPP.getSpelling(MacroNameTok);
// Get the macro body
std::string MacroBody;
const auto MI = MD.getMacroInfo();
if (!MI->isFunctionLike()) {
// I think there is always just one token in these single
// objectLike macros that do not contain parameters.
for (const auto next : MI->tokens()) {
MacroBody += mPP.getSpelling(next);
}
#if defined MACRO_DEBUGGING
// Print the macro name and body
const auto debugString = std::format(
"Macro {} expands to: {}"
, MacroName
, MacroBody);
llvm::errs() << debugString << "\n";
#endif
// TODO make sure macro expansion nesting works.
mRewriter.ReplaceText(Range, MacroBody);
} else {
std::vector<std::string> expandedMacroArgs;
for (auto argNum = 0u; argNum < Args->getNumMacroArguments(); ++argNum) {
if (const auto* token = Args->getUnexpArgument(argNum)) {
// Check if macro argument is an unexpanded macro
if (Args->ArgNeedsPreexpansion(token, mPP)) {
// @BEGIN
// THIS IS WHERE I NEED HELP TO EXPAND argNum to its original expansion
std::vector<clang::Token> tokens;
while (token->isNot(clang::tok::eof)) {
tokens.emplace_back(*token);
++token;
}
const auto& expandedMacroArg = tokens
| std::views::transform([&](const auto& next) {
return mPP.getSpelling(next);
})
| std::views::join_with(std::string(" "))
| std::ranges::to<std::string>();
expandedMacroArgs.emplace_back(
expandedMacroArg);
// @END
}
}
}
// This is the complicated function like macro invocation.
// hack together a map of macro parameter names to their values,
// so we can substitute them in the macro body as we expand the tokens.
std::vector<std::string> macroArgNames;
for (const auto next : MI->params()) {
macroArgNames.emplace_back(next->getName().str());
}
std::vector<std::string> macroArgValues;
for (unsigned i = 0u, e = Args->getNumMacroArguments(); i != e; ++i) {
if (const auto next = Args->getUnexpArgument(i); next) {
// Handle address parameters using special case logic.
if (next->is(clang::tok::amp)) {
auto addressParam = mPP.getSpelling(*next);
auto specialToken = next + 1;
while (specialToken->isNot(clang::tok::eof)) {
addressParam += mPP.getSpelling(*specialToken);
++specialToken;
}
macroArgValues.emplace_back(addressParam);
} else if (next->is(clang::tok::identifier)) {
// check if parameter is unexpanded macro
if (mPP.getMacroInfo(next->getIdentifierInfo())) {
//auto bar = mPP.getMacroInfo(II->getReplacementToken(0);
//auto baz = mPP.getSpelling(bar);
}
macroArgValues.emplace_back(mPP.getSpelling(*next));
} else {
macroArgValues.emplace_back(mPP.getSpelling(*next));
}
}
}
// Make sure we have the same number of arguments as parameters.
if (macroArgValues.size() == macroArgNames.size()) {
std::map<std::string, std::string> macroParamInfo;
for (auto i = 0; i<macroArgNames.size(); ++i) {
macroParamInfo[macroArgNames[i]] = macroArgValues[i];
}
bool bStringizing = false;
auto& SM = mPP.getSourceManager();
for (const auto next : MI->tokens()) {
if (next.is(clang::tok::identifier)) {
// identifiers not found in our macro parameters
// need to be forwarded directly to the output unmodified.
const auto identifier = mPP.getSpelling(next);
if (const auto iter = macroParamInfo.find(
identifier); iter != macroParamInfo.cend()) {
MacroBody += iter->second;
} else {
MacroBody += identifier;
}
if (bStringizing) {
MacroBody += '"';
bStringizing = false;
} else {
MacroBody += " ";
}
} else if (next.is(clang::tok::hash)) {
// turn on bStringizing flag
MacroBody += '"';
bStringizing = true;
} else if (next.is(clang::tok::hashhash)) {
// do nothing (effectively) token pastes
MacroBody = util::trim_str(MacroBody);
} else if (next.isOneOf(
clang::tok::l_paren, clang::tok::r_paren,
clang::tok::l_brace, clang::tok::r_brace,
clang::tok::l_square, clang::tok::r_square,
clang::tok::colon, clang::tok::equal,
clang::tok::comma, clang::tok::semi)) {
// Punctuation
MacroBody = util::trim_str(MacroBody);
MacroBody += mPP.getSpelling(next);
} else {
MacroBody += mPP.getSpelling(next);
if (bStringizing) {
MacroBody += '"';
bStringizing = false;
} else {
MacroBody += " ";
}
}
}
#if defined (MACRO_DEBUGGING)
// Print the macro name and body
// For example if macro is "#define MAX(a, b) ...",
// commaSeparatedParams are "a, b".
const std::vector paramVec(MI->params().begin(), MI->params().end());
const auto& commaSeparatedParams = paramVec
| std::views::transform([&](const clang::IdentifierInfo* next) {
return next->getName().str();
})
| std::views::join_with(std::string(", "))
| std::ranges::to<std::string>();
// For example if macro usage is "MAX(1, 2) ...",
// commaSeparatedArgs are "1, 2".
std::vector<clang::Token> argTokens;
for (unsigned i = 0u, e = Args->getNumMacroArguments(); i != e; ++i) {
if (const auto next = Args->getUnexpArgument(i); next) {
argTokens.emplace_back(*next);
}
}
const auto& commaSeparatedArgs = argTokens
| std::views::transform([&](const auto& next) {
return mPP.getSpelling(next);
})
| std::views::join_with(std::string(", "))
| std::ranges::to<std::string>();
const auto debugString = std::format(
"Macro {}({}) expands to: {}"
, MacroName
, commaSeparatedArgs
, MacroBody);
llvm::errs() << debugString << "\n";
#endif
// TODO make sure macro expansion nesting works.
mRewriter.ReplaceText(Range, MacroBody );
}
}
}
private:
clang::Preprocessor& mPP;
clang::Rewriter& mRewriter;
};
class MyFrontendAction : public clang::ASTFrontendAction {
public:
[[nodiscard]] std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(
clang::CompilerInstance &CI, llvm::StringRef file) override {
// Initialize the Rewriter, SourceManager and Language Options
// from the provided compiler instance.
mRewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
CI.getPreprocessor().addPPCallbacks(std::make_unique<
MacroExpander>(CI.getPreprocessor(), mRewriter));
return std::make_unique<clang::ASTConsumer>();
}
//! Outputs rewritten source to console.
void EndSourceFileAction() override {
mRewriter.getEditBuffer(mRewriter.getSourceMgr().
getMainFileID()).write(llvm::outs());
}
private:
clang::Rewriter mRewriter;
};
int main(int argc, const char **argv) {
auto ExpectedParser = CommonOptionsParser::create(argc, argv, MyToolCategory);
if (!ExpectedParser) {
llvm::errs() << ExpectedParser.takeError();
return 1;
}
CommonOptionsParser &OptionsParser = ExpectedParser.get();
ClangTool Tool(OptionsParser.getCompilations(), OptionsParser.getSourcePathList());
return Tool.run(newFrontendActionFactory<MyFrontendAction>().get());
}
When running the application pass 'test.c --' as arguments and the output will appear on the console as shown later.
#include <math.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdbool.h>
#define TRUE 1
#define FALSE 0
#define SEMI ;
#define GT >
#define GE >=
#define LT <
#define LE <=
#define NE !
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define MULTI_LINE_MACRO(a, b) (((a) > (b)) ? \
/*LHS*/pow((a), 2) : \
/*RHS*/pow((b), 3) )
#define KNOT_TO_MSEC(a) ((a)*0.514444)
#define STANDARD_GRAVITY (9.80665)
#define LON_LIMIT 180.0000241664
#define LAT_LIMIT (LON_LIMIT/2.0)
#define LOG(format, ...) printf(format, __VA_ARGS__)
void
variadicFunctionTest(int count, ...) {
va_list args;
va_start(args, count);
for (int i = 0; i < count; i++) {
int num = va_arg(args, int);
LOG("Argument %d: %d\n", i+1, num);
}
va_end(args);
}
void foo() {
int a, b, c, d, e, f, g;
int t = 0;
variadicFunctionTest(3, 1, 2, 3);
// double bar = MULTI_LINE_MACRO(1, 3);
double bar = MULTI_LINE_MACRO(1, 3);
// double bar = KNOT_TO_MSEC(123.0) / STANDARD_GRAVITY;
double foo = KNOT_TO_MSEC(123.0) / STANDARD_GRAVITY;
// a = b > c ? d : e;
a = b > c ? d : e;
// a = (b > c) ? (a LT b) ? c : d : TRUE;
a = (b > c) ? (a LT b) ? c : d : TRUE;
// a = b GT c ? d : e;
a = b GT c ? d : e;
// a = (b GT c) ? (a LT b) : FALSE ? d : e;
a = (b GT c) ? (a LT b) : FALSE ? d : e;
// a = MIN(1, 7);
a = MIN(1, 7);
// a = MAX( MIN(e, f), g);
a = MAX( MIN(e, f), g);
}
We do not need to see all the #include <..> macro expansions.
...
Macro va_arg expands to: __crt_va_arg
Macro __crt_va_arg(args, int) expands to: ((sizeof(int)> sizeof(__int64)||(sizeof(int)&(sizeof(int)- 1))!= 0)? * *(int * *)((args += sizeof(__int64))- sizeof(__int64)):*(int *)((args += sizeof(__int64))- sizeof(__int64)))
Macro LOG("Argument %d: %d\n", i) expands to: printf("Argument %d: %d\n" ,i)
Macro va_end expands to: __crt_va_end
Macro __crt_va_end(args) expands to: ((void)(args=(va_list)0))
Macro MULTI_LINE_MACRO(1, 3) expands to: (((1)>(3))? pow((1),2):pow((3),3))
Macro KNOT_TO_MSEC(123.0) expands to: ((123.0)* 0.514444)
Macro STANDARD_GRAVITY expands to: (9.80665)
Macro LT expands to: <
Macro TRUE expands to: 1
Macro GT expands to: >
Macro GT expands to: >
Macro LT expands to: <
Macro FALSE expands to: 0
Macro MIN(1, 7) expands to: (((1)<=(7))?(1):(7))
Macro MAX(MIN, g) expands to: (((MIN)>(g))?(MIN):(g))
Macro MIN(e, f) expands to: (((e)<=(f))?(e):(f))
#include <math.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdbool.h>
#define TRUE 1
#define FALSE 0
#define SEMI ;
#define GT >
#define GE >=
#define LT <
#define LE <=
#define NE !
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define MULTI_LINE_MACRO(a, b) (((a) > (b)) ? \
/*LHS*/pow((a), 2) : \
/*RHS*/pow((b), 3) )
#define KNOT_TO_MSEC(a) ((a)*0.514444)
#define STANDARD_GRAVITY (9.80665)
#define LON_LIMIT 180.0000241664
#define LAT_LIMIT (LON_LIMIT/2.0)
#define LOG(format, ...) printf(format, __VA_ARGS__)
void
variadicFunctionTest(int count, ...) {
va_list args;
__crt_va_start(args, count);
for (int i = 0; i < count; i++) {
int num = __crt_va_arg(args, int);
printf("Argument %d: %d\n" ,i);
}
__crt_va_end(args);
}
void foo() {
int a, b, c, d, e, f, g;
int t = 0;
variadicFunctionTest(3, 1, 2, 3);
// double bar = MULTI_LINE_MACRO(1, 3);
double bar = (((1)>(3))? pow((1),2):pow((3),3));
// double bar = KNOT_TO_MSEC(123.0) / STANDARD_GRAVITY;
double foo = ((123.0)* 0.514444) / (9.80665);
// a = b > c ? d : e;
a = b > c ? d : e;
// a = (b > c) ? (a LT b) ? c : d : TRUE;
a = (b > c) ? (a < b) ? c : d : 1;
// a = b GT c ? d : e;
a = b > c ? d : e;
// a = (b GT c) ? (a LT b) : FALSE ? d : e;
a = (b > c) ? (a < b) : 0 ? d : e;
// a = MIN(1, 7);
a = (((1)<=(7))?(1):(7));
// a = MAX( MIN(e, f), g);
a = (((MIN)>(g(((e)<=(f))?(e):(f))(g));
}
Upvotes: 0
Views: 131