IT博客汇
  • 首页
  • 精华
  • 技术
  • 设计
  • 资讯
  • 扯淡
  • 权利声明
  • 登录 注册

    Compiling C++ with the Clang API

    MaskRay发表于 2025-03-19 16:36:12
    love 0

    This post describes how to compile a single C++ source file to anobject file with the Clang API. Here is the code. It behaves like asimplified clang executable that handles -cand -S.

    1
    cat > main.cc <<eof
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    #include <clang/CodeGen/CodeGenAction.h> // EmitObjAction
    #include <clang/Driver/Compilation.h>
    #include <clang/Driver/Driver.h>
    #include <clang/Frontend/CompilerInstance.h>
    #include <clang/Frontend/FrontendOptions.h>
    #include <llvm/Config/llvm-config.h> // LLVM_VERSION_MAJOR
    #include <llvm/Support/TargetSelect.h> // LLVMInitialize*
    #include <llvm/Support/VirtualFileSystem.h>

    using namespace clang;

    constexpr llvm::StringRef kTargetTriple = "x86_64-unknown-linux-gnu";

    namespace {
    struct DiagsSaver : DiagnosticConsumer {
    std::string message;
    llvm::raw_string_ostream os{message};

    void HandleDiagnostic(DiagnosticsEngine::Level diagLevel, const Diagnostic &info) override {
    DiagnosticConsumer::HandleDiagnostic(diagLevel, info);
    const char *level;
    switch (diagLevel) {
    default:
    return;
    case DiagnosticsEngine::Note:
    level = "note";
    break;
    case DiagnosticsEngine::Warning:
    level = "warning";
    break;
    case DiagnosticsEngine::Error:
    case DiagnosticsEngine::Fatal:
    level = "error";
    break;
    }

    llvm::SmallString<256> msg;
    info.FormatDiagnostic(msg);
    auto &sm = info.getSourceManager();
    auto loc = info.getLocation();
    auto fileLoc = sm.getFileLoc(loc);
    os << sm.getFilename(fileLoc) << ':' << sm.getSpellingLineNumber(fileLoc)
    << ':' << sm.getSpellingColumnNumber(fileLoc) << ": " << level << ": "
    << msg << '\n';
    if (loc.isMacroID()) {
    loc = sm.getSpellingLoc(loc);
    os << sm.getFilename(loc) << ':' << sm.getSpellingLineNumber(loc) << ':'
    << sm.getSpellingColumnNumber(loc) << ": note: expanded from macro\n";
    }
    }
    };
    }

    static std::pair<bool, std::string> compile(int argc, char *argv[]) {
    auto fs = llvm::vfs::getRealFileSystem();
    DiagsSaver dc;
    std::vector<const char *> args{"clang"};
    args.insert(args.end(), argv + 1, argv + argc);
    auto diags = CompilerInstance::createDiagnostics(
    #if LLVM_VERSION_MAJOR >= 20
    *fs,
    #endif
    new DiagnosticOptions, &dc, false);
    driver::Driver d(args[0], kTargetTriple, *diags, "cc", fs);
    d.setCheckInputsExist(false);
    std::unique_ptr<driver::Compilation> comp(d.BuildCompilation(args));
    const auto &jobs = comp->getJobs();
    if (jobs.size() != 1)
    return {false, "only support one job"};
    const llvm::opt::ArgStringList &ccArgs = jobs.begin()->getArguments();

    auto invoc = std::make_unique<CompilerInvocation>();
    CompilerInvocation::CreateFromArgs(*invoc, ccArgs, *diags);
    auto ci = std::make_unique<CompilerInstance>();
    ci->setInvocation(std::move(invoc));
    ci->createDiagnostics(*fs, &dc, false);
    // Disable CompilerInstance::printDiagnosticStats, which might display "2 warnings generated."
    ci->getDiagnostics().getDiagnosticOptions().ShowCarets = false;
    ci->createFileManager(fs);
    ci->createSourceManager(ci->getFileManager());

    // Clang calls BuryPointer on the internal AST and CodeGen-related elements like TargetMachine.
    // This will cause memory leaks if `compile` is executed many times.
    ci->getCodeGenOpts().DisableFree = false;
    ci->getFrontendOpts().DisableFree = false;

    LLVMInitializeX86AsmParser();
    LLVMInitializeX86AsmPrinter();
    LLVMInitializeX86Target();
    LLVMInitializeX86TargetInfo();
    LLVMInitializeX86TargetMC();

    switch (ci->getFrontendOpts().ProgramAction) {
    case frontend::ActionKind::EmitObj: {
    EmitObjAction action;
    ci->ExecuteAction(action);
    } break;
    case frontend::ActionKind::EmitAssembly: {
    EmitAssemblyAction action;
    ci->ExecuteAction(action);
    } break;
    default:
    return {false, "unhandled action"};
    }
    return {true, std::move(dc.message)};
    }

    int main(int argc, char *argv[]) {
    auto [ok, err] = compile(argc, argv);
    llvm::errs() << err;
    }
    1
    eof

    Building the code with CMake

    Let's write a CMakeLists.txt that links against theneeded Clang and LLVM libraries.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    cat > CMakeLists.txt <<eof
    project(cc)
    cmake_minimum_required(VERSION 3.16)
    find_package(LLVM REQUIRED CONFIG)
    find_package(Clang REQUIRED CONFIG)

    include_directories(${LLVM_INCLUDE_DIRS} ${CLANG_INCLUDE_DIRS})
    add_executable(cc main.cc)

    if(NOT LLVM_ENABLE_RTTI)
    target_compile_options(cc PRIVATE -fno-rtti)
    endif()

    if(CLANG_LINK_CLANG_DYLIB)
    target_link_libraries(cc PRIVATE clang-cpp)
    else()
    target_link_libraries(cc PRIVATE
    clangAST
    clangBasic
    clangCodeGen
    clangDriver
    clangFrontend
    clangLex
    clangParse
    clangSema
    )
    endif()

    if(LLVM_LINK_LLVM_DYLIB)
    target_link_libraries(cc PRIVATE LLVM)
    else()
    target_link_libraries(cc PRIVATE LLVMOption LLVMSupport LLVMTarget
    LLVMX86AsmParser LLVMX86CodeGen LLVMX86Desc LLVMX86Info)
    endif()
    eof

    We need an LLVM and Clang installation that provides bothlib/cmake/llvm/LLVMConfig.cmake andlib/cmake/clang/ClangConfig.cmake. You can grab these fromsystem packages (dev versions may be required) or build LLVMyourself-I'll skip the detailed steps here. For a DIY build, use:

    1
    2
    3
    # cmake ... -DLLVM_ENABLE_PROJECTS='clang'

    ninja -C out/stable clang-cmake-exports clang

    No install step is needed. Next, create a builddirectory with the CMake configuration above:

    1
    2
    cmake -S. -Bout/debug -G Ninja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=$HOME/Stable/bin/clang++ -DCMAKE_PREFIX_PATH="$HOME/llvm/out/stable"
    ninja -C out/debug

    I've set a prebuilt Clang as CMAKE_CXX_COMPILER-just ahabit of mine. llvm-project isn't guaranteed to build warning-free withGCC, since GCC -Wall -Wextra has many false positives andLLVM developers avoid cluttering the codebase.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    % echo 'void f() {}' > a.cc
    % out/debug/cc -S a.cc && head -n 5 a.s
    .file "a.cc"
    .text
    .globl _Z1fv # -- Begin function _Z1fv
    .p2align 4
    .type _Z1fv,@function
    % out/debug/cc -c a.cc && ls a.o
    a.o: ELF 64-bit LSB relocatable, x86-64, version 1 (SYSV), not stripped

    Anonymous files

    The input source file and the output ELF file are stored in thefilesystem. We could create a temporary file and delete it with a RAIIclass llvm::FileRemover:

    1
    2
    3
    std::error_code ec = llvm::sys::fs::createTemporaryFile("clang", "cc", fdIn, tempPath);
    llvm::raw_fd_stream osIn(fdIn, /*ShouldClose=*/true);
    llvm::FileRemover remover(tempPath);

    On Linux, we could utilzie memfd_create to create a filein RAM with a volatile backing storage.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    int fdIn = memfd_create("input", MFD_CLOEXEC);
    if (fdIn < 0)
    return {"", "failed to create input memfd"};
    int fdOut = memfd_create("output", MFD_CLOEXEC);
    if (fdOut < 0) {
    close(fdIn);
    return {"", "failed to create output memfd"};
    }

    std::string pathIn = "/proc/self/fd/" + std::to_string(fdIn);
    std::string pathOut = "/proc/self/fd/" + std::to_string(fdOut);

    // clang -c -xc++ /proc/self/fd/3 -o /proc/self/fd/4

    LLVMInitialize*

    To generate x86 code, we need a few LLVM X86 libraries defined byllvm/lib/Target/X86/**/CMakeLists.txt files.

    1
    2
    3
    4
    LLVMInitializeX86AsmPrinter();
    LLVMInitializeX86Target();
    LLVMInitializeX86TargetInfo();
    LLVMInitializeX86TargetMC();

    If inline assembly is used, we will also need the AsmParserlibrary:

    1
    LLVMInitializeX86AsmParser();

    We could also call LLVMInitializeAll* functions instead,which initialize all supported targets (build-timeLLVM_TARGETS_TO_BUILD).

    Here are some notes about the LLVMX86 libraries:

    • LLVMX86Info: llvm/lib/Target/X86/TargetInfo/
    • LLVMX86Desc: llvm/lib/Target/X86/MCTargetDesc/ (dependson LLVMX86Info)
    • LLVMX86AsmParser: llvm/lib/Target/X86/AsmParser(depends on LLVMX86Info and LLVMX86Desc)
    • LLVMX86CodeGen: llvm/lib/Target/X86/ (depends onLLVMX86Info and LLVMX86Desc)

    EmitAssembly andEmitObj

    The code supports two frontend actions, EmitAssembly(-S) and EmitObj (-c).

    You could also utilize the API inclang/include/clang/FrontendTool/Utils.h, but that wouldpull in another library clangFrontendTool (different fromclangFrontend).

    Diagnostics

    The diagnostics system is quite complex. We haveDiagnosticConsumer, DiagnosticsEngine, andDiagnosticOptions.

    1
    2
    3
    4
    5
    6
    DiagnosticsEngine
    ├─ DiagnosticIDs (defines diagnostics)
    ├─ SourceManager (provides locations)
    ├─ DiagnosticOptions (configures output)
    └─ DiagnosticConsumer (handles output)
    └─ Diagnostic (individual message)

    We define a simple DiagnosticConsumer that handlesnotes, warnings, errors, and fatal errors. When macro expansion comesinto play, we report two key locations:

    • The physical location (fileLoc), where the expandedtoken triggers an issue-matching Clang's error line, and
    • The spelling location within the macro's replacement list(sm.getSpellingLoc(loc)).

    Although Clang also highlights intermediate locations for chainedexpansions, our simple approach offers a solid approximation.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    % cat a.h
    #define FOO(x) x + 1
    % cat a.cc
    #include "a.h"
    #define BAR FOO
    void f() {
    int y = BAR("abc");
    }
    % out/debug/cc -c -Wall a.cc
    a.cc:4:11: warning: adding 'int' to a string does not append to the string
    ./a.h:1:18: note: expanded from macro
    a.cc:4:11: note: use array indexing to silence this warning
    ./a.h:1:18: note: expanded from macro
    a.cc:4:7: error: cannot initialize a variable of type 'int' with an rvalue of type 'const char *'
    % clang -c -Wall a.cc
    a.cc:4:11: warning: adding 'int' to a string does not append to the string [-Wstring-plus-int]
    4 | int y = BAR("abc");
    | ^~~~~~~~~~
    a.cc:2:13: note: expanded from macro 'BAR'
    2 | #define BAR FOO
    | ^
    ./a.h:1:18: note: expanded from macro 'FOO'
    1 | #define FOO(x) x + 1
    | ~~^~~
    a.cc:4:11: note: use array indexing to silence this warning
    a.cc:2:13: note: expanded from macro 'BAR'
    2 | #define BAR FOO
    | ^
    ./a.h:1:18: note: expanded from macro 'FOO'
    1 | #define FOO(x) x + 1
    | ^
    a.cc:4:7: error: cannot initialize a variable of type 'int' with an rvalue of type 'const char *'
    4 | int y = BAR("abc");
    | ^ ~~~~~~~~~~
    1 warning and 1 error generated.

    We call a convenience functionCompilerInstance::ExecuteAction, which wraps lower-levelAPI like BeginSource, Execute, andEndSource. However, it will print1 warning and 1 error generated. unless we setShowCarets to false.

    clang::createInvocation

    clang::createInvocation, renamed from createInvocationFromCommandLinein 2022, combines clang::Driver::BuildCompilation andclang::CompilerInvocation::CreateFromArgs. While it saves afew lines for certain tasks, it lacks the flexibility we need for ourspecific use cases.



沪ICP备19023445号-2号
友情链接