)]}'
{
  "commit": "630bff8a2248da1873f27060d17301b5a5606ebb",
  "tree": "b52eb68f1b6aefc18a1fce7d7d63d12aa62af817",
  "parents": [
    "383733ea8d15524517b0f1f15c8380c24f17407d"
  ],
  "author": {
    "name": "Shilei Tian",
    "email": "i@tianshilei.me",
    "time": "Tue Apr 28 00:33:42 2026 -0400"
  },
  "committer": {
    "name": "Shilei Tian",
    "email": "i@tianshilei.me",
    "time": "Tue Apr 28 21:15:00 2026 -0400"
  },
  "message": "[RFC][AMDGPU] Add AMDGPU_SUMMARY bitcode block for ThinLTO\n\nWith AMDGPU object linking, device functions are compiled separately from the\nkernels that call them. Without whole-program visibility, the compiler must be\nconservative about occupancy for every device function, leading to suboptimal\nresource usage. However, GPU kernels typically carry explicit occupancy control\nattributes that constrain the launch environment. ThinLTO is the natural place\nto propagate these kernel attributes to callees: the combined module summary\nindex contains a cross-TU call graph, allowing occupancy information to be\npropagated top-down from kernels to all reachable device functions. The backend\ncan then generate better code with the propagated constraints, achieving\nwhole-program awareness without the compile-time overhead of full LTO.\n\nThis patch introduces a dedicated AMDGPU_SUMMARY bitcode block that serializes\nper-function summary data alongside the standard module summary. The block is\nscoped to AMDGPU so that non-AMDGPU targets are completely unaffected. A\nfollow-up patch will add the ThinLTO propagation logic that reads these\nsummaries and applies conservative attribute bounds to device functions\nreachable from multiple kernels.\n",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "772ca82019278b73f95b5cf2de3aec8dff5a5125",
      "old_mode": 33188,
      "old_path": "llvm/include/llvm/Bitcode/BitcodeReader.h",
      "new_id": "7ff6f7de8e701fc5f92f222bd80e6906c9451edc",
      "new_mode": 33188,
      "new_path": "llvm/include/llvm/Bitcode/BitcodeReader.h"
    },
    {
      "type": "modify",
      "old_id": "9162754bbfe1a5991ce17630689f1b73fba4bc77",
      "old_mode": 33188,
      "old_path": "llvm/include/llvm/Bitcode/LLVMBitCodes.h",
      "new_id": "e543966662c930ca3da2aca639138ee7ae8735a3",
      "new_mode": 33188,
      "new_path": "llvm/include/llvm/Bitcode/LLVMBitCodes.h"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "8bade6df7dd4894bafabc22db6be15d55ddaea4f",
      "new_mode": 33188,
      "new_path": "llvm/include/llvm/Support/AMDGPUSummary.h"
    },
    {
      "type": "modify",
      "old_id": "911ec7501eb8b53ef4d77bebf1386041e006233f",
      "old_mode": 33188,
      "old_path": "llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp",
      "new_id": "9a164e0ce1a7bd937136f1b25d86e9a8fd586657",
      "new_mode": 33188,
      "new_path": "llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp"
    },
    {
      "type": "modify",
      "old_id": "fa7a3b214e463789a0014803beab9b8eb4af875d",
      "old_mode": 33188,
      "old_path": "llvm/lib/Bitcode/Reader/BitcodeReader.cpp",
      "new_id": "ddae213bff45c90c91b232c644f13273e9634032",
      "new_mode": 33188,
      "new_path": "llvm/lib/Bitcode/Reader/BitcodeReader.cpp"
    },
    {
      "type": "modify",
      "old_id": "7153b1a0000fda70f19e228d4b4415f32f3b2441",
      "old_mode": 33188,
      "old_path": "llvm/lib/Bitcode/Writer/BitcodeWriter.cpp",
      "new_id": "84de4e7575c48964749b2aaeea7bd9acf50c62da",
      "new_mode": 33188,
      "new_path": "llvm/lib/Bitcode/Writer/BitcodeWriter.cpp"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "f31a4845d23b182f26419f2e8d4a4acba282e001",
      "new_mode": 33188,
      "new_path": "llvm/test/ThinLTO/AMDGPU/amdgpu-summary-roundtrip.ll"
    }
  ]
}
