loader/asm: add some end branch annotations.

This adds these in advance of adding the CET support as if we add CET
support first, then you can build something that claims cet support
but actually isn't.
diff --git a/loader/unknown_ext_chain_gas.S b/loader/unknown_ext_chain_gas.S
index aca92ea..3a422b4 100644
--- a/loader/unknown_ext_chain_gas.S
+++ b/loader/unknown_ext_chain_gas.S
@@ -23,6 +23,8 @@
 # VkPhysicalDevice or a dispatchable object it can unwrap the object, possibly overwriting the wrapped physical device, and then
 # jump to the next function in the call chain
 
+#define _CET_ENDBR
+
 .intel_syntax noprefix
 .include "gen_defines.asm"
 
@@ -31,6 +33,7 @@
 .macro PhysDevExtTramp num
 .global vkPhysDevExtTramp\num
 vkPhysDevExtTramp\num:
+    _CET_ENDBR
     mov     rax, [rdi]
     mov     rdi, [rdi + PHYS_DEV_OFFSET_PHYS_DEV_TRAMP]
     jmp     [rax + (PHYS_DEV_OFFSET_INST_DISPATCH + (PTR_SIZE * \num))]
@@ -39,6 +42,7 @@
 .macro PhysDevExtTermin num
 .global vkPhysDevExtTermin\num
 vkPhysDevExtTermin\num:
+    _CET_ENDBR
     mov     rax, [rdi + ICD_TERM_OFFSET_PHYS_DEV_TERM]                          # Store the loader_icd_term* in rax
     cmp     qword ptr [rax + (DISPATCH_OFFSET_ICD_TERM + (PTR_SIZE * \num))], 0 # Check if the next function in the chain is NULL
     je      terminError\num                                                     # Go to the error section if it is NULL
@@ -60,6 +64,7 @@
 .macro DevExtTramp num
 .global vkdev_ext\num
 vkdev_ext\num:
+    _CET_ENDBR
     mov     rax, [rdi]                                                          # Dereference the handle to get the dispatch table
     jmp     [rax + (EXT_OFFSET_DEVICE_DISPATCH + (PTR_SIZE * \num))]            # Jump to the appropriate call chain
 .endm
@@ -69,6 +74,7 @@
 .macro PhysDevExtTramp num
 .global vkPhysDevExtTramp\num
 vkPhysDevExtTramp\num:
+    _CET_ENDBR
     mov     eax, [esp + 4]                              # Load the wrapped VkPhysicalDevice into eax
     mov     ecx, [eax + PHYS_DEV_OFFSET_PHYS_DEV_TRAMP] # Load the unwrapped VkPhysicalDevice into ecx
     mov     [esp + 4], ecx                              # Overwrite the wrapped VkPhysicalDevice with the unwrapped one (on the stack)
@@ -79,6 +85,7 @@
 .macro PhysDevExtTermin num
 .global vkPhysDevExtTermin\num
 vkPhysDevExtTermin\num:
+    _CET_ENDBR
     mov     ecx, [esp + 4]                                                      # Move the wrapped VkPhysicalDevice into ecx
     mov     eax, [ecx + ICD_TERM_OFFSET_PHYS_DEV_TERM]                          # Store the loader_icd_term* in eax
     cmp     dword ptr [eax + (DISPATCH_OFFSET_ICD_TERM + (PTR_SIZE * \num))], 0 # Check if the next function in the chain is NULL
@@ -102,6 +109,7 @@
 .macro DevExtTramp num
 .global vkdev_ext\num
 vkdev_ext\num:
+    _CET_ENDBR
     mov     eax, [esp + 4]                                                      # Dereference the handle to get the dispatch table
     jmp     [eax + (EXT_OFFSET_DEVICE_DISPATCH + (PTR_SIZE * \num))]            # Jump to the appropriate call chain
 .endm