libipt, block: handle instructions overlapping sections

When adding image sections that overlap with existing image sections, we may end
up with instructions that are split between two (or more) image sections.  Since
memory reads stop at image section boundaries, instruction decode will fail with
-pte_bad_insn for those instructions.

Issue additional memory reads for the remaining bytes and try to decode the
instruction again until we are either able to decode it correctly or fail to
read more bytes.

Since isid and IP no longer suffice to read the instruction's memory, we provide
the instruction's size and raw bytes as we do in struct pt_insn.  We mark the
block as truncated to indicate that.  Such an instruction ends a block so only
the last instruction of a block may be truncated.

Change-Id: Ia78a49f28b2745be59e5e16dc4fc25a87f07ebb4
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
diff --git a/doc/howto_libipt.md b/doc/howto_libipt.md
index 41ed84b..235cfb5 100644
--- a/doc/howto_libipt.md
+++ b/doc/howto_libipt.md
@@ -918,6 +918,11 @@
 to source code using the debug information contained in or reachable via the
 binary file.
 
+In some cases, the last instruction in a block may cross image section
+boundaries.  This can happen when a code segment is split into more than one
+image section.  The block is marked truncated in this case and provides the raw
+bytes of the last instruction.
+
 The following example shows how instructions can be reconstructed from a block:
 
 ~~~{.c}
@@ -932,9 +937,14 @@
         <struct insn> insn;
         int size;
 
-        size = pt_iscache_read(iscache, raw, sizeof(raw), block->isid, ip);
-        if (size < 0)
-            break;
+        if (block->truncated && ((ninsn +1) == block->ninsn)) {
+            memcpy(raw, block->raw, block->size);
+            size = block->size;
+        } else {
+            size = pt_iscache_read(iscache, raw, sizeof(raw), block->isid, ip);
+            if (size < 0)
+                break;
+        }
 
         errcode = <decode instruction>(&insn, raw, size, block->mode);
         if (errcode < 0)
diff --git a/doc/man/pt_blk_next.3.md b/doc/man/pt_blk_next.3.md
index f4697e0..e2ac6e6 100644
--- a/doc/man/pt_blk_next.3.md
+++ b/doc/man/pt_blk_next.3.md
@@ -95,6 +95,19 @@
     /** The number of instructions in this block. */
     uint16_t ninsn;
 
+    /** The raw bytes of the last instruction in this block in case the
+     * instruction does not fit entirely into this block's section.
+     *
+     * This field is only valid if \@truncated is set.
+     */
+    uint8_t raw[pt_max_insn_size];
+
+    /** The size of the last instruction in this block in bytes.
+     *
+     * This field is only valid if \@truncated is set.
+     */
+    uint8_t size;
+
     /** A collection of flags giving additional information about the
      * instructions in this block.
      *
@@ -133,6 +146,16 @@
 
     /** - tracing was stopped after this block. */
     uint32_t stopped:1;
+
+    /** - the last instruction in this block is truncated.
+     *
+     *    It starts in this block's section but continues in one or more
+     *    other sections depending on how fragmented the memory image is.
+     *
+     *    The raw bytes for the last instruction are provided in \@raw and
+     *    its size in \@size in this case.
+     */
+    uint32_t truncated:1;
 };
 ~~~
 
@@ -183,6 +206,18 @@
     other instructions can be determined by decoding and examining the previous
     instruction.
 
+raw
+:   If the last instruction of this block can not be read entirely from this
+    block's section, this field provides the instruction's raw bytes.
+
+    It is only valid if the *truncated* flag is set.
+
+size
+:   If the last instruction of this block can not be read entirely from this
+    block's section, this field provides the instruction's size in bytes.
+
+    It is only valid if the *truncated* flag is set.
+
 speculative
 :   A flag giving the speculative execution status of all instructions in the
     block.  If set, the instructions were executed speculatively.  Otherwise,
@@ -238,6 +273,15 @@
     block.  If set, this is the last block of instructions that retired before
     tracing was stopped due to a TraceStop condition.
 
+truncated
+:   A flag saying whether the last instruction in this block can not be read
+    entirely from this block's section.  Some bytes need to be read from one or
+    more other sections.  This can happen when an image section is partially
+    overwritten by another image section.
+
+    If set, the last instruction's memory is provided in *raw* and its size in
+    *size*.
+
 
 # RETURN VALUE
 
diff --git a/libipt/include/intel-pt.h b/libipt/include/intel-pt.h
index e279d6b..d6ee2f6 100644
--- a/libipt/include/intel-pt.h
+++ b/libipt/include/intel-pt.h
@@ -2113,6 +2113,19 @@
 	/** The number of instructions in this block. */
 	uint16_t ninsn;
 
+	/** The raw bytes of the last instruction in this block in case the
+	 * instruction does not fit entirely into this block's section.
+	 *
+	 * This field is only valid if \@truncated is set.
+	 */
+	uint8_t raw[pt_max_insn_size];
+
+	/** The size of the last instruction in this block in bytes.
+	 *
+	 * This field is only valid if \@truncated is set.
+	 */
+	uint8_t size;
+
 	/** A collection of flags giving additional information about the
 	 * instructions in this block.
 	 *
@@ -2151,6 +2164,16 @@
 
 	/** - tracing was stopped after this block. */
 	uint32_t stopped:1;
+
+	/** - the last instruction in this block is truncated.
+	 *
+	 *    It starts in this block's section but continues in one or more
+	 *    other sections depending on how fragmented the memory image is.
+	 *
+	 *    The raw bytes for the last instruction are provided in \@raw and
+	 *    its size in \@size in this case.
+	 */
+	uint32_t truncated:1;
 };
 
 /** Allocate an Intel PT block decoder.
diff --git a/libipt/src/pt_block_decoder.c b/libipt/src/pt_block_decoder.c
index 7f2e994..df8e0cf 100644
--- a/libipt/src/pt_block_decoder.c
+++ b/libipt/src/pt_block_decoder.c
@@ -1298,11 +1298,17 @@
 	if (!ninsn)
 		return 0;
 
+	/* The truncated instruction must be last. */
+	if (block->truncated)
+		return 0;
+
+	memset(&insn, 0, sizeof(insn));
+	memset(&iext, 0, sizeof(iext));
+
 	insn.mode = decoder->mode;
 	insn.ip = decoder->ip;
 
-	status = pt_image_read(decoder->image, &insn.isid, insn.raw,
-			       sizeof(insn.raw), &decoder->asid, insn.ip);
+	status = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
 	if (status < 0)
 		return status;
 
@@ -1314,11 +1320,14 @@
 		block->isid = insn.isid;
 	}
 
-	insn.size = (uint8_t) status;
-
-	status = pt_ild_decode(&insn, &iext);
-	if (status < 0)
-		return status;
+	/* If we couldn't read @insn's memory in one chunk from @insn.isid, we
+	 * provide the memory in @block.
+	 */
+	if (insn.truncated) {
+		memcpy(block->raw, insn.raw, insn.size);
+		block->size = insn.size;
+		block->truncated = 1;
+	}
 
 	/* Log calls' return addresses for return compression. */
 	status = pt_blk_log_call(decoder, &insn, &iext);
@@ -1875,6 +1884,16 @@
 			break;
 		}
 
+		/* If the block was truncated, we have to decode its last
+		 * instruction each time.
+		 *
+		 * We could have skipped the above switch and size assignment in
+		 * this case but this is already a slow and hopefully infrequent
+		 * path.
+		 */
+		if (block->truncated)
+			bce.qualifier = ptbq_decode;
+
 		status = pt_bcache_add(bcache, insn.ip - laddr, bce);
 		if (status < 0)
 			return status;
@@ -1907,9 +1926,21 @@
 	 *
 	 *     We need to re-decode @insn in order to determine the start IP of
 	 *     the next block.
+	 *
+	 *   - if the block is truncated
+	 *
+	 *     We need to read the last instruction's memory from multiple
+	 *     sections and provide it to the user.
+	 *
+	 *     We could still use the block cache but then we'd have to handle
+	 *     this case for each qualifier.  Truncation is hopefully rare and
+	 *     having to read the memory for the instruction from multiple
+	 *     sections is already slow.  Let's rather keep things simple and
+	 *     route it through the decode flow, where we already have
+	 *     everything in place.
 	 */
 	if (insn.iclass == ptic_call ||
-	    !pt_blk_is_in_section(nip, section, laddr)) {
+	    !pt_blk_is_in_section(nip, section, laddr) || block->truncated) {
 
 		memset(&bce, 0, sizeof(bce));
 		bce.ninsn = 1;
@@ -2008,6 +2039,67 @@
 	return pt_bcache_add(bcache, insn.ip - laddr, bce);
 }
 
+/* Proceed at a potentially truncated instruction.
+ *
+ * We were not able to decode the instruction at @decoder->ip in @decoder's
+ * cached section.  This is typically caused by not having enough bytes.
+ *
+ * Try to decode the instruction again using the entire image.  If this succeeds
+ * we expect to end up with an instruction that was truncated in the section it
+ * started.  We provide the full instruction in this case and end the block.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_proceed_truncated(struct pt_block_decoder *decoder,
+				    struct pt_block *block)
+{
+	struct pt_insn_ext iext;
+	struct pt_insn insn;
+	int errcode;
+
+	if (!decoder || !block)
+		return -pte_internal;
+
+	memset(&iext, 0, sizeof(iext));
+	memset(&insn, 0, sizeof(insn));
+
+	insn.mode = decoder->mode;
+	insn.ip = decoder->ip;
+
+	errcode = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
+	if (errcode < 0)
+		return errcode;
+
+	/* We shouldn't use this function if the instruction isn't truncated. */
+	if (!insn.truncated)
+		return -pte_internal;
+
+	/* Provide the instruction in the block.  This ends the block. */
+	memcpy(block->raw, insn.raw, insn.size);
+	block->size = insn.size;
+	block->truncated = 1;
+
+	/* Log calls' return addresses for return compression. */
+	errcode = pt_blk_log_call(decoder, &insn, &iext);
+	if (errcode < 0)
+		return errcode;
+
+	/* Let's see if we can proceed to the next IP without trace.
+	 *
+	 * The truncated instruction ends the block but we still need to get the
+	 * next block's start IP.
+	 */
+	errcode = pt_insn_next_ip(&decoder->ip, &insn, &iext);
+	if (errcode < 0) {
+		if (errcode != -pte_bad_query)
+			return errcode;
+
+		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
+	}
+
+	return 0;
+}
+
 /* Proceed to the next decision point using the block cache.
  *
  * Tracing is enabled and we don't have an event pending.  We already set
@@ -2153,8 +2245,12 @@
 		insn.ip = decoder->ip;
 
 		status = pt_blk_decode_in_section(&insn, &iext, section, laddr);
-		if (status < 0)
-			return status;
+		if (status < 0) {
+			if (status != -pte_bad_insn)
+				return status;
+
+			return pt_blk_proceed_truncated(decoder, block);
+		}
 
 		/* Log calls' return addresses for return compression. */
 		status = pt_blk_log_call(decoder, &insn, &iext);