libipt, block: bound block cache filling recursion

We fill the block cache by proceeding one instruction and recursively calling
pt_blk_proceed_no_event_fill_cache() until we find a block cache entry or the
block fills up.  On the way back from the recursion, we fill in entries for the
instructions we skipped that didn't have a block cache entry, yet.

This recursion is bounded by the maximum allowed number of instructions in a
block.  This bound is too high and may lead to stack overflows for large
sequences of instructions.  Add an artificial lower bound to prevent the latter.

When we reach that bound, we insert a trampoline block cache entry to the next
instruction.  This may cause unnecessary extra block cache jumps, especially if
we stop close to a decision point.

We could try to replace those artificial trampoline entries with entries for the
real decision point once we reach it.  Given that we inserted the trampoline
after a long sequence of linear code, we will further insert a fair amount of
further trampolines on our way back due to block cache entry overflows (the
ninsn field).  Such a fixup would remove at most one hop for the long code
sequence.  It really only makes sense if we have other entry points much closer
to the decision point.  A quick hack didn't show any improvement so I'm not
doing it.  It remains an option should we find such cases in the future.

Change-Id: I04b81ba36500cebb7f5346daab899ead1826c5cd
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
diff --git a/libipt/src/pt_block_decoder.c b/libipt/src/pt_block_decoder.c
index 9188752..8eca947 100644
--- a/libipt/src/pt_block_decoder.c
+++ b/libipt/src/pt_block_decoder.c
@@ -1678,6 +1678,50 @@
 	return (begin <= ip && ip < end);
 }
 
+/* Insert a trampoline block cache entry.
+ *
+ * Add a trampoline block cache entry at @ip to continue at @nip, where @nip
+ * must be the next instruction after @ip.
+ *
+ * Both @ip and @nip must be section-relative
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static inline int pt_blk_add_trampoline(struct pt_block_cache *bcache,
+					uint64_t ip, uint64_t nip,
+					enum pt_exec_mode mode)
+{
+	struct pt_bcache_entry bce;
+	int64_t disp;
+
+	/* The displacement from @ip to @nip for the trampoline. */
+	disp = (int64_t) (nip - ip);
+
+	memset(&bce, 0, sizeof(bce));
+	bce.displacement = (int32_t) disp;
+	bce.ninsn = 1;
+	bce.mode = mode;
+	bce.qualifier = ptbq_again;
+
+	/* If we can't reach @nip without overflowing the displacement field, we
+	 * have to stop and re-decode the instruction at @ip.
+	 */
+	if ((int64_t) bce.displacement != disp) {
+
+		memset(&bce, 0, sizeof(bce));
+		bce.ninsn = 1;
+		bce.mode = mode;
+		bce.qualifier = ptbq_decode;
+	}
+
+	return pt_bcache_add(bcache, ip, bce);
+}
+
+enum {
+	/* The maximum number of steps when filling the block cache. */
+	bcache_fill_steps	= 0x400
+};
+
 /* Proceed to the next instruction and fill the block cache for @decoder->ip.
  *
  * Tracing is enabled and we don't have an event pending.  The current IP is not
@@ -1687,8 +1731,8 @@
  * further using the block cache.
  *
  * On our way back, add a block cache entry for the IP before proceeding.  Note
- * that the recursion is bounded by the maximum number of instructions in a
- * block.
+ * that the recursion is bounded by @steps and ultimately by the maximum number
+ * of instructions in a block.
  *
  * Returns zero on success, a negative error code otherwise.
  */
@@ -1696,7 +1740,7 @@
 					      struct pt_block *block,
 					      struct pt_block_cache *bcache,
 					      struct pt_section *section,
-					      uint64_t laddr)
+					      uint64_t laddr, size_t steps)
 {
 	struct pt_bcache_entry bce;
 	struct pt_insn_ext iext;
@@ -1705,6 +1749,9 @@
 	int64_t disp;
 	int status;
 
+	if (!decoder || !steps)
+		return -pte_internal;
+
 	/* Proceed one instruction by decoding and examining it.
 	 *
 	 * Note that we also return on a status of zero that indicates that the
@@ -1847,9 +1894,21 @@
 	 * the cache entry of the succeeding instruction.
 	 */
 	if (!pt_bce_is_valid(bce)) {
+		/* If we exceeded the maximum number of allowed steps, we insert
+		 * a trampoline to the next instruction.
+		 *
+		 * The next time we encounter the same code, we will use the
+		 * trampoline to jump directly to where we left off this time
+		 * and continue from there.
+		 */
+		steps -= 1;
+		if (!steps)
+			return pt_blk_add_trampoline(bcache, insn.ip - laddr,
+						     nip - laddr, insn.mode);
+
 		status = pt_blk_proceed_no_event_fill_cache(decoder, block,
 							    bcache, section,
-							    laddr);
+							    laddr, steps);
 		if (status < 0)
 			return status;
 
@@ -1896,27 +1955,9 @@
 	 * If one or both overflowed, let's try to insert a trampoline, i.e. we
 	 * try to reach @dip via a ptbq_again entry to @nip.
 	 */
-	if (!bce.ninsn || ((int64_t) bce.displacement != disp)) {
-		/* The displacement from @ip to @nip for the trampoline. */
-		disp = (int64_t) (nip - insn.ip);
-
-		memset(&bce, 0, sizeof(bce));
-		bce.displacement = (int32_t) disp;
-		bce.ninsn = 1;
-		bce.mode = insn.mode;
-		bce.qualifier = ptbq_again;
-
-		/* If we can't even reach @nip without overflowing the
-		 * displacement field, we have to stop and re-decode @insn.
-		 */
-		if ((int64_t) bce.displacement != disp) {
-
-			memset(&bce, 0, sizeof(bce));
-			bce.ninsn = 1;
-			bce.mode = insn.mode;
-			bce.qualifier = ptbq_decode;
-		}
-	}
+	if (!bce.ninsn || ((int64_t) bce.displacement != disp))
+		return pt_blk_add_trampoline(bcache, insn.ip - laddr,
+					     nip - laddr, insn.mode);
 
 	/* We're done.  Add the cache entry.
 	 *
@@ -2028,7 +2069,8 @@
 	if (!pt_bce_is_valid(bce))
 		return pt_blk_proceed_no_event_fill_cache(decoder, block,
 							  bcache, section,
-							  laddr);
+							  laddr,
+							  bcache_fill_steps);
 
 	/* We have a valid cache entry.  Let's first check if the way to the
 	 * decision point still fits into @block.
diff --git a/test/src/linear-fup-tip_pgd.ptt b/test/src/linear-fup-tip_pgd.ptt
new file mode 100644
index 0000000..5ca2ca9
--- /dev/null
+++ b/test/src/linear-fup-tip_pgd.ptt
@@ -0,0 +1,59 @@
+; Copyright (c) 2016, Intel Corporation
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are met:
+;
+;  * Redistributions of source code must retain the above copyright notice,
+;    this list of conditions and the following disclaimer.
+;  * Redistributions in binary form must reproduce the above copyright notice,
+;    this list of conditions and the following disclaimer in the documentation
+;    and/or other materials provided with the distribution.
+;  * Neither the name of Intel Corporation nor the names of its contributors
+;    may be used to endorse or promote products derived from this software
+;    without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+; POSSIBILITY OF SUCH DAMAGE.
+
+; Test a rather long linear trace.  To keep the test file small, we only check
+; the number of instructions.
+;
+;   opt:ptxed --quiet --stat
+;
+; Variant:  linear trace ends with disabled event.
+;
+
+org 0x100000
+bits 64
+
+; @pt p1: psb()
+; @pt p2: fup(3: %l0)
+; @pt p3: mode.exec(64bit)
+; @pt p4: psbend()
+l0:     times 100000 nop
+
+l1:     hlt
+; @pt p5: fup(2: %l1)
+; @pt p6: tip.pgd(0: %l1)
+
+
+; @pt .exp(ptdump)
+;%0p1  psb
+;%0p2  fup        3: %?l0
+;%0p3  mode.exec  cs.l
+;%0p4  psbend
+;%0p5  fup        2: %?l1.4
+;%0p6  tip.pgd    0: %?l1.0
+
+
+; @pt .exp(ptxed)
+;insn:  100000.
diff --git a/test/src/linear-tip.ptt b/test/src/linear-tip.ptt
new file mode 100644
index 0000000..7a17936
--- /dev/null
+++ b/test/src/linear-tip.ptt
@@ -0,0 +1,65 @@
+; Copyright (c) 2016, Intel Corporation
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are met:
+;
+;  * Redistributions of source code must retain the above copyright notice,
+;    this list of conditions and the following disclaimer.
+;  * Redistributions in binary form must reproduce the above copyright notice,
+;    this list of conditions and the following disclaimer in the documentation
+;    and/or other materials provided with the distribution.
+;  * Neither the name of Intel Corporation nor the names of its contributors
+;    may be used to endorse or promote products derived from this software
+;    without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+; POSSIBILITY OF SUCH DAMAGE.
+
+; Test a rather long linear trace.  To keep the test file small, we only check
+; the number of instructions.
+;
+;   opt:ptxed --quiet --stat
+;
+; Variant:  linear trace ends with an indirect branch.
+;
+
+org 0x100000
+bits 64
+
+; @pt p1: psb()
+; @pt p2: fup(3: %l0)
+; @pt p3: mode.exec(64bit)
+; @pt p4: psbend()
+l0:     times 50000 nop
+
+; @pt p5: tip(2: %l0)
+; @pt p6: tip(2: %l2)
+l1:     jmp rax
+
+; @pt p7: fup(1: %l2)
+; @pt p8: tip.pgd(0: %l2)
+l2:     hlt
+
+
+; @pt .exp(ptdump)
+;%0p1  psb
+;%0p2  fup        3: %?l0
+;%0p3  mode.exec  cs.l
+;%0p4  psbend
+;%0p5  tip        2: %?l0.4
+;%0p6  tip        2: %?l2.4
+;%0p7  fup        1: %?l2.2
+;%0p8  tip.pgd    0: %?l2.0
+
+
+; @pt .exp(ptxed)
+;insn:  100002.