libipt: sync from the current position

When synchronizing forward, we start from the current position.  When
synchronizing backward, we start from the last synchronization point.

This guarantees that repeated sync-backwards can be used to split the trace for
parallel decode even when the trace contains empty trace segments, which would
be skipped when reading ahead.

On the other hand, this does not allow forward and backward synchronization to
be combined in a meaningful way.  And it does not support backtracking to the
beginning of the current trace segment in case of errors.

Change the behaviour of backward synchronization to start from the current
position and skip empty trace segments.  In order to reach the initial
synchronization point, multiple calls to pt_*_sync_backward() may be required.

We leave pt_*_get_sync_offset() to provide the offset of the last successful or
attempted synchronization.  It is not updated when reading over synchronization
points (i.e. PSB packets).  It is updated during synchronization before reading
the PSB+ header.

This fixes #19.

Change-Id: I6f939321b6621c6725a3a794cfdc5d5038dff90f
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
diff --git a/libipt/src/pt_packet_decoder.c b/libipt/src/pt_packet_decoder.c
index bbd6abd..a7cfed5 100644
--- a/libipt/src/pt_packet_decoder.c
+++ b/libipt/src/pt_packet_decoder.c
@@ -118,7 +118,7 @@
 	if (!decoder)
 		return -pte_invalid;
 
-	pos = decoder->sync;
+	pos = decoder->pos;
 	if (!pos)
 		pos = decoder->config.end;
 
diff --git a/libipt/src/pt_query_decoder.c b/libipt/src/pt_query_decoder.c
index 69b0db1..100fc8c 100644
--- a/libipt/src/pt_query_decoder.c
+++ b/libipt/src/pt_query_decoder.c
@@ -563,21 +563,43 @@
 
 int pt_qry_sync_backward(struct pt_query_decoder *decoder, uint64_t *ip)
 {
-	const uint8_t *pos, *sync;
+	const uint8_t *start, *sync;
 	int errcode;
 
 	if (!decoder)
 		return -pte_invalid;
 
-	pos = decoder->sync;
-	if (!pos)
-		pos = decoder->config.end;
+	start = decoder->pos;
+	if (!start)
+		start = decoder->config.end;
 
-	errcode = pt_sync_backward(&sync, pos, &decoder->config);
-	if (errcode < 0)
-		return errcode;
+	sync = start;
+	for (;;) {
+		errcode = pt_sync_backward(&sync, sync, &decoder->config);
+		if (errcode < 0)
+			return errcode;
 
-	return pt_qry_start(decoder, sync, ip);
+		errcode = pt_qry_start(decoder, sync, ip);
+		if (errcode < 0) {
+			/* Ignore incomplete trace segments at the end.  We need
+			 * a full PSB+ to start decoding.
+			 */
+			if (errcode == -pte_eos)
+				continue;
+
+			return errcode;
+		}
+
+		/* An empty trace segment in the middle of the trace might bring
+		 * us back to where we started.
+		 *
+		 * We're done when we reached a new position.
+		 */
+		if (decoder->pos != start)
+			break;
+	}
+
+	return 0;
 }
 
 int pt_qry_sync_set(struct pt_query_decoder *decoder, uint64_t *ip,
diff --git a/libipt/test/src/ptunit-query.c b/libipt/test/src/ptunit-query.c
index d1da33c..2f11cd3 100644
--- a/libipt/test/src/ptunit-query.c
+++ b/libipt/test/src/ptunit-query.c
@@ -138,6 +138,306 @@
 	return ptu_passed();
 }
 
+static struct ptunit_result sync_backward(struct ptu_decoder_fixture *dfix)
+{
+	struct pt_query_decoder *decoder = &dfix->decoder;
+	struct pt_encoder *encoder = &dfix->encoder;
+	uint64_t sync[3], offset, ip;
+	int errcode;
+
+	/* Check that we can use repeated pt_qry_sync_backward() to iterate over
+	 * synchronization points in backwards order.
+	 */
+
+	errcode = pt_enc_get_offset(encoder, &sync[0]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+	errcode = pt_enc_get_offset(encoder, &sync[1]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+	errcode = pt_enc_get_offset(encoder, &sync[2]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+	/* Synchronize repeatedly and check that we reach each PSB in the
+	 * correct order.
+	 */
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[2]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[1]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[0]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_eq(errcode, -pte_eos);
+
+	return ptu_passed();
+}
+
+static struct ptunit_result
+sync_backward_empty_end(struct ptu_decoder_fixture *dfix)
+{
+	struct pt_query_decoder *decoder = &dfix->decoder;
+	struct pt_encoder *encoder = &dfix->encoder;
+	uint64_t sync[3], offset, ip;
+	int errcode;
+
+	/* Check that we can use repeated pt_qry_sync_backward() to iterate over
+	 * synchronization points in backwards order.
+	 *
+	 * There's an empty PSB+ at the end.  We skip it.
+	 */
+
+	errcode = pt_enc_get_offset(encoder, &sync[0]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+	errcode = pt_enc_get_offset(encoder, &sync[1]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+	errcode = pt_enc_get_offset(encoder, &sync[2]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_psbend(encoder);
+
+	/* Synchronize repeatedly and check that we reach each PSB in the
+	 * correct order.
+	 */
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[1]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[0]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_eq(errcode, -pte_eos);
+
+	return ptu_passed();
+}
+
+static struct ptunit_result
+sync_backward_empty_mid(struct ptu_decoder_fixture *dfix)
+{
+	struct pt_query_decoder *decoder = &dfix->decoder;
+	struct pt_encoder *encoder = &dfix->encoder;
+	uint64_t sync[3], offset, ip;
+	int errcode;
+
+	/* Check that we can use repeated pt_qry_sync_backward() to iterate over
+	 * synchronization points in backwards order.
+	 *
+	 * There's an empty PSB+ in the middle.  We skip it.
+	 */
+
+	errcode = pt_enc_get_offset(encoder, &sync[0]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+	errcode = pt_enc_get_offset(encoder, &sync[1]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_psbend(encoder);
+
+	errcode = pt_enc_get_offset(encoder, &sync[2]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+	/* Synchronize repeatedly and check that we reach each PSB in the
+	 * correct order.
+	 */
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[2]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[0]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_eq(errcode, -pte_eos);
+
+	return ptu_passed();
+}
+
+static struct ptunit_result
+sync_backward_empty_begin(struct ptu_decoder_fixture *dfix)
+{
+	struct pt_query_decoder *decoder = &dfix->decoder;
+	struct pt_encoder *encoder = &dfix->encoder;
+	uint64_t sync[3], offset, ip;
+	int errcode;
+
+	/* Check that we can use repeated pt_qry_sync_backward() to iterate over
+	 * synchronization points in backwards order.
+	 *
+	 * There's an empty PSB+ at the beginning.  We skip it.
+	 */
+
+	errcode = pt_enc_get_offset(encoder, &sync[0]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_psbend(encoder);
+
+	errcode = pt_enc_get_offset(encoder, &sync[1]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+	errcode = pt_enc_get_offset(encoder, &sync[2]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+	/* Synchronize repeatedly and check that we reach each PSB in the
+	 * correct order.
+	 */
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[2]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[1]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_eq(errcode, -pte_eos);
+
+	return ptu_passed();
+}
+
+static struct ptunit_result
+decode_sync_backward(struct ptu_decoder_fixture *dfix)
+{
+	struct pt_query_decoder *decoder = &dfix->decoder;
+	struct pt_encoder *encoder = &dfix->encoder;
+	struct pt_event event;
+	uint64_t sync[2], offset, ip;
+	int errcode;
+
+	/* Check that we can use sync_backward to re-sync at the current trace
+	 * segment as well as to find the previous trace segment.
+	 */
+
+	errcode = pt_enc_get_offset(encoder, &sync[0]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+	errcode = pt_enc_get_offset(encoder, &sync[1]);
+	ptu_int_ge(errcode, 0);
+
+	pt_encode_psb(encoder);
+	pt_encode_mode_exec(encoder, ptem_64bit);
+	pt_encode_psbend(encoder);
+
+
+	errcode = pt_qry_sync_forward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[0]);
+
+	errcode = pt_qry_event(decoder, &event, sizeof(event));
+	ptu_int_ge(errcode, 0);
+	ptu_int_eq(event.type, ptev_exec_mode);
+
+	errcode = pt_qry_event(decoder, &event, sizeof(event));
+	ptu_int_ge(errcode, 0);
+	ptu_int_eq(event.type, ptev_exec_mode);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[1]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_ge(errcode, 0);
+
+	errcode = pt_qry_get_sync_offset(decoder, &offset);
+	ptu_int_eq(errcode, 0);
+	ptu_uint_eq(offset, sync[0]);
+
+	errcode = pt_qry_sync_backward(decoder, &ip);
+	ptu_int_eq(errcode, -pte_eos);
+
+	return ptu_passed();
+}
+
 static struct ptunit_result indir_null(struct ptu_decoder_fixture *dfix)
 {
 	struct pt_query_decoder *decoder = &dfix->decoder;
@@ -2112,6 +2412,12 @@
 	ptu_run_f(suite, cond_not_synced, dfix_raw);
 	ptu_run_f(suite, event_not_synced, dfix_raw);
 
+	ptu_run_f(suite, sync_backward, dfix_raw);
+	ptu_run_f(suite, sync_backward_empty_end, dfix_raw);
+	ptu_run_f(suite, sync_backward_empty_mid, dfix_raw);
+	ptu_run_f(suite, sync_backward_empty_begin, dfix_raw);
+	ptu_run_f(suite, decode_sync_backward, dfix_raw);
+
 	ptu_run_f(suite, indir_null, dfix_empty);
 	ptu_run_f(suite, indir_empty, dfix_empty);
 	ptu_run_fp(suite, indir, dfix_empty, pt_ipc_suppressed);