libipt: block decoder

Add a new API to iterate over blocks of instructions.

Change-Id: I3f01db2b3bde87fe6211af9ba7c077705f7892b3
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
diff --git a/doc/howto_libipt.md b/doc/howto_libipt.md
index 21dc4d7..41ed84b 100644
--- a/doc/howto_libipt.md
+++ b/doc/howto_libipt.md
@@ -56,6 +56,12 @@
   * *instruction flow*      This layer deals with the execution flow on the
                             instruction level.
 
+  * *block*                 This layer deals with the execution flow on the
+                            instruction level.
+
+                            It is faster than the instruction flow decoder but
+                            requires a small amount of post-processing.
+
 
 Each layer provides its own encoder or decoder struct plus a set of functions
 for allocating and freeing encoder or decoder objects and for synchronizing
@@ -67,6 +73,7 @@
   * *pkt*     Packet decoding (packet layer).
   * *qry*     Event (or query) layer.
   * *insn*    Instruction flow layer.
+  * *blk*     Block layer.
 
 
 Here is some generic example code for working with decoders:
@@ -799,6 +806,147 @@
 instruction.  The returned instruction is valid if its `iclass` field is set.
 
 
+## The Block Layer
+
+The block layer provides a simple API for iterating over blocks of sequential
+instructions in execution order.  The instructions in a block are sequential in
+the sense that no trace is required for reconstructing the instructions.  The IP
+of the first instruction is given in `struct pt_block` and the IP of other
+instructions in the block can be determined by decoding and examining the
+previous instruction.
+
+Start by configuring and allocating a `pt_block_decoder` as shown below:
+
+~~~{.c}
+    struct pt_block_decoder *decoder;
+    struct pt_config config;
+
+    memset(&config, 0, sizeof(config));
+    config.size = sizeof(config);
+    config.begin = <pt buffer begin>;
+    config.end = <pt buffer end>;
+    config.cpu = <cpu identifier>;
+    config.decode.callback = <decode function>;
+    config.decode.context = <decode context>;
+
+    decoder = pt_blk_alloc_decoder(&config);
+~~~
+
+An optional packet decode callback function may be specified in addition to the
+mandatory config fields.  If specified, the callback function will be called for
+packets the decoder does not know about.  The decoder will ignore the unknown
+packet except for its size in order to skip it.  If there is no decode callback
+specified, the decoder will abort with `-pte_bad_opc`.  In addition to the
+callback function pointer, an optional pointer to user-defined context
+information can be specified.  This context will be passed to the decode
+callback function.
+
+
+#### Synchronizing
+
+Before the decoder can be used, it needs to be synchronized onto the Intel PT
+packet stream.  To iterate over synchronization points in the Intel PT packet
+stream in forward or backward directions, the block decoder offers the following
+two synchronization functions respectively:
+
+    pt_blk_sync_forward()
+    pt_blk_sync_backward()
+
+
+To manually synchronize the decoder at a synchronization point (i.e. PSB packet)
+in the Intel PT packet stream, use the following function:
+
+    pt_blk_sync_set()
+
+
+The example below shows synchronization to the first synchronization point:
+
+~~~{.c}
+    struct pt_block_decoder *decoder;
+    int errcode;
+
+    errcode = pt_blk_sync_forward(decoder);
+    if (errcode < 0)
+        <handle error>(errcode);
+~~~
+
+The decoder will remember the last synchronization packet it decoded.
+Subsequent calls to `pt_blk_sync_forward` and `pt_blk_sync_backward` will use
+this as their starting point.
+
+You can get the current decoder position as offset into the Intel PT buffer via:
+
+    pt_blk_get_offset()
+
+
+You can get the position of the last synchronization point as offset into the
+Intel PT buffer via:
+
+    pt_blk_get_sync_offset()
+
+
+#### Iterating
+
+Once the decoder is synchronized, it can be used to iterate over blocks of
+instructions in execution flow order by repeated calls to `pt_blk_next()` as
+shown in the following example:
+
+~~~{.c}
+    struct pt_block_decoder *decoder;
+    int errcode;
+
+    for (;;) {
+        struct pt_block block;
+
+        errcode = pt_blk_next(decoder, &block, sizeof(block));
+
+        if (block.ninsn > 0)
+            <process block>(&block);
+
+        if (errcode < 0)
+            break;
+    }
+~~~
+
+A block contains enough information to reconstruct the instructions.  See
+`struct pt_block` in `intel-pt.h` for details.  Note that errors returned by
+`pt_blk_next()` apply after the last instruction in the provided block.
+
+It is recommended to use a traced image section cache so the image section
+identifier contained in a block can be used for reading the memory containing
+the instructions in the block.  This also allows mapping the instructions back
+to source code using the debug information contained in or reachable via the
+binary file.
+
+The following example shows how instructions can be reconstructed from a block:
+
+~~~{.c}
+    struct pt_image_section_cache *iscache;
+    struct pt_block *block;
+    uint16_t ninsn;
+    uint64_t ip;
+
+    ip = block->ip;
+    for (ninsn = 0; ninsn < block->ninsn; ++ninsn) {
+        uint8_t raw[pt_max_insn_size];
+        <struct insn> insn;
+        int size;
+
+        size = pt_iscache_read(iscache, raw, sizeof(raw), block->isid, ip);
+        if (size < 0)
+            break;
+
+        errcode = <decode instruction>(&insn, raw, size, block->mode);
+        if (errcode < 0)
+            break;
+
+        <process instruction>(&insn);
+
+        ip = <determine next ip>(&insn);
+    }
+~~~
+
+
 ## Parallel Decode
 
 Intel PT splits naturally into self-contained PSB segments that can be decoded
@@ -826,8 +974,8 @@
     }
 ~~~
 
-The individual trace segments can then be decoded using the query or instruction
-flow decoder as shown above in the previous examples.
+The individual trace segments can then be decoded using the query, instruction
+flow, or block decoder as shown above in the previous examples.
 
 When stitching decoded trace segments together, a sequence of linear (in the
 sense that it can be decoded without Intel PT) code has to be filled in.  Use
diff --git a/doc/man/CMakeLists.txt b/doc/man/CMakeLists.txt
index a8a9d18..3c94d3b 100644
--- a/doc/man/CMakeLists.txt
+++ b/doc/man/CMakeLists.txt
@@ -84,6 +84,11 @@
   pt_iscache_alloc
   pt_iscache_add_file
   pt_iscache_read
+  pt_blk_alloc_decoder
+  pt_blk_sync_forward
+  pt_blk_get_offset
+  pt_blk_get_image
+  pt_blk_next
 )
 
 foreach (function ${MAN3_FUNCTIONS})
@@ -101,6 +106,7 @@
 add_man_page_alias(3 pt_enc_get_config pt_pkt_get_config)
 add_man_page_alias(3 pt_enc_get_config pt_qry_get_config)
 add_man_page_alias(3 pt_enc_get_config pt_insn_get_config)
+add_man_page_alias(3 pt_enc_get_config pt_blk_get_config)
 add_man_page_alias(3 pt_pkt_alloc_decoder pt_pkt_free_decoder)
 add_man_page_alias(3 pt_pkt_sync_forward pt_pkt_sync_backward)
 add_man_page_alias(3 pt_pkt_sync_forward pt_pkt_sync_set)
@@ -113,6 +119,8 @@
 add_man_page_alias(3 pt_qry_time pt_qry_core_bus_ratio)
 add_man_page_alias(3 pt_qry_time pt_insn_time)
 add_man_page_alias(3 pt_qry_time pt_insn_core_bus_ratio)
+add_man_page_alias(3 pt_qry_time pt_blk_time)
+add_man_page_alias(3 pt_qry_time pt_blk_core_bus_ratio)
 add_man_page_alias(3 pt_image_alloc pt_image_free)
 add_man_page_alias(3 pt_image_alloc pt_image_name)
 add_man_page_alias(3 pt_image_add_file pt_image_copy)
@@ -123,8 +131,15 @@
 add_man_page_alias(3 pt_insn_sync_forward pt_insn_sync_set)
 add_man_page_alias(3 pt_insn_get_offset pt_insn_get_sync_offset)
 add_man_page_alias(3 pt_insn_get_image pt_insn_set_image)
+add_man_page_alias(3 pt_insn_get_image pt_blk_get_image)
+add_man_page_alias(3 pt_insn_get_image pt_blk_set_image)
 add_man_page_alias(3 pt_insn_next pt_insn)
 add_man_page_alias(3 pt_iscache_alloc pt_iscache_free)
 add_man_page_alias(3 pt_iscache_alloc pt_iscache_name)
+add_man_page_alias(3 pt_blk_alloc_decoder pt_blk_free_decoder)
+add_man_page_alias(3 pt_blk_sync_forward pt_blk_sync_backward)
+add_man_page_alias(3 pt_blk_sync_forward pt_blk_sync_set)
+add_man_page_alias(3 pt_blk_get_offset pt_blk_get_sync_offset)
+add_man_page_alias(3 pt_blk_next pt_block)
 
 add_custom_target(man ALL DEPENDS ${MAN_PAGES})
diff --git a/doc/man/pt_blk_alloc_decoder.3.md b/doc/man/pt_blk_alloc_decoder.3.md
new file mode 100644
index 0000000..0a98654
--- /dev/null
+++ b/doc/man/pt_blk_alloc_decoder.3.md
@@ -0,0 +1,98 @@
+% PT_BLK_ALLOC_DECODER(3)
+
+<!---
+ ! Copyright (c) 2016, Intel Corporation
+ !
+ ! Redistribution and use in source and binary forms, with or without
+ ! modification, are permitted provided that the following conditions are met:
+ !
+ !  * Redistributions of source code must retain the above copyright notice,
+ !    this list of conditions and the following disclaimer.
+ !  * Redistributions in binary form must reproduce the above copyright notice,
+ !    this list of conditions and the following disclaimer in the documentation
+ !    and/or other materials provided with the distribution.
+ !  * Neither the name of Intel Corporation nor the names of its contributors
+ !    may be used to endorse or promote products derived from this software
+ !    without specific prior written permission.
+ !
+ ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ ! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ ! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ! ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ ! LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ ! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ ! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ ! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ ! POSSIBILITY OF SUCH DAMAGE.
+ !-->
+
+# NAME
+
+pt_blk_alloc_decoder, pt_blk_free_decoder - allocate/free an Intel(R) Processor
+Trace block decoder
+
+
+# SYNOPSIS
+
+| **\#include `<intel-pt.h>`**
+|
+| **struct pt_block_decoder \***
+| **pt_blk_alloc_decoder(const struct pt_config \**config*);**
+|
+| **void pt_blk_free_decoder(struct pt_block_decoder \**decoder*);**
+
+Link with *-lipt*.
+
+
+# DESCRIPTION
+
+A block decoder decodes raw Intel Processor Trace (Intel PT) into a sequence of
+blocks of instructions described by the *pt_block* structure.  See
+**pt_blk_next**(3).
+
+**pt_blk_alloc_decoder**() allocates a new block decoder and returns a pointer
+to it.  The *config* argument points to a *pt_config* object.  See
+**pt_config**(3).  The *config* argument will not be referenced by the returned
+decoder but the trace buffer defined by the *config* argument's *begin* and
+*end* fields will.
+
+The returned block decoder needs to be synchronized onto the trace stream before
+it can be used.  To synchronize the decoder, use **pt_blk_sync_forward**(3),
+**pt_blk_sync_backward**(3), or **pt_blk_sync_set**(3).
+
+**pt_blk_free_decoder**() frees the Intel PT block decoder pointed to by
+*decoder*.  The *decoder* argument must be NULL or point to a decoder that has
+been allocated by a call to **pt_blk_alloc_decoder**().
+
+
+# RETURN VALUE
+
+**pt_blk_alloc_decoder**() returns a pointer to a *pt_block_decoder* object on
+success or NULL in case of an error.
+
+
+# EXAMPLE
+
+~~~{.c}
+    struct pt_block_decoder *decoder;
+    int errcode;
+
+    decoder = pt_blk_alloc_decoder(config);
+    if (!decoder)
+        return pte_nomem;
+
+    errcode = decode(decoder);
+
+    pt_blk_free_decoder(decoder);
+    return errcode;
+~~~
+
+
+# SEE ALSO
+
+**pt_config**(3), **pt_blk_sync_forward**(3), **pt_blk_sync_backward**(3),
+**pt_blk_sync_set**(3), **pt_blk_get_offset**(3), **pt_blk_get_sync_offset**(3),
+**pt_blk_get_image**(3), **pt_blk_set_image**(3), **pt_blk_get_config**(3),
+**pt_blk_time**(3), **pt_blk_core_bus_ratio**(3), **pt_blk_next**(3)
diff --git a/doc/man/pt_blk_get_offset.3.md b/doc/man/pt_blk_get_offset.3.md
new file mode 100644
index 0000000..d6bc3d5
--- /dev/null
+++ b/doc/man/pt_blk_get_offset.3.md
@@ -0,0 +1,82 @@
+% PT_BLK_GET_OFFSET(3)
+
+<!---
+ ! Copyright (c) 2016, Intel Corporation
+ !
+ ! Redistribution and use in source and binary forms, with or without
+ ! modification, are permitted provided that the following conditions are met:
+ !
+ !  * Redistributions of source code must retain the above copyright notice,
+ !    this list of conditions and the following disclaimer.
+ !  * Redistributions in binary form must reproduce the above copyright notice,
+ !    this list of conditions and the following disclaimer in the documentation
+ !    and/or other materials provided with the distribution.
+ !  * Neither the name of Intel Corporation nor the names of its contributors
+ !    may be used to endorse or promote products derived from this software
+ !    without specific prior written permission.
+ !
+ ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ ! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ ! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ! ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ ! LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ ! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ ! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ ! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ ! POSSIBILITY OF SUCH DAMAGE.
+ !-->
+
+# NAME
+
+pt_blk_get_offset, pt_blk_get_sync_offset - get an Intel(R) Processor Trace
+block decoder's current/synchronization trace buffer offset
+
+
+# SYNOPSIS
+
+| **\#include `<intel-pt.h>`**
+|
+| **int pt_blk_get_offset(struct pt_block_decoder \**decoder*,**
+|                       **uint64_t \**offset*);**
+| **int pt_blk_get_sync_offset(struct pt_block_decoder \**decoder*,**
+|                            **uint64_t \**offset*);**
+
+Link with *-lipt*.
+
+
+# DESCRIPTION
+
+**pt_blk_get_offset**() provides *decoder*'s current position as offset in
+bytes from the beginning of *decoder*'s trace buffer in the unsigned integer
+variable pointed to by *offset*.
+
+**pt_blk_get_sync_offset**() provides *decoder*'s last synchronization point as
+offset in bytes from the beginning of *decoder*'s trace buffer in the unsigned
+integer variable pointed to by *offset*.
+
+
+# RETURN VALUE
+
+Both functions return zero on success or a negative *pt_error_code* enumeration
+constant in case of an error.
+
+
+# ERRORS
+
+pte_invalid
+:   The *decoder* or *offset* argument is NULL.
+
+pte_nosync
+:   *decoder* has not been synchronized onto the trace stream.  Use
+    **pt_blk_sync_forward**(3), **pt_blk_sync_backward**(3), or
+    **pt_blk_sync_set**(3) to synchronize *decoder*.
+
+
+# SEE ALSO
+
+**pt_blk_alloc_decoder**(3), **pt_blk_free_decoder**(3),
+**pt_blk_sync_forward**(3), **pt_blk_sync_backward**(3),
+**pt_blk_sync_set**(3), **pt_blk_get_config**(3), **pt_blk_time**(3),
+**pt_blk_core_bus_ratio**(3), **pt_blk_next**(3)
diff --git a/doc/man/pt_blk_next.3.md b/doc/man/pt_blk_next.3.md
new file mode 100644
index 0000000..f4697e0
--- /dev/null
+++ b/doc/man/pt_blk_next.3.md
@@ -0,0 +1,302 @@
+% PT_BLK_NEXT(3)
+
+<!---
+ ! Copyright (c) 2016, Intel Corporation
+ !
+ ! Redistribution and use in source and binary forms, with or without
+ ! modification, are permitted provided that the following conditions are met:
+ !
+ !  * Redistributions of source code must retain the above copyright notice,
+ !    this list of conditions and the following disclaimer.
+ !  * Redistributions in binary form must reproduce the above copyright notice,
+ !    this list of conditions and the following disclaimer in the documentation
+ !    and/or other materials provided with the distribution.
+ !  * Neither the name of Intel Corporation nor the names of its contributors
+ !    may be used to endorse or promote products derived from this software
+ !    without specific prior written permission.
+ !
+ ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ ! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ ! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ! ARE DISCLAIMED. IN NO NEXT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ ! LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ ! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ ! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ ! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ ! POSSIBILITY OF SUCH DAMAGE.
+ !-->
+
+# NAME
+
+pt_blk_next, pt_block - iterate over blocks of traced instructions
+
+
+# SYNOPSIS
+
+| **\#include `<intel-pt.h>`**
+|
+| **struct pt_block;**
+|
+| **int pt_blk_next(struct pt_blk_decoder \**decoder*,**
+|                  **struct pt_blk \**blk*, size_t *size*);**
+|
+| **int pt_blk_next(struct pt_block_decoder \**decoder*,**
+|                 **struct pt_block \**block*, size_t *size*);**
+
+Link with *-lipt*.
+
+
+# DESCRIPTION
+
+**pt_blk_next**() provides the next block of instructions in execution order,
+which is described by the *pt_block* structure.
+
+The *size* argument must be set to *sizeof(struct pt_block)*.  The function will
+provide at most *size* bytes of the *pt_block* structure.  A newer decoder
+library may truncate an extended *pt_block* object to *size* bytes.
+
+An older decoder library may provide less *pt_block* fields.  Fields that are
+not provided will be zero-initialized.  For fields where zero is a valid value
+(e.g. for bit-fields), check the decoder library version to determine which
+fields are valid.  See **pt_library_version**(3).
+
+On success, the next block of instructions is provided in the *pt_block* object
+pointed to by the *block* argument.  The *pt_block* structure is declared as:
+
+~~~{.c}
+/** A block of instructions.
+ *
+ * Instructions in this block are executed sequentially but are not necessarily
+ * contiguous in memory.  Users are expected to follow direct branches.
+ */
+struct pt_block {
+    /** The IP of the first instruction in this block. */
+    uint64_t ip;
+
+    /** The IP of the last instruction in this block.
+     *
+     * This can be used for error-detection.
+     */
+    uint64_t end_ip;
+
+    /** The image section that contains the instructions in this block.
+     *
+     * A value of zero means that the section did not have an identifier.
+     * The section was not added via an image section cache or the memory
+     * was read via the read memory callback.
+     */
+    int isid;
+
+    /** The execution mode for all instructions in this block. */
+    enum pt_exec_mode mode;
+
+    /** The number of instructions in this block. */
+    uint16_t ninsn;
+
+    /** A collection of flags giving additional information about the
+     * instructions in this block.
+     *
+     * - all instructions in this block were executed speculatively.
+     */
+    uint32_t speculative:1;
+
+    /** - speculative execution was aborted after this block. */
+    uint32_t aborted:1;
+
+    /** - speculative execution was committed after this block. */
+    uint32_t committed:1;
+
+    /** - tracing was disabled after this block. */
+    uint32_t disabled:1;
+
+    /** - tracing was enabled at this block. */
+    uint32_t enabled:1;
+
+    /** - tracing was resumed at this block.
+     *
+     *    In addition to tracing being enabled, it continues from the IP
+     *    at which tracing had been disabled before.
+     *
+     *    If tracing was disabled at a call instruction, we assume that
+     *    tracing will be re-enabled after returning from the call at the
+     *    instruction following the call instruction.
+     */
+    uint32_t resumed:1;
+
+    /** - normal execution flow was interrupted after this block. */
+    uint32_t interrupted:1;
+
+    /** - tracing resumed at this block after an overflow. */
+    uint32_t resynced:1;
+
+    /** - tracing was stopped after this block. */
+    uint32_t stopped:1;
+};
+~~~
+
+The fields of the *pt_block* structure are described in more detail below:
+
+ip
+:   The virtual address of the first instruction in the block.  The address
+    should be interpreted in the current address space context.
+
+end_ip
+:   The virtual address of the last instruction in the block.  The address
+    should be interpreted in the current address space context.
+
+    This can be used for error detection.  Reconstruction of the instructions in
+    a block should end with the last instruction at *end_ip*.
+
+isid
+:   The image section identifier of the section from which the block of
+    instructions originated.  This will be zero unless the instructions came
+    from a section that was added via an image section cache.  See
+    **pt_image_add_cached**(3).
+
+    The image section identifier can be used for reading the memory containing
+    an instruction in order to decode it and for tracing an instruction back to
+    its binary file and from there to source code.
+
+mode
+:   The execution mode at which the instructions in the block were executed.
+    The *pt_exec_mode* enumeration is declared as:
+
+~~~{.c}
+/** An execution mode. */
+enum pt_exec_mode {
+    ptem_unknown,
+    ptem_16bit,
+    ptem_32bit,
+    ptem_64bit
+};
+~~~
+
+ninsn
+:   The number of instructions contained in this block.
+
+    The instructions are sequential in the sense that no trace is required for
+    reconstructing them.  They are not necessarily contiguous in memory.
+
+    The IP of the first instruction is given in the *ip* field and the IP of
+    other instructions can be determined by decoding and examining the previous
+    instruction.
+
+speculative
+:   A flag giving the speculative execution status of all instructions in the
+    block.  If set, the instructions were executed speculatively.  Otherwise,
+    the instructions were executed normally.
+
+aborted
+:   A flag saying whether speculative execution was aborted after the last
+    instruction in this block.  If set, speculative execution was aborted and
+    the effect of speculatively executed instructions prior to and including
+    this block was discarded.
+
+committed
+:   A flag saying whether the speculative execution state was committed after
+    the last instruction in this block.  If set, the effect of speculatively
+    executed instructions prior to and including this block was committed.
+
+disabled
+:   A flag saying that tracing was disabled after the last instruction in this
+    block.  If set, tracing was disabled after the last instruction in this
+    block retired.
+
+enabled
+:   A flag saying whether tracing was enabled at the first instruction in this
+    block.  If set, this is the first block of instructions after tracing was
+    enabled.
+
+resumed
+:   A flag saying whether tracing was resumed at the first instruction in this
+    block.  If set, tracing was previously disabled at this block's IP before
+    executing the instruction at that IP and was then enabled at the same IP.
+
+    A typical example would be a system call or interrupt when tracing only user
+    space.  Tracing is disabled due to the context switch and is then resumed
+    from the next instruction after returning to user space.
+
+interrupted
+:   A flag saying whether normal execution flow was interrupted after the last
+    instruction in this block.  If set, the normal execution flow was
+    interrupted.
+
+    The next instruction, which is provided by another call to
+    **pt_blk_next**(), is the next instruction that retired after the
+    interrupt.  This is not necessarily the interrupt's destination.
+
+resynced
+:   A flag saying whether tracing resumed at the fist instruction in this block
+    after an overflow.  If set, there was an internal buffer overflow and
+    packets were lost.  This was the first block of instructions to retire after
+    the overflow resolved.
+
+stopped
+:   A flag saying whether tracing was stopped after the last instruction in this
+    block.  If set, this is the last block of instructions that retired before
+    tracing was stopped due to a TraceStop condition.
+
+
+# RETURN VALUE
+
+**pt_blk_next**() returns zero or a positive value on success or a negative
+*pt_error_code* enumeration constant in case of an error.
+
+On success, a bit-vector of *pt_status_flag* enumeration constants is returned.
+The *pt_status_flag* enumeration is declared as:
+
+~~~{.c}
+/** Decoder status flags. */
+enum pt_status_flag {
+    /** There is an event pending. */
+    pts_event_pending    = 1 << 0,
+
+    /** The address has been suppressed. */
+    pts_ip_suppressed    = 1 << 1,
+
+    /** There is no more trace data available. */
+    pts_eos              = 1 << 2
+};
+~~~
+
+The *pt_eos* flag indicates that the information contained in the Intel PT
+stream has been consumed.  Further calls to **pt_blk_next**() will continue to
+provide blocks for instructions as long as the instruction's addresses can be
+determined without further trace.
+
+
+# ERRORS
+
+pte_invalid
+:   The *decoder* or *block* argument is NULL or the *size* argument is too
+    small.
+
+pte_eos
+:   Decode reached the end of the trace stream.
+
+pte_nosync
+:   The decoder has not been synchronized onto the trace stream.  Use
+    **pt_blk_sync_forward**(3), **pt_blk_sync_backward**(3), or
+    **pt_blk_sync_set**(3) to synchronize *decoder*.
+
+pte_bad_opc
+:   The decoder encountered an unsupported Intel PT packet opcode.
+
+pte_bad_packet
+:   The decoder encountered an unsupported Intel PT packet payload.
+
+pte_bad_query
+:   Execution flow reconstruction and trace got out of sync.
+
+    This typically means that, on its way to the virtual address of the next
+    event, the decoder encountered a conditional or indirect branch for which it
+    did not find guidance in the trace.
+
+
+# SEE ALSO
+
+**pt_blk_alloc_decoder**(3), **pt_blk_free_decoder**(3),
+**pt_blk_sync_forward**(3), **pt_blk_sync_backward**(3),
+**pt_blk_sync_set**(3), **pt_blk_time**(3), **pt_blk_core_bus_ratio**(3)
diff --git a/doc/man/pt_blk_sync_forward.3.md b/doc/man/pt_blk_sync_forward.3.md
new file mode 100644
index 0000000..b51e91d
--- /dev/null
+++ b/doc/man/pt_blk_sync_forward.3.md
@@ -0,0 +1,144 @@
+% PT_BLK_SYNC_FORWARD(3)
+
+<!---
+ ! Copyright (c) 2016, Intel Corporation
+ !
+ ! Redistribution and use in source and binary forms, with or without
+ ! modification, are permitted provided that the following conditions are met:
+ !
+ !  * Redistributions of source code must retain the above copyright notice,
+ !    this list of conditions and the following disclaimer.
+ !  * Redistributions in binary form must reproduce the above copyright notice,
+ !    this list of conditions and the following disclaimer in the documentation
+ !    and/or other materials provided with the distribution.
+ !  * Neither the name of Intel Corporation nor the names of its contributors
+ !    may be used to endorse or promote products derived from this software
+ !    without specific prior written permission.
+ !
+ ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ ! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ ! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ! ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ ! LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ ! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ ! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ ! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ ! POSSIBILITY OF SUCH DAMAGE.
+ !-->
+
+# NAME
+
+pt_blk_sync_forward, pt_blk_sync_backward, pt_blk_sync_set - synchronize an
+Intel(R) Processor Trace block decoder
+
+
+# SYNOPSIS
+
+| **\#include `<intel-pt.h>`**
+|
+| **int pt_blk_sync_forward(struct pt_block_decoder \**decoder*);**
+| **int pt_blk_sync_backward(struct pt_block_decoder \**decoder*);**
+| **int pt_blk_sync_set(struct pt_block_decoder \**decoder*,**
+|                     **uint64_t *offset*);**
+
+Link with *-lipt*.
+
+
+# DESCRIPTION
+
+These functions synchronize an Intel Processor Trace (Intel PT) block decoder
+pointed to by *decoder* onto the trace stream in *decoder*'s trace buffer.
+
+They search for a Packet Stream Boundary (PSB) packet in the trace stream and,
+if successful, set *decoder*'s current position and synchronization position to
+that packet and start processing packets.  For synchronization to be
+successfull, there must be a full PSB+ header in the trace stream.
+
+**pt_blk_sync_forward**() searches in forward direction from *decoder*'s
+current position towards the end of the trace buffer.  If *decoder* has been
+newly allocated and has not been synchronized yet, the search starts from the
+beginning of the trace.
+
+**pt_blk_sync_backward**() searches in backward direction from *decoder*'s
+current position towards the beginning of the trace buffer.  If *decoder* has
+been newly allocated and has not been synchronized yet, the search starts from
+the end of the trace.
+
+**pt_blk_sync_set**() searches at *offset* bytes from the beginning of its
+trace buffer.
+
+
+# RETURN VALUE
+
+All synchronization functions return zero or a positive value on success or a
+negative *pt_error_code* enumeration constant in case of an error.
+
+On success, a bit-vector of *pt_status_flag* enumeration constants is returned.
+The *pt_status_flag* enumeration is declared as:
+
+~~~{.c}
+/** Decoder status flags. */
+enum pt_status_flag {
+    /** There is an event pending. */
+    pts_event_pending    = 1 << 0,
+
+    /** The address has been suppressed. */
+    pts_ip_suppressed    = 1 << 1,
+
+    /** There is no more trace data available. */
+    pts_eos              = 1 << 2
+};
+~~~
+
+
+# ERRORS
+
+pte_invalid
+:   The *decoder* argument is NULL.
+
+pte_eos
+:   There is no (further) PSB+ header in the trace stream
+    (**pt_blk_sync_forward**() and **pt_blk_sync_backward**()) or at *offset*
+    bytes into the trace buffer (**pt_blk_sync_set**()).
+
+pte_nosync
+:   There is no PSB packet at *offset* bytes from the beginning of the trace
+    (**pt_blk_sync_set**() only).
+
+pte_bad_opc
+:   The decoder encountered an unsupported Intel PT packet opcode.
+
+pte_bad_packet
+:   The decoder encountered an unsupported Intel PT packet payload.
+
+
+# EXAMPLE
+
+The following example re-synchronizes an Intel PT block decoder after decode
+errors:
+
+~~~{.c}
+int foo(struct pt_block_decoder *decoder) {
+    for (;;) {
+        int errcode;
+
+        errcode = pt_blk_sync_forward(decoder);
+        if (errcode < 0)
+            return errcode;
+
+        do {
+            errcode = decode(decoder);
+        } while (errcode >= 0);
+    }
+}
+~~~
+
+
+# SEE ALSO
+
+**pt_blk_alloc_decoder**(3), **pt_blk_free_decoder**(3),
+**pt_blk_get_offset**(3), **pt_blk_get_sync_offset**(3),
+**pt_blk_get_config**(3), **pt_blk_time**(3), **pt_blk_core_bus_ratio**(3),
+**pt_blk_next**(3)
diff --git a/doc/man/pt_enc_get_config.3.md b/doc/man/pt_enc_get_config.3.md
index 40f352d..46aac15 100644
--- a/doc/man/pt_enc_get_config.3.md
+++ b/doc/man/pt_enc_get_config.3.md
@@ -30,8 +30,9 @@
 
 # NAME
 
-pt_enc_get_config, pt_pkt_get_config, pt_qry_get_config, pt_insn_get_config -
-get an Intel(R) Processor Trace encoder/decoder's configuration
+pt_enc_get_config, pt_pkt_get_config, pt_qry_get_config, pt_insn_get_config,
+pt_blk_get_config - get an Intel(R) Processor Trace encoder/decoder's
+configuration
 
 
 # SYNOPSIS
@@ -49,6 +50,9 @@
 |
 | **const struct pt_config \***
 | **pt_insn_get_config(const struct pt_insn_decoder \**decoder*);**
+|
+| **const struct pt_config \***
+| **pt_blk_get_config(const struct pt_block_decoder \**decoder*);**
 
 Link with *-lipt*.
 
@@ -69,4 +73,5 @@
 # SEE ALSO
 
 **pt_config**(3), **pt_alloc_encoder**(3), **pt_pkt_alloc_decoder**(3),
-**pt_qry_alloc_decoder**(3), **pt_insn_alloc_decoder**(3)
+**pt_qry_alloc_decoder**(3), **pt_insn_alloc_decoder**(3),
+**pt_blk_alloc_decoder**(3)
diff --git a/doc/man/pt_insn_get_image.3.md b/doc/man/pt_insn_get_image.3.md
index d6f2ca7..a0ed343 100644
--- a/doc/man/pt_insn_get_image.3.md
+++ b/doc/man/pt_insn_get_image.3.md
@@ -30,8 +30,9 @@
 
 # NAME
 
-pt_insn_get_image, pt_insn_set_image - get/set an Intel(R) Processor Trace
-instruction flow decoder's traced memory image descriptor
+pt_insn_get_image, pt_insn_set_image, pt_blk_get_image, pt_blk_set_image -
+get/set an Intel(R) Processor Trace instruction flow or block decoder's traced
+memory image descriptor
 
 
 # SYNOPSIS
@@ -39,33 +40,39 @@
 | **\#include `<intel-pt.h>`**
 |
 | **struct pt_image \*pt_insn_get_image(struct pt_insn_decoder \**decoder*);**
+| **struct pt_image \*pt_blk_get_image(struct pt_block_decoder \**decoder*);**
 |
 | **int pt_insn_set_image(struct pt_insn_decoder \**decoder*,**
 |                       **struct pt_image \**image*);**
+| **int pt_blk_set_image(struct pt_block_decoder \**decoder*,**
+|                      **struct pt_image \**image*);**
 
 Link with *-lipt*.
 
 
 # DESCRIPTION
 
-**pt_insn_get_image**() returns the traced memory image descriptor that
-*decoder* uses for reading instruction memory.  See **pt_image_alloc**(3).
-Every decoder comes with a default *pt_image* object that is initially empty and
-that will automatically be destroyed when the decoder is freed.
+**pt_insn_get_image**() and **pt_blk_get_image**() return the traced memory
+*image descriptor that decoder* uses for reading instruction memory.  See
+***pt_image_alloc**(3).  Every decoder comes with a default *pt_image* object
+*that is initially empty and that will automatically be destroyed when the
+*decoder is freed.
 
-**pt_insn_set_image**() sets the traced memory image descriptor that *decoder*
-uses for reading instruction memory.  If the *image* argument is NULL, sets
-*decoder*'s image to be its default image.  The user is responsible for freeing
-the *pt_image* object that *image* points to when it is no longer needed.
+**pt_insn_set_image**() and **pt_blk_set_image**() set the traced memory image
+descriptor that *decoder* uses for reading instruction memory.  If the *image*
+argument is NULL, sets *decoder*'s image to be its default image.  The user is
+responsible for freeing the *pt_image* object that *image* points to when it is
+no longer needed.
 
 
 # RETURN VALUE
 
-**pt_insn_get_image**() returns a pointer to *decoder*'s *pt_image* object.  The
-returned pointer is NULL if the *decoder* argument is NULL.
+**pt_insn_get_image**() and **pt_blk_get_image**() return a pointer to
+*decoder*'s *pt_image* object.  The returned pointer is NULL if the *decoder*
+argument is NULL.
 
-**pt_insn_set_image**() returns zero on success or a negative *pt_error_code*
-enumeration constant in case of an error.
+**pt_insn_set_image**() and **pt_blk_set_image**() return zero on success or a
+negative *pt_error_code* enumeration constant in case of an error.
 
 
 # ERRORS
@@ -82,4 +89,5 @@
 
 # SEE ALSO
 
-**pt_insn_alloc_decoder**(3), **pt_insn_free_decoder**(3), **pt_insn_next**(3)
+**pt_insn_alloc_decoder**(3), **pt_insn_free_decoder**(3), **pt_insn_next**(3),
+**pt_blk_alloc_decoder**(3), **pt_blk_free_decoder**(3), **pt_blk_next**(3)
diff --git a/doc/man/pt_qry_time.3.md b/doc/man/pt_qry_time.3.md
index 2598ffd..cea3c0d 100644
--- a/doc/man/pt_qry_time.3.md
+++ b/doc/man/pt_qry_time.3.md
@@ -30,8 +30,9 @@
 
 # NAME
 
-pt_qry_time, pt_qry_core_bus_ratio, pt_insn_time, pt_insn_core_bus_ratio - query
-an Intel(R) Processor Trace decoder for timing information
+pt_qry_time, pt_qry_core_bus_ratio, pt_insn_time, pt_insn_core_bus_ratio,
+pt_blk_time, pt_blk_core_bus_ratio - query an Intel(R) Processor Trace decoder
+for timing information
 
 
 # SYNOPSIS
@@ -47,16 +48,21 @@
 |                  **uint32_t \**lost_mtc*, uint32_t \**lost_cyc*);**
 | **int pt_insn_core_bus_ratio(struct pt_insn_decoder \**decoder*,**
 |                            **uint32_t \**cbr*);**
+|
+| **int pt_blk_time(struct pt_block_decoder \**decoder*, uint64_t \**time*,**
+|                 **uint32_t \**lost_mtc*, uint32_t \**lost_cyc*);**
+| **int pt_blk_core_bus_ratio(struct pt_block_decoder \**decoder*,**
+|                           **uint32_t \**cbr*);**
 
 Link with *-lipt*.
 
 
 # DESCRIPTION
 
-**pt_qry_time**() and **pt_insn_time**() provide the current estimated timestamp
-count (TSC) value in the unsigned integer variable pointed to by the *time*
-argument.  The returned value corresponds to what an **rdtsc** instruction would
-have returned.
+**pt_qry_time**(), **pt_insn_time**(), and **pt_blk_time**() provide the current
+estimated timestamp count (TSC) value in the unsigned integer variable pointed
+to by the *time* argument.  The returned value corresponds to what an **rdtsc**
+instruction would have returned.
 
 At configurable intervals, Intel PT contains the full, accurate TSC value.
 Between those intervals, the timestamp count is estimated using a collection of
@@ -81,8 +87,9 @@
 *lost_cyc* arguments respectively.  If one or both of the arguments is NULL, no
 information on lost packets is provided for the respective packet type.
 
-**pt_qry_core_bus_ratio**() and **pt_insn_core_bus_ratio**() give the last known
-core:bus ratio as provided by the Core Bus Ratio (CBR) Intel PT packet.
+**pt_qry_core_bus_ratio**(), **pt_insn_core_bus_ratio**(), and
+**pt_blk_core_bus_ratio**() give the last known core:bus ratio as provided by
+the Core Bus Ratio (CBR) Intel PT packet.
 
 
 # RETURN VALUE
@@ -94,8 +101,9 @@
 # ERRORS
 
 pte_invalid
-:   The *decoder* or *time* (**pt_qry_time**() and **pt_insn_time**()) or *cbr*
-    (**pt_qry_core_bus_ratio**() and **pt_insn_core_bus_ratio**()) argument is
+:   The *decoder* or *time* (**pt_qry_time**(), **pt_insn_time**(), and
+    **pt_blk_time**()) or *cbr* (**pt_qry_core_bus_ratio**(),
+    **pt_insn_core_bus_ratio**(), and **pt_blk_core_bus_ratio**()) argument is
     NULL.
 
 pte_no_time
@@ -114,10 +122,9 @@
 
 # NOTES
 
-Both the query and the instruction flow decoder read ahead.  The estimated
-timestamp count and core:bus ratios correspond to their current decode position,
-which may be ahead of the trace position that matches the last event or
-instruction.
+All decoders read ahead.  The estimated timestamp count and core:bus ratios
+correspond to their current decode position, which may be ahead of the trace
+position that matches the last event, instruction, or block.
 
 The query decoder also provides an estimated timestamp count in the *pt_event*
 structure.
@@ -127,4 +134,5 @@
 
 **pt_qry_alloc_decoder**(3), **pt_qry_free_decoder**(3),
 **pt_qry_cond_branch**(3), **pt_qry_indirect_branch**(3), **pt_qry_event**(3),
-**pt_insn_alloc_decoder**(3), **pt_insn_free_decoder**(3), **pt_insn_next**(3)
+**pt_insn_alloc_decoder**(3), **pt_insn_free_decoder**(3), **pt_insn_next**(3),
+**pt_blk_alloc_decoder**(3), **pt_blk_free_decoder**(3), **pt_blk_next**(3)
diff --git a/libipt/CMakeLists.txt b/libipt/CMakeLists.txt
index 86d0b6b..47aedc0 100644
--- a/libipt/CMakeLists.txt
+++ b/libipt/CMakeLists.txt
@@ -54,6 +54,7 @@
   src/pt_decoder_function.c
   src/pt_config.c
   src/pt_insn.c
+  src/pt_block_decoder.c
 )
 
 if (CMAKE_HOST_UNIX)
diff --git a/libipt/include/intel-pt.h b/libipt/include/intel-pt.h
index 4c8f68c..9131304 100644
--- a/libipt/include/intel-pt.h
+++ b/libipt/include/intel-pt.h
@@ -50,6 +50,7 @@
  * - Query decoder
  * - Traced image
  * - Instruction flow decoder
+ * - Block decoder
  */
 
 
@@ -58,6 +59,7 @@
 struct pt_packet_decoder;
 struct pt_query_decoder;
 struct pt_insn_decoder;
+struct pt_block_decoder;
 
 
 
@@ -2069,6 +2071,261 @@
 extern pt_export int pt_insn_next(struct pt_insn_decoder *decoder,
 				  struct pt_insn *insn, size_t size);
 
+
+
+/* Block decoder. */
+
+
+
+/** A block of instructions.
+ *
+ * Instructions in this block are executed sequentially but are not necessarily
+ * contiguous in memory.  Users are expected to follow direct branches.
+ */
+struct pt_block {
+	/** The IP of the first instruction in this block. */
+	uint64_t ip;
+
+	/** The IP of the last instruction in this block.
+	 *
+	 * This can be used for error-detection.
+	 */
+	uint64_t end_ip;
+
+	/** The image section that contains the instructions in this block.
+	 *
+	 * A value of zero means that the section did not have an identifier.
+	 * The section was not added via an image section cache or the memory
+	 * was read via the read memory callback.
+	 */
+	int isid;
+
+	/** The execution mode for all instructions in this block. */
+	enum pt_exec_mode mode;
+
+	/** The number of instructions in this block. */
+	uint16_t ninsn;
+
+	/** A collection of flags giving additional information about the
+	 * instructions in this block.
+	 *
+	 * - all instructions in this block were executed speculatively.
+	 */
+	uint32_t speculative:1;
+
+	/** - speculative execution was aborted after this block. */
+	uint32_t aborted:1;
+
+	/** - speculative execution was committed after this block. */
+	uint32_t committed:1;
+
+	/** - tracing was disabled after this block. */
+	uint32_t disabled:1;
+
+	/** - tracing was enabled at this block. */
+	uint32_t enabled:1;
+
+	/** - tracing was resumed at this block.
+	 *
+	 *    In addition to tracing being enabled, it continues from the IP
+	 *    at which tracing had been disabled before.
+	 *
+	 *    If tracing was disabled at a call instruction, we assume that
+	 *    tracing will be re-enabled after returning from the call at the
+	 *    instruction following the call instruction.
+	 */
+	uint32_t resumed:1;
+
+	/** - normal execution flow was interrupted after this block. */
+	uint32_t interrupted:1;
+
+	/** - tracing resumed at this block after an overflow. */
+	uint32_t resynced:1;
+
+	/** - tracing was stopped after this block. */
+	uint32_t stopped:1;
+};
+
+/** Allocate an Intel PT block decoder.
+ *
+ * The decoder will work on the buffer defined in \@config, it shall contain
+ * raw trace data and remain valid for the lifetime of the decoder.
+ *
+ * The decoder needs to be synchronized before it can be used.
+ */
+extern pt_export struct pt_block_decoder *
+pt_blk_alloc_decoder(const struct pt_config *config);
+
+/** Free an Intel PT block decoder.
+ *
+ * This will destroy the decoder's default image.
+ *
+ * The \@decoder must not be used after a successful return.
+ */
+extern pt_export void pt_blk_free_decoder(struct pt_block_decoder *decoder);
+
+/** Synchronize an Intel PT block decoder.
+ *
+ * Search for the next synchronization point in forward or backward direction.
+ *
+ * If \@decoder has not been synchronized, yet, the search is started at the
+ * beginning of the trace buffer in case of forward synchronization and at the
+ * end of the trace buffer in case of backward synchronization.
+ *
+ * Returns zero or a positive value on success, a negative error code otherwise.
+ *
+ * Returns -pte_bad_opc if an unknown packet is encountered.
+ * Returns -pte_bad_packet if an unknown packet payload is encountered.
+ * Returns -pte_eos if no further synchronization point is found.
+ * Returns -pte_invalid if \@decoder is NULL.
+ */
+extern pt_export int pt_blk_sync_forward(struct pt_block_decoder *decoder);
+extern pt_export int pt_blk_sync_backward(struct pt_block_decoder *decoder);
+
+/** Manually synchronize an Intel PT block decoder.
+ *
+ * Synchronize \@decoder on the syncpoint at \@offset.  There must be a PSB
+ * packet at \@offset.
+ *
+ * Returns zero or a positive value on success, a negative error code otherwise.
+ *
+ * Returns -pte_bad_opc if an unknown packet is encountered.
+ * Returns -pte_bad_packet if an unknown packet payload is encountered.
+ * Returns -pte_eos if \@offset lies outside of \@decoder's trace buffer.
+ * Returns -pte_eos if \@decoder reaches the end of its trace buffer.
+ * Returns -pte_invalid if \@decoder is NULL.
+ * Returns -pte_nosync if there is no syncpoint at \@offset.
+ */
+extern pt_export int pt_blk_sync_set(struct pt_block_decoder *decoder,
+				     uint64_t offset);
+
+/** Get the current decoder position.
+ *
+ * Fills the current \@decoder position into \@offset.
+ *
+ * This is useful for reporting errors.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ *
+ * Returns -pte_invalid if \@decoder or \@offset is NULL.
+ * Returns -pte_nosync if \@decoder is out of sync.
+ */
+extern pt_export int pt_blk_get_offset(struct pt_block_decoder *decoder,
+				       uint64_t *offset);
+
+/** Get the position of the last synchronization point.
+ *
+ * Fills the last synchronization position into \@offset.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ *
+ * Returns -pte_invalid if \@decoder or \@offset is NULL.
+ * Returns -pte_nosync if \@decoder is out of sync.
+ */
+extern pt_export int pt_blk_get_sync_offset(struct pt_block_decoder *decoder,
+					    uint64_t *offset);
+
+/** Get the traced image.
+ *
+ * The returned image may be modified as long as \@decoder is not running.
+ *
+ * Returns a pointer to the traced image \@decoder uses for reading memory.
+ * Returns NULL if \@decoder is NULL.
+ */
+extern pt_export struct pt_image *
+pt_blk_get_image(struct pt_block_decoder *decoder);
+
+/** Set the traced image.
+ *
+ * Sets the image that \@decoder uses for reading memory to \@image.  If \@image
+ * is NULL, sets the image to \@decoder's default image.
+ *
+ * Only one image can be active at any time.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ * Return -pte_invalid if \@decoder is NULL.
+ */
+extern pt_export int pt_blk_set_image(struct pt_block_decoder *decoder,
+				      struct pt_image *image);
+
+/* Return a pointer to \@decoder's configuration.
+ *
+ * Returns a non-null pointer on success, NULL if \@decoder is NULL.
+ */
+extern pt_export const struct pt_config *
+pt_blk_get_config(const struct pt_block_decoder *decoder);
+
+/** Return the current time.
+ *
+ * On success, provides the time at \@decoder's current position in \@time.
+ * Since \@decoder is reading ahead until the next indirect branch or event,
+ * the value matches the time for that branch or event.
+ *
+ * The time is similar to what a rdtsc instruction would return.  Depending
+ * on the configuration, the time may not be fully accurate.  If TSC is not
+ * enabled, the time is relative to the last synchronization and can't be used
+ * to correlate with other TSC-based time sources.  In this case, -pte_no_time
+ * is returned and the relative time is provided in \@time.
+ *
+ * Some timing-related packets may need to be dropped (mostly due to missing
+ * calibration or incomplete configuration).  To get an idea about the quality
+ * of the estimated time, we record the number of dropped MTC and CYC packets.
+ *
+ * If \@lost_mtc is not NULL, set it to the number of lost MTC packets.
+ * If \@lost_cyc is not NULL, set it to the number of lost CYC packets.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ *
+ * Returns -pte_invalid if \@decoder or \@time is NULL.
+ * Returns -pte_no_time if there has not been a TSC packet.
+ */
+extern pt_export int pt_blk_time(struct pt_block_decoder *decoder,
+				 uint64_t *time, uint32_t *lost_mtc,
+				 uint32_t *lost_cyc);
+
+/** Return the current core bus ratio.
+ *
+ * On success, provides the core:bus ratio at \@decoder's current position
+ * in \@cbr.
+ * Since \@decoder is reading ahead until the next indirect branch or event,
+ * the value matches the core:bus ratio for that branch or event.
+ *
+ * The ratio is defined as core cycles per bus clock cycle.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ *
+ * Returns -pte_invalid if \@decoder or \@cbr is NULL.
+ * Returns -pte_no_cbr if there has not been a CBR packet.
+ */
+extern pt_export int pt_blk_core_bus_ratio(struct pt_block_decoder *decoder,
+					   uint32_t *cbr);
+
+/** Determine the next block of instructions.
+ *
+ * On success, provides the next block of instructions in execution order in
+ * \@block.
+ *
+ * The \@size argument must be set to sizeof(struct pt_block).
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ *
+ * Returns pts_eos to indicate the end of the trace stream.  Subsequent calls
+ * to pt_block_next() will continue to return pts_eos until trace is required
+ * to determine the next instruction.
+ *
+ * Returns -pte_bad_context if the decoder encountered an unexpected packet.
+ * Returns -pte_bad_opc if the decoder encountered unknown packets.
+ * Returns -pte_bad_packet if the decoder encountered unknown packet payloads.
+ * Returns -pte_bad_query if the decoder got out of sync.
+ * Returns -pte_eos if decoding reached the end of the Intel PT buffer.
+ * Returns -pte_invalid if \@decoder or \@block is NULL.
+ * Returns -pte_nomap if the memory at the instruction address can't be read.
+ * Returns -pte_nosync if \@decoder is out of sync.
+ */
+extern pt_export int pt_blk_next(struct pt_block_decoder *decoder,
+				 struct pt_block *block, size_t size);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/libipt/internal/include/pt_block_decoder.h b/libipt/internal/include/pt_block_decoder.h
new file mode 100644
index 0000000..9fc3939
--- /dev/null
+++ b/libipt/internal/include/pt_block_decoder.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PT_BLOCK_DECODER_H
+#define PT_BLOCK_DECODER_H
+
+#include "pt_query_decoder.h"
+#include "pt_image.h"
+#include "pt_retstack.h"
+
+
+/* A block decoder.
+ *
+ * It decodes Intel(R) Processor Trace into a sequence of instruction blocks
+ * such that the instructions in each block can be decoded without further need
+ * of trace.
+ */
+struct pt_block_decoder {
+	/* The Intel(R) Processor Trace query decoder. */
+	struct pt_query_decoder query;
+
+	/* The default image. */
+	struct pt_image default_image;
+
+	/* The image. */
+	struct pt_image *image;
+
+	/* The current address space. */
+	struct pt_asid asid;
+
+	/* The current Intel(R) Processor Trace event. */
+	struct pt_event event;
+
+	/* The call/return stack for ret compression. */
+	struct pt_retstack retstack;
+
+	/* The start IP of the next block.
+	 *
+	 * If tracing is disabled, this is the IP at which we assume tracing to
+	 * be resumed.
+	 */
+	uint64_t ip;
+
+	/* The current execution mode. */
+	enum pt_exec_mode mode;
+
+	/* The status of the last successful decoder query.
+	 *
+	 * Errors are reported directly; the status is always a non-negative
+	 * pt_status_flag bit-vector.
+	 */
+	int status;
+
+	/* A collection of flags defining how to proceed flow reconstruction:
+	 *
+	 * - tracing is enabled.
+	 */
+	uint32_t enabled:1;
+
+	/* - process @event. */
+	uint32_t process_event:1;
+
+	/* - instructions are executed speculatively. */
+	uint32_t speculative:1;
+};
+
+
+/* Initialize a block decoder.
+ *
+ * Returns zero on success; a negative error code otherwise.
+ * Returns -pte_internal, if @decoder or @config is NULL.
+ */
+extern int pt_blk_decoder_init(struct pt_block_decoder *decoder,
+			       const struct pt_config *config);
+
+/* Finalize a block decoder. */
+extern void pt_blk_decoder_fini(struct pt_block_decoder *decoder);
+
+#endif /* PT_BLOCK_DECODER_H */
diff --git a/libipt/src/pt_block_decoder.c b/libipt/src/pt_block_decoder.c
new file mode 100644
index 0000000..f8b52e5
--- /dev/null
+++ b/libipt/src/pt_block_decoder.c
@@ -0,0 +1,1939 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "pt_block_decoder.h"
+#include "pt_insn.h"
+#include "pt_ild.h"
+
+#include "intel-pt.h"
+
+#include <string.h>
+
+
+static int pt_blk_proceed(struct pt_block_decoder *, struct pt_block *);
+static int pt_blk_process_trailing_events(struct pt_block_decoder *,
+					  struct pt_block *);
+
+
+static void pt_blk_reset(struct pt_block_decoder *decoder)
+{
+	if (!decoder)
+		return;
+
+	decoder->mode = ptem_unknown;
+	decoder->ip = 0ull;
+	decoder->status = 0;
+	decoder->enabled = 0;
+	decoder->process_event = 0;
+	decoder->speculative = 0;
+
+	pt_retstack_init(&decoder->retstack);
+	pt_asid_init(&decoder->asid);
+}
+
+int pt_blk_decoder_init(struct pt_block_decoder *decoder,
+			const struct pt_config *config)
+{
+	int errcode;
+
+	if (!decoder)
+		return -pte_internal;
+
+	errcode = pt_qry_decoder_init(&decoder->query, config);
+	if (errcode < 0)
+		return errcode;
+
+	pt_image_init(&decoder->default_image, NULL);
+	decoder->image = &decoder->default_image;
+
+	pt_blk_reset(decoder);
+
+	return 0;
+}
+
+void pt_blk_decoder_fini(struct pt_block_decoder *decoder)
+{
+	if (!decoder)
+		return;
+
+	pt_image_fini(&decoder->default_image);
+	pt_qry_decoder_fini(&decoder->query);
+}
+
+struct pt_block_decoder *
+pt_blk_alloc_decoder(const struct pt_config *config)
+{
+	struct pt_block_decoder *decoder;
+	int errcode;
+
+	decoder = malloc(sizeof(*decoder));
+	if (!decoder)
+		return NULL;
+
+	errcode = pt_blk_decoder_init(decoder, config);
+	if (errcode < 0) {
+		free(decoder);
+		return NULL;
+	}
+
+	return decoder;
+}
+
+void pt_blk_free_decoder(struct pt_block_decoder *decoder)
+{
+	if (!decoder)
+		return;
+
+	pt_blk_decoder_fini(decoder);
+	free(decoder);
+}
+
+static int pt_blk_start(struct pt_block_decoder *decoder, int status)
+{
+	if (!decoder)
+		return -pte_internal;
+
+	if (status < 0)
+		return status;
+
+	decoder->status = status;
+	if (!(status & pts_ip_suppressed))
+		decoder->enabled = 1;
+
+	return 0;
+}
+
+static int pt_blk_sync_reset(struct pt_block_decoder *decoder)
+{
+	if (!decoder)
+		return -pte_internal;
+
+	pt_blk_reset(decoder);
+
+	return 0;
+}
+
+int pt_blk_sync_forward(struct pt_block_decoder *decoder)
+{
+	int errcode, status;
+
+	if (!decoder)
+		return -pte_invalid;
+
+	errcode = pt_blk_sync_reset(decoder);
+	if (errcode < 0)
+		return errcode;
+
+	status = pt_qry_sync_forward(&decoder->query, &decoder->ip);
+
+	return pt_blk_start(decoder, status);
+}
+
+int pt_blk_sync_backward(struct pt_block_decoder *decoder)
+{
+	int errcode, status;
+
+	if (!decoder)
+		return -pte_invalid;
+
+	errcode = pt_blk_sync_reset(decoder);
+	if (errcode < 0)
+		return errcode;
+
+	status = pt_qry_sync_backward(&decoder->query, &decoder->ip);
+
+	return pt_blk_start(decoder, status);
+}
+
+int pt_blk_sync_set(struct pt_block_decoder *decoder, uint64_t offset)
+{
+	int errcode, status;
+
+	if (!decoder)
+		return -pte_invalid;
+
+	errcode = pt_blk_sync_reset(decoder);
+	if (errcode < 0)
+		return errcode;
+
+	status = pt_qry_sync_set(&decoder->query, &decoder->ip, offset);
+
+	return pt_blk_start(decoder, status);
+}
+
+int pt_blk_get_offset(struct pt_block_decoder *decoder, uint64_t *offset)
+{
+	if (!decoder)
+		return -pte_invalid;
+
+	return pt_qry_get_offset(&decoder->query, offset);
+}
+
+int pt_blk_get_sync_offset(struct pt_block_decoder *decoder, uint64_t *offset)
+{
+	if (!decoder)
+		return -pte_invalid;
+
+	return pt_qry_get_sync_offset(&decoder->query, offset);
+}
+
+struct pt_image *pt_blk_get_image(struct pt_block_decoder *decoder)
+{
+	if (!decoder)
+		return NULL;
+
+	return decoder->image;
+}
+
+int pt_blk_set_image(struct pt_block_decoder *decoder, struct pt_image *image)
+{
+	if (!decoder)
+		return -pte_invalid;
+
+	if (!image)
+		image = &decoder->default_image;
+
+	decoder->image = image;
+	return 0;
+}
+
+const struct pt_config *
+pt_blk_get_config(const struct pt_block_decoder *decoder)
+{
+	if (!decoder)
+		return NULL;
+
+	return pt_qry_get_config(&decoder->query);
+}
+
+int pt_blk_time(struct pt_block_decoder *decoder, uint64_t *time,
+		uint32_t *lost_mtc, uint32_t *lost_cyc)
+{
+	if (!decoder || !time)
+		return -pte_invalid;
+
+	return pt_qry_time(&decoder->query, time, lost_mtc, lost_cyc);
+}
+
+int pt_blk_core_bus_ratio(struct pt_block_decoder *decoder, uint32_t *cbr)
+{
+	if (!decoder || !cbr)
+		return -pte_invalid;
+
+	return pt_qry_core_bus_ratio(&decoder->query, cbr);
+}
+
+/* Fetch the next pending event.
+ *
+ * Checks for pending events.  If an event is pending, fetches it (if not
+ * already in process).
+ *
+ * Returns zero if no event is pending.
+ * Returns a positive integer if an event is pending or in process.
+ * Returns a negative error code otherwise.
+ */
+static inline int pt_blk_fetch_event(struct pt_block_decoder *decoder)
+{
+	int status;
+
+	if (!decoder)
+		return -pte_internal;
+
+	if (decoder->process_event)
+		return 1;
+
+	if (!(decoder->status & pts_event_pending))
+		return 0;
+
+	status = pt_qry_event(&decoder->query, &decoder->event,
+			      sizeof(decoder->event));
+	if (status < 0)
+		return status;
+
+	decoder->process_event = 1;
+	decoder->status = status;
+
+	return 1;
+}
+
+static inline int pt_blk_block_is_empty(const struct pt_block *block)
+{
+	if (!block)
+		return 1;
+
+	return !block->ninsn;
+}
+
+static inline int block_to_user(struct pt_block *ublock, size_t size,
+				const struct pt_block *block)
+{
+	if (!ublock || !block)
+		return -pte_internal;
+
+	if (ublock == block)
+		return 0;
+
+	/* Zero out any unknown bytes. */
+	if (sizeof(*block) < size) {
+		memset(ublock + sizeof(*block), 0, size - sizeof(*block));
+
+		size = sizeof(*block);
+	}
+
+	memcpy(ublock, block, size);
+
+	return 0;
+}
+
+static int pt_insn_false(const struct pt_insn *insn,
+			 const struct pt_insn_ext *iext)
+{
+	(void) insn;
+	(void) iext;
+
+	return 0;
+}
+
+/* Determine the next IP using trace.
+ *
+ * Tries to determine the IP of the next instruction using trace and provides it
+ * in @pip.
+ *
+ * Not requiring trace to determine the IP is treated as an internal error.
+ *
+ * Does not update the return compression stack for indirect calls.  This is
+ * expected to have been done, already, when trying to determine the next IP
+ * without using trace.
+ *
+ * Does not update @decoder->status.  The caller is expected to do that.
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ * Returns -pte_internal if @pip, @decoder, @insn, or @iext are NULL.
+ * Returns -pte_internal if no trace is required.
+ */
+static int pt_blk_next_ip(uint64_t *pip, struct pt_block_decoder *decoder,
+			  const struct pt_insn *insn,
+			  const struct pt_insn_ext *iext)
+{
+	int status;
+
+	if (!pip || !decoder || !insn || !iext)
+		return -pte_internal;
+
+	/* We handle non-taken conditional branches, and compressed returns
+	 * directly in the switch.
+	 *
+	 * All kinds of branches are handled below the switch.
+	 */
+	switch (insn->iclass) {
+	case ptic_cond_jump: {
+		uint64_t ip;
+		int taken;
+
+		status = pt_qry_cond_branch(&decoder->query, &taken);
+		if (status < 0)
+			return status;
+
+		ip = insn->ip + insn->size;
+		if (taken)
+			ip += iext->variant.branch.displacement;
+
+		*pip = ip;
+		return status;
+	}
+
+	case ptic_return: {
+		int taken, errcode;
+
+		/* Check for a compressed return. */
+		status = pt_qry_cond_branch(&decoder->query, &taken);
+		if (status < 0) {
+			if (status != -pte_bad_query)
+				return status;
+
+			break;
+		}
+
+		/* A compressed return is indicated by a taken conditional
+		 * branch.
+		 */
+		if (!taken)
+			return -pte_bad_retcomp;
+
+		errcode = pt_retstack_pop(&decoder->retstack, pip);
+		if (errcode < 0)
+			return errcode;
+
+		return status;
+	}
+
+	case ptic_jump:
+	case ptic_call:
+		/* A direct jump or call wouldn't require trace. */
+		if (iext->variant.branch.is_direct)
+			return -pte_internal;
+
+		break;
+
+	case ptic_far_call:
+	case ptic_far_return:
+	case ptic_far_jump:
+		break;
+
+	case ptic_other:
+		return -pte_internal;
+
+	case ptic_error:
+		return -pte_bad_insn;
+	}
+
+	/* Process an indirect branch.
+	 *
+	 * This covers indirect jumps and calls, non-compressed returns, and all
+	 * flavors of far transfers.
+	 */
+	return pt_qry_indirect_branch(&decoder->query, pip);
+}
+
+/* Process an enabled event.
+ *
+ * Determines whether the enabled event can be processed in this iteration or
+ * has to be postponed.
+ *
+ * If the event can be processed, do so and proceed.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_process_enabled(struct pt_block_decoder *decoder,
+				  struct pt_block *block,
+				  const struct pt_event *ev)
+{
+	if (!decoder || !block || !ev)
+		return -pte_internal;
+
+	/* This event can't be a status update. */
+	if (ev->status_update)
+		return -pte_bad_context;
+
+	/* We must have an IP in order to start decoding. */
+	if (ev->ip_suppressed)
+		return -pte_noip;
+
+	/* We must currently be disabled. */
+	if (decoder->enabled)
+		return -pte_bad_context;
+
+	/* Delay processing of the event if the block is alredy in progress. */
+	if (!pt_blk_block_is_empty(block))
+		return 0;
+
+	/* Check if we resumed from a preceding disable or if we enabled at a
+	 * different position.
+	 */
+	if (ev->variant.enabled.ip == decoder->ip && !block->enabled)
+		block->resumed = 1;
+	else {
+		block->enabled = 1;
+		block->resumed = 0;
+	}
+
+	/* Clear an indication of a preceding disable. */
+	block->disabled = 0;
+
+	block->ip = decoder->ip = ev->variant.enabled.ip;
+	decoder->enabled = 1;
+	decoder->process_event = 0;
+
+	return pt_blk_proceed(decoder, block);
+}
+
+/* Apply a disabled event.
+ *
+ * This is used for proceed events and for trailing events.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_apply_disabled(struct pt_block_decoder *decoder,
+				 struct pt_block *block,
+				 const struct pt_event *ev)
+{
+	if (!decoder || !block || !ev)
+		return -pte_internal;
+
+	/* This event can't be a status update. */
+	if (ev->status_update)
+		return -pte_bad_context;
+
+	/* We must currently be enabled. */
+	if (!decoder->enabled)
+		return -pte_bad_context;
+
+	/* We preserve @decoder->ip.  This is where we expect tracing to resume
+	 * and we'll indicate that on the subsequent enabled event if tracing
+	 * actually does resume from there.
+	 */
+	decoder->enabled = 0;
+	decoder->process_event = 0;
+
+	block->disabled = 1;
+
+	return 0;
+}
+
+/* Process a disabled event.
+ *
+ * We reached the location of a disabled event.  This ends a non-empty block.
+ *
+ * We may see disabled events for empty blocks when we have a series of enables
+ * and disabled on the same IP without any trace in between.  We ignore the
+ * disabled event in this case and proceed.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_process_disabled(struct pt_block_decoder *decoder,
+				   struct pt_block *block,
+				   const struct pt_event *ev)
+{
+	int errcode;
+
+	if (!block)
+		return -pte_internal;
+
+	errcode = pt_blk_apply_disabled(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	/* The event completes a non-empty block. */
+	if (!pt_blk_block_is_empty(block))
+		return 0;
+
+	/* Ignore the disable if the block is empty. */
+	block->disabled = 0;
+
+	return pt_blk_proceed(decoder, block);
+}
+
+/* Process a trailing disabled event.
+ *
+ * We reached the location of a disabled event after completing a block.
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ */
+static int pt_blk_process_trailing_disabled(struct pt_block_decoder *decoder,
+					    struct pt_block *block,
+					    const struct pt_event *ev)
+{
+	int errcode;
+
+	errcode = pt_blk_apply_disabled(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	return pt_blk_process_trailing_events(decoder, block);
+}
+
+/* Apply an asynchronous branch event.
+ *
+ * This is used for proceed events and for trailing events.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_apply_async_branch(struct pt_block_decoder *decoder,
+				     struct pt_block *block,
+				     const struct pt_event *ev)
+{
+	if (!decoder || !block || !ev)
+		return -pte_internal;
+
+	/* This event can't be a status update. */
+	if (ev->status_update)
+		return -pte_bad_context;
+
+	/* We must currently be enabled. */
+	if (!decoder->enabled)
+		return -pte_bad_context;
+
+	/* Indicate the async branch as an interrupt.  This ends the block. */
+	block->interrupted = 1;
+
+	/* Jump to the branch destination.  We will continue from there in the
+	 * next iteration.
+	 */
+	decoder->ip = ev->variant.async_branch.to;
+	decoder->process_event = 0;
+
+	return 0;
+}
+
+/* Process an asynchronous branch event.
+ *
+ * We reached the source location of an asynchronous branch.  This ends a
+ * non-empty block.
+ *
+ * We may come across an asynchronous branch for an empty block, e.g. when
+ * tracing just started.  We ignore the event in that case and proceed.  It will
+ * look like tracing started at the asynchronous branch destination instead of
+ * at its source.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_process_async_branch(struct pt_block_decoder *decoder,
+				       struct pt_block *block,
+				       const struct pt_event *ev)
+{
+	int errcode;
+
+	if (!block)
+		return -pte_internal;
+
+	errcode = pt_blk_apply_async_branch(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	if (!pt_blk_block_is_empty(block))
+		return 0;
+
+	/* We may still change the start IP for an empty block.  Do not indicate
+	 * the interrupt in this case.
+	 */
+	block->interrupted = 0;
+	block->ip = decoder->ip;
+
+	return pt_blk_proceed(decoder, block);
+}
+
+/* Process a trailing asynchronous branch event.
+ *
+ * We reached the source location of an asynchronous branch after completing a
+ * block.
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ */
+static int
+pt_blk_process_trailing_async_branch(struct pt_block_decoder *decoder,
+				     struct pt_block *block,
+				     const struct pt_event *ev)
+{
+	int errcode;
+
+	errcode = pt_blk_apply_async_branch(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	return pt_blk_process_trailing_events(decoder, block);
+}
+
+/* Apply a paging event.
+ *
+ * This is used for proceed events and for trailing events.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_apply_paging(struct pt_block_decoder *decoder,
+			       struct pt_block *block,
+			       const struct pt_event *ev)
+{
+	(void) block;
+
+	if (!decoder || !ev)
+		return -pte_internal;
+
+	decoder->asid.cr3 = ev->variant.paging.cr3;
+	decoder->process_event = 0;
+
+	return 0;
+}
+
+/* Process a paging event.
+ *
+ * We reached the location of a paging event.  Update CR3 and proceed.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_process_paging(struct pt_block_decoder *decoder,
+				 struct pt_block *block,
+				 const struct pt_event *ev)
+{
+	int errcode;
+
+	errcode = pt_blk_apply_paging(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	return pt_blk_proceed(decoder, block);
+}
+
+/* Process a trailing paging event.
+ *
+ * We reached the location of a paging event after completing a block.
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ */
+static int pt_blk_process_trailing_paging(struct pt_block_decoder *decoder,
+					  struct pt_block *block,
+					  const struct pt_event *ev)
+{
+	int errcode;
+
+	errcode = pt_blk_apply_paging(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	return pt_blk_process_trailing_events(decoder, block);
+}
+
+/* Apply a vmcs event.
+ *
+ * This is used for proceed events and for trailing events.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_apply_vmcs(struct pt_block_decoder *decoder,
+			     struct pt_block *block,
+			     const struct pt_event *ev)
+{
+	(void) block;
+
+	if (!decoder || !ev)
+		return -pte_internal;
+
+	decoder->asid.vmcs = ev->variant.vmcs.base;
+	decoder->process_event = 0;
+
+	return 0;
+}
+
+/* Process a vmcs event.
+ *
+ * We reached the location of a vmcs event.  Update VMCS base and proceed.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_process_vmcs(struct pt_block_decoder *decoder,
+			       struct pt_block *block,
+			       const struct pt_event *ev)
+{
+	int errcode;
+
+	errcode = pt_blk_apply_vmcs(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	return pt_blk_proceed(decoder, block);
+}
+
+/* Process a trailing vmcs event.
+ *
+ * We reached the location of a vmcs event after completing a block.
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ */
+static int pt_blk_process_trailing_vmcs(struct pt_block_decoder *decoder,
+					struct pt_block *block,
+					const struct pt_event *ev)
+{
+	int errcode;
+
+	errcode = pt_blk_apply_vmcs(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	return pt_blk_process_trailing_events(decoder, block);
+}
+
+/* Process an overflow event.
+ *
+ * An overflow ends a non-empty block.  The overflow itself is indicated in the
+ * next block.  Indicate the overflow and resume in this case.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_process_overflow(struct pt_block_decoder *decoder,
+				   struct pt_block *block,
+				   const struct pt_event *ev)
+{
+	if (!decoder || !block || !ev)
+		return -pte_internal;
+
+	/* This event can't be a status update. */
+	if (ev->status_update)
+		return -pte_bad_context;
+
+	/* The overflow ends a non-empty block.  We will process the event in
+	 * the next iteration.
+	 */
+	if (!pt_blk_block_is_empty(block))
+		return 0;
+
+	/* If the IP is suppressed, the overflow resolved while tracing was
+	 * disabled.  Otherwise it resolved while tracing was enabled.
+	 */
+	if (ev->ip_suppressed) {
+		/* Tracing is disabled.  It doesn't make sense to preserve the
+		 * previous IP.  This will just be misleading.  Even if tracing
+		 * had been disabled before, as well, we might have missed the
+		 * re-enable in the overflow.
+		 */
+		decoder->enabled = 0;
+		decoder->ip = 0ull;
+
+		/* Indicate the overflow.  Since tracing is disabled, the block
+		 * will remain empty until tracing gets re-enabled again.
+		 *
+		 * When the block is eventually returned it will have the resync
+		 * and the enabled bit set to indicate the the overflow resolved
+		 * before tracing was enabled.
+		 */
+		block->resynced = 1;
+	} else {
+		/* Tracing is enabled and we're at the IP at which the overflow
+		 * resolved.
+		 */
+		decoder->enabled = 1;
+		decoder->ip = ev->variant.overflow.ip;
+
+		/* Indicate the overflow and set the start IP.  The block is
+		 * empty so we may still change it.
+		 *
+		 * We do not indicate a tracing enable if tracing had been
+		 * disabled before to distinguish this from the above case.
+		 */
+		block->resynced = 1;
+		block->ip = decoder->ip;
+	}
+
+	/* We don't know the TSX state.  Let's assume we execute normally.
+	 *
+	 * We also don't know the execution mode.  Let's keep what we have
+	 * in case we don't get an update before we have to decode the next
+	 * instruction.
+	 */
+	decoder->speculative = 0;
+	decoder->process_event = 0;
+
+	return pt_blk_proceed(decoder, block);
+}
+
+/* Apply an exec mode event.
+ *
+ * This is used for proceed events and for trailing events.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_apply_exec_mode(struct pt_block_decoder *decoder,
+				  struct pt_block *block,
+				  const struct pt_event *ev)
+{
+	enum pt_exec_mode mode;
+
+	if (!decoder || !block || !ev)
+		return -pte_internal;
+
+	/* Use status update events to diagnose inconsistencies. */
+	mode = ev->variant.exec_mode.mode;
+	if (ev->status_update && decoder->enabled &&
+	    decoder->mode != ptem_unknown && decoder->mode != mode)
+		return -pte_bad_status_update;
+
+	decoder->mode = mode;
+	decoder->process_event = 0;
+
+	return 0;
+}
+
+/* Process an exec mode event.
+ *
+ * We reached the location of an exec mode event.  Update the exec mode and
+ * proceed.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_process_exec_mode(struct pt_block_decoder *decoder,
+				    struct pt_block *block,
+				    const struct pt_event *ev)
+{
+	int errcode;
+
+	if (!decoder || !block)
+		return -pte_internal;
+
+	errcode = pt_blk_apply_exec_mode(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	/* An execution mode change ends a non-empty block. */
+	if (!pt_blk_block_is_empty(block))
+		return 0;
+
+	/* We may still change the execution mode of an empty block. */
+	block->mode = decoder->mode;
+
+	return pt_blk_proceed(decoder, block);
+}
+
+/* Process a trailing exec mode event.
+ *
+ * We reached the location of an exec mode event after completing a block.
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ */
+static int pt_blk_process_trailing_exec_mode(struct pt_block_decoder *decoder,
+					     struct pt_block *block,
+					     const struct pt_event *ev)
+{
+	int errcode;
+
+	errcode = pt_blk_apply_exec_mode(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	return pt_blk_process_trailing_events(decoder, block);
+}
+
+/* Apply a tsx event.
+ *
+ * This is used for proceed events and for trailing events.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_apply_tsx(struct pt_block_decoder *decoder,
+			    struct pt_block *block,
+			    const struct pt_event *ev)
+{
+	if (!decoder || !block || !ev)
+		return -pte_internal;
+
+	decoder->speculative = ev->variant.tsx.speculative;
+	decoder->process_event = 0;
+
+	if (decoder->enabled && !pt_blk_block_is_empty(block)) {
+		if (ev->variant.tsx.aborted)
+			block->aborted = 1;
+		else if (block->speculative && !ev->variant.tsx.speculative)
+			block->committed = 1;
+	}
+
+	return 0;
+}
+
+/* Process a tsx event.
+ *
+ * We reached the location of a tsx event.  A speculation mode change ends a
+ * non-empty block.  Indicate commit or abort in the ended block.
+ *
+ * We might see tsx event while tracing is disabled or for empty blocks, e.g. if
+ * tracing was just enabled.  In this case we do not indicate the abort or
+ * commit.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_process_tsx(struct pt_block_decoder *decoder,
+			      struct pt_block *block,
+			      const struct pt_event *ev)
+{
+	int errcode;
+
+	if (!decoder || !block)
+		return -pte_internal;
+
+	errcode = pt_blk_apply_tsx(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	/* A speculation mode change ends a non-empty block. */
+	if (!pt_blk_block_is_empty(block))
+		return 0;
+
+	/* We may still change the speculation mode of an empty block. */
+	block->speculative = decoder->speculative;
+
+	return pt_blk_proceed(decoder, block);
+}
+
+/* Process a trailing tsx event.
+ *
+ * We reached the location of a tsx event after completing a block.
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ */
+static int pt_blk_process_trailing_tsx(struct pt_block_decoder *decoder,
+				       struct pt_block *block,
+				       const struct pt_event *ev)
+{
+	int errcode;
+
+	errcode = pt_blk_apply_tsx(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	return pt_blk_process_trailing_events(decoder, block);
+}
+
+/* Apply a stop event.
+ *
+ * This is used for proceed events and for trailing events.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_apply_stop(struct pt_block_decoder *decoder,
+			     struct pt_block *block,
+			     const struct pt_event *ev)
+{
+	if (!decoder || !block || !ev)
+		return -pte_internal;
+
+	/* This event can't be a status update. */
+	if (ev->status_update)
+		return -pte_bad_context;
+
+	/* Tracing is always disabled before it is stopped. */
+	if (decoder->enabled)
+		return -pte_bad_context;
+
+	decoder->process_event = 0;
+
+	/* Indicate the stop. */
+	block->stopped = 1;
+
+	return 0;
+}
+
+/* Process a stop event.
+ *
+ * We got a stop event.  This always succeeds a disabled event.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_process_stop(struct pt_block_decoder *decoder,
+			       struct pt_block *block,
+			       const struct pt_event *ev)
+{
+	int errcode;
+
+	errcode = pt_blk_apply_stop(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	return pt_blk_proceed(decoder, block);
+}
+
+/* Process a trailing stop event.
+ *
+ * We got a stop event.  This always succeeds a disabled event.
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ */
+static int pt_blk_process_trailing_stop(struct pt_block_decoder *decoder,
+					struct pt_block *block,
+					const struct pt_event *ev)
+{
+	int errcode;
+
+	errcode = pt_blk_apply_stop(decoder, block, ev);
+	if (errcode < 0)
+		return errcode;
+
+	return pt_blk_process_trailing_events(decoder, block);
+}
+
+/* Check if we can reach a particular IP from the current location.
+ *
+ * Try to proceed to @ip without using trace.  Do not update any internal state
+ * on our way and ignore errors.
+ *
+ * Returns non-zero if @ip was reached.
+ * Returns zero if @ip could not be reached.
+ */
+static int pt_blk_ip_is_reachable(struct pt_block_decoder *decoder, uint64_t ip,
+				  size_t steps)
+{
+	struct pt_insn_ext iext;
+	struct pt_insn insn;
+
+	if (!decoder)
+		return 0;
+
+	memset(&insn, 0, sizeof(insn));
+	memset(&iext, 0, sizeof(iext));
+
+	/* We do not expect execution mode changes. */
+	insn.mode = decoder->mode;
+	insn.ip = decoder->ip;
+
+	for (; steps && (insn.ip != ip); --steps) {
+		int size, errcode;
+
+		/* If we can't read the memory for the instruction, we can't
+		 * reach it.
+		 */
+		size = pt_image_read(decoder->image, &insn.isid, insn.raw,
+				     sizeof(insn.raw), &decoder->asid, insn.ip);
+		if (size < 0)
+			return 0;
+
+		/* We initialize @insn.size to the maximal possible size.  It
+		 * will be set to the actual size during instruction decode.
+		 */
+		insn.size = (uint8_t) size;
+
+		errcode = pt_ild_decode(&insn, &iext);
+		if (errcode < 0)
+			return 0;
+
+		errcode = pt_insn_next_ip(&insn.ip, &insn, &iext);
+		if (errcode < 0)
+			return 0;
+	}
+
+	return 1;
+}
+
+/* Proceed to the next IP using trace.
+ *
+ * We failed to proceed without trace.  This ends the current block.  Now use
+ * trace to do one final step to determine the start IP of the next block.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_proceed_with_trace(struct pt_block_decoder *decoder,
+				     const struct pt_insn *insn,
+				     const struct pt_insn_ext *iext)
+{
+	int status;
+
+	if (!decoder)
+		return -pte_internal;
+
+	status = pt_blk_next_ip(&decoder->ip, decoder, insn, iext);
+	if (status < 0)
+		return status;
+
+	/* Preserve the query decoder's response which indicates upcoming
+	 * events.
+	 */
+	decoder->status = status;
+
+	/* We do need an IP in order to proceed. */
+	if (status & pts_ip_suppressed)
+		return -pte_noip;
+
+	return 0;
+}
+
+/* Proceed by one instruction.
+ *
+ * Tries to decode the instruction at @decoder->ip and, on success, adds it to
+ * @block and provides it in @pinsn and @piext.
+ *
+ * The instruction will not be added if:
+ *
+ *   - the memory could not be read:  return error
+ *   - it could not be decoded:       return error
+ *   - @block is already full:        return zero
+ *   - @block would switch sections:  return zero
+ *
+ * Returns a positive integer if the instruction was added.
+ * Returns zero if the instruction didn't fit into @block.
+ * Returns a negative error code otherwise.
+ */
+static int pt_blk_proceed_one_insn(struct pt_block_decoder *decoder,
+				   struct pt_block *block,
+				   struct pt_insn *pinsn,
+				   struct pt_insn_ext *piext)
+{
+	struct pt_insn_ext iext;
+	struct pt_insn insn;
+	uint16_t ninsn;
+	int status;
+
+	if (!decoder || !block || !pinsn || !piext)
+		return -pte_internal;
+
+	/* There's nothing to do if there is no room in @block. */
+	ninsn = block->ninsn + 1;
+	if (!ninsn)
+		return 0;
+
+	insn.mode = decoder->mode;
+	insn.ip = decoder->ip;
+
+	status = pt_image_read(decoder->image, &insn.isid, insn.raw,
+			       sizeof(insn.raw), &decoder->asid, insn.ip);
+	if (status < 0)
+		return status;
+
+	/* We do not switch sections inside a block. */
+	if (insn.isid != block->isid) {
+		if (!pt_blk_block_is_empty(block))
+			return 0;
+
+		block->isid = insn.isid;
+	}
+
+	insn.size = (uint8_t) status;
+
+	status = pt_ild_decode(&insn, &iext);
+	if (status < 0)
+		return status;
+
+	/* Log calls' return addresses for return compression.
+	 *
+	 * Unless this is a call to the next instruction as is used for position
+	 * independent code.
+	 */
+	if ((insn.iclass == ptic_call) &&
+	    (!iext.variant.branch.is_direct ||
+	     iext.variant.branch.displacement)) {
+		status = pt_retstack_push(&decoder->retstack,
+					  insn.ip + insn.size);
+		if (status < 0)
+			return status;
+	}
+
+	/* We have a new instruction. */
+	block->end_ip = insn.ip;
+	block->ninsn = ninsn;
+
+	*pinsn = insn;
+	*piext = iext;
+
+	return 1;
+}
+
+
+/* Proceed to a particular type of instruction without using trace.
+ *
+ * Proceed until we reach an instruction for which @predicate returns a positive
+ * integer or until:
+ *
+ *   - @predicate returns an error:  return error
+ *   - @block is full:               return zero
+ *   - @block would switch sections: return zero
+ *   - we would need trace:          return -pte_bad_query
+ *
+ * Provide the last instruction that was reached in @insn and @iext.
+ *
+ * Update @decoder->ip to point to the last IP that was reached.  If we fail due
+ * to lack of trace or if we reach a desired instruction, this is @insn->ip;
+ * otherwise this is the next instruction's IP.
+ *
+ * Returns a positive integer if a suitable instruction was reached.
+ * Returns zero if no such instruction was reached.
+ * Returns a negative error code otherwise.
+ */
+static int pt_blk_proceed_to_insn(struct pt_block_decoder *decoder,
+				  struct pt_block *block,
+				  struct pt_insn *insn,
+				  struct pt_insn_ext *iext,
+				  int (*predicate)(const struct pt_insn *,
+						   const struct pt_insn_ext *))
+{
+	int status;
+
+	if (!decoder || !predicate)
+		return -pte_internal;
+
+	for (;;) {
+		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
+		if (status <= 0)
+			return status;
+
+		/* We're done if this instruction matches the spec (positive
+		 * status) or we run into an error (negative status).
+		 */
+		status = predicate(insn, iext);
+		if (status != 0)
+			return status;
+
+		/* Let's see if we can proceed to the next IP without trace. */
+		status = pt_insn_next_ip(&decoder->ip, insn, iext);
+		if (status < 0)
+			return status;
+	}
+}
+
+/* Proceed to a particular IP without using trace.
+ *
+ * Proceed until we reach @ip or until:
+ *
+ *   - @block is full:               return zero
+ *   - @block would switch sections: return zero
+ *   - we would need trace:          return -pte_bad_query
+ *
+ * Provide the last instruction that was reached in @insn and @iext.  If we
+ * reached @ip, this is the instruction preceding it.
+ *
+ * Update @decoder->ip to point to the last IP that was reached.  If we fail due
+ * to lack of trace, this is @insn->ip; otherwise this is the next instruction's
+ * IP.
+ *
+ * Returns a positive integer if @ip was reached.
+ * Returns zero if no such instruction was reached.
+ * Returns a negative error code otherwise.
+ */
+static int pt_blk_proceed_to_ip(struct pt_block_decoder *decoder,
+				struct pt_block *block, struct pt_insn *insn,
+				struct pt_insn_ext *iext, uint64_t ip)
+{
+	int status;
+
+	if (!decoder)
+		return -pte_internal;
+
+	for (;;) {
+		/* We're done when we reach @ip.  We may not even have to decode
+		 * a single instruction in some cases.
+		 */
+		if (decoder->ip == ip)
+			return 1;
+
+		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
+		if (status <= 0)
+			return status;
+
+		/* Let's see if we can proceed to the next IP without trace. */
+		status = pt_insn_next_ip(&decoder->ip, insn, iext);
+		if (status < 0)
+			return status;
+	}
+}
+
+/* Proceed to the event location for a disabled event.
+ *
+ * We have a (synchronous) disabled event pending.  Proceed to the event
+ * location and indicate whether we were able to reach it.
+ *
+ * The last instruction that was reached is stored in @insn/@iext.
+ *
+ * Returns a positive integer if the event location was reached.
+ * Returns zero if the event location was not reached.
+ * Returns a negative error code otherwise.
+ */
+static int pt_blk_proceed_to_disabled(struct pt_block_decoder *decoder,
+				      struct pt_block *block,
+				      struct pt_insn *insn,
+				      struct pt_insn_ext *iext,
+				      const struct pt_event *ev)
+{
+	if (!decoder || !block || !ev)
+		return -pte_internal;
+
+
+	if (ev->ip_suppressed) {
+		/* A synchronous disabled event also binds to far branches and
+		 * CPL-changing instructions.  Both would require trace,
+		 * however, and are thus implicitly handled by erroring out.
+		 *
+		 * The would-require-trace error is handled by our caller.
+		 */
+		return pt_blk_proceed_to_insn(decoder, block, insn, iext,
+					      pt_insn_changes_cr3);
+	} else
+		return pt_blk_proceed_to_ip(decoder, block, insn, iext,
+					    ev->variant.disabled.ip);
+}
+
+/* Proceed from an instruction at which we stopped previously.
+ *
+ * We proceeded to @insn/@iext and stopped after decoding and accounting for the
+ * instruction but before determining the next IP.
+ *
+ * Determine the next IP then proceed normally.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_proceed_from_insn(struct pt_block_decoder *decoder,
+				    struct pt_block *block,
+				    const struct pt_insn *insn,
+				    const struct pt_insn_ext *iext)
+{
+	int status;
+
+	if (!decoder)
+		return -pte_internal;
+
+	/* Let's see if we can proceed to the next IP without trace. */
+	status = pt_insn_next_ip(&decoder->ip, insn, iext);
+	if (status < 0) {
+		if (status != -pte_bad_query)
+			return status;
+
+		return pt_blk_proceed_with_trace(decoder, insn, iext);
+	}
+
+	return pt_blk_proceed(decoder, block);
+}
+
+/* Try to work around erratum SKD022.
+ *
+ * If we get an asynchronous disable on VMLAUNCH or VMRESUME, the FUP that
+ * caused the disable to be asynchronous might have been bogous.
+ *
+ * Returns a positive integer if the erratum has been handled.
+ * Returns zero if the erratum does not apply.
+ * Returns a negative error code otherwise.
+ */
+static int pt_blk_handle_erratum_skd022(struct pt_block_decoder *decoder,
+					struct pt_event *ev)
+{
+	struct pt_insn_ext iext;
+	struct pt_insn insn;
+	int size, errcode;
+
+	if (!decoder || !ev)
+		return -pte_internal;
+
+	insn.mode = decoder->mode;
+	insn.ip = ev->variant.async_disabled.at;
+
+	size = pt_image_read(decoder->image, &insn.isid, insn.raw,
+			     sizeof(insn.raw), &decoder->asid, insn.ip);
+	if (size < 0)
+		return 0;
+
+	/* We initialize @insn.size to the maximal possible size.  It will be
+	 * set to the actual size during instruction decode.
+	 */
+	insn.size = (uint8_t) size;
+
+	errcode = pt_ild_decode(&insn, &iext);
+	if (errcode < 0)
+		return 0;
+
+	switch (iext.iclass) {
+	default:
+		/* The erratum does not apply. */
+		return 0;
+
+	case PTI_INST_VMLAUNCH:
+	case PTI_INST_VMRESUME:
+		/* The erratum may apply.  We can't be sure without a lot more
+		 * analysis.  Let's assume it does.
+		 *
+		 * We turn the async disable into a sync disable.  Our caller
+		 * will restart event processing.
+		 */
+		ev->type = ptev_disabled;
+		ev->variant.disabled.ip = ev->variant.async_disabled.ip;
+
+		return 1;
+	}
+}
+
+/* Proceed to the next event.
+ *
+ * We have an event pending.  Proceed to the event location and either process
+ * the event and continue or postpone the event to the next block.
+ *
+ * On our way to the event location we may also be forced to postpone the event
+ * to the next block, e.g. if we overflow the number of instructions in the
+ * block or if we need trace in order to reach the event location.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_proceed_event(struct pt_block_decoder *decoder,
+				struct pt_block *block)
+{
+	struct pt_insn_ext iext;
+	struct pt_insn insn;
+	struct pt_event *ev;
+	uint64_t ip;
+	int status;
+
+	if (!decoder || !block)
+		return -pte_internal;
+
+	if (!decoder->process_event)
+		return -pte_internal;
+
+	status = 0;
+
+	ev = &decoder->event;
+	switch (ev->type) {
+	case ptev_enabled:
+		return pt_blk_process_enabled(decoder, block, ev);
+
+	case ptev_disabled:
+		status = pt_blk_proceed_to_disabled(decoder, block, &insn,
+						    &iext, ev);
+		if (status <= 0) {
+			/* A synchronous disable event also binds to the next
+			 * indirect or conditional branch, i.e. to any branch
+			 * that would have required trace.
+			 */
+			if (status != -pte_bad_query)
+				break;
+
+			/* The @decoder->ip still points to the indirect or
+			 * conditional branch instruction that caused us to
+			 * error out.  That's not where we expect tracing to
+			 * resume since the instruction already retired.
+			 *
+			 * For calls, a fair assumption is that tracing resumes
+			 * after returning from the called function.  For other
+			 * types of instructions, we simply don't know.
+			 */
+			switch (insn.iclass) {
+			case ptic_call:
+			case ptic_far_call:
+				decoder->ip = insn.ip + insn.size;
+				break;
+
+			default:
+				decoder->ip = 0ull;
+				break;
+			}
+		}
+
+		return pt_blk_process_disabled(decoder, block, ev);
+
+	case ptev_async_disabled:
+		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
+					      ev->variant.async_disabled.at);
+		if (status <= 0)
+			break;
+
+		if (decoder->query.config.errata.skd022) {
+			status = pt_blk_handle_erratum_skd022(decoder, ev);
+			if (status != 0) {
+				if (status < 0)
+					break;
+
+				return pt_blk_proceed_event(decoder, block);
+			}
+		}
+
+		return pt_blk_process_disabled(decoder, block, ev);
+
+	case ptev_async_branch:
+		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
+					      ev->variant.async_branch.from);
+		if (status <= 0)
+			break;
+
+		return pt_blk_process_async_branch(decoder, block, ev);
+
+	case ptev_paging:
+		if (!decoder->enabled)
+			return pt_blk_process_paging(decoder, block, ev);
+
+		status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
+						pt_insn_binds_to_pip);
+		if (status <= 0)
+			break;
+
+		status = pt_blk_apply_paging(decoder, block, ev);
+		if (status < 0)
+			break;
+
+		return pt_blk_proceed_from_insn(decoder, block, &insn, &iext);
+
+	case ptev_async_paging:
+		if (!ev->ip_suppressed) {
+			ip = ev->variant.async_paging.ip;
+
+			status = pt_blk_proceed_to_ip(decoder, block, &insn,
+						      &iext, ip);
+			if (status <= 0)
+				break;
+		}
+
+		return pt_blk_process_paging(decoder, block, ev);
+
+	case ptev_vmcs:
+		if (!decoder->enabled)
+			return pt_blk_process_vmcs(decoder, block, ev);
+
+		status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
+						pt_insn_binds_to_vmcs);
+		if (status <= 0)
+			break;
+
+		status = pt_blk_apply_vmcs(decoder, block, ev);
+		if (status < 0)
+			break;
+
+		return pt_blk_proceed_from_insn(decoder, block, &insn, &iext);
+
+	case ptev_async_vmcs:
+		if (!ev->ip_suppressed) {
+			ip = ev->variant.async_vmcs.ip;
+
+			status = pt_blk_proceed_to_ip(decoder, block, &insn,
+						      &iext, ip);
+			if (status <= 0)
+				break;
+		}
+
+		return pt_blk_process_vmcs(decoder, block, ev);
+
+	case ptev_overflow:
+		return pt_blk_process_overflow(decoder, block, ev);
+
+	case ptev_exec_mode:
+		if (!ev->ip_suppressed) {
+			ip = ev->variant.exec_mode.ip;
+
+			status = pt_blk_proceed_to_ip(decoder, block, &insn,
+						      &iext, ip);
+			if (status <= 0)
+				break;
+		}
+
+		return pt_blk_process_exec_mode(decoder, block, ev);
+
+	case ptev_tsx:
+		if (!ev->ip_suppressed) {
+			ip = ev->variant.tsx.ip;
+
+			status = pt_blk_proceed_to_ip(decoder, block, &insn,
+						      &iext, ip);
+			if (status <= 0)
+				break;
+		}
+
+		return pt_blk_process_tsx(decoder, block, ev);
+
+	case ptev_stop:
+		return pt_blk_process_stop(decoder, block, ev);
+	}
+
+	return status;
+}
+
+/* Proceed to the next decision point.
+ *
+ * Tracing is enabled and we don't have an event pending.  Proceed as far as
+ * we get without trace.  Stop when we either:
+ *
+ *   - need trace in order to continue
+ *   - overflow the max number of instructions in a block
+ *
+ * We actually proceed one instruction further to get the start IP for the next
+ * block.  This only updates @decoder's internal state, though.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_proceed_no_event(struct pt_block_decoder *decoder,
+				   struct pt_block *block)
+{
+	struct pt_insn_ext iext;
+	struct pt_insn insn;
+	int status;
+
+	if (!decoder || !block)
+		return -pte_internal;
+
+	/* This is overly conservative, really.  We shouldn't get a bad-query
+	 * status unless we decoded at least one instruction successfully.
+	 */
+	memset(&insn, 0, sizeof(insn));
+	memset(&iext, 0, sizeof(iext));
+
+	/* Proceed as far as we get without trace. */
+	status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
+					pt_insn_false);
+	if (status < 0) {
+		if (status != -pte_bad_query)
+			return status;
+
+		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
+	}
+
+	return 0;
+}
+
+/* Proceed to the next event or decision point.
+ *
+ * Returns zero on success, a negative error code otherwise.
+ */
+static int pt_blk_proceed(struct pt_block_decoder *decoder,
+			  struct pt_block *block)
+{
+	int event_pending;
+
+	event_pending = pt_blk_fetch_event(decoder);
+	if (event_pending != 0) {
+		if (event_pending < 0)
+			return event_pending;
+
+		return pt_blk_proceed_event(decoder, block);
+	}
+
+	/* The end of the trace ends a non-empty block.
+	 *
+	 * If we're called again, we will proceed until we really need trace.
+	 * For example, if tracing is currently disabled.
+	 */
+	if (decoder->status & pts_eos) {
+		if (!pt_blk_block_is_empty(block))
+			return 0;
+
+		if (!decoder->enabled)
+			return -pte_eos;
+	}
+
+	/* If tracing is disabled and we have still trace left but no event,
+	 * something is wrong.
+	 */
+	if (!decoder->enabled)
+		return -pte_no_enable;
+
+	return pt_blk_proceed_no_event(decoder, block);
+}
+
+static int pt_blk_status(const struct pt_block_decoder *decoder)
+{
+	int status, flags;
+
+	if (!decoder)
+		return -pte_internal;
+
+	status = decoder->status;
+	flags = 0;
+
+	/* Forward end-of-trace indications.
+	 *
+	 * Postpone it as long as we're still processing events, though.
+	 */
+	if ((status & pts_eos) && !decoder->process_event)
+		flags |= pts_eos;
+
+	return flags;
+}
+
+/* Try to work around erratum BDM64.
+ *
+ * If we got a transaction abort immediately following a branch that produced
+ * trace, the trace for that branch might have been corrupted.
+ *
+ * Returns a positive integer if the erratum was handled.
+ * Returns zero if the erratum does not seem to apply.
+ * Returns a negative error code otherwise.
+ */
+static int pt_blk_handle_erratum_bdm64(struct pt_block_decoder *decoder,
+				       const struct pt_event *ev)
+{
+	if (!decoder || !ev)
+		return -pte_internal;
+
+	/* This only affects aborts. */
+	if (!ev->variant.tsx.aborted)
+		return 0;
+
+	/* Let's check if we can reach the event location from here.
+	 *
+	 * If we can, let's assume the erratum did not hit.  We might still be
+	 * wrong but we're not able to tell.
+	 */
+	if (pt_blk_ip_is_reachable(decoder, ev->variant.tsx.ip, 0x1000))
+		return 0;
+
+	/* We can't reach the event location.  This could either mean that we
+	 * stopped too early (and status is zero) or that the erratum hit.
+	 *
+	 * We assume the latter and pretend that the previous branch brought us
+	 * to the event location, instead.
+	 */
+	decoder->ip = ev->variant.tsx.ip;
+
+	return 1;
+}
+
+/* Process events that bind to the current decoder IP.
+ *
+ * We filled a block and proceeded to the next IP, which will become the start
+ * IP of the next block.  Process any pending events that bind to that IP so we
+ * can indicate their effect in the current block.
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ */
+static int pt_blk_process_trailing_events(struct pt_block_decoder *decoder,
+					  struct pt_block *block)
+{
+	struct pt_event *ev;
+	int event_pending, status;
+
+	if (!decoder)
+		return -pte_internal;
+
+	event_pending = pt_blk_fetch_event(decoder);
+	if (event_pending <= 0) {
+		if (event_pending < 0)
+			return event_pending;
+
+		return pt_blk_status(decoder);
+	}
+
+	ev = &decoder->event;
+	switch (ev->type) {
+	case ptev_enabled:
+	case ptev_disabled:
+	case ptev_paging:
+	case ptev_vmcs:
+	case ptev_overflow:
+		break;
+
+	case ptev_async_disabled:
+		if (decoder->ip != ev->variant.async_disabled.at)
+			break;
+
+		if (decoder->query.config.errata.skd022) {
+			status = pt_blk_handle_erratum_skd022(decoder, ev);
+			if (status != 0) {
+				if (status < 0)
+					break;
+
+				return pt_blk_process_trailing_events(decoder,
+								      block);
+			}
+		}
+
+		return pt_blk_process_trailing_disabled(decoder, block, ev);
+
+	case ptev_async_branch:
+		if (decoder->ip != ev->variant.async_branch.from)
+			break;
+
+		return pt_blk_process_trailing_async_branch(decoder, block, ev);
+
+	case ptev_async_paging:
+		if (!ev->ip_suppressed &&
+		    decoder->ip != ev->variant.async_paging.ip)
+			break;
+
+		return pt_blk_process_trailing_paging(decoder, block, ev);
+
+	case ptev_async_vmcs:
+		if (!ev->ip_suppressed &&
+		    decoder->ip != ev->variant.async_vmcs.ip)
+			break;
+
+		return pt_blk_process_trailing_vmcs(decoder, block, ev);
+
+	case ptev_exec_mode:
+		if (!ev->ip_suppressed &&
+		    decoder->ip != ev->variant.exec_mode.ip)
+			break;
+
+		return pt_blk_process_trailing_exec_mode(decoder, block, ev);
+
+	case ptev_tsx:
+		if (!ev->ip_suppressed) {
+			if (decoder->query.config.errata.bdm64) {
+				status = pt_blk_handle_erratum_bdm64(decoder,
+								     ev);
+				if (status < 0)
+					break;
+			}
+
+			if (decoder->ip != ev->variant.tsx.ip)
+				break;
+		}
+
+		return pt_blk_process_trailing_tsx(decoder, block, ev);
+
+	case ptev_stop:
+		return pt_blk_process_trailing_stop(decoder, block, ev);
+	}
+
+	return pt_blk_status(decoder);
+}
+
+/* Collect one block.
+ *
+ * Fill a new, empty block.
+ *
+ * Returns a non-negative pt_status_flag bit-vector on success, a negative error
+ * code otherwise.
+ */
+static int pt_blk_collect(struct pt_block_decoder *decoder,
+			  struct pt_block *block)
+{
+	int errcode;
+
+	if (!decoder || !block)
+		return -pte_internal;
+
+	/* Zero-initialize the block in case of error returns. */
+	memset(block, 0, sizeof(*block));
+
+	/* Fill in a few things from the current decode state.
+	 *
+	 * This reflects the state of the last pt_blk_next() or pt_blk_start()
+	 * call.  Note that, unless we stop with tracing disabled, we proceed
+	 * already to the start IP of the next block.
+	 *
+	 * Some of the state may later be overwritten as we process events.
+	 */
+	block->ip = decoder->ip;
+	block->mode = decoder->mode;
+	if (decoder->speculative)
+		block->speculative = 1;
+
+	/* Proceed one block. */
+	errcode = pt_blk_proceed(decoder, block);
+	if (errcode < 0)
+		return errcode;
+
+	/* We may still have events left that trigger on the current IP.
+	 *
+	 * This IP lies outside of @block but events typically bind to the IP of
+	 * the last instruction that did not retire.
+	 */
+	return pt_blk_process_trailing_events(decoder, block);
+}
+
+int pt_blk_next(struct pt_block_decoder *decoder, struct pt_block *ublock,
+		size_t size)
+{
+	struct pt_block block, *pblock;
+	int errcode, status;
+
+	if (!decoder || !ublock)
+		return -pte_invalid;
+
+	pblock = size == sizeof(block) ? ublock : &block;
+
+	status = pt_blk_collect(decoder, pblock);
+
+	errcode = block_to_user(ublock, size, pblock);
+	if (errcode < 0)
+		return errcode;
+
+	return status;
+}