1.5ns per operation when using correct Barrier abstraction on stack-based writer.
diff --git a/example/main.go b/example/main.go
index 7eeeb11..19b060a 100644
--- a/example/main.go
+++ b/example/main.go
@@ -24,39 +24,39 @@
 
 	started := time.Now()
 	reader.Start()
-	// publish(written, read)
-	publish(disruptor.NewWriter(written, read, BufferSize))
+	publish(written, read)
+	// publish(disruptor.NewWriter(written, read, BufferSize))
 	reader.Stop()
 	finished := time.Now()
 	fmt.Println(Iterations, finished.Sub(started))
 }
 
-func publish(writer *disruptor.Writer) {
-	for sequence := disruptor.InitialSequenceValue; sequence <= Iterations; {
-		sequence = writer.Reserve()
-		ringBuffer[sequence&BufferMask] = sequence
-		writer.Commit(sequence)
-	}
-}
-
-// func publish(written, read *disruptor.Cursor) {
-// 	previous := disruptor.InitialSequenceValue
-// 	gate := disruptor.InitialSequenceValue
-
-// 	for previous <= Iterations {
-// 		next := previous + 1
-// 		wrap := next - BufferSize
-
-// 		for wrap > gate {
-// 			gate = read.Sequence
-// 		}
-
-// 		ringBuffer[next&BufferMask] = next
-// 		written.Sequence = next
-// 		previous = next
+// func publish(writer *disruptor.Writer) {
+// 	for sequence := disruptor.InitialSequenceValue; sequence <= Iterations; {
+// 		sequence = writer.Reserve()
+// 		ringBuffer[sequence&BufferMask] = sequence
+// 		writer.Commit(sequence)
 // 	}
 // }
 
+func publish(written *disruptor.Cursor, upstream disruptor.Barrier) {
+	previous := disruptor.InitialSequenceValue
+	gate := disruptor.InitialSequenceValue
+
+	for previous <= Iterations {
+		next := previous + 1
+		wrap := next - BufferSize
+
+		for wrap > gate {
+			gate = upstream.Read(next)
+		}
+
+		ringBuffer[next&BufferMask] = next
+		written.Store(next)
+		previous = next
+	}
+}
+
 type SampleConsumer struct{}
 
 func (this SampleConsumer) Consume(lower, upper int64) {