17 files changed, 5815 insertions, 0 deletions
diff --git a/core/state/snapshot/account.go b/core/state/snapshot/account.go
new file mode 100644
index 0000000..b92e942
--- /dev/null
+++ b/core/state/snapshot/account.go
@@ -0,0 +1,86 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"math/big"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/rlp"
+)
+
+// Account is a modified version of a state.Account, where the root is replaced
+// with a byte slice. This format can be used to represent full-consensus format
+// or slim-snapshot format which replaces the empty root and code hash as nil
+// byte slice.
+type Account struct {
+	Nonce    uint64
+	Balance  *big.Int
+	Root     []byte
+	CodeHash []byte
+}
+
+// SlimAccount converts a state.Account content into a slim snapshot account
+func SlimAccount(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) Account {
+	slim := Account{
+		Nonce:   nonce,
+		Balance: balance,
+	}
+	if root != emptyRoot {
+		slim.Root = root[:]
+	}
+	if !bytes.Equal(codehash, emptyCode[:]) {
+		slim.CodeHash = codehash
+	}
+	return slim
+}
+
+// SlimAccountRLP converts a state.Account content into a slim snapshot
+// version RLP encoded.
+func SlimAccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) []byte {
+	data, err := rlp.EncodeToBytes(SlimAccount(nonce, balance, root, codehash))
+	if err != nil {
+		panic(err)
+	}
+	return data
+}
+
+// FullAccount decodes the data on the 'slim RLP' format and return
+// the consensus format account.
+func FullAccount(data []byte) (Account, error) {
+	var account Account
+	if err := rlp.DecodeBytes(data, &account); err != nil {
+		return Account{}, err
+	}
+	if len(account.Root) == 0 {
+		account.Root = emptyRoot[:]
+	}
+	if len(account.CodeHash) == 0 {
+		account.CodeHash = emptyCode[:]
+	}
+	return account, nil
+}
+
+// FullAccountRLP converts data on the 'slim RLP' format into the full RLP-format.
+func FullAccountRLP(data []byte) ([]byte, error) {
+	account, err := FullAccount(data)
+	if err != nil {
+		return nil, err
+	}
+	return rlp.EncodeToBytes(account)
+}
diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go
new file mode 100644
index 0000000..dee9ff0
--- /dev/null
+++ b/core/state/snapshot/conversion.go
@@ -0,0 +1,275 @@
+// Copyright 2020 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb/memorydb"
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+// trieKV represents a trie key-value pair
+type trieKV struct {
+	key   common.Hash
+	value []byte
+}
+
+type (
+	// trieGeneratorFn is the interface of trie generation which can
+	// be implemented by different trie algorithm.
+	trieGeneratorFn func(in chan (trieKV), out chan (common.Hash))
+
+	// leafCallbackFn is the callback invoked at the leaves of the trie,
+	// returns the subtrie root with the specified subtrie identifier.
+	leafCallbackFn func(hash common.Hash, stat *generateStats) common.Hash
+)
+
+// GenerateAccountTrieRoot takes an account iterator and reproduces the root hash.
+func GenerateAccountTrieRoot(it AccountIterator) (common.Hash, error) {
+	return generateTrieRoot(it, common.Hash{}, stdGenerate, nil, &generateStats{start: time.Now()}, true)
+}
+
+// GenerateStorageTrieRoot takes a storage iterator and reproduces the root hash.
+func GenerateStorageTrieRoot(account common.Hash, it StorageIterator) (common.Hash, error) {
+	return generateTrieRoot(it, account, stdGenerate, nil, &generateStats{start: time.Now()}, true)
+}
+
+// VerifyState takes the whole snapshot tree as the input, traverses all the accounts
+// as well as the corresponding storages and compares the re-computed hash with the
+// original one(state root and the storage root).
+func VerifyState(snaptree *Tree, root common.Hash) error {
+	acctIt, err := snaptree.AccountIterator(root, common.Hash{})
+	if err != nil {
+		return err
+	}
+	defer acctIt.Release()
+
+	got, err := generateTrieRoot(acctIt, common.Hash{}, stdGenerate, func(account common.Hash, stat *generateStats) common.Hash {
+		storageIt, err := snaptree.StorageIterator(root, account, common.Hash{})
+		if err != nil {
+			return common.Hash{}
+		}
+		defer storageIt.Release()
+
+		hash, err := generateTrieRoot(storageIt, account, stdGenerate, nil, stat, false)
+		if err != nil {
+			return common.Hash{}
+		}
+		return hash
+	}, &generateStats{start: time.Now()}, true)
+
+	if err != nil {
+		return err
+	}
+	if got != root {
+		return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root)
+	}
+	return nil
+}
+
+// generateStats is a collection of statistics gathered by the trie generator
+// for logging purposes.
+type generateStats struct {
+	accounts   uint64
+	slots      uint64
+	curAccount common.Hash
+	curSlot    common.Hash
+	start      time.Time
+	lock       sync.RWMutex
+}
+
+// progress records the progress trie generator made recently.
+func (stat *generateStats) progress(accounts, slots uint64, curAccount common.Hash, curSlot common.Hash) {
+	stat.lock.Lock()
+	defer stat.lock.Unlock()
+
+	stat.accounts += accounts
+	stat.slots += slots
+	stat.curAccount = curAccount
+	stat.curSlot = curSlot
+}
+
+// report prints the cumulative progress statistic smartly.
+func (stat *generateStats) report() {
+	stat.lock.RLock()
+	defer stat.lock.RUnlock()
+
+	var ctx []interface{}
+	if stat.curSlot != (common.Hash{}) {
+		ctx = append(ctx, []interface{}{
+			"in", stat.curAccount,
+			"at", stat.curSlot,
+		}...)
+	} else {
+		ctx = append(ctx, []interface{}{"at", stat.curAccount}...)
+	}
+	// Add the usual measurements
+	ctx = append(ctx, []interface{}{"accounts", stat.accounts}...)
+	if stat.slots != 0 {
+		ctx = append(ctx, []interface{}{"slots", stat.slots}...)
+	}
+	ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...)
+	log.Info("Generating trie hash from snapshot", ctx...)
+}
+
+// reportDone prints the last log when the whole generation is finished.
+func (stat *generateStats) reportDone() {
+	stat.lock.RLock()
+	defer stat.lock.RUnlock()
+
+	var ctx []interface{}
+	ctx = append(ctx, []interface{}{"accounts", stat.accounts}...)
+	if stat.slots != 0 {
+		ctx = append(ctx, []interface{}{"slots", stat.slots}...)
+	}
+	ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...)
+	log.Info("Generated trie hash from snapshot", ctx...)
+}
+
+// generateTrieRoot generates the trie hash based on the snapshot iterator.
+// It can be used for generating account trie, storage trie or even the
+// whole state which connects the accounts and the corresponding storages.
+func generateTrieRoot(it Iterator, account common.Hash, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) {
+	var (
+		in      = make(chan trieKV)         // chan to pass leaves
+		out     = make(chan common.Hash, 1) // chan to collect result
+		stoplog = make(chan bool, 1)        // 1-size buffer, works when logging is not enabled
+		wg      sync.WaitGroup
+	)
+	// Spin up a go-routine for trie hash re-generation
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		generatorFn(in, out)
+	}()
+
+	// Spin up a go-routine for progress logging
+	if report && stats != nil {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+
+			timer := time.NewTimer(0)
+			defer timer.Stop()
+
+			for {
+				select {
+				case <-timer.C:
+					stats.report()
+					timer.Reset(time.Second * 8)
+				case success := <-stoplog:
+					if success {
+						stats.reportDone()
+					}
+					return
+				}
+			}
+		}()
+	}
+	// stop is a helper function to shutdown the background threads
+	// and return the re-generated trie hash.
+	stop := func(success bool) common.Hash {
+		close(in)
+		result := <-out
+		stoplog <- success
+		wg.Wait()
+		return result
+	}
+	var (
+		logged    = time.Now()
+		processed = uint64(0)
+		leaf      trieKV
+		last      common.Hash
+	)
+	// Start to feed leaves
+	for it.Next() {
+		if account == (common.Hash{}) {
+			var (
+				err      error
+				fullData []byte
+			)
+			if leafCallback == nil {
+				fullData, err = FullAccountRLP(it.(AccountIterator).Account())
+				if err != nil {
+					stop(false)
+					return common.Hash{}, err
+				}
+			} else {
+				account, err := FullAccount(it.(AccountIterator).Account())
+				if err != nil {
+					stop(false)
+					return common.Hash{}, err
+				}
+				// Apply the leaf callback. Normally the callback is used to traverse
+				// the storage trie and re-generate the subtrie root.
+				subroot := leafCallback(it.Hash(), stats)
+				if !bytes.Equal(account.Root, subroot.Bytes()) {
+					stop(false)
+					return common.Hash{}, fmt.Errorf("invalid subroot(%x), want %x, got %x", it.Hash(), account.Root, subroot)
+				}
+				fullData, err = rlp.EncodeToBytes(account)
+				if err != nil {
+					stop(false)
+					return common.Hash{}, err
+				}
+			}
+			leaf = trieKV{it.Hash(), fullData}
+		} else {
+			leaf = trieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())}
+		}
+		in <- leaf
+
+		// Accumulate the generaation statistic if it's required.
+		processed++
+		if time.Since(logged) > 3*time.Second && stats != nil {
+			if account == (common.Hash{}) {
+				stats.progress(processed, 0, it.Hash(), common.Hash{})
+			} else {
+				stats.progress(0, processed, account, it.Hash())
+			}
+			logged, processed = time.Now(), 0
+		}
+		last = it.Hash()
+	}
+	// Commit the last part statistic.
+	if processed > 0 && stats != nil {
+		if account == (common.Hash{}) {
+			stats.progress(processed, 0, last, common.Hash{})
+		} else {
+			stats.progress(0, processed, account, last)
+		}
+	}
+	result := stop(true)
+	return result, nil
+}
+
+// stdGenerate is a very basic hexary trie builder which uses the same Trie
+// as the rest of geth, with no enhancements or optimizations
+func stdGenerate(in chan (trieKV), out chan (common.Hash)) {
+	t, _ := trie.New(common.Hash{}, trie.NewDatabase(memorydb.New()))
+	for leaf := range in {
+		t.TryUpdate(leaf.key[:], leaf.value)
+	}
+	out <- t.Hash()
+}
diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
new file mode 100644
index 0000000..0aef6cf
--- /dev/null
+++ b/core/state/snapshot/difflayer.go
@@ -0,0 +1,553 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math"
+	"math/rand"
+	"sort"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/steakknife/bloomfilter"
+)
+
+var (
+	// aggregatorMemoryLimit is the maximum size of the bottom-most diff layer
+	// that aggregates the writes from above until it's flushed into the disk
+	// layer.
+	//
+	// Note, bumping this up might drastically increase the size of the bloom
+	// filters that's stored in every diff layer. Don't do that without fully
+	// understanding all the implications.
+	aggregatorMemoryLimit = uint64(4 * 1024 * 1024)
+
+	// aggregatorItemLimit is an approximate number of items that will end up
+	// in the agregator layer before it's flushed out to disk. A plain account
+	// weighs around 14B (+hash), a storage slot 32B (+hash), a deleted slot
+	// 0B (+hash). Slots are mostly set/unset in lockstep, so thet average at
+	// 16B (+hash). All in all, the average entry seems to be 15+32=47B. Use a
+	// smaller number to be on the safe side.
+	aggregatorItemLimit = aggregatorMemoryLimit / 42
+
+	// bloomTargetError is the target false positive rate when the aggregator
+	// layer is at its fullest. The actual value will probably move around up
+	// and down from this number, it's mostly a ballpark figure.
+	//
+	// Note, dropping this down might drastically increase the size of the bloom
+	// filters that's stored in every diff layer. Don't do that without fully
+	// understanding all the implications.
+	bloomTargetError = 0.02
+
+	// bloomSize is the ideal bloom filter size given the maximum number of items
+	// it's expected to hold and the target false positive error rate.
+	bloomSize = math.Ceil(float64(aggregatorItemLimit) * math.Log(bloomTargetError) / math.Log(1/math.Pow(2, math.Log(2))))
+
+	// bloomFuncs is the ideal number of bits a single entry should set in the
+	// bloom filter to keep its size to a minimum (given it's size and maximum
+	// entry count).
+	bloomFuncs = math.Round((bloomSize / float64(aggregatorItemLimit)) * math.Log(2))
+
+	// the bloom offsets are runtime constants which determines which part of the
+	// the account/storage hash the hasher functions looks at, to determine the
+	// bloom key for an account/slot. This is randomized at init(), so that the
+	// global population of nodes do not all display the exact same behaviour with
+	// regards to bloom content
+	bloomDestructHasherOffset = 0
+	bloomAccountHasherOffset  = 0
+	bloomStorageHasherOffset  = 0
+)
+
+func init() {
+	// Init the bloom offsets in the range [0:24] (requires 8 bytes)
+	bloomDestructHasherOffset = rand.Intn(25)
+	bloomAccountHasherOffset = rand.Intn(25)
+	bloomStorageHasherOffset = rand.Intn(25)
+
+	// The destruct and account blooms must be different, as the storage slots
+	// will check for destruction too for every bloom miss. It should not collide
+	// with modified accounts.
+	for bloomAccountHasherOffset == bloomDestructHasherOffset {
+		bloomAccountHasherOffset = rand.Intn(25)
+	}
+}
+
+// diffLayer represents a collection of modifications made to a state snapshot
+// after running a block on top. It contains one sorted list for the account trie
+// and one-one list for each storage tries.
+//
+// The goal of a diff layer is to act as a journal, tracking recent modifications
+// made to the state, that have not yet graduated into a semi-immutable state.
+type diffLayer struct {
+	origin *diskLayer // Base disk layer to directly use on bloom misses
+	parent snapshot   // Parent snapshot modified by this one, never nil
+	memory uint64     // Approximate guess as to how much memory we use
+
+	root  common.Hash // Root hash to which this snapshot diff belongs to
+	stale uint32      // Signals that the layer became stale (state progressed)
+
+	// destructSet is a very special helper marker. If an account is marked as
+	// deleted, then it's recorded in this set. However it's allowed that an account
+	// is included here but still available in other sets(e.g. storageData). The
+	// reason is the diff layer includes all the changes in a *block*. It can
+	// happen that in the tx_1, account A is self-destructed while in the tx_2
+	// it's recreated. But we still need this marker to indicate the "old" A is
+	// deleted, all data in other set belongs to the "new" A.
+	destructSet map[common.Hash]struct{}               // Keyed markers for deleted (and potentially) recreated accounts
+	accountList []common.Hash                          // List of account for iteration. If it exists, it's sorted, otherwise it's nil
+	accountData map[common.Hash][]byte                 // Keyed accounts for direct retrival (nil means deleted)
+	storageList map[common.Hash][]common.Hash          // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil
+	storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted)
+
+	diffed *bloomfilter.Filter // Bloom filter tracking all the diffed items up to the disk layer
+
+	lock sync.RWMutex
+}
+
+// destructBloomHasher is a wrapper around a common.Hash to satisfy the interface
+// API requirements of the bloom library used. It's used to convert a destruct
+// event into a 64 bit mini hash.
+type destructBloomHasher common.Hash
+
+func (h destructBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
+func (h destructBloomHasher) Sum(b []byte) []byte               { panic("not implemented") }
+func (h destructBloomHasher) Reset()                            { panic("not implemented") }
+func (h destructBloomHasher) BlockSize() int                    { panic("not implemented") }
+func (h destructBloomHasher) Size() int                         { return 8 }
+func (h destructBloomHasher) Sum64() uint64 {
+	return binary.BigEndian.Uint64(h[bloomDestructHasherOffset : bloomDestructHasherOffset+8])
+}
+
+// accountBloomHasher is a wrapper around a common.Hash to satisfy the interface
+// API requirements of the bloom library used. It's used to convert an account
+// hash into a 64 bit mini hash.
+type accountBloomHasher common.Hash
+
+func (h accountBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
+func (h accountBloomHasher) Sum(b []byte) []byte               { panic("not implemented") }
+func (h accountBloomHasher) Reset()                            { panic("not implemented") }
+func (h accountBloomHasher) BlockSize() int                    { panic("not implemented") }
+func (h accountBloomHasher) Size() int                         { return 8 }
+func (h accountBloomHasher) Sum64() uint64 {
+	return binary.BigEndian.Uint64(h[bloomAccountHasherOffset : bloomAccountHasherOffset+8])
+}
+
+// storageBloomHasher is a wrapper around a [2]common.Hash to satisfy the interface
+// API requirements of the bloom library used. It's used to convert an account
+// hash into a 64 bit mini hash.
+type storageBloomHasher [2]common.Hash
+
+func (h storageBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
+func (h storageBloomHasher) Sum(b []byte) []byte               { panic("not implemented") }
+func (h storageBloomHasher) Reset()                            { panic("not implemented") }
+func (h storageBloomHasher) BlockSize() int                    { panic("not implemented") }
+func (h storageBloomHasher) Size() int                         { return 8 }
+func (h storageBloomHasher) Sum64() uint64 {
+	return binary.BigEndian.Uint64(h[0][bloomStorageHasherOffset:bloomStorageHasherOffset+8]) ^
+		binary.BigEndian.Uint64(h[1][bloomStorageHasherOffset:bloomStorageHasherOffset+8])
+}
+
+// newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low
+// level persistent database or a hierarchical diff already.
+func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+	// Create the new layer with some pre-allocated data segments
+	dl := &diffLayer{
+		parent:      parent,
+		root:        root,
+		destructSet: destructs,
+		accountData: accounts,
+		storageData: storage,
+		storageList: make(map[common.Hash][]common.Hash),
+	}
+	switch parent := parent.(type) {
+	case *diskLayer:
+		dl.rebloom(parent)
+	case *diffLayer:
+		dl.rebloom(parent.origin)
+	default:
+		panic("unknown parent type")
+	}
+	// Sanity check that accounts or storage slots are never nil
+	for accountHash, blob := range accounts {
+		if blob == nil {
+			panic(fmt.Sprintf("account %#x nil", accountHash))
+		}
+	}
+	for accountHash, slots := range storage {
+		if slots == nil {
+			panic(fmt.Sprintf("storage %#x nil", accountHash))
+		}
+	}
+	// Determine memory size and track the dirty writes
+	for _, data := range accounts {
+		dl.memory += uint64(common.HashLength + len(data))
+		snapshotDirtyAccountWriteMeter.Mark(int64(len(data)))
+	}
+	// Determine memory size and track the dirty writes
+	for _, slots := range storage {
+		for _, data := range slots {
+			dl.memory += uint64(common.HashLength + len(data))
+			snapshotDirtyStorageWriteMeter.Mark(int64(len(data)))
+		}
+	}
+	dl.memory += uint64(len(destructs) * common.HashLength)
+	return dl
+}
+
+// rebloom discards the layer's current bloom and rebuilds it from scratch based
+// on the parent's and the local diffs.
+func (dl *diffLayer) rebloom(origin *diskLayer) {
+	dl.lock.Lock()
+	defer dl.lock.Unlock()
+
+	defer func(start time.Time) {
+		snapshotBloomIndexTimer.Update(time.Since(start))
+	}(time.Now())
+
+	// Inject the new origin that triggered the rebloom
+	dl.origin = origin
+
+	// Retrieve the parent bloom or create a fresh empty one
+	if parent, ok := dl.parent.(*diffLayer); ok {
+		parent.lock.RLock()
+		dl.diffed, _ = parent.diffed.Copy()
+		parent.lock.RUnlock()
+	} else {
+		dl.diffed, _ = bloomfilter.New(uint64(bloomSize), uint64(bloomFuncs))
+	}
+	// Iterate over all the accounts and storage slots and index them
+	for hash := range dl.destructSet {
+		dl.diffed.Add(destructBloomHasher(hash))
+	}
+	for hash := range dl.accountData {
+		dl.diffed.Add(accountBloomHasher(hash))
+	}
+	for accountHash, slots := range dl.storageData {
+		for storageHash := range slots {
+			dl.diffed.Add(storageBloomHasher{accountHash, storageHash})
+		}
+	}
+	// Calculate the current false positive rate and update the error rate meter.
+	// This is a bit cheating because subsequent layers will overwrite it, but it
+	// should be fine, we're only interested in ballpark figures.
+	k := float64(dl.diffed.K())
+	n := float64(dl.diffed.N())
+	m := float64(dl.diffed.M())
+	snapshotBloomErrorGauge.Update(math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k))
+}
+
+// Root returns the root hash for which this snapshot was made.
+func (dl *diffLayer) Root() common.Hash {
+	return dl.root
+}
+
+// Parent returns the subsequent layer of a diff layer.
+func (dl *diffLayer) Parent() snapshot {
+	return dl.parent
+}
+
+// Stale return whether this layer has become stale (was flattened across) or if
+// it's still live.
+func (dl *diffLayer) Stale() bool {
+	return atomic.LoadUint32(&dl.stale) != 0
+}
+
+// Account directly retrieves the account associated with a particular hash in
+// the snapshot slim data format.
+func (dl *diffLayer) Account(hash common.Hash) (*Account, error) {
+	data, err := dl.AccountRLP(hash)
+	if err != nil {
+		return nil, err
+	}
+	if len(data) == 0 { // can be both nil and []byte{}
+		return nil, nil
+	}
+	account := new(Account)
+	if err := rlp.DecodeBytes(data, account); err != nil {
+		panic(err)
+	}
+	return account, nil
+}
+
+// AccountRLP directly retrieves the account RLP associated with a particular
+// hash in the snapshot slim data format.
+//
+// Note the returned account is not a copy, please don't modify it.
+func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
+	// Check the bloom filter first whether there's even a point in reaching into
+	// all the maps in all the layers below
+	dl.lock.RLock()
+	hit := dl.diffed.Contains(accountBloomHasher(hash))
+	if !hit {
+		hit = dl.diffed.Contains(destructBloomHasher(hash))
+	}
+	dl.lock.RUnlock()
+
+	// If the bloom filter misses, don't even bother with traversing the memory
+	// diff layers, reach straight into the bottom persistent disk layer
+	if !hit {
+		snapshotBloomAccountMissMeter.Mark(1)
+		return dl.origin.AccountRLP(hash)
+	}
+	// The bloom filter hit, start poking in the internal maps
+	return dl.accountRLP(hash, 0)
+}
+
+// accountRLP is an internal version of AccountRLP that skips the bloom filter
+// checks and uses the internal maps to try and retrieve the data. It's meant
+// to be used if a higher layer's bloom filter hit already.
+func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	// If the layer was flattened into, consider it invalid (any live reference to
+	// the original should be marked as unusable).
+	if dl.Stale() {
+		return nil, ErrSnapshotStale
+	}
+	// If the account is known locally, return it
+	if data, ok := dl.accountData[hash]; ok {
+		snapshotDirtyAccountHitMeter.Mark(1)
+		snapshotDirtyAccountHitDepthHist.Update(int64(depth))
+		snapshotDirtyAccountReadMeter.Mark(int64(len(data)))
+		snapshotBloomAccountTrueHitMeter.Mark(1)
+		return data, nil
+	}
+	// If the account is known locally, but deleted, return it
+	if _, ok := dl.destructSet[hash]; ok {
+		snapshotDirtyAccountHitMeter.Mark(1)
+		snapshotDirtyAccountHitDepthHist.Update(int64(depth))
+		snapshotDirtyAccountInexMeter.Mark(1)
+		snapshotBloomAccountTrueHitMeter.Mark(1)
+		return nil, nil
+	}
+	// Account unknown to this diff, resolve from parent
+	if diff, ok := dl.parent.(*diffLayer); ok {
+		return diff.accountRLP(hash, depth+1)
+	}
+	// Failed to resolve through diff layers, mark a bloom error and use the disk
+	snapshotBloomAccountFalseHitMeter.Mark(1)
+	return dl.parent.AccountRLP(hash)
+}
+
+// Storage directly retrieves the storage data associated with a particular hash,
+// within a particular account. If the slot is unknown to this diff, it's parent
+// is consulted.
+//
+// Note the returned slot is not a copy, please don't modify it.
+func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
+	// Check the bloom filter first whether there's even a point in reaching into
+	// all the maps in all the layers below
+	dl.lock.RLock()
+	hit := dl.diffed.Contains(storageBloomHasher{accountHash, storageHash})
+	if !hit {
+		hit = dl.diffed.Contains(destructBloomHasher(accountHash))
+	}
+	dl.lock.RUnlock()
+
+	// If the bloom filter misses, don't even bother with traversing the memory
+	// diff layers, reach straight into the bottom persistent disk layer
+	if !hit {
+		snapshotBloomStorageMissMeter.Mark(1)
+		return dl.origin.Storage(accountHash, storageHash)
+	}
+	// The bloom filter hit, start poking in the internal maps
+	return dl.storage(accountHash, storageHash, 0)
+}
+
+// storage is an internal version of Storage that skips the bloom filter checks
+// and uses the internal maps to try and retrieve the data. It's meant  to be
+// used if a higher layer's bloom filter hit already.
+func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([]byte, error) {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	// If the layer was flattened into, consider it invalid (any live reference to
+	// the original should be marked as unusable).
+	if dl.Stale() {
+		return nil, ErrSnapshotStale
+	}
+	// If the account is known locally, try to resolve the slot locally
+	if storage, ok := dl.storageData[accountHash]; ok {
+		if data, ok := storage[storageHash]; ok {
+			snapshotDirtyStorageHitMeter.Mark(1)
+			snapshotDirtyStorageHitDepthHist.Update(int64(depth))
+			if n := len(data); n > 0 {
+				snapshotDirtyStorageReadMeter.Mark(int64(n))
+			} else {
+				snapshotDirtyStorageInexMeter.Mark(1)
+			}
+			snapshotBloomStorageTrueHitMeter.Mark(1)
+			return data, nil
+		}
+	}
+	// If the account is known locally, but deleted, return an empty slot
+	if _, ok := dl.destructSet[accountHash]; ok {
+		snapshotDirtyStorageHitMeter.Mark(1)
+		snapshotDirtyStorageHitDepthHist.Update(int64(depth))
+		snapshotDirtyStorageInexMeter.Mark(1)
+		snapshotBloomStorageTrueHitMeter.Mark(1)
+		return nil, nil
+	}
+	// Storage slot unknown to this diff, resolve from parent
+	if diff, ok := dl.parent.(*diffLayer); ok {
+		return diff.storage(accountHash, storageHash, depth+1)
+	}
+	// Failed to resolve through diff layers, mark a bloom error and use the disk
+	snapshotBloomStorageFalseHitMeter.Mark(1)
+	return dl.parent.Storage(accountHash, storageHash)
+}
+
+// Update creates a new layer on top of the existing snapshot diff tree with
+// the specified data items.
+func (dl *diffLayer) Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+	return newDiffLayer(dl, blockRoot, destructs, accounts, storage)
+}
+
+// flatten pushes all data from this point downwards, flattening everything into
+// a single diff at the bottom. Since usually the lowermost diff is the largest,
+// the flattening builds up from there in reverse.
+func (dl *diffLayer) flatten() snapshot {
+	// If the parent is not diff, we're the first in line, return unmodified
+	parent, ok := dl.parent.(*diffLayer)
+	if !ok {
+		return dl
+	}
+	// Parent is a diff, flatten it first (note, apart from weird corned cases,
+	// flatten will realistically only ever merge 1 layer, so there's no need to
+	// be smarter about grouping flattens together).
+	parent = parent.flatten().(*diffLayer)
+
+	parent.lock.Lock()
+	defer parent.lock.Unlock()
+
+	// Before actually writing all our data to the parent, first ensure that the
+	// parent hasn't been 'corrupted' by someone else already flattening into it
+	if atomic.SwapUint32(&parent.stale, 1) != 0 {
+		panic("parent diff layer is stale") // we've flattened into the same parent from two children, boo
+	}
+	// Overwrite all the updated accounts blindly, merge the sorted list
+	for hash := range dl.destructSet {
+		parent.destructSet[hash] = struct{}{}
+		delete(parent.accountData, hash)
+		delete(parent.storageData, hash)
+	}
+	for hash, data := range dl.accountData {
+		parent.accountData[hash] = data
+	}
+	// Overwrite all the updated storage slots (individually)
+	for accountHash, storage := range dl.storageData {
+		// If storage didn't exist (or was deleted) in the parent, overwrite blindly
+		if _, ok := parent.storageData[accountHash]; !ok {
+			parent.storageData[accountHash] = storage
+			continue
+		}
+		// Storage exists in both parent and child, merge the slots
+		comboData := parent.storageData[accountHash]
+		for storageHash, data := range storage {
+			comboData[storageHash] = data
+		}
+		parent.storageData[accountHash] = comboData
+	}
+	// Return the combo parent
+	return &diffLayer{
+		parent:      parent.parent,
+		origin:      parent.origin,
+		root:        dl.root,
+		destructSet: parent.destructSet,
+		accountData: parent.accountData,
+		storageData: parent.storageData,
+		storageList: make(map[common.Hash][]common.Hash),
+		diffed:      dl.diffed,
+		memory:      parent.memory + dl.memory,
+	}
+}
+
+// AccountList returns a sorted list of all accounts in this difflayer, including
+// the deleted ones.
+//
+// Note, the returned slice is not a copy, so do not modify it.
+func (dl *diffLayer) AccountList() []common.Hash {
+	// If an old list already exists, return it
+	dl.lock.RLock()
+	list := dl.accountList
+	dl.lock.RUnlock()
+
+	if list != nil {
+		return list
+	}
+	// No old sorted account list exists, generate a new one
+	dl.lock.Lock()
+	defer dl.lock.Unlock()
+
+	dl.accountList = make([]common.Hash, 0, len(dl.destructSet)+len(dl.accountData))
+	for hash := range dl.accountData {
+		dl.accountList = append(dl.accountList, hash)
+	}
+	for hash := range dl.destructSet {
+		if _, ok := dl.accountData[hash]; !ok {
+			dl.accountList = append(dl.accountList, hash)
+		}
+	}
+	sort.Sort(hashes(dl.accountList))
+	dl.memory += uint64(len(dl.accountList) * common.HashLength)
+	return dl.accountList
+}
+
+// StorageList returns a sorted list of all storage slot hashes in this difflayer
+// for the given account. If the whole storage is destructed in this layer, then
+// an additional flag *destructed = true* will be returned, otherwise the flag is
+// false. Besides, the returned list will include the hash of deleted storage slot.
+// Note a special case is an account is deleted in a prior tx but is recreated in
+// the following tx with some storage slots set. In this case the returned list is
+// not empty but the flag is true.
+//
+// Note, the returned slice is not a copy, so do not modify it.
+func (dl *diffLayer) StorageList(accountHash common.Hash) ([]common.Hash, bool) {
+	dl.lock.RLock()
+	_, destructed := dl.destructSet[accountHash]
+	if _, ok := dl.storageData[accountHash]; !ok {
+		// Account not tracked by this layer
+		dl.lock.RUnlock()
+		return nil, destructed
+	}
+	// If an old list already exists, return it
+	if list, exist := dl.storageList[accountHash]; exist {
+		dl.lock.RUnlock()
+		return list, destructed // the cached list can't be nil
+	}
+	dl.lock.RUnlock()
+
+	// No old sorted account list exists, generate a new one
+	dl.lock.Lock()
+	defer dl.lock.Unlock()
+
+	storageMap := dl.storageData[accountHash]
+	storageList := make([]common.Hash, 0, len(storageMap))
+	for k := range storageMap {
+		storageList = append(storageList, k)
+	}
+	sort.Sort(hashes(storageList))
+	dl.storageList[accountHash] = storageList
+	dl.memory += uint64(len(dl.storageList)*common.HashLength + common.HashLength)
+	return storageList, destructed
+}
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
new file mode 100644
index 0000000..31636ee
--- /dev/null
+++ b/core/state/snapshot/difflayer_test.go
@@ -0,0 +1,400 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"math/rand"
+	"testing"
+
+	"github.com/VictoriaMetrics/fastcache"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/crypto"
+	"github.com/ethereum/go-ethereum/ethdb/memorydb"
+)
+
+func copyDestructs(destructs map[common.Hash]struct{}) map[common.Hash]struct{} {
+	copy := make(map[common.Hash]struct{})
+	for hash := range destructs {
+		copy[hash] = struct{}{}
+	}
+	return copy
+}
+
+func copyAccounts(accounts map[common.Hash][]byte) map[common.Hash][]byte {
+	copy := make(map[common.Hash][]byte)
+	for hash, blob := range accounts {
+		copy[hash] = blob
+	}
+	return copy
+}
+
+func copyStorage(storage map[common.Hash]map[common.Hash][]byte) map[common.Hash]map[common.Hash][]byte {
+	copy := make(map[common.Hash]map[common.Hash][]byte)
+	for accHash, slots := range storage {
+		copy[accHash] = make(map[common.Hash][]byte)
+		for slotHash, blob := range slots {
+			copy[accHash][slotHash] = blob
+		}
+	}
+	return copy
+}
+
+// TestMergeBasics tests some simple merges
+func TestMergeBasics(t *testing.T) {
+	var (
+		destructs = make(map[common.Hash]struct{})
+		accounts  = make(map[common.Hash][]byte)
+		storage   = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	// Fill up a parent
+	for i := 0; i < 100; i++ {
+		h := randomHash()
+		data := randomAccount()
+
+		accounts[h] = data
+		if rand.Intn(4) == 0 {
+			destructs[h] = struct{}{}
+		}
+		if rand.Intn(2) == 0 {
+			accStorage := make(map[common.Hash][]byte)
+			value := make([]byte, 32)
+			rand.Read(value)
+			accStorage[randomHash()] = value
+			storage[h] = accStorage
+		}
+	}
+	// Add some (identical) layers on top
+	parent := newDiffLayer(emptyLayer(), common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
+	child := newDiffLayer(parent, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
+	child = newDiffLayer(child, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
+	child = newDiffLayer(child, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
+	child = newDiffLayer(child, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
+	// And flatten
+	merged := (child.flatten()).(*diffLayer)
+
+	{ // Check account lists
+		if have, want := len(merged.accountList), 0; have != want {
+			t.Errorf("accountList wrong: have %v, want %v", have, want)
+		}
+		if have, want := len(merged.AccountList()), len(accounts); have != want {
+			t.Errorf("AccountList() wrong: have %v, want %v", have, want)
+		}
+		if have, want := len(merged.accountList), len(accounts); have != want {
+			t.Errorf("accountList [2] wrong: have %v, want %v", have, want)
+		}
+	}
+	{ // Check account drops
+		if have, want := len(merged.destructSet), len(destructs); have != want {
+			t.Errorf("accountDrop wrong: have %v, want %v", have, want)
+		}
+	}
+	{ // Check storage lists
+		i := 0
+		for aHash, sMap := range storage {
+			if have, want := len(merged.storageList), i; have != want {
+				t.Errorf("[1] storageList wrong: have %v, want %v", have, want)
+			}
+			list, _ := merged.StorageList(aHash)
+			if have, want := len(list), len(sMap); have != want {
+				t.Errorf("[2] StorageList() wrong: have %v, want %v", have, want)
+			}
+			if have, want := len(merged.storageList[aHash]), len(sMap); have != want {
+				t.Errorf("storageList wrong: have %v, want %v", have, want)
+			}
+			i++
+		}
+	}
+}
+
+// TestMergeDelete tests some deletion
+func TestMergeDelete(t *testing.T) {
+	var (
+		storage = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	// Fill up a parent
+	h1 := common.HexToHash("0x01")
+	h2 := common.HexToHash("0x02")
+
+	flipDrops := func() map[common.Hash]struct{} {
+		return map[common.Hash]struct{}{
+			h2: {},
+		}
+	}
+	flipAccs := func() map[common.Hash][]byte {
+		return map[common.Hash][]byte{
+			h1: randomAccount(),
+		}
+	}
+	flopDrops := func() map[common.Hash]struct{} {
+		return map[common.Hash]struct{}{
+			h1: {},
+		}
+	}
+	flopAccs := func() map[common.Hash][]byte {
+		return map[common.Hash][]byte{
+			h2: randomAccount(),
+		}
+	}
+	// Add some flipAccs-flopping layers on top
+	parent := newDiffLayer(emptyLayer(), common.Hash{}, flipDrops(), flipAccs(), storage)
+	child := parent.Update(common.Hash{}, flopDrops(), flopAccs(), storage)
+	child = child.Update(common.Hash{}, flipDrops(), flipAccs(), storage)
+	child = child.Update(common.Hash{}, flopDrops(), flopAccs(), storage)
+	child = child.Update(common.Hash{}, flipDrops(), flipAccs(), storage)
+	child = child.Update(common.Hash{}, flopDrops(), flopAccs(), storage)
+	child = child.Update(common.Hash{}, flipDrops(), flipAccs(), storage)
+
+	if data, _ := child.Account(h1); data == nil {
+		t.Errorf("last diff layer: expected %x account to be non-nil", h1)
+	}
+	if data, _ := child.Account(h2); data != nil {
+		t.Errorf("last diff layer: expected %x account to be nil", h2)
+	}
+	if _, ok := child.destructSet[h1]; ok {
+		t.Errorf("last diff layer: expected %x drop to be missing", h1)
+	}
+	if _, ok := child.destructSet[h2]; !ok {
+		t.Errorf("last diff layer: expected %x drop to be present", h1)
+	}
+	// And flatten
+	merged := (child.flatten()).(*diffLayer)
+
+	if data, _ := merged.Account(h1); data == nil {
+		t.Errorf("merged layer: expected %x account to be non-nil", h1)
+	}
+	if data, _ := merged.Account(h2); data != nil {
+		t.Errorf("merged layer: expected %x account to be nil", h2)
+	}
+	if _, ok := merged.destructSet[h1]; !ok { // Note, drops stay alive until persisted to disk!
+		t.Errorf("merged diff layer: expected %x drop to be present", h1)
+	}
+	if _, ok := merged.destructSet[h2]; !ok { // Note, drops stay alive until persisted to disk!
+		t.Errorf("merged diff layer: expected %x drop to be present", h1)
+	}
+	// If we add more granular metering of memory, we can enable this again,
+	// but it's not implemented for now
+	//if have, want := merged.memory, child.memory; have != want {
+	//	t.Errorf("mem wrong: have %d, want %d", have, want)
+	//}
+}
+
+// This tests that if we create a new account, and set a slot, and then merge
+// it, the lists will be correct.
+func TestInsertAndMerge(t *testing.T) {
+	// Fill up a parent
+	var (
+		acc    = common.HexToHash("0x01")
+		slot   = common.HexToHash("0x02")
+		parent *diffLayer
+		child  *diffLayer
+	)
+	{
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
+		parent = newDiffLayer(emptyLayer(), common.Hash{}, destructs, accounts, storage)
+	}
+	{
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
+		accounts[acc] = randomAccount()
+		storage[acc] = make(map[common.Hash][]byte)
+		storage[acc][slot] = []byte{0x01}
+		child = newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
+	}
+	// And flatten
+	merged := (child.flatten()).(*diffLayer)
+	{ // Check that slot value is present
+		have, _ := merged.Storage(acc, slot)
+		if want := []byte{0x01}; !bytes.Equal(have, want) {
+			t.Errorf("merged slot value wrong: have %x, want %x", have, want)
+		}
+	}
+}
+
+func emptyLayer() *diskLayer {
+	return &diskLayer{
+		diskdb: memorydb.New(),
+		cache:  fastcache.New(500 * 1024),
+	}
+}
+
+// BenchmarkSearch checks how long it takes to find a non-existing key
+// BenchmarkSearch-6   	  200000	     10481 ns/op (1K per layer)
+// BenchmarkSearch-6   	  200000	     10760 ns/op (10K per layer)
+// BenchmarkSearch-6   	  100000	     17866 ns/op
+//
+// BenchmarkSearch-6   	  500000	      3723 ns/op (10k per layer, only top-level RLock()
+func BenchmarkSearch(b *testing.B) {
+	// First, we set up 128 diff layers, with 1K items each
+	fill := func(parent snapshot) *diffLayer {
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
+		for i := 0; i < 10000; i++ {
+			accounts[randomHash()] = randomAccount()
+		}
+		return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
+	}
+	var layer snapshot
+	layer = emptyLayer()
+	for i := 0; i < 128; i++ {
+		layer = fill(layer)
+	}
+	key := crypto.Keccak256Hash([]byte{0x13, 0x38})
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		layer.AccountRLP(key)
+	}
+}
+
+// BenchmarkSearchSlot checks how long it takes to find a non-existing key
+// - Number of layers: 128
+// - Each layers contains the account, with a couple of storage slots
+// BenchmarkSearchSlot-6   	  100000	     14554 ns/op
+// BenchmarkSearchSlot-6   	  100000	     22254 ns/op (when checking parent root using mutex)
+// BenchmarkSearchSlot-6   	  100000	     14551 ns/op (when checking parent number using atomic)
+// With bloom filter:
+// BenchmarkSearchSlot-6   	 3467835	       351 ns/op
+func BenchmarkSearchSlot(b *testing.B) {
+	// First, we set up 128 diff layers, with 1K items each
+	accountKey := crypto.Keccak256Hash([]byte{0x13, 0x37})
+	storageKey := crypto.Keccak256Hash([]byte{0x13, 0x37})
+	accountRLP := randomAccount()
+	fill := func(parent snapshot) *diffLayer {
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
+		accounts[accountKey] = accountRLP
+
+		accStorage := make(map[common.Hash][]byte)
+		for i := 0; i < 5; i++ {
+			value := make([]byte, 32)
+			rand.Read(value)
+			accStorage[randomHash()] = value
+			storage[accountKey] = accStorage
+		}
+		return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
+	}
+	var layer snapshot
+	layer = emptyLayer()
+	for i := 0; i < 128; i++ {
+		layer = fill(layer)
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		layer.Storage(accountKey, storageKey)
+	}
+}
+
+// With accountList and sorting
+// BenchmarkFlatten-6   	      50	  29890856 ns/op
+//
+// Without sorting and tracking accountlist
+// BenchmarkFlatten-6   	     300	   5511511 ns/op
+func BenchmarkFlatten(b *testing.B) {
+	fill := func(parent snapshot) *diffLayer {
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
+		for i := 0; i < 100; i++ {
+			accountKey := randomHash()
+			accounts[accountKey] = randomAccount()
+
+			accStorage := make(map[common.Hash][]byte)
+			for i := 0; i < 20; i++ {
+				value := make([]byte, 32)
+				rand.Read(value)
+				accStorage[randomHash()] = value
+
+			}
+			storage[accountKey] = accStorage
+		}
+		return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		b.StopTimer()
+		var layer snapshot
+		layer = emptyLayer()
+		for i := 1; i < 128; i++ {
+			layer = fill(layer)
+		}
+		b.StartTimer()
+
+		for i := 1; i < 128; i++ {
+			dl, ok := layer.(*diffLayer)
+			if !ok {
+				break
+			}
+			layer = dl.flatten()
+		}
+		b.StopTimer()
+	}
+}
+
+// This test writes ~324M of diff layers to disk, spread over
+// - 128 individual layers,
+// - each with 200 accounts
+// - containing 200 slots
+//
+// BenchmarkJournal-6   	       1	1471373923 ns/ops
+// BenchmarkJournal-6   	       1	1208083335 ns/op // bufio writer
+func BenchmarkJournal(b *testing.B) {
+	fill := func(parent snapshot) *diffLayer {
+		var (
+			destructs = make(map[common.Hash]struct{})
+			accounts  = make(map[common.Hash][]byte)
+			storage   = make(map[common.Hash]map[common.Hash][]byte)
+		)
+		for i := 0; i < 200; i++ {
+			accountKey := randomHash()
+			accounts[accountKey] = randomAccount()
+
+			accStorage := make(map[common.Hash][]byte)
+			for i := 0; i < 200; i++ {
+				value := make([]byte, 32)
+				rand.Read(value)
+				accStorage[randomHash()] = value
+
+			}
+			storage[accountKey] = accStorage
+		}
+		return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage)
+	}
+	layer := snapshot(new(diskLayer))
+	for i := 1; i < 128; i++ {
+		layer = fill(layer)
+	}
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		layer.Journal(new(bytes.Buffer))
+	}
+}
diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go
new file mode 100644
index 0000000..496edaa
--- /dev/null
+++ b/core/state/snapshot/disklayer.go
@@ -0,0 +1,166 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"sync"
+
+	"github.com/VictoriaMetrics/fastcache"
+	"github.com/ava-labs/coreth/core/rawdb"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+// diskLayer is a low level persistent snapshot built on top of a key-value store.
+type diskLayer struct {
+	diskdb ethdb.KeyValueStore // Key-value store containing the base snapshot
+	triedb *trie.Database      // Trie node cache for reconstuction purposes
+	cache  *fastcache.Cache    // Cache to avoid hitting the disk for direct access
+
+	root  common.Hash // Root hash of the base snapshot
+	stale bool        // Signals that the layer became stale (state progressed)
+
+	genMarker  []byte                    // Marker for the state that's indexed during initial layer generation
+	genPending chan struct{}             // Notification channel when generation is done (test synchronicity)
+	genAbort   chan chan *generatorStats // Notification channel to abort generating the snapshot in this layer
+
+	lock sync.RWMutex
+}
+
+// Root returns  root hash for which this snapshot was made.
+func (dl *diskLayer) Root() common.Hash {
+	return dl.root
+}
+
+// Parent always returns nil as there's no layer below the disk.
+func (dl *diskLayer) Parent() snapshot {
+	return nil
+}
+
+// Stale return whether this layer has become stale (was flattened across) or if
+// it's still live.
+func (dl *diskLayer) Stale() bool {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	return dl.stale
+}
+
+// Account directly retrieves the account associated with a particular hash in
+// the snapshot slim data format.
+func (dl *diskLayer) Account(hash common.Hash) (*Account, error) {
+	data, err := dl.AccountRLP(hash)
+	if err != nil {
+		return nil, err
+	}
+	if len(data) == 0 { // can be both nil and []byte{}
+		return nil, nil
+	}
+	account := new(Account)
+	if err := rlp.DecodeBytes(data, account); err != nil {
+		panic(err)
+	}
+	return account, nil
+}
+
+// AccountRLP directly retrieves the account RLP associated with a particular
+// hash in the snapshot slim data format.
+func (dl *diskLayer) AccountRLP(hash common.Hash) ([]byte, error) {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	// If the layer was flattened into, consider it invalid (any live reference to
+	// the original should be marked as unusable).
+	if dl.stale {
+		return nil, ErrSnapshotStale
+	}
+	// If the layer is being generated, ensure the requested hash has already been
+	// covered by the generator.
+	if dl.genMarker != nil && bytes.Compare(hash[:], dl.genMarker) > 0 {
+		return nil, ErrNotCoveredYet
+	}
+	// If we're in the disk layer, all diff layers missed
+	snapshotDirtyAccountMissMeter.Mark(1)
+
+	// Try to retrieve the account from the memory cache
+	if blob, found := dl.cache.HasGet(nil, hash[:]); found {
+		snapshotCleanAccountHitMeter.Mark(1)
+		snapshotCleanAccountReadMeter.Mark(int64(len(blob)))
+		return blob, nil
+	}
+	// Cache doesn't contain account, pull from disk and cache for later
+	blob := rawdb.ReadAccountSnapshot(dl.diskdb, hash)
+	dl.cache.Set(hash[:], blob)
+
+	snapshotCleanAccountMissMeter.Mark(1)
+	if n := len(blob); n > 0 {
+		snapshotCleanAccountWriteMeter.Mark(int64(n))
+	} else {
+		snapshotCleanAccountInexMeter.Mark(1)
+	}
+	return blob, nil
+}
+
+// Storage directly retrieves the storage data associated with a particular hash,
+// within a particular account.
+func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	// If the layer was flattened into, consider it invalid (any live reference to
+	// the original should be marked as unusable).
+	if dl.stale {
+		return nil, ErrSnapshotStale
+	}
+	key := append(accountHash[:], storageHash[:]...)
+
+	// If the layer is being generated, ensure the requested hash has already been
+	// covered by the generator.
+	if dl.genMarker != nil && bytes.Compare(key, dl.genMarker) > 0 {
+		return nil, ErrNotCoveredYet
+	}
+	// If we're in the disk layer, all diff layers missed
+	snapshotDirtyStorageMissMeter.Mark(1)
+
+	// Try to retrieve the storage slot from the memory cache
+	if blob, found := dl.cache.HasGet(nil, key); found {
+		snapshotCleanStorageHitMeter.Mark(1)
+		snapshotCleanStorageReadMeter.Mark(int64(len(blob)))
+		return blob, nil
+	}
+	// Cache doesn't contain storage slot, pull from disk and cache for later
+	blob := rawdb.ReadStorageSnapshot(dl.diskdb, accountHash, storageHash)
+	dl.cache.Set(key, blob)
+
+	snapshotCleanStorageMissMeter.Mark(1)
+	if n := len(blob); n > 0 {
+		snapshotCleanStorageWriteMeter.Mark(int64(n))
+	} else {
+		snapshotCleanStorageInexMeter.Mark(1)
+	}
+	return blob, nil
+}
+
+// Update creates a new layer on top of the existing snapshot diff tree with
+// the specified data items. Note, the maps are retained by the method to avoid
+// copying everything.
+func (dl *diskLayer) Update(blockHash common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
+	return newDiffLayer(dl, blockHash, destructs, accounts, storage)
+}
diff --git a/core/state/snapshot/disklayer_test.go b/core/state/snapshot/disklayer_test.go
new file mode 100644
index 0000000..5df5efc
--- /dev/null
+++ b/core/state/snapshot/disklayer_test.go
@@ -0,0 +1,511 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"io/ioutil"
+	"os"
+	"testing"
+
+	"github.com/VictoriaMetrics/fastcache"
+	"github.com/ava-labs/coreth/core/rawdb"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/ethdb/leveldb"
+	"github.com/ethereum/go-ethereum/ethdb/memorydb"
+)
+
+// reverse reverses the contents of a byte slice. It's used to update random accs
+// with deterministic changes.
+func reverse(blob []byte) []byte {
+	res := make([]byte, len(blob))
+	for i, b := range blob {
+		res[len(blob)-1-i] = b
+	}
+	return res
+}
+
+// Tests that merging something into a disk layer persists it into the database
+// and invalidates any previously written and cached values.
+func TestDiskMerge(t *testing.T) {
+	// Create some accounts in the disk layer
+	db := memorydb.New()
+
+	var (
+		accNoModNoCache     = common.Hash{0x1}
+		accNoModCache       = common.Hash{0x2}
+		accModNoCache       = common.Hash{0x3}
+		accModCache         = common.Hash{0x4}
+		accDelNoCache       = common.Hash{0x5}
+		accDelCache         = common.Hash{0x6}
+		conNoModNoCache     = common.Hash{0x7}
+		conNoModNoCacheSlot = common.Hash{0x70}
+		conNoModCache       = common.Hash{0x8}
+		conNoModCacheSlot   = common.Hash{0x80}
+		conModNoCache       = common.Hash{0x9}
+		conModNoCacheSlot   = common.Hash{0x90}
+		conModCache         = common.Hash{0xa}
+		conModCacheSlot     = common.Hash{0xa0}
+		conDelNoCache       = common.Hash{0xb}
+		conDelNoCacheSlot   = common.Hash{0xb0}
+		conDelCache         = common.Hash{0xc}
+		conDelCacheSlot     = common.Hash{0xc0}
+		conNukeNoCache      = common.Hash{0xd}
+		conNukeNoCacheSlot  = common.Hash{0xd0}
+		conNukeCache        = common.Hash{0xe}
+		conNukeCacheSlot    = common.Hash{0xe0}
+		baseRoot            = randomHash()
+		diffRoot            = randomHash()
+	)
+
+	rawdb.WriteAccountSnapshot(db, accNoModNoCache, accNoModNoCache[:])
+	rawdb.WriteAccountSnapshot(db, accNoModCache, accNoModCache[:])
+	rawdb.WriteAccountSnapshot(db, accModNoCache, accModNoCache[:])
+	rawdb.WriteAccountSnapshot(db, accModCache, accModCache[:])
+	rawdb.WriteAccountSnapshot(db, accDelNoCache, accDelNoCache[:])
+	rawdb.WriteAccountSnapshot(db, accDelCache, accDelCache[:])
+
+	rawdb.WriteAccountSnapshot(db, conNoModNoCache, conNoModNoCache[:])
+	rawdb.WriteStorageSnapshot(db, conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conNoModCache, conNoModCache[:])
+	rawdb.WriteStorageSnapshot(db, conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conModNoCache, conModNoCache[:])
+	rawdb.WriteStorageSnapshot(db, conModNoCache, conModNoCacheSlot, conModNoCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conModCache, conModCache[:])
+	rawdb.WriteStorageSnapshot(db, conModCache, conModCacheSlot, conModCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conDelNoCache, conDelNoCache[:])
+	rawdb.WriteStorageSnapshot(db, conDelNoCache, conDelNoCacheSlot, conDelNoCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conDelCache, conDelCache[:])
+	rawdb.WriteStorageSnapshot(db, conDelCache, conDelCacheSlot, conDelCacheSlot[:])
+
+	rawdb.WriteAccountSnapshot(db, conNukeNoCache, conNukeNoCache[:])
+	rawdb.WriteStorageSnapshot(db, conNukeNoCache, conNukeNoCacheSlot, conNukeNoCacheSlot[:])
+	rawdb.WriteAccountSnapshot(db, conNukeCache, conNukeCache[:])
+	rawdb.WriteStorageSnapshot(db, conNukeCache, conNukeCacheSlot, conNukeCacheSlot[:])
+
+	rawdb.WriteSnapshotRoot(db, baseRoot)
+
+	// Create a disk layer based on the above and cache in some data
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			baseRoot: &diskLayer{
+				diskdb: db,
+				cache:  fastcache.New(500 * 1024),
+				root:   baseRoot,
+			},
+		},
+	}
+	base := snaps.Snapshot(baseRoot)
+	base.AccountRLP(accNoModCache)
+	base.AccountRLP(accModCache)
+	base.AccountRLP(accDelCache)
+	base.Storage(conNoModCache, conNoModCacheSlot)
+	base.Storage(conModCache, conModCacheSlot)
+	base.Storage(conDelCache, conDelCacheSlot)
+	base.Storage(conNukeCache, conNukeCacheSlot)
+
+	// Modify or delete some accounts, flatten everything onto disk
+	if err := snaps.Update(diffRoot, baseRoot, map[common.Hash]struct{}{
+		accDelNoCache:  {},
+		accDelCache:    {},
+		conNukeNoCache: {},
+		conNukeCache:   {},
+	}, map[common.Hash][]byte{
+		accModNoCache: reverse(accModNoCache[:]),
+		accModCache:   reverse(accModCache[:]),
+	}, map[common.Hash]map[common.Hash][]byte{
+		conModNoCache: {conModNoCacheSlot: reverse(conModNoCacheSlot[:])},
+		conModCache:   {conModCacheSlot: reverse(conModCacheSlot[:])},
+		conDelNoCache: {conDelNoCacheSlot: nil},
+		conDelCache:   {conDelCacheSlot: nil},
+	}); err != nil {
+		t.Fatalf("failed to update snapshot tree: %v", err)
+	}
+	if err := snaps.Cap(diffRoot, 0); err != nil {
+		t.Fatalf("failed to flatten snapshot tree: %v", err)
+	}
+	// Retrieve all the data through the disk layer and validate it
+	base = snaps.Snapshot(diffRoot)
+	if _, ok := base.(*diskLayer); !ok {
+		t.Fatalf("update not flattend into the disk layer")
+	}
+
+	// assertAccount ensures that an account matches the given blob.
+	assertAccount := func(account common.Hash, data []byte) {
+		t.Helper()
+		blob, err := base.AccountRLP(account)
+		if err != nil {
+			t.Errorf("account access (%x) failed: %v", account, err)
+		} else if !bytes.Equal(blob, data) {
+			t.Errorf("account access (%x) mismatch: have %x, want %x", account, blob, data)
+		}
+	}
+	assertAccount(accNoModNoCache, accNoModNoCache[:])
+	assertAccount(accNoModCache, accNoModCache[:])
+	assertAccount(accModNoCache, reverse(accModNoCache[:]))
+	assertAccount(accModCache, reverse(accModCache[:]))
+	assertAccount(accDelNoCache, nil)
+	assertAccount(accDelCache, nil)
+
+	// assertStorage ensures that a storage slot matches the given blob.
+	assertStorage := func(account common.Hash, slot common.Hash, data []byte) {
+		t.Helper()
+		blob, err := base.Storage(account, slot)
+		if err != nil {
+			t.Errorf("storage access (%x:%x) failed: %v", account, slot, err)
+		} else if !bytes.Equal(blob, data) {
+			t.Errorf("storage access (%x:%x) mismatch: have %x, want %x", account, slot, blob, data)
+		}
+	}
+	assertStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+	assertStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+	assertStorage(conModNoCache, conModNoCacheSlot, reverse(conModNoCacheSlot[:]))
+	assertStorage(conModCache, conModCacheSlot, reverse(conModCacheSlot[:]))
+	assertStorage(conDelNoCache, conDelNoCacheSlot, nil)
+	assertStorage(conDelCache, conDelCacheSlot, nil)
+	assertStorage(conNukeNoCache, conNukeNoCacheSlot, nil)
+	assertStorage(conNukeCache, conNukeCacheSlot, nil)
+
+	// Retrieve all the data directly from the database and validate it
+
+	// assertDatabaseAccount ensures that an account from the database matches the given blob.
+	assertDatabaseAccount := func(account common.Hash, data []byte) {
+		t.Helper()
+		if blob := rawdb.ReadAccountSnapshot(db, account); !bytes.Equal(blob, data) {
+			t.Errorf("account database access (%x) mismatch: have %x, want %x", account, blob, data)
+		}
+	}
+	assertDatabaseAccount(accNoModNoCache, accNoModNoCache[:])
+	assertDatabaseAccount(accNoModCache, accNoModCache[:])
+	assertDatabaseAccount(accModNoCache, reverse(accModNoCache[:]))
+	assertDatabaseAccount(accModCache, reverse(accModCache[:]))
+	assertDatabaseAccount(accDelNoCache, nil)
+	assertDatabaseAccount(accDelCache, nil)
+
+	// assertDatabaseStorage ensures that a storage slot from the database matches the given blob.
+	assertDatabaseStorage := func(account common.Hash, slot common.Hash, data []byte) {
+		t.Helper()
+		if blob := rawdb.ReadStorageSnapshot(db, account, slot); !bytes.Equal(blob, data) {
+			t.Errorf("storage database access (%x:%x) mismatch: have %x, want %x", account, slot, blob, data)
+		}
+	}
+	assertDatabaseStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+	assertDatabaseStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+	assertDatabaseStorage(conModNoCache, conModNoCacheSlot, reverse(conModNoCacheSlot[:]))
+	assertDatabaseStorage(conModCache, conModCacheSlot, reverse(conModCacheSlot[:]))
+	assertDatabaseStorage(conDelNoCache, conDelNoCacheSlot, nil)
+	assertDatabaseStorage(conDelCache, conDelCacheSlot, nil)
+	assertDatabaseStorage(conNukeNoCache, conNukeNoCacheSlot, nil)
+	assertDatabaseStorage(conNukeCache, conNukeCacheSlot, nil)
+}
+
+// Tests that merging something into a disk layer persists it into the database
+// and invalidates any previously written and cached values, discarding anything
+// after the in-progress generation marker.
+func TestDiskPartialMerge(t *testing.T) {
+	// Iterate the test a few times to ensure we pick various internal orderings
+	// for the data slots as well as the progress marker.
+	for i := 0; i < 1024; i++ {
+		// Create some accounts in the disk layer
+		db := memorydb.New()
+
+		var (
+			accNoModNoCache     = randomHash()
+			accNoModCache       = randomHash()
+			accModNoCache       = randomHash()
+			accModCache         = randomHash()
+			accDelNoCache       = randomHash()
+			accDelCache         = randomHash()
+			conNoModNoCache     = randomHash()
+			conNoModNoCacheSlot = randomHash()
+			conNoModCache       = randomHash()
+			conNoModCacheSlot   = randomHash()
+			conModNoCache       = randomHash()
+			conModNoCacheSlot   = randomHash()
+			conModCache         = randomHash()
+			conModCacheSlot     = randomHash()
+			conDelNoCache       = randomHash()
+			conDelNoCacheSlot   = randomHash()
+			conDelCache         = randomHash()
+			conDelCacheSlot     = randomHash()
+			conNukeNoCache      = randomHash()
+			conNukeNoCacheSlot  = randomHash()
+			conNukeCache        = randomHash()
+			conNukeCacheSlot    = randomHash()
+			baseRoot            = randomHash()
+			diffRoot            = randomHash()
+			genMarker           = append(randomHash().Bytes(), randomHash().Bytes()...)
+		)
+
+		// insertAccount injects an account into the database if it's after the
+		// generator marker, drops the op otherwise. This is needed to seed the
+		// database with a valid starting snapshot.
+		insertAccount := func(account common.Hash, data []byte) {
+			if bytes.Compare(account[:], genMarker) <= 0 {
+				rawdb.WriteAccountSnapshot(db, account, data[:])
+			}
+		}
+		insertAccount(accNoModNoCache, accNoModNoCache[:])
+		insertAccount(accNoModCache, accNoModCache[:])
+		insertAccount(accModNoCache, accModNoCache[:])
+		insertAccount(accModCache, accModCache[:])
+		insertAccount(accDelNoCache, accDelNoCache[:])
+		insertAccount(accDelCache, accDelCache[:])
+
+		// insertStorage injects a storage slot into the database if it's after
+		// the  generator marker, drops the op otherwise. This is needed to seed
+		// the  database with a valid starting snapshot.
+		insertStorage := func(account common.Hash, slot common.Hash, data []byte) {
+			if bytes.Compare(append(account[:], slot[:]...), genMarker) <= 0 {
+				rawdb.WriteStorageSnapshot(db, account, slot, data[:])
+			}
+		}
+		insertAccount(conNoModNoCache, conNoModNoCache[:])
+		insertStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+		insertAccount(conNoModCache, conNoModCache[:])
+		insertStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+		insertAccount(conModNoCache, conModNoCache[:])
+		insertStorage(conModNoCache, conModNoCacheSlot, conModNoCacheSlot[:])
+		insertAccount(conModCache, conModCache[:])
+		insertStorage(conModCache, conModCacheSlot, conModCacheSlot[:])
+		insertAccount(conDelNoCache, conDelNoCache[:])
+		insertStorage(conDelNoCache, conDelNoCacheSlot, conDelNoCacheSlot[:])
+		insertAccount(conDelCache, conDelCache[:])
+		insertStorage(conDelCache, conDelCacheSlot, conDelCacheSlot[:])
+
+		insertAccount(conNukeNoCache, conNukeNoCache[:])
+		insertStorage(conNukeNoCache, conNukeNoCacheSlot, conNukeNoCacheSlot[:])
+		insertAccount(conNukeCache, conNukeCache[:])
+		insertStorage(conNukeCache, conNukeCacheSlot, conNukeCacheSlot[:])
+
+		rawdb.WriteSnapshotRoot(db, baseRoot)
+
+		// Create a disk layer based on the above using a random progress marker
+		// and cache in some data.
+		snaps := &Tree{
+			layers: map[common.Hash]snapshot{
+				baseRoot: &diskLayer{
+					diskdb: db,
+					cache:  fastcache.New(500 * 1024),
+					root:   baseRoot,
+				},
+			},
+		}
+		snaps.layers[baseRoot].(*diskLayer).genMarker = genMarker
+		base := snaps.Snapshot(baseRoot)
+
+		// assertAccount ensures that an account matches the given blob if it's
+		// already covered by the disk snapshot, and errors out otherwise.
+		assertAccount := func(account common.Hash, data []byte) {
+			t.Helper()
+			blob, err := base.AccountRLP(account)
+			if bytes.Compare(account[:], genMarker) > 0 && err != ErrNotCoveredYet {
+				t.Fatalf("test %d: post-marker (%x) account access (%x) succeeded: %x", i, genMarker, account, blob)
+			}
+			if bytes.Compare(account[:], genMarker) <= 0 && !bytes.Equal(blob, data) {
+				t.Fatalf("test %d: pre-marker (%x) account access (%x) mismatch: have %x, want %x", i, genMarker, account, blob, data)
+			}
+		}
+		assertAccount(accNoModCache, accNoModCache[:])
+		assertAccount(accModCache, accModCache[:])
+		assertAccount(accDelCache, accDelCache[:])
+
+		// assertStorage ensures that a storage slot matches the given blob if
+		// it's already covered by the disk snapshot, and errors out otherwise.
+		assertStorage := func(account common.Hash, slot common.Hash, data []byte) {
+			t.Helper()
+			blob, err := base.Storage(account, slot)
+			if bytes.Compare(append(account[:], slot[:]...), genMarker) > 0 && err != ErrNotCoveredYet {
+				t.Fatalf("test %d: post-marker (%x) storage access (%x:%x) succeeded: %x", i, genMarker, account, slot, blob)
+			}
+			if bytes.Compare(append(account[:], slot[:]...), genMarker) <= 0 && !bytes.Equal(blob, data) {
+				t.Fatalf("test %d: pre-marker (%x) storage access (%x:%x) mismatch: have %x, want %x", i, genMarker, account, slot, blob, data)
+			}
+		}
+		assertStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+		assertStorage(conModCache, conModCacheSlot, conModCacheSlot[:])
+		assertStorage(conDelCache, conDelCacheSlot, conDelCacheSlot[:])
+		assertStorage(conNukeCache, conNukeCacheSlot, conNukeCacheSlot[:])
+
+		// Modify or delete some accounts, flatten everything onto disk
+		if err := snaps.Update(diffRoot, baseRoot, map[common.Hash]struct{}{
+			accDelNoCache:  {},
+			accDelCache:    {},
+			conNukeNoCache: {},
+			conNukeCache:   {},
+		}, map[common.Hash][]byte{
+			accModNoCache: reverse(accModNoCache[:]),
+			accModCache:   reverse(accModCache[:]),
+		}, map[common.Hash]map[common.Hash][]byte{
+			conModNoCache: {conModNoCacheSlot: reverse(conModNoCacheSlot[:])},
+			conModCache:   {conModCacheSlot: reverse(conModCacheSlot[:])},
+			conDelNoCache: {conDelNoCacheSlot: nil},
+			conDelCache:   {conDelCacheSlot: nil},
+		}); err != nil {
+			t.Fatalf("test %d: failed to update snapshot tree: %v", i, err)
+		}
+		if err := snaps.Cap(diffRoot, 0); err != nil {
+			t.Fatalf("test %d: failed to flatten snapshot tree: %v", i, err)
+		}
+		// Retrieve all the data through the disk layer and validate it
+		base = snaps.Snapshot(diffRoot)
+		if _, ok := base.(*diskLayer); !ok {
+			t.Fatalf("test %d: update not flattend into the disk layer", i)
+		}
+		assertAccount(accNoModNoCache, accNoModNoCache[:])
+		assertAccount(accNoModCache, accNoModCache[:])
+		assertAccount(accModNoCache, reverse(accModNoCache[:]))
+		assertAccount(accModCache, reverse(accModCache[:]))
+		assertAccount(accDelNoCache, nil)
+		assertAccount(accDelCache, nil)
+
+		assertStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+		assertStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+		assertStorage(conModNoCache, conModNoCacheSlot, reverse(conModNoCacheSlot[:]))
+		assertStorage(conModCache, conModCacheSlot, reverse(conModCacheSlot[:]))
+		assertStorage(conDelNoCache, conDelNoCacheSlot, nil)
+		assertStorage(conDelCache, conDelCacheSlot, nil)
+		assertStorage(conNukeNoCache, conNukeNoCacheSlot, nil)
+		assertStorage(conNukeCache, conNukeCacheSlot, nil)
+
+		// Retrieve all the data directly from the database and validate it
+
+		// assertDatabaseAccount ensures that an account inside the database matches
+		// the given blob if it's already covered by the disk snapshot, and does not
+		// exist otherwise.
+		assertDatabaseAccount := func(account common.Hash, data []byte) {
+			t.Helper()
+			blob := rawdb.ReadAccountSnapshot(db, account)
+			if bytes.Compare(account[:], genMarker) > 0 && blob != nil {
+				t.Fatalf("test %d: post-marker (%x) account database access (%x) succeeded: %x", i, genMarker, account, blob)
+			}
+			if bytes.Compare(account[:], genMarker) <= 0 && !bytes.Equal(blob, data) {
+				t.Fatalf("test %d: pre-marker (%x) account database access (%x) mismatch: have %x, want %x", i, genMarker, account, blob, data)
+			}
+		}
+		assertDatabaseAccount(accNoModNoCache, accNoModNoCache[:])
+		assertDatabaseAccount(accNoModCache, accNoModCache[:])
+		assertDatabaseAccount(accModNoCache, reverse(accModNoCache[:]))
+		assertDatabaseAccount(accModCache, reverse(accModCache[:]))
+		assertDatabaseAccount(accDelNoCache, nil)
+		assertDatabaseAccount(accDelCache, nil)
+
+		// assertDatabaseStorage ensures that a storage slot inside the database
+		// matches the given blob if it's already covered by the disk snapshot,
+		// and does not exist otherwise.
+		assertDatabaseStorage := func(account common.Hash, slot common.Hash, data []byte) {
+			t.Helper()
+			blob := rawdb.ReadStorageSnapshot(db, account, slot)
+			if bytes.Compare(append(account[:], slot[:]...), genMarker) > 0 && blob != nil {
+				t.Fatalf("test %d: post-marker (%x) storage database access (%x:%x) succeeded: %x", i, genMarker, account, slot, blob)
+			}
+			if bytes.Compare(append(account[:], slot[:]...), genMarker) <= 0 && !bytes.Equal(blob, data) {
+				t.Fatalf("test %d: pre-marker (%x) storage database access (%x:%x) mismatch: have %x, want %x", i, genMarker, account, slot, blob, data)
+			}
+		}
+		assertDatabaseStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:])
+		assertDatabaseStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:])
+		assertDatabaseStorage(conModNoCache, conModNoCacheSlot, reverse(conModNoCacheSlot[:]))
+		assertDatabaseStorage(conModCache, conModCacheSlot, reverse(conModCacheSlot[:]))
+		assertDatabaseStorage(conDelNoCache, conDelNoCacheSlot, nil)
+		assertDatabaseStorage(conDelCache, conDelCacheSlot, nil)
+		assertDatabaseStorage(conNukeNoCache, conNukeNoCacheSlot, nil)
+		assertDatabaseStorage(conNukeCache, conNukeCacheSlot, nil)
+	}
+}
+
+// Tests that merging something into a disk layer persists it into the database
+// and invalidates any previously written and cached values, discarding anything
+// after the in-progress generation marker.
+//
+// This test case is a tiny specialized case of TestDiskPartialMerge, which tests
+// some very specific cornercases that random tests won't ever trigger.
+func TestDiskMidAccountPartialMerge(t *testing.T) {
+	// TODO(@karalabe) ?
+}
+
+// TestDiskSeek tests that seek-operations work on the disk layer
+func TestDiskSeek(t *testing.T) {
+	// Create some accounts in the disk layer
+	var db ethdb.Database
+
+	if dir, err := ioutil.TempDir("", "disklayer-test"); err != nil {
+		t.Fatal(err)
+	} else {
+		defer os.RemoveAll(dir)
+		diskdb, err := leveldb.New(dir, 256, 0, "")
+		if err != nil {
+			t.Fatal(err)
+		}
+		db = rawdb.NewDatabase(diskdb)
+	}
+	// Fill even keys [0,2,4...]
+	for i := 0; i < 0xff; i += 2 {
+		acc := common.Hash{byte(i)}
+		rawdb.WriteAccountSnapshot(db, acc, acc[:])
+	}
+	// Add an 'higher' key, with incorrect (higher) prefix
+	highKey := []byte{rawdb.SnapshotAccountPrefix[0] + 1}
+	db.Put(highKey, []byte{0xff, 0xff})
+
+	baseRoot := randomHash()
+	rawdb.WriteSnapshotRoot(db, baseRoot)
+
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			baseRoot: &diskLayer{
+				diskdb: db,
+				cache:  fastcache.New(500 * 1024),
+				root:   baseRoot,
+			},
+		},
+	}
+	// Test some different seek positions
+	type testcase struct {
+		pos    byte
+		expkey byte
+	}
+	var cases = []testcase{
+		{0xff, 0x55}, // this should exit immediately without checking key
+		{0x01, 0x02},
+		{0xfe, 0xfe},
+		{0xfd, 0xfe},
+		{0x00, 0x00},
+	}
+	for i, tc := range cases {
+		it, err := snaps.AccountIterator(baseRoot, common.Hash{tc.pos})
+		if err != nil {
+			t.Fatalf("case %d, error: %v", i, err)
+		}
+		count := 0
+		for it.Next() {
+			k, v, err := it.Hash()[0], it.Account()[0], it.Error()
+			if err != nil {
+				t.Fatalf("test %d, item %d, error: %v", i, count, err)
+			}
+			// First item in iterator should have the expected key
+			if count == 0 && k != tc.expkey {
+				t.Fatalf("test %d, item %d, got %v exp %v", i, count, k, tc.expkey)
+			}
+			count++
+			if v != k {
+				t.Fatalf("test %d, item %d, value wrong, got %v exp %v", i, count, v, k)
+			}
+		}
+	}
+}
diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go
new file mode 100644
index 0000000..dac782f
--- /dev/null
+++ b/core/state/snapshot/generate.go
@@ -0,0 +1,264 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"encoding/binary"
+	"math/big"
+	"time"
+
+	"github.com/VictoriaMetrics/fastcache"
+	"github.com/ava-labs/coreth/core/rawdb"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/common/math"
+	"github.com/ethereum/go-ethereum/crypto"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+var (
+	// emptyRoot is the known root hash of an empty trie.
+	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
+
+	// emptyCode is the known hash of the empty EVM bytecode.
+	emptyCode = crypto.Keccak256Hash(nil)
+)
+
+// generatorStats is a collection of statistics gathered by the snapshot generator
+// for logging purposes.
+type generatorStats struct {
+	wiping   chan struct{}      // Notification channel if wiping is in progress
+	origin   uint64             // Origin prefix where generation started
+	start    time.Time          // Timestamp when generation started
+	accounts uint64             // Number of accounts indexed
+	slots    uint64             // Number of storage slots indexed
+	storage  common.StorageSize // Account and storage slot size
+}
+
+// Log creates an contextual log with the given message and the context pulled
+// from the internally maintained statistics.
+func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) {
+	var ctx []interface{}
+	if root != (common.Hash{}) {
+		ctx = append(ctx, []interface{}{"root", root}...)
+	}
+	// Figure out whether we're after or within an account
+	switch len(marker) {
+	case common.HashLength:
+		ctx = append(ctx, []interface{}{"at", common.BytesToHash(marker)}...)
+	case 2 * common.HashLength:
+		ctx = append(ctx, []interface{}{
+			"in", common.BytesToHash(marker[:common.HashLength]),
+			"at", common.BytesToHash(marker[common.HashLength:]),
+		}...)
+	}
+	// Add the usual measurements
+	ctx = append(ctx, []interface{}{
+		"accounts", gs.accounts,
+		"slots", gs.slots,
+		"storage", gs.storage,
+		"elapsed", common.PrettyDuration(time.Since(gs.start)),
+	}...)
+	// Calculate the estimated indexing time based on current stats
+	if len(marker) > 0 {
+		if done := binary.BigEndian.Uint64(marker[:8]) - gs.origin; done > 0 {
+			left := math.MaxUint64 - binary.BigEndian.Uint64(marker[:8])
+
+			speed := done/uint64(time.Since(gs.start)/time.Millisecond+1) + 1 // +1s to avoid division by zero
+			ctx = append(ctx, []interface{}{
+				"eta", common.PrettyDuration(time.Duration(left/speed) * time.Millisecond),
+			}...)
+		}
+	}
+	log.Info(msg, ctx...)
+}
+
+// generateSnapshot regenerates a brand new snapshot based on an existing state
+// database and head block asynchronously. The snapshot is returned immediately
+// and generation is continued in the background until done.
+func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, wiper chan struct{}) *diskLayer {
+	// Wipe any previously existing snapshot from the database if no wiper is
+	// currently in progress.
+	if wiper == nil {
+		wiper = wipeSnapshot(diskdb, true)
+	}
+	// Create a new disk layer with an initialized state marker at zero
+	rawdb.WriteSnapshotRoot(diskdb, root)
+
+	base := &diskLayer{
+		diskdb:     diskdb,
+		triedb:     triedb,
+		root:       root,
+		cache:      fastcache.New(cache * 1024 * 1024),
+		genMarker:  []byte{}, // Initialized but empty!
+		genPending: make(chan struct{}),
+		genAbort:   make(chan chan *generatorStats),
+	}
+	go base.generate(&generatorStats{wiping: wiper, start: time.Now()})
+	return base
+}
+
+// generate is a background thread that iterates over the state and storage tries,
+// constructing the state snapshot. All the arguments are purely for statistics
+// gethering and logging, since the method surfs the blocks as they arrive, often
+// being restarted.
+func (dl *diskLayer) generate(stats *generatorStats) {
+	// If a database wipe is in operation, wait until it's done
+	if stats.wiping != nil {
+		stats.Log("Wiper running, state snapshotting paused", common.Hash{}, dl.genMarker)
+		select {
+		// If wiper is done, resume normal mode of operation
+		case <-stats.wiping:
+			stats.wiping = nil
+			stats.start = time.Now()
+
+		// If generator was aboted during wipe, return
+		case abort := <-dl.genAbort:
+			abort <- stats
+			return
+		}
+	}
+	// Create an account and state iterator pointing to the current generator marker
+	accTrie, err := trie.NewSecure(dl.root, dl.triedb)
+	if err != nil {
+		// The account trie is missing (GC), surf the chain until one becomes available
+		stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker)
+
+		abort := <-dl.genAbort
+		abort <- stats
+		return
+	}
+	stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker)
+
+	var accMarker []byte
+	if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that
+		accMarker = dl.genMarker[:common.HashLength]
+	}
+	accIt := trie.NewIterator(accTrie.NodeIterator(accMarker))
+	batch := dl.diskdb.NewBatch()
+
+	// Iterate from the previous marker and continue generating the state snapshot
+	logged := time.Now()
+	for accIt.Next() {
+		// Retrieve the current account and flatten it into the internal format
+		accountHash := common.BytesToHash(accIt.Key)
+
+		var acc struct {
+			Nonce    uint64
+			Balance  *big.Int
+			Root     common.Hash
+			CodeHash []byte
+		}
+		if err := rlp.DecodeBytes(accIt.Value, &acc); err != nil {
+			log.Crit("Invalid account encountered during snapshot creation", "err", err)
+		}
+		data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash)
+
+		// If the account is not yet in-progress, write it out
+		if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) {
+			rawdb.WriteAccountSnapshot(batch, accountHash, data)
+			stats.storage += common.StorageSize(1 + common.HashLength + len(data))
+			stats.accounts++
+		}
+		// If we've exceeded our batch allowance or termination was requested, flush to disk
+		var abort chan *generatorStats
+		select {
+		case abort = <-dl.genAbort:
+		default:
+		}
+		if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil {
+			// Only write and set the marker if we actually did something useful
+			if batch.ValueSize() > 0 {
+				batch.Write()
+				batch.Reset()
+
+				dl.lock.Lock()
+				dl.genMarker = accountHash[:]
+				dl.lock.Unlock()
+			}
+			if abort != nil {
+				stats.Log("Aborting state snapshot generation", dl.root, accountHash[:])
+				abort <- stats
+				return
+			}
+		}
+		// If the account is in-progress, continue where we left off (otherwise iterate all)
+		if acc.Root != emptyRoot {
+			storeTrie, err := trie.NewSecure(acc.Root, dl.triedb)
+			if err != nil {
+				log.Crit("Storage trie inaccessible for snapshot generation", "err", err)
+			}
+			var storeMarker []byte
+			if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength {
+				storeMarker = dl.genMarker[common.HashLength:]
+			}
+			storeIt := trie.NewIterator(storeTrie.NodeIterator(storeMarker))
+			for storeIt.Next() {
+				rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(storeIt.Key), storeIt.Value)
+				stats.storage += common.StorageSize(1 + 2*common.HashLength + len(storeIt.Value))
+				stats.slots++
+
+				// If we've exceeded our batch allowance or termination was requested, flush to disk
+				var abort chan *generatorStats
+				select {
+				case abort = <-dl.genAbort:
+				default:
+				}
+				if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil {
+					// Only write and set the marker if we actually did something useful
+					if batch.ValueSize() > 0 {
+						batch.Write()
+						batch.Reset()
+
+						dl.lock.Lock()
+						dl.genMarker = append(accountHash[:], storeIt.Key...)
+						dl.lock.Unlock()
+					}
+					if abort != nil {
+						stats.Log("Aborting state snapshot generation", dl.root, append(accountHash[:], storeIt.Key...))
+						abort <- stats
+						return
+					}
+				}
+			}
+		}
+		if time.Since(logged) > 8*time.Second {
+			stats.Log("Generating state snapshot", dl.root, accIt.Key)
+			logged = time.Now()
+		}
+		// Some account processed, unmark the marker
+		accMarker = nil
+	}
+	// Snapshot fully generated, set the marker to nil
+	if batch.ValueSize() > 0 {
+		batch.Write()
+	}
+	log.Info("Generated state snapshot", "accounts", stats.accounts, "slots", stats.slots,
+		"storage", stats.storage, "elapsed", common.PrettyDuration(time.Since(stats.start)))
+
+	dl.lock.Lock()
+	dl.genMarker = nil
+	close(dl.genPending)
+	dl.lock.Unlock()
+
+	// Someone will be looking for us, wait it out
+	abort := <-dl.genAbort
+	abort <- nil
+}
diff --git a/core/state/snapshot/iterator.go b/core/state/snapshot/iterator.go
new file mode 100644
index 0000000..ef527ff
--- /dev/null
+++ b/core/state/snapshot/iterator.go
@@ -0,0 +1,400 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"fmt"
+	"sort"
+
+	"github.com/ava-labs/coreth/core/rawdb"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb"
+)
+
+// Iterator is an iterator to step over all the accounts or the specific
+// storage in a snapshot which may or may not be composed of multiple layers.
+type Iterator interface {
+	// Next steps the iterator forward one element, returning false if exhausted,
+	// or an error if iteration failed for some reason (e.g. root being iterated
+	// becomes stale and garbage collected).
+	Next() bool
+
+	// Error returns any failure that occurred during iteration, which might have
+	// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+	Error() error
+
+	// Hash returns the hash of the account or storage slot the iterator is
+	// currently at.
+	Hash() common.Hash
+
+	// Release releases associated resources. Release should always succeed and
+	// can be called multiple times without causing error.
+	Release()
+}
+
+// AccountIterator is an iterator to step over all the accounts in a snapshot,
+// which may or may not be composed of multiple layers.
+type AccountIterator interface {
+	Iterator
+
+	// Account returns the RLP encoded slim account the iterator is currently at.
+	// An error will be returned if the iterator becomes invalid
+	Account() []byte
+}
+
+// StorageIterator is an iterator to step over the specific storage in a snapshot,
+// which may or may not be composed of multiple layers.
+type StorageIterator interface {
+	Iterator
+
+	// Slot returns the storage slot the iterator is currently at. An error will
+	// be returned if the iterator becomes invalid
+	Slot() []byte
+}
+
+// diffAccountIterator is an account iterator that steps over the accounts (both
+// live and deleted) contained within a single diff layer. Higher order iterators
+// will use the deleted accounts to skip deeper iterators.
+type diffAccountIterator struct {
+	// curHash is the current hash the iterator is positioned on. The field is
+	// explicitly tracked since the referenced diff layer might go stale after
+	// the iterator was positioned and we don't want to fail accessing the old
+	// hash as long as the iterator is not touched any more.
+	curHash common.Hash
+
+	layer *diffLayer    // Live layer to retrieve values from
+	keys  []common.Hash // Keys left in the layer to iterate
+	fail  error         // Any failures encountered (stale)
+}
+
+// AccountIterator creates an account iterator over a single diff layer.
+func (dl *diffLayer) AccountIterator(seek common.Hash) AccountIterator {
+	// Seek out the requested starting account
+	hashes := dl.AccountList()
+	index := sort.Search(len(hashes), func(i int) bool {
+		return bytes.Compare(seek[:], hashes[i][:]) <= 0
+	})
+	// Assemble and returned the already seeked iterator
+	return &diffAccountIterator{
+		layer: dl,
+		keys:  hashes[index:],
+	}
+}
+
+// Next steps the iterator forward one element, returning false if exhausted.
+func (it *diffAccountIterator) Next() bool {
+	// If the iterator was already stale, consider it a programmer error. Although
+	// we could just return false here, triggering this path would probably mean
+	// somebody forgot to check for Error, so lets blow up instead of undefined
+	// behavior that's hard to debug.
+	if it.fail != nil {
+		panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail))
+	}
+	// Stop iterating if all keys were exhausted
+	if len(it.keys) == 0 {
+		return false
+	}
+	if it.layer.Stale() {
+		it.fail, it.keys = ErrSnapshotStale, nil
+		return false
+	}
+	// Iterator seems to be still alive, retrieve and cache the live hash
+	it.curHash = it.keys[0]
+	// key cached, shift the iterator and notify the user of success
+	it.keys = it.keys[1:]
+	return true
+}
+
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+func (it *diffAccountIterator) Error() error {
+	return it.fail
+}
+
+// Hash returns the hash of the account the iterator is currently at.
+func (it *diffAccountIterator) Hash() common.Hash {
+	return it.curHash
+}
+
+// Account returns the RLP encoded slim account the iterator is currently at.
+// This method may _fail_, if the underlying layer has been flattened between
+// the call to Next and Acccount. That type of error will set it.Err.
+// This method assumes that flattening does not delete elements from
+// the accountdata mapping (writing nil into it is fine though), and will panic
+// if elements have been deleted.
+//
+// Note the returned account is not a copy, please don't modify it.
+func (it *diffAccountIterator) Account() []byte {
+	it.layer.lock.RLock()
+	blob, ok := it.layer.accountData[it.curHash]
+	if !ok {
+		if _, ok := it.layer.destructSet[it.curHash]; ok {
+			it.layer.lock.RUnlock()
+			return nil
+		}
+		panic(fmt.Sprintf("iterator referenced non-existent account: %x", it.curHash))
+	}
+	it.layer.lock.RUnlock()
+	if it.layer.Stale() {
+		it.fail, it.keys = ErrSnapshotStale, nil
+	}
+	return blob
+}
+
+// Release is a noop for diff account iterators as there are no held resources.
+func (it *diffAccountIterator) Release() {}
+
+// diskAccountIterator is an account iterator that steps over the live accounts
+// contained within a disk layer.
+type diskAccountIterator struct {
+	layer *diskLayer
+	it    ethdb.Iterator
+}
+
+// AccountIterator creates an account iterator over a disk layer.
+func (dl *diskLayer) AccountIterator(seek common.Hash) AccountIterator {
+	pos := common.TrimRightZeroes(seek[:])
+	return &diskAccountIterator{
+		layer: dl,
+		it:    dl.diskdb.NewIterator(rawdb.SnapshotAccountPrefix, pos),
+	}
+}
+
+// Next steps the iterator forward one element, returning false if exhausted.
+func (it *diskAccountIterator) Next() bool {
+	// If the iterator was already exhausted, don't bother
+	if it.it == nil {
+		return false
+	}
+	// Try to advance the iterator and release it if we reached the end
+	for {
+		if !it.it.Next() {
+			it.it.Release()
+			it.it = nil
+			return false
+		}
+		if len(it.it.Key()) == len(rawdb.SnapshotAccountPrefix)+common.HashLength {
+			break
+		}
+	}
+	return true
+}
+
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+//
+// A diff layer is immutable after creation content wise and can always be fully
+// iterated without error, so this method always returns nil.
+func (it *diskAccountIterator) Error() error {
+	if it.it == nil {
+		return nil // Iterator is exhausted and released
+	}
+	return it.it.Error()
+}
+
+// Hash returns the hash of the account the iterator is currently at.
+func (it *diskAccountIterator) Hash() common.Hash {
+	return common.BytesToHash(it.it.Key()) // The prefix will be truncated
+}
+
+// Account returns the RLP encoded slim account the iterator is currently at.
+func (it *diskAccountIterator) Account() []byte {
+	return it.it.Value()
+}
+
+// Release releases the database snapshot held during iteration.
+func (it *diskAccountIterator) Release() {
+	// The iterator is auto-released on exhaustion, so make sure it's still alive
+	if it.it != nil {
+		it.it.Release()
+		it.it = nil
+	}
+}
+
+// diffStorageIterator is a storage iterator that steps over the specific storage
+// (both live and deleted) contained within a single diff layer. Higher order
+// iterators will use the deleted slot to skip deeper iterators.
+type diffStorageIterator struct {
+	// curHash is the current hash the iterator is positioned on. The field is
+	// explicitly tracked since the referenced diff layer might go stale after
+	// the iterator was positioned and we don't want to fail accessing the old
+	// hash as long as the iterator is not touched any more.
+	curHash common.Hash
+	account common.Hash
+
+	layer *diffLayer    // Live layer to retrieve values from
+	keys  []common.Hash // Keys left in the layer to iterate
+	fail  error         // Any failures encountered (stale)
+}
+
+// StorageIterator creates a storage iterator over a single diff layer.
+// Execept the storage iterator is returned, there is an additional flag
+// "destructed" returned. If it's true then it means the whole storage is
+// destructed in this layer(maybe recreated too), don't bother deeper layer
+// for storage retrieval.
+func (dl *diffLayer) StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) {
+	// Create the storage for this account even it's marked
+	// as destructed. The iterator is for the new one which
+	// just has the same address as the deleted one.
+	hashes, destructed := dl.StorageList(account)
+	index := sort.Search(len(hashes), func(i int) bool {
+		return bytes.Compare(seek[:], hashes[i][:]) <= 0
+	})
+	// Assemble and returned the already seeked iterator
+	return &diffStorageIterator{
+		layer:   dl,
+		account: account,
+		keys:    hashes[index:],
+	}, destructed
+}
+
+// Next steps the iterator forward one element, returning false if exhausted.
+func (it *diffStorageIterator) Next() bool {
+	// If the iterator was already stale, consider it a programmer error. Although
+	// we could just return false here, triggering this path would probably mean
+	// somebody forgot to check for Error, so lets blow up instead of undefined
+	// behavior that's hard to debug.
+	if it.fail != nil {
+		panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail))
+	}
+	// Stop iterating if all keys were exhausted
+	if len(it.keys) == 0 {
+		return false
+	}
+	if it.layer.Stale() {
+		it.fail, it.keys = ErrSnapshotStale, nil
+		return false
+	}
+	// Iterator seems to be still alive, retrieve and cache the live hash
+	it.curHash = it.keys[0]
+	// key cached, shift the iterator and notify the user of success
+	it.keys = it.keys[1:]
+	return true
+}
+
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+func (it *diffStorageIterator) Error() error {
+	return it.fail
+}
+
+// Hash returns the hash of the storage slot the iterator is currently at.
+func (it *diffStorageIterator) Hash() common.Hash {
+	return it.curHash
+}
+
+// Slot returns the raw storage slot value the iterator is currently at.
+// This method may _fail_, if the underlying layer has been flattened between
+// the call to Next and Value. That type of error will set it.Err.
+// This method assumes that flattening does not delete elements from
+// the storage mapping (writing nil into it is fine though), and will panic
+// if elements have been deleted.
+//
+// Note the returned slot is not a copy, please don't modify it.
+func (it *diffStorageIterator) Slot() []byte {
+	it.layer.lock.RLock()
+	storage, ok := it.layer.storageData[it.account]
+	if !ok {
+		panic(fmt.Sprintf("iterator referenced non-existent account storage: %x", it.account))
+	}
+	// Storage slot might be nil(deleted), but it must exist
+	blob, ok := storage[it.curHash]
+	if !ok {
+		panic(fmt.Sprintf("iterator referenced non-existent storage slot: %x", it.curHash))
+	}
+	it.layer.lock.RUnlock()
+	if it.layer.Stale() {
+		it.fail, it.keys = ErrSnapshotStale, nil
+	}
+	return blob
+}
+
+// Release is a noop for diff account iterators as there are no held resources.
+func (it *diffStorageIterator) Release() {}
+
+// diskStorageIterator is a storage iterator that steps over the live storage
+// contained within a disk layer.
+type diskStorageIterator struct {
+	layer   *diskLayer
+	account common.Hash
+	it      ethdb.Iterator
+}
+
+// StorageIterator creates a storage iterator over a disk layer.
+// If the whole storage is destructed, then all entries in the disk
+// layer are deleted already. So the "destructed" flag returned here
+// is always false.
+func (dl *diskLayer) StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) {
+	pos := common.TrimRightZeroes(seek[:])
+	return &diskStorageIterator{
+		layer:   dl,
+		account: account,
+		it:      dl.diskdb.NewIterator(append(rawdb.SnapshotStoragePrefix, account.Bytes()...), pos),
+	}, false
+}
+
+// Next steps the iterator forward one element, returning false if exhausted.
+func (it *diskStorageIterator) Next() bool {
+	// If the iterator was already exhausted, don't bother
+	if it.it == nil {
+		return false
+	}
+	// Try to advance the iterator and release it if we reached the end
+	for {
+		if !it.it.Next() {
+			it.it.Release()
+			it.it = nil
+			return false
+		}
+		if len(it.it.Key()) == len(rawdb.SnapshotStoragePrefix)+common.HashLength+common.HashLength {
+			break
+		}
+	}
+	return true
+}
+
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+//
+// A diff layer is immutable after creation content wise and can always be fully
+// iterated without error, so this method always returns nil.
+func (it *diskStorageIterator) Error() error {
+	if it.it == nil {
+		return nil // Iterator is exhausted and released
+	}
+	return it.it.Error()
+}
+
+// Hash returns the hash of the storage slot the iterator is currently at.
+func (it *diskStorageIterator) Hash() common.Hash {
+	return common.BytesToHash(it.it.Key()) // The prefix will be truncated
+}
+
+// Slot returns the raw strorage slot content the iterator is currently at.
+func (it *diskStorageIterator) Slot() []byte {
+	return it.it.Value()
+}
+
+// Release releases the database snapshot held during iteration.
+func (it *diskStorageIterator) Release() {
+	// The iterator is auto-released on exhaustion, so make sure it's still alive
+	if it.it != nil {
+		it.it.Release()
+		it.it = nil
+	}
+}
diff --git a/core/state/snapshot/iterator_binary.go b/core/state/snapshot/iterator_binary.go
new file mode 100644
index 0000000..f82f750
--- /dev/null
+++ b/core/state/snapshot/iterator_binary.go
@@ -0,0 +1,213 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// binaryIterator is a simplistic iterator to step over the accounts or storage
+// in a snapshot, which may or may not be composed of multiple layers. Performance
+// wise this iterator is slow, it's meant for cross validating the fast one,
+type binaryIterator struct {
+	a               Iterator
+	b               Iterator
+	aDone           bool
+	bDone           bool
+	accountIterator bool
+	k               common.Hash
+	account         common.Hash
+	fail            error
+}
+
+// initBinaryAccountIterator creates a simplistic iterator to step over all the
+// accounts in a slow, but eaily verifiable way. Note this function is used for
+// initialization, use `newBinaryAccountIterator` as the API.
+func (dl *diffLayer) initBinaryAccountIterator() Iterator {
+	parent, ok := dl.parent.(*diffLayer)
+	if !ok {
+		l := &binaryIterator{
+			a:               dl.AccountIterator(common.Hash{}),
+			b:               dl.Parent().AccountIterator(common.Hash{}),
+			accountIterator: true,
+		}
+		l.aDone = !l.a.Next()
+		l.bDone = !l.b.Next()
+		return l
+	}
+	l := &binaryIterator{
+		a:               dl.AccountIterator(common.Hash{}),
+		b:               parent.initBinaryAccountIterator(),
+		accountIterator: true,
+	}
+	l.aDone = !l.a.Next()
+	l.bDone = !l.b.Next()
+	return l
+}
+
+// initBinaryStorageIterator creates a simplistic iterator to step over all the
+// storage slots in a slow, but eaily verifiable way. Note this function is used
+// for initialization, use `newBinaryStorageIterator` as the API.
+func (dl *diffLayer) initBinaryStorageIterator(account common.Hash) Iterator {
+	parent, ok := dl.parent.(*diffLayer)
+	if !ok {
+		// If the storage in this layer is already destructed, discard all
+		// deeper layers but still return an valid single-branch iterator.
+		a, destructed := dl.StorageIterator(account, common.Hash{})
+		if destructed {
+			l := &binaryIterator{
+				a:       a,
+				account: account,
+			}
+			l.aDone = !l.a.Next()
+			l.bDone = true
+			return l
+		}
+		// The parent is disk layer, don't need to take care "destructed"
+		// anymore.
+		b, _ := dl.Parent().StorageIterator(account, common.Hash{})
+		l := &binaryIterator{
+			a:       a,
+			b:       b,
+			account: account,
+		}
+		l.aDone = !l.a.Next()
+		l.bDone = !l.b.Next()
+		return l
+	}
+	// If the storage in this layer is already destructed, discard all
+	// deeper layers but still return an valid single-branch iterator.
+	a, destructed := dl.StorageIterator(account, common.Hash{})
+	if destructed {
+		l := &binaryIterator{
+			a:       a,
+			account: account,
+		}
+		l.aDone = !l.a.Next()
+		l.bDone = true
+		return l
+	}
+	l := &binaryIterator{
+		a:       a,
+		b:       parent.initBinaryStorageIterator(account),
+		account: account,
+	}
+	l.aDone = !l.a.Next()
+	l.bDone = !l.b.Next()
+	return l
+}
+
+// Next steps the iterator forward one element, returning false if exhausted,
+// or an error if iteration failed for some reason (e.g. root being iterated
+// becomes stale and garbage collected).
+func (it *binaryIterator) Next() bool {
+	if it.aDone && it.bDone {
+		return false
+	}
+first:
+	if it.aDone {
+		it.k = it.b.Hash()
+		it.bDone = !it.b.Next()
+		return true
+	}
+	if it.bDone {
+		it.k = it.a.Hash()
+		it.aDone = !it.a.Next()
+		return true
+	}
+	nextA, nextB := it.a.Hash(), it.b.Hash()
+	if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 {
+		it.aDone = !it.a.Next()
+		it.k = nextA
+		return true
+	} else if diff == 0 {
+		// Now we need to advance one of them
+		it.aDone = !it.a.Next()
+		goto first
+	}
+	it.bDone = !it.b.Next()
+	it.k = nextB
+	return true
+}
+
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+func (it *binaryIterator) Error() error {
+	return it.fail
+}
+
+// Hash returns the hash of the account the iterator is currently at.
+func (it *binaryIterator) Hash() common.Hash {
+	return it.k
+}
+
+// Account returns the RLP encoded slim account the iterator is currently at, or
+// nil if the iterated snapshot stack became stale (you can check Error after
+// to see if it failed or not).
+//
+// Note the returned account is not a copy, please don't modify it.
+func (it *binaryIterator) Account() []byte {
+	if !it.accountIterator {
+		return nil
+	}
+	// The topmost iterator must be `diffAccountIterator`
+	blob, err := it.a.(*diffAccountIterator).layer.AccountRLP(it.k)
+	if err != nil {
+		it.fail = err
+		return nil
+	}
+	return blob
+}
+
+// Slot returns the raw storage slot data the iterator is currently at, or
+// nil if the iterated snapshot stack became stale (you can check Error after
+// to see if it failed or not).
+//
+// Note the returned slot is not a copy, please don't modify it.
+func (it *binaryIterator) Slot() []byte {
+	if it.accountIterator {
+		return nil
+	}
+	blob, err := it.a.(*diffStorageIterator).layer.Storage(it.account, it.k)
+	if err != nil {
+		it.fail = err
+		return nil
+	}
+	return blob
+}
+
+// Release recursively releases all the iterators in the stack.
+func (it *binaryIterator) Release() {
+	it.a.Release()
+	it.b.Release()
+}
+
+// newBinaryAccountIterator creates a simplistic account iterator to step over
+// all the accounts in a slow, but eaily verifiable way.
+func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
+	iter := dl.initBinaryAccountIterator()
+	return iter.(AccountIterator)
+}
+
+// newBinaryStorageIterator creates a simplistic account iterator to step over
+// all the storage slots in a slow, but eaily verifiable way.
+func (dl *diffLayer) newBinaryStorageIterator(account common.Hash) StorageIterator {
+	iter := dl.initBinaryStorageIterator(account)
+	return iter.(StorageIterator)
+}
diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go
new file mode 100644
index 0000000..291d529
--- /dev/null
+++ b/core/state/snapshot/iterator_fast.go
@@ -0,0 +1,350 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"fmt"
+	"sort"
+
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// weightedIterator is a iterator with an assigned weight. It is used to prioritise
+// which account or storage slot is the correct one if multiple iterators find the
+// same one (modified in multiple consecutive blocks).
+type weightedIterator struct {
+	it       Iterator
+	priority int
+}
+
+// weightedIterators is a set of iterators implementing the sort.Interface.
+type weightedIterators []*weightedIterator
+
+// Len implements sort.Interface, returning the number of active iterators.
+func (its weightedIterators) Len() int { return len(its) }
+
+// Less implements sort.Interface, returning which of two iterators in the stack
+// is before the other.
+func (its weightedIterators) Less(i, j int) bool {
+	// Order the iterators primarily by the account hashes
+	hashI := its[i].it.Hash()
+	hashJ := its[j].it.Hash()
+
+	switch bytes.Compare(hashI[:], hashJ[:]) {
+	case -1:
+		return true
+	case 1:
+		return false
+	}
+	// Same account/storage-slot in multiple layers, split by priority
+	return its[i].priority < its[j].priority
+}
+
+// Swap implements sort.Interface, swapping two entries in the iterator stack.
+func (its weightedIterators) Swap(i, j int) {
+	its[i], its[j] = its[j], its[i]
+}
+
+// fastIterator is a more optimized multi-layer iterator which maintains a
+// direct mapping of all iterators leading down to the bottom layer.
+type fastIterator struct {
+	tree *Tree       // Snapshot tree to reinitialize stale sub-iterators with
+	root common.Hash // Root hash to reinitialize stale sub-iterators through
+
+	curAccount []byte
+	curSlot    []byte
+
+	iterators weightedIterators
+	initiated bool
+	account   bool
+	fail      error
+}
+
+// newFastIterator creates a new hierarhical account or storage iterator with one
+// element per diff layer. The returned combo iterator can be used to walk over
+// the entire snapshot diff stack simultaneously.
+func newFastIterator(tree *Tree, root common.Hash, account common.Hash, seek common.Hash, accountIterator bool) (*fastIterator, error) {
+	snap := tree.Snapshot(root)
+	if snap == nil {
+		return nil, fmt.Errorf("unknown snapshot: %x", root)
+	}
+	fi := &fastIterator{
+		tree:    tree,
+		root:    root,
+		account: accountIterator,
+	}
+	current := snap.(snapshot)
+	for depth := 0; current != nil; depth++ {
+		if accountIterator {
+			fi.iterators = append(fi.iterators, &weightedIterator{
+				it:       current.AccountIterator(seek),
+				priority: depth,
+			})
+		} else {
+			// If the whole storage is destructed in this layer, don't
+			// bother deeper layer anymore. But we should still keep
+			// the iterator for this layer, since the iterator can contain
+			// some valid slots which belongs to the re-created account.
+			it, destructed := current.StorageIterator(account, seek)
+			fi.iterators = append(fi.iterators, &weightedIterator{
+				it:       it,
+				priority: depth,
+			})
+			if destructed {
+				break
+			}
+		}
+		current = current.Parent()
+	}
+	fi.init()
+	return fi, nil
+}
+
+// init walks over all the iterators and resolves any clashes between them, after
+// which it prepares the stack for step-by-step iteration.
+func (fi *fastIterator) init() {
+	// Track which account hashes are iterators positioned on
+	var positioned = make(map[common.Hash]int)
+
+	// Position all iterators and track how many remain live
+	for i := 0; i < len(fi.iterators); i++ {
+		// Retrieve the first element and if it clashes with a previous iterator,
+		// advance either the current one or the old one. Repeat until nothing is
+		// clashing any more.
+		it := fi.iterators[i]
+		for {
+			// If the iterator is exhausted, drop it off the end
+			if !it.it.Next() {
+				it.it.Release()
+				last := len(fi.iterators) - 1
+
+				fi.iterators[i] = fi.iterators[last]
+				fi.iterators[last] = nil
+				fi.iterators = fi.iterators[:last]
+
+				i--
+				break
+			}
+			// The iterator is still alive, check for collisions with previous ones
+			hash := it.it.Hash()
+			if other, exist := positioned[hash]; !exist {
+				positioned[hash] = i
+				break
+			} else {
+				// Iterators collide, one needs to be progressed, use priority to
+				// determine which.
+				//
+				// This whole else-block can be avoided, if we instead
+				// do an initial priority-sort of the iterators. If we do that,
+				// then we'll only wind up here if a lower-priority (preferred) iterator
+				// has the same value, and then we will always just continue.
+				// However, it costs an extra sort, so it's probably not better
+				if fi.iterators[other].priority < it.priority {
+					// The 'it' should be progressed
+					continue
+				} else {
+					// The 'other' should be progressed, swap them
+					it = fi.iterators[other]
+					fi.iterators[other], fi.iterators[i] = fi.iterators[i], fi.iterators[other]
+					continue
+				}
+			}
+		}
+	}
+	// Re-sort the entire list
+	sort.Sort(fi.iterators)
+	fi.initiated = false
+}
+
+// Next steps the iterator forward one element, returning false if exhausted.
+func (fi *fastIterator) Next() bool {
+	if len(fi.iterators) == 0 {
+		return false
+	}
+	if !fi.initiated {
+		// Don't forward first time -- we had to 'Next' once in order to
+		// do the sorting already
+		fi.initiated = true
+		if fi.account {
+			fi.curAccount = fi.iterators[0].it.(AccountIterator).Account()
+		} else {
+			fi.curSlot = fi.iterators[0].it.(StorageIterator).Slot()
+		}
+		if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
+			fi.fail = innerErr
+			return false
+		}
+		if fi.curAccount != nil || fi.curSlot != nil {
+			return true
+		}
+		// Implicit else: we've hit a nil-account or nil-slot, and need to
+		// fall through to the loop below to land on something non-nil
+	}
+	// If an account or a slot is deleted in one of the layers, the key will
+	// still be there, but the actual value will be nil. However, the iterator
+	// should not export nil-values (but instead simply omit the key), so we
+	// need to loop here until we either
+	//  - get a non-nil value,
+	//  - hit an error,
+	//  - or exhaust the iterator
+	for {
+		if !fi.next(0) {
+			return false // exhausted
+		}
+		if fi.account {
+			fi.curAccount = fi.iterators[0].it.(AccountIterator).Account()
+		} else {
+			fi.curSlot = fi.iterators[0].it.(StorageIterator).Slot()
+		}
+		if innerErr := fi.iterators[0].it.Error(); innerErr != nil {
+			fi.fail = innerErr
+			return false // error
+		}
+		if fi.curAccount != nil || fi.curSlot != nil {
+			break // non-nil value found
+		}
+	}
+	return true
+}
+
+// next handles the next operation internally and should be invoked when we know
+// that two elements in the list may have the same value.
+//
+// For example, if the iterated hashes become [2,3,5,5,8,9,10], then we should
+// invoke next(3), which will call Next on elem 3 (the second '5') and will
+// cascade along the list, applying the same operation if needed.
+func (fi *fastIterator) next(idx int) bool {
+	// If this particular iterator got exhausted, remove it and return true (the
+	// next one is surely not exhausted yet, otherwise it would have been removed
+	// already).
+	if it := fi.iterators[idx].it; !it.Next() {
+		it.Release()
+
+		fi.iterators = append(fi.iterators[:idx], fi.iterators[idx+1:]...)
+		return len(fi.iterators) > 0
+	}
+	// If there's no one left to cascade into, return
+	if idx == len(fi.iterators)-1 {
+		return true
+	}
+	// We next-ed the iterator at 'idx', now we may have to re-sort that element
+	var (
+		cur, next         = fi.iterators[idx], fi.iterators[idx+1]
+		curHash, nextHash = cur.it.Hash(), next.it.Hash()
+	)
+	if diff := bytes.Compare(curHash[:], nextHash[:]); diff < 0 {
+		// It is still in correct place
+		return true
+	} else if diff == 0 && cur.priority < next.priority {
+		// So still in correct place, but we need to iterate on the next
+		fi.next(idx + 1)
+		return true
+	}
+	// At this point, the iterator is in the wrong location, but the remaining
+	// list is sorted. Find out where to move the item.
+	clash := -1
+	index := sort.Search(len(fi.iterators), func(n int) bool {
+		// The iterator always advances forward, so anything before the old slot
+		// is known to be behind us, so just skip them altogether. This actually
+		// is an important clause since the sort order got invalidated.
+		if n < idx {
+			return false
+		}
+		if n == len(fi.iterators)-1 {
+			// Can always place an elem last
+			return true
+		}
+		nextHash := fi.iterators[n+1].it.Hash()
+		if diff := bytes.Compare(curHash[:], nextHash[:]); diff < 0 {
+			return true
+		} else if diff > 0 {
+			return false
+		}
+		// The elem we're placing it next to has the same value,
+		// so whichever winds up on n+1 will need further iteraton
+		clash = n + 1
+
+		return cur.priority < fi.iterators[n+1].priority
+	})
+	fi.move(idx, index)
+	if clash != -1 {
+		fi.next(clash)
+	}
+	return true
+}
+
+// move advances an iterator to another position in the list.
+func (fi *fastIterator) move(index, newpos int) {
+	elem := fi.iterators[index]
+	copy(fi.iterators[index:], fi.iterators[index+1:newpos+1])
+	fi.iterators[newpos] = elem
+}
+
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+func (fi *fastIterator) Error() error {
+	return fi.fail
+}
+
+// Hash returns the current key
+func (fi *fastIterator) Hash() common.Hash {
+	return fi.iterators[0].it.Hash()
+}
+
+// Account returns the current account blob.
+// Note the returned account is not a copy, please don't modify it.
+func (fi *fastIterator) Account() []byte {
+	return fi.curAccount
+}
+
+// Slot returns the current storage slot.
+// Note the returned slot is not a copy, please don't modify it.
+func (fi *fastIterator) Slot() []byte {
+	return fi.curSlot
+}
+
+// Release iterates over all the remaining live layer iterators and releases each
+// of thme individually.
+func (fi *fastIterator) Release() {
+	for _, it := range fi.iterators {
+		it.it.Release()
+	}
+	fi.iterators = nil
+}
+
+// Debug is a convencience helper during testing
+func (fi *fastIterator) Debug() {
+	for _, it := range fi.iterators {
+		fmt.Printf("[p=%v v=%v] ", it.priority, it.it.Hash()[0])
+	}
+	fmt.Println()
+}
+
+// newFastAccountIterator creates a new hierarhical account iterator with one
+// element per diff layer. The returned combo iterator can be used to walk over
+// the entire snapshot diff stack simultaneously.
+func newFastAccountIterator(tree *Tree, root common.Hash, seek common.Hash) (AccountIterator, error) {
+	return newFastIterator(tree, root, common.Hash{}, seek, true)
+}
+
+// newFastStorageIterator creates a new hierarhical storage iterator with one
+// element per diff layer. The returned combo iterator can be used to walk over
+// the entire snapshot diff stack simultaneously.
+func newFastStorageIterator(tree *Tree, root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
+	return newFastIterator(tree, root, account, seek, false)
+}
diff --git a/core/state/snapshot/iterator_test.go b/core/state/snapshot/iterator_test.go
new file mode 100644
index 0000000..ef4859c
--- /dev/null
+++ b/core/state/snapshot/iterator_test.go
@@ -0,0 +1,1046 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"math/rand"
+	"testing"
+
+	"github.com/VictoriaMetrics/fastcache"
+	"github.com/ava-labs/coreth/core/rawdb"
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// TestAccountIteratorBasics tests some simple single-layer(diff and disk) iteration
+func TestAccountIteratorBasics(t *testing.T) {
+	var (
+		destructs = make(map[common.Hash]struct{})
+		accounts  = make(map[common.Hash][]byte)
+		storage   = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	// Fill up a parent
+	for i := 0; i < 100; i++ {
+		h := randomHash()
+		data := randomAccount()
+
+		accounts[h] = data
+		if rand.Intn(4) == 0 {
+			destructs[h] = struct{}{}
+		}
+		if rand.Intn(2) == 0 {
+			accStorage := make(map[common.Hash][]byte)
+			value := make([]byte, 32)
+			rand.Read(value)
+			accStorage[randomHash()] = value
+			storage[h] = accStorage
+		}
+	}
+	// Add some (identical) layers on top
+	diffLayer := newDiffLayer(emptyLayer(), common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage))
+	it := diffLayer.AccountIterator(common.Hash{})
+	verifyIterator(t, 100, it, verifyNothing) // Nil is allowed for single layer iterator
+
+	diskLayer := diffToDisk(diffLayer)
+	it = diskLayer.AccountIterator(common.Hash{})
+	verifyIterator(t, 100, it, verifyNothing) // Nil is allowed for single layer iterator
+}
+
+// TestStorageIteratorBasics tests some simple single-layer(diff and disk) iteration for storage
+func TestStorageIteratorBasics(t *testing.T) {
+	var (
+		nilStorage = make(map[common.Hash]int)
+		accounts   = make(map[common.Hash][]byte)
+		storage    = make(map[common.Hash]map[common.Hash][]byte)
+	)
+	// Fill some random data
+	for i := 0; i < 10; i++ {
+		h := randomHash()
+		accounts[h] = randomAccount()
+
+		accStorage := make(map[common.Hash][]byte)
+		value := make([]byte, 32)
+
+		var nilstorage int
+		for i := 0; i < 100; i++ {
+			rand.Read(value)
+			if rand.Intn(2) == 0 {
+				accStorage[randomHash()] = common.CopyBytes(value)
+			} else {
+				accStorage[randomHash()] = nil // delete slot
+				nilstorage += 1
+			}
+		}
+		storage[h] = accStorage
+		nilStorage[h] = nilstorage
+	}
+	// Add some (identical) layers on top
+	diffLayer := newDiffLayer(emptyLayer(), common.Hash{}, nil, copyAccounts(accounts), copyStorage(storage))
+	for account := range accounts {
+		it, _ := diffLayer.StorageIterator(account, common.Hash{})
+		verifyIterator(t, 100, it, verifyNothing) // Nil is allowed for single layer iterator
+	}
+
+	diskLayer := diffToDisk(diffLayer)
+	for account := range accounts {
+		it, _ := diskLayer.StorageIterator(account, common.Hash{})
+		verifyIterator(t, 100-nilStorage[account], it, verifyNothing) // Nil is allowed for single layer iterator
+	}
+}
+
+type testIterator struct {
+	values []byte
+}
+
+func newTestIterator(values ...byte) *testIterator {
+	return &testIterator{values}
+}
+
+func (ti *testIterator) Seek(common.Hash) {
+	panic("implement me")
+}
+
+func (ti *testIterator) Next() bool {
+	ti.values = ti.values[1:]
+	return len(ti.values) > 0
+}
+
+func (ti *testIterator) Error() error {
+	return nil
+}
+
+func (ti *testIterator) Hash() common.Hash {
+	return common.BytesToHash([]byte{ti.values[0]})
+}
+
+func (ti *testIterator) Account() []byte {
+	return nil
+}
+
+func (ti *testIterator) Slot() []byte {
+	return nil
+}
+
+func (ti *testIterator) Release() {}
+
+func TestFastIteratorBasics(t *testing.T) {
+	type testCase struct {
+		lists   [][]byte
+		expKeys []byte
+	}
+	for i, tc := range []testCase{
+		{lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4},
+			{7, 14, 15}, {9, 13, 15, 16}},
+			expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}},
+		{lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9},
+			{9, 10}, {10, 13, 15, 16}},
+			expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}},
+	} {
+		var iterators []*weightedIterator
+		for i, data := range tc.lists {
+			it := newTestIterator(data...)
+			iterators = append(iterators, &weightedIterator{it, i})
+		}
+		fi := &fastIterator{
+			iterators: iterators,
+			initiated: false,
+		}
+		count := 0
+		for fi.Next() {
+			if got, exp := fi.Hash()[31], tc.expKeys[count]; exp != got {
+				t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp)
+			}
+			count++
+		}
+	}
+}
+
+type verifyContent int
+
+const (
+	verifyNothing verifyContent = iota
+	verifyAccount
+	verifyStorage
+)
+
+func verifyIterator(t *testing.T, expCount int, it Iterator, verify verifyContent) {
+	t.Helper()
+
+	var (
+		count = 0
+		last  = common.Hash{}
+	)
+	for it.Next() {
+		hash := it.Hash()
+		if bytes.Compare(last[:], hash[:]) >= 0 {
+			t.Errorf("wrong order: %x >= %x", last, hash)
+		}
+		count++
+		if verify == verifyAccount && len(it.(AccountIterator).Account()) == 0 {
+			t.Errorf("iterator returned nil-value for hash %x", hash)
+		} else if verify == verifyStorage && len(it.(StorageIterator).Slot()) == 0 {
+			t.Errorf("iterator returned nil-value for hash %x", hash)
+		}
+		last = hash
+	}
+	if count != expCount {
+		t.Errorf("iterator count mismatch: have %d, want %d", count, expCount)
+	}
+	if err := it.Error(); err != nil {
+		t.Errorf("iterator failed: %v", err)
+	}
+}
+
+// TestAccountIteratorTraversal tests some simple multi-layer iteration.
+func TestAccountIteratorTraversal(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Stack three diff layers on top with various overlaps
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil,
+		randomAccountSet("0xaa", "0xee", "0xff", "0xf0"), nil)
+
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil,
+		randomAccountSet("0xbb", "0xdd", "0xf0"), nil)
+
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil,
+		randomAccountSet("0xcc", "0xf0", "0xff"), nil)
+
+	// Verify the single and multi-layer iterators
+	head := snaps.Snapshot(common.HexToHash("0x04"))
+
+	verifyIterator(t, 3, head.(snapshot).AccountIterator(common.Hash{}), verifyNothing)
+	verifyIterator(t, 7, head.(*diffLayer).newBinaryAccountIterator(), verifyAccount)
+
+	it, _ := snaps.AccountIterator(common.HexToHash("0x04"), common.Hash{})
+	verifyIterator(t, 7, it, verifyAccount)
+	it.Release()
+
+	// Test after persist some bottom-most layers into the disk,
+	// the functionalities still work.
+	limit := aggregatorMemoryLimit
+	defer func() {
+		aggregatorMemoryLimit = limit
+	}()
+	aggregatorMemoryLimit = 0 // Force pushing the bottom-most layer into disk
+	snaps.Cap(common.HexToHash("0x04"), 2)
+	verifyIterator(t, 7, head.(*diffLayer).newBinaryAccountIterator(), verifyAccount)
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.Hash{})
+	verifyIterator(t, 7, it, verifyAccount)
+	it.Release()
+}
+
+func TestStorageIteratorTraversal(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Stack three diff layers on top with various overlaps
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil,
+		randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x01", "0x02", "0x03"}}, nil))
+
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil,
+		randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x04", "0x05", "0x06"}}, nil))
+
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil,
+		randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x01", "0x02", "0x03"}}, nil))
+
+	// Verify the single and multi-layer iterators
+	head := snaps.Snapshot(common.HexToHash("0x04"))
+
+	diffIter, _ := head.(snapshot).StorageIterator(common.HexToHash("0xaa"), common.Hash{})
+	verifyIterator(t, 3, diffIter, verifyNothing)
+	verifyIterator(t, 6, head.(*diffLayer).newBinaryStorageIterator(common.HexToHash("0xaa")), verifyStorage)
+
+	it, _ := snaps.StorageIterator(common.HexToHash("0x04"), common.HexToHash("0xaa"), common.Hash{})
+	verifyIterator(t, 6, it, verifyStorage)
+	it.Release()
+
+	// Test after persist some bottom-most layers into the disk,
+	// the functionalities still work.
+	limit := aggregatorMemoryLimit
+	defer func() {
+		aggregatorMemoryLimit = limit
+	}()
+	aggregatorMemoryLimit = 0 // Force pushing the bottom-most layer into disk
+	snaps.Cap(common.HexToHash("0x04"), 2)
+	verifyIterator(t, 6, head.(*diffLayer).newBinaryStorageIterator(common.HexToHash("0xaa")), verifyStorage)
+
+	it, _ = snaps.StorageIterator(common.HexToHash("0x04"), common.HexToHash("0xaa"), common.Hash{})
+	verifyIterator(t, 6, it, verifyStorage)
+	it.Release()
+}
+
+// TestAccountIteratorTraversalValues tests some multi-layer iteration, where we
+// also expect the correct values to show up.
+func TestAccountIteratorTraversalValues(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Create a batch of account sets to seed subsequent layers with
+	var (
+		a = make(map[common.Hash][]byte)
+		b = make(map[common.Hash][]byte)
+		c = make(map[common.Hash][]byte)
+		d = make(map[common.Hash][]byte)
+		e = make(map[common.Hash][]byte)
+		f = make(map[common.Hash][]byte)
+		g = make(map[common.Hash][]byte)
+		h = make(map[common.Hash][]byte)
+	)
+	for i := byte(2); i < 0xff; i++ {
+		a[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 0, i))
+		if i > 20 && i%2 == 0 {
+			b[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 1, i))
+		}
+		if i%4 == 0 {
+			c[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 2, i))
+		}
+		if i%7 == 0 {
+			d[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 3, i))
+		}
+		if i%8 == 0 {
+			e[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 4, i))
+		}
+		if i > 50 || i < 85 {
+			f[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 5, i))
+		}
+		if i%64 == 0 {
+			g[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 6, i))
+		}
+		if i%128 == 0 {
+			h[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 7, i))
+		}
+	}
+	// Assemble a stack of snapshots from the account layers
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, a, nil)
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, b, nil)
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil, c, nil)
+	snaps.Update(common.HexToHash("0x05"), common.HexToHash("0x04"), nil, d, nil)
+	snaps.Update(common.HexToHash("0x06"), common.HexToHash("0x05"), nil, e, nil)
+	snaps.Update(common.HexToHash("0x07"), common.HexToHash("0x06"), nil, f, nil)
+	snaps.Update(common.HexToHash("0x08"), common.HexToHash("0x07"), nil, g, nil)
+	snaps.Update(common.HexToHash("0x09"), common.HexToHash("0x08"), nil, h, nil)
+
+	it, _ := snaps.AccountIterator(common.HexToHash("0x09"), common.Hash{})
+	head := snaps.Snapshot(common.HexToHash("0x09"))
+	for it.Next() {
+		hash := it.Hash()
+		want, err := head.AccountRLP(hash)
+		if err != nil {
+			t.Fatalf("failed to retrieve expected account: %v", err)
+		}
+		if have := it.Account(); !bytes.Equal(want, have) {
+			t.Fatalf("hash %x: account mismatch: have %x, want %x", hash, have, want)
+		}
+	}
+	it.Release()
+
+	// Test after persist some bottom-most layers into the disk,
+	// the functionalities still work.
+	limit := aggregatorMemoryLimit
+	defer func() {
+		aggregatorMemoryLimit = limit
+	}()
+	aggregatorMemoryLimit = 0 // Force pushing the bottom-most layer into disk
+	snaps.Cap(common.HexToHash("0x09"), 2)
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x09"), common.Hash{})
+	for it.Next() {
+		hash := it.Hash()
+		want, err := head.AccountRLP(hash)
+		if err != nil {
+			t.Fatalf("failed to retrieve expected account: %v", err)
+		}
+		if have := it.Account(); !bytes.Equal(want, have) {
+			t.Fatalf("hash %x: account mismatch: have %x, want %x", hash, have, want)
+		}
+	}
+	it.Release()
+}
+
+func TestStorageIteratorTraversalValues(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	wrapStorage := func(storage map[common.Hash][]byte) map[common.Hash]map[common.Hash][]byte {
+		return map[common.Hash]map[common.Hash][]byte{
+			common.HexToHash("0xaa"): storage,
+		}
+	}
+	// Create a batch of storage sets to seed subsequent layers with
+	var (
+		a = make(map[common.Hash][]byte)
+		b = make(map[common.Hash][]byte)
+		c = make(map[common.Hash][]byte)
+		d = make(map[common.Hash][]byte)
+		e = make(map[common.Hash][]byte)
+		f = make(map[common.Hash][]byte)
+		g = make(map[common.Hash][]byte)
+		h = make(map[common.Hash][]byte)
+	)
+	for i := byte(2); i < 0xff; i++ {
+		a[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 0, i))
+		if i > 20 && i%2 == 0 {
+			b[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 1, i))
+		}
+		if i%4 == 0 {
+			c[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 2, i))
+		}
+		if i%7 == 0 {
+			d[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 3, i))
+		}
+		if i%8 == 0 {
+			e[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 4, i))
+		}
+		if i > 50 || i < 85 {
+			f[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 5, i))
+		}
+		if i%64 == 0 {
+			g[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 6, i))
+		}
+		if i%128 == 0 {
+			h[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 7, i))
+		}
+	}
+	// Assemble a stack of snapshots from the account layers
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, randomAccountSet("0xaa"), wrapStorage(a))
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, randomAccountSet("0xaa"), wrapStorage(b))
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil, randomAccountSet("0xaa"), wrapStorage(c))
+	snaps.Update(common.HexToHash("0x05"), common.HexToHash("0x04"), nil, randomAccountSet("0xaa"), wrapStorage(d))
+	snaps.Update(common.HexToHash("0x06"), common.HexToHash("0x05"), nil, randomAccountSet("0xaa"), wrapStorage(e))
+	snaps.Update(common.HexToHash("0x07"), common.HexToHash("0x06"), nil, randomAccountSet("0xaa"), wrapStorage(e))
+	snaps.Update(common.HexToHash("0x08"), common.HexToHash("0x07"), nil, randomAccountSet("0xaa"), wrapStorage(g))
+	snaps.Update(common.HexToHash("0x09"), common.HexToHash("0x08"), nil, randomAccountSet("0xaa"), wrapStorage(h))
+
+	it, _ := snaps.StorageIterator(common.HexToHash("0x09"), common.HexToHash("0xaa"), common.Hash{})
+	head := snaps.Snapshot(common.HexToHash("0x09"))
+	for it.Next() {
+		hash := it.Hash()
+		want, err := head.Storage(common.HexToHash("0xaa"), hash)
+		if err != nil {
+			t.Fatalf("failed to retrieve expected storage slot: %v", err)
+		}
+		if have := it.Slot(); !bytes.Equal(want, have) {
+			t.Fatalf("hash %x: slot mismatch: have %x, want %x", hash, have, want)
+		}
+	}
+	it.Release()
+
+	// Test after persist some bottom-most layers into the disk,
+	// the functionalities still work.
+	limit := aggregatorMemoryLimit
+	defer func() {
+		aggregatorMemoryLimit = limit
+	}()
+	aggregatorMemoryLimit = 0 // Force pushing the bottom-most layer into disk
+	snaps.Cap(common.HexToHash("0x09"), 2)
+
+	it, _ = snaps.StorageIterator(common.HexToHash("0x09"), common.HexToHash("0xaa"), common.Hash{})
+	for it.Next() {
+		hash := it.Hash()
+		want, err := head.Storage(common.HexToHash("0xaa"), hash)
+		if err != nil {
+			t.Fatalf("failed to retrieve expected slot: %v", err)
+		}
+		if have := it.Slot(); !bytes.Equal(want, have) {
+			t.Fatalf("hash %x: slot mismatch: have %x, want %x", hash, have, want)
+		}
+	}
+	it.Release()
+}
+
+// This testcase is notorious, all layers contain the exact same 200 accounts.
+func TestAccountIteratorLargeTraversal(t *testing.T) {
+	// Create a custom account factory to recreate the same addresses
+	makeAccounts := func(num int) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for i := 0; i < num; i++ {
+			h := common.Hash{}
+			binary.BigEndian.PutUint64(h[:], uint64(i+1))
+			accounts[h] = randomAccount()
+		}
+		return accounts
+	}
+	// Build up a large stack of snapshots
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	for i := 1; i < 128; i++ {
+		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), nil, makeAccounts(200), nil)
+	}
+	// Iterate the entire stack and ensure everything is hit only once
+	head := snaps.Snapshot(common.HexToHash("0x80"))
+	verifyIterator(t, 200, head.(snapshot).AccountIterator(common.Hash{}), verifyNothing)
+	verifyIterator(t, 200, head.(*diffLayer).newBinaryAccountIterator(), verifyAccount)
+
+	it, _ := snaps.AccountIterator(common.HexToHash("0x80"), common.Hash{})
+	verifyIterator(t, 200, it, verifyAccount)
+	it.Release()
+
+	// Test after persist some bottom-most layers into the disk,
+	// the functionalities still work.
+	limit := aggregatorMemoryLimit
+	defer func() {
+		aggregatorMemoryLimit = limit
+	}()
+	aggregatorMemoryLimit = 0 // Force pushing the bottom-most layer into disk
+	snaps.Cap(common.HexToHash("0x80"), 2)
+
+	verifyIterator(t, 200, head.(*diffLayer).newBinaryAccountIterator(), verifyAccount)
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x80"), common.Hash{})
+	verifyIterator(t, 200, it, verifyAccount)
+	it.Release()
+}
+
+// TestAccountIteratorFlattening tests what happens when we
+// - have a live iterator on child C (parent C1 -> C2 .. CN)
+// - flattens C2 all the way into CN
+// - continues iterating
+func TestAccountIteratorFlattening(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Create a stack of diffs on top
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil,
+		randomAccountSet("0xaa", "0xee", "0xff", "0xf0"), nil)
+
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil,
+		randomAccountSet("0xbb", "0xdd", "0xf0"), nil)
+
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil,
+		randomAccountSet("0xcc", "0xf0", "0xff"), nil)
+
+	// Create an iterator and flatten the data from underneath it
+	it, _ := snaps.AccountIterator(common.HexToHash("0x04"), common.Hash{})
+	defer it.Release()
+
+	if err := snaps.Cap(common.HexToHash("0x04"), 1); err != nil {
+		t.Fatalf("failed to flatten snapshot stack: %v", err)
+	}
+	//verifyIterator(t, 7, it)
+}
+
+func TestAccountIteratorSeek(t *testing.T) {
+	// Create a snapshot stack with some initial data
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil,
+		randomAccountSet("0xaa", "0xee", "0xff", "0xf0"), nil)
+
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil,
+		randomAccountSet("0xbb", "0xdd", "0xf0"), nil)
+
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil,
+		randomAccountSet("0xcc", "0xf0", "0xff"), nil)
+
+	// Account set is now
+	// 02: aa, ee, f0, ff
+	// 03: aa, bb, dd, ee, f0 (, f0), ff
+	// 04: aa, bb, cc, dd, ee, f0 (, f0), ff (, ff)
+	// Construct various iterators and ensure their traversal is correct
+	it, _ := snaps.AccountIterator(common.HexToHash("0x02"), common.HexToHash("0xdd"))
+	defer it.Release()
+	verifyIterator(t, 3, it, verifyAccount) // expected: ee, f0, ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x02"), common.HexToHash("0xaa"))
+	defer it.Release()
+	verifyIterator(t, 4, it, verifyAccount) // expected: aa, ee, f0, ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x02"), common.HexToHash("0xff"))
+	defer it.Release()
+	verifyIterator(t, 1, it, verifyAccount) // expected: ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x02"), common.HexToHash("0xff1"))
+	defer it.Release()
+	verifyIterator(t, 0, it, verifyAccount) // expected: nothing
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.HexToHash("0xbb"))
+	defer it.Release()
+	verifyIterator(t, 6, it, verifyAccount) // expected: bb, cc, dd, ee, f0, ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.HexToHash("0xef"))
+	defer it.Release()
+	verifyIterator(t, 2, it, verifyAccount) // expected: f0, ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.HexToHash("0xf0"))
+	defer it.Release()
+	verifyIterator(t, 2, it, verifyAccount) // expected: f0, ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.HexToHash("0xff"))
+	defer it.Release()
+	verifyIterator(t, 1, it, verifyAccount) // expected: ff
+
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.HexToHash("0xff1"))
+	defer it.Release()
+	verifyIterator(t, 0, it, verifyAccount) // expected: nothing
+}
+
+func TestStorageIteratorSeek(t *testing.T) {
+	// Create a snapshot stack with some initial data
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Stack three diff layers on top with various overlaps
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil,
+		randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x01", "0x03", "0x05"}}, nil))
+
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil,
+		randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x02", "0x05", "0x06"}}, nil))
+
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil,
+		randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x01", "0x05", "0x08"}}, nil))
+
+	// Account set is now
+	// 02: 01, 03, 05
+	// 03: 01, 02, 03, 05 (, 05), 06
+	// 04: 01(, 01), 02, 03, 05(, 05, 05), 06, 08
+	// Construct various iterators and ensure their traversal is correct
+	it, _ := snaps.StorageIterator(common.HexToHash("0x02"), common.HexToHash("0xaa"), common.HexToHash("0x01"))
+	defer it.Release()
+	verifyIterator(t, 3, it, verifyStorage) // expected: 01, 03, 05
+
+	it, _ = snaps.StorageIterator(common.HexToHash("0x02"), common.HexToHash("0xaa"), common.HexToHash("0x02"))
+	defer it.Release()
+	verifyIterator(t, 2, it, verifyStorage) // expected: 03, 05
+
+	it, _ = snaps.StorageIterator(common.HexToHash("0x02"), common.HexToHash("0xaa"), common.HexToHash("0x5"))
+	defer it.Release()
+	verifyIterator(t, 1, it, verifyStorage) // expected: 05
+
+	it, _ = snaps.StorageIterator(common.HexToHash("0x02"), common.HexToHash("0xaa"), common.HexToHash("0x6"))
+	defer it.Release()
+	verifyIterator(t, 0, it, verifyStorage) // expected: nothing
+
+	it, _ = snaps.StorageIterator(common.HexToHash("0x04"), common.HexToHash("0xaa"), common.HexToHash("0x01"))
+	defer it.Release()
+	verifyIterator(t, 6, it, verifyStorage) // expected: 01, 02, 03, 05, 06, 08
+
+	it, _ = snaps.StorageIterator(common.HexToHash("0x04"), common.HexToHash("0xaa"), common.HexToHash("0x05"))
+	defer it.Release()
+	verifyIterator(t, 3, it, verifyStorage) // expected: 05, 06, 08
+
+	it, _ = snaps.StorageIterator(common.HexToHash("0x04"), common.HexToHash("0xaa"), common.HexToHash("0x08"))
+	defer it.Release()
+	verifyIterator(t, 1, it, verifyStorage) // expected: 08
+
+	it, _ = snaps.StorageIterator(common.HexToHash("0x04"), common.HexToHash("0xaa"), common.HexToHash("0x09"))
+	defer it.Release()
+	verifyIterator(t, 0, it, verifyStorage) // expected: nothing
+}
+
+// TestAccountIteratorDeletions tests that the iterator behaves correct when there are
+// deleted accounts (where the Account() value is nil). The iterator
+// should not output any accounts or nil-values for those cases.
+func TestAccountIteratorDeletions(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Stack three diff layers on top with various overlaps
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"),
+		nil, randomAccountSet("0x11", "0x22", "0x33"), nil)
+
+	deleted := common.HexToHash("0x22")
+	destructed := map[common.Hash]struct{}{
+		deleted: {},
+	}
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"),
+		destructed, randomAccountSet("0x11", "0x33"), nil)
+
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"),
+		nil, randomAccountSet("0x33", "0x44", "0x55"), nil)
+
+	// The output should be 11,33,44,55
+	it, _ := snaps.AccountIterator(common.HexToHash("0x04"), common.Hash{})
+	// Do a quick check
+	verifyIterator(t, 4, it, verifyAccount)
+	it.Release()
+
+	// And a more detailed verification that we indeed do not see '0x22'
+	it, _ = snaps.AccountIterator(common.HexToHash("0x04"), common.Hash{})
+	defer it.Release()
+	for it.Next() {
+		hash := it.Hash()
+		if it.Account() == nil {
+			t.Errorf("iterator returned nil-value for hash %x", hash)
+		}
+		if hash == deleted {
+			t.Errorf("expected deleted elem %x to not be returned by iterator", deleted)
+		}
+	}
+}
+
+func TestStorageIteratorDeletions(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Stack three diff layers on top with various overlaps
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil,
+		randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x01", "0x03", "0x05"}}, nil))
+
+	snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil,
+		randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x02", "0x04", "0x06"}}, [][]string{{"0x01", "0x03"}}))
+
+	// The output should be 02,04,05,06
+	it, _ := snaps.StorageIterator(common.HexToHash("0x03"), common.HexToHash("0xaa"), common.Hash{})
+	verifyIterator(t, 4, it, verifyStorage)
+	it.Release()
+
+	// The output should be 04,05,06
+	it, _ = snaps.StorageIterator(common.HexToHash("0x03"), common.HexToHash("0xaa"), common.HexToHash("0x03"))
+	verifyIterator(t, 3, it, verifyStorage)
+	it.Release()
+
+	// Destruct the whole storage
+	destructed := map[common.Hash]struct{}{
+		common.HexToHash("0xaa"): {},
+	}
+	snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), destructed, nil, nil)
+
+	it, _ = snaps.StorageIterator(common.HexToHash("0x04"), common.HexToHash("0xaa"), common.Hash{})
+	verifyIterator(t, 0, it, verifyStorage)
+	it.Release()
+
+	// Re-insert the slots of the same account
+	snaps.Update(common.HexToHash("0x05"), common.HexToHash("0x04"), nil,
+		randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x07", "0x08", "0x09"}}, nil))
+
+	// The output should be 07,08,09
+	it, _ = snaps.StorageIterator(common.HexToHash("0x05"), common.HexToHash("0xaa"), common.Hash{})
+	verifyIterator(t, 3, it, verifyStorage)
+	it.Release()
+
+	// Destruct the whole storage but re-create the account in the same layer
+	snaps.Update(common.HexToHash("0x06"), common.HexToHash("0x05"), destructed, randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x11", "0x12"}}, nil))
+	it, _ = snaps.StorageIterator(common.HexToHash("0x06"), common.HexToHash("0xaa"), common.Hash{})
+	verifyIterator(t, 2, it, verifyStorage) // The output should be 11,12
+	it.Release()
+
+	verifyIterator(t, 2, snaps.Snapshot(common.HexToHash("0x06")).(*diffLayer).newBinaryStorageIterator(common.HexToHash("0xaa")), verifyStorage)
+}
+
+// BenchmarkAccountIteratorTraversal is a bit a bit notorious -- all layers contain the
+// exact same 200 accounts. That means that we need to process 2000 items, but
+// only spit out 200 values eventually.
+//
+// The value-fetching benchmark is easy on the binary iterator, since it never has to reach
+// down at any depth for retrieving the values -- all are on the toppmost layer
+//
+// BenchmarkAccountIteratorTraversal/binary_iterator_keys-6         	    2239	    483674 ns/op
+// BenchmarkAccountIteratorTraversal/binary_iterator_values-6       	    2403	    501810 ns/op
+// BenchmarkAccountIteratorTraversal/fast_iterator_keys-6           	    1923	    677966 ns/op
+// BenchmarkAccountIteratorTraversal/fast_iterator_values-6         	    1741	    649967 ns/op
+func BenchmarkAccountIteratorTraversal(b *testing.B) {
+	// Create a custom account factory to recreate the same addresses
+	makeAccounts := func(num int) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for i := 0; i < num; i++ {
+			h := common.Hash{}
+			binary.BigEndian.PutUint64(h[:], uint64(i+1))
+			accounts[h] = randomAccount()
+		}
+		return accounts
+	}
+	// Build up a large stack of snapshots
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	for i := 1; i <= 100; i++ {
+		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), nil, makeAccounts(200), nil)
+	}
+	// We call this once before the benchmark, so the creation of
+	// sorted accountlists are not included in the results.
+	head := snaps.Snapshot(common.HexToHash("0x65"))
+	head.(*diffLayer).newBinaryAccountIterator()
+
+	b.Run("binary iterator keys", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := head.(*diffLayer).newBinaryAccountIterator()
+			for it.Next() {
+				got++
+			}
+			if exp := 200; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("binary iterator values", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := head.(*diffLayer).newBinaryAccountIterator()
+			for it.Next() {
+				got++
+				head.(*diffLayer).accountRLP(it.Hash(), 0)
+			}
+			if exp := 200; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("fast iterator keys", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			it, _ := snaps.AccountIterator(common.HexToHash("0x65"), common.Hash{})
+			defer it.Release()
+
+			got := 0
+			for it.Next() {
+				got++
+			}
+			if exp := 200; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("fast iterator values", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			it, _ := snaps.AccountIterator(common.HexToHash("0x65"), common.Hash{})
+			defer it.Release()
+
+			got := 0
+			for it.Next() {
+				got++
+				it.Account()
+			}
+			if exp := 200; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+}
+
+// BenchmarkAccountIteratorLargeBaselayer is a pretty realistic benchmark, where
+// the baselayer is a lot larger than the upper layer.
+//
+// This is heavy on the binary iterator, which in most cases will have to
+// call recursively 100 times for the majority of the values
+//
+// BenchmarkAccountIteratorLargeBaselayer/binary_iterator_(keys)-6         	     514	   1971999 ns/op
+// BenchmarkAccountIteratorLargeBaselayer/binary_iterator_(values)-6       	      61	  18997492 ns/op
+// BenchmarkAccountIteratorLargeBaselayer/fast_iterator_(keys)-6           	   10000	    114385 ns/op
+// BenchmarkAccountIteratorLargeBaselayer/fast_iterator_(values)-6         	    4047	    296823 ns/op
+func BenchmarkAccountIteratorLargeBaselayer(b *testing.B) {
+	// Create a custom account factory to recreate the same addresses
+	makeAccounts := func(num int) map[common.Hash][]byte {
+		accounts := make(map[common.Hash][]byte)
+		for i := 0; i < num; i++ {
+			h := common.Hash{}
+			binary.BigEndian.PutUint64(h[:], uint64(i+1))
+			accounts[h] = randomAccount()
+		}
+		return accounts
+	}
+	// Build up a large stack of snapshots
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, makeAccounts(2000), nil)
+	for i := 2; i <= 100; i++ {
+		snaps.Update(common.HexToHash(fmt.Sprintf("0x%02x", i+1)), common.HexToHash(fmt.Sprintf("0x%02x", i)), nil, makeAccounts(20), nil)
+	}
+	// We call this once before the benchmark, so the creation of
+	// sorted accountlists are not included in the results.
+	head := snaps.Snapshot(common.HexToHash("0x65"))
+	head.(*diffLayer).newBinaryAccountIterator()
+
+	b.Run("binary iterator (keys)", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := head.(*diffLayer).newBinaryAccountIterator()
+			for it.Next() {
+				got++
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("binary iterator (values)", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := 0
+			it := head.(*diffLayer).newBinaryAccountIterator()
+			for it.Next() {
+				got++
+				v := it.Hash()
+				head.(*diffLayer).accountRLP(v, 0)
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("fast iterator (keys)", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			it, _ := snaps.AccountIterator(common.HexToHash("0x65"), common.Hash{})
+			defer it.Release()
+
+			got := 0
+			for it.Next() {
+				got++
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+	b.Run("fast iterator (values)", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			it, _ := snaps.AccountIterator(common.HexToHash("0x65"), common.Hash{})
+			defer it.Release()
+
+			got := 0
+			for it.Next() {
+				it.Account()
+				got++
+			}
+			if exp := 2000; got != exp {
+				b.Errorf("iterator len wrong, expected %d, got %d", exp, got)
+			}
+		}
+	})
+}
+
+/*
+func BenchmarkBinaryAccountIteration(b *testing.B) {
+	benchmarkAccountIteration(b, func(snap snapshot) AccountIterator {
+		return snap.(*diffLayer).newBinaryAccountIterator()
+	})
+}
+
+func BenchmarkFastAccountIteration(b *testing.B) {
+	benchmarkAccountIteration(b, newFastAccountIterator)
+}
+
+func benchmarkAccountIteration(b *testing.B, iterator func(snap snapshot) AccountIterator) {
+	// Create a diff stack and randomize the accounts across them
+	layers := make([]map[common.Hash][]byte, 128)
+	for i := 0; i < len(layers); i++ {
+		layers[i] = make(map[common.Hash][]byte)
+	}
+	for i := 0; i < b.N; i++ {
+		depth := rand.Intn(len(layers))
+		layers[depth][randomHash()] = randomAccount()
+	}
+	stack := snapshot(emptyLayer())
+	for _, layer := range layers {
+		stack = stack.Update(common.Hash{}, layer, nil, nil)
+	}
+	// Reset the timers and report all the stats
+	it := iterator(stack)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for it.Next() {
+	}
+}
+*/
diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go
new file mode 100644
index 0000000..3d4c9de
--- /dev/null
+++ b/core/state/snapshot/journal.go
@@ -0,0 +1,270 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"time"
+
+	"github.com/VictoriaMetrics/fastcache"
+	"github.com/ava-labs/coreth/core/rawdb"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+// journalGenerator is a disk layer entry containing the generator progress marker.
+type journalGenerator struct {
+	Wiping   bool // Whether the database was in progress of being wiped
+	Done     bool // Whether the generator finished creating the snapshot
+	Marker   []byte
+	Accounts uint64
+	Slots    uint64
+	Storage  uint64
+}
+
+// journalDestruct is an account deletion entry in a diffLayer's disk journal.
+type journalDestruct struct {
+	Hash common.Hash
+}
+
+// journalAccount is an account entry in a diffLayer's disk journal.
+type journalAccount struct {
+	Hash common.Hash
+	Blob []byte
+}
+
+// journalStorage is an account's storage map in a diffLayer's disk journal.
+type journalStorage struct {
+	Hash common.Hash
+	Keys []common.Hash
+	Vals [][]byte
+}
+
+// loadSnapshot loads a pre-existing state snapshot backed by a key-value store.
+func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) (snapshot, error) {
+	// Retrieve the block number and hash of the snapshot, failing if no snapshot
+	// is present in the database (or crashed mid-update).
+	baseRoot := rawdb.ReadSnapshotRoot(diskdb)
+	if baseRoot == (common.Hash{}) {
+		return nil, errors.New("missing or corrupted snapshot")
+	}
+	base := &diskLayer{
+		diskdb: diskdb,
+		triedb: triedb,
+		cache:  fastcache.New(cache * 1024 * 1024),
+		root:   baseRoot,
+	}
+	// Retrieve the journal, it must exist since even for 0 layer it stores whether
+	// we've already generated the snapshot or are in progress only
+	journal := rawdb.ReadSnapshotJournal(diskdb)
+	if len(journal) == 0 {
+		return nil, errors.New("missing or corrupted snapshot journal")
+	}
+	r := rlp.NewStream(bytes.NewReader(journal), 0)
+
+	// Read the snapshot generation progress for the disk layer
+	var generator journalGenerator
+	if err := r.Decode(&generator); err != nil {
+		return nil, fmt.Errorf("failed to load snapshot progress marker: %v", err)
+	}
+	// Load all the snapshot diffs from the journal
+	snapshot, err := loadDiffLayer(base, r)
+	if err != nil {
+		return nil, err
+	}
+	// Entire snapshot journal loaded, sanity check the head and return
+	// Journal doesn't exist, don't worry if it's not supposed to
+	if head := snapshot.Root(); head != root {
+		return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root)
+	}
+	// Everything loaded correctly, resume any suspended operations
+	if !generator.Done {
+		// If the generator was still wiping, restart one from scratch (fine for
+		// now as it's rare and the wiper deletes the stuff it touches anyway, so
+		// restarting won't incur a lot of extra database hops.
+		var wiper chan struct{}
+		if generator.Wiping {
+			log.Info("Resuming previous snapshot wipe")
+			wiper = wipeSnapshot(diskdb, false)
+		}
+		// Whether or not wiping was in progress, load any generator progress too
+		base.genMarker = generator.Marker
+		if base.genMarker == nil {
+			base.genMarker = []byte{}
+		}
+		base.genPending = make(chan struct{})
+		base.genAbort = make(chan chan *generatorStats)
+
+		var origin uint64
+		if len(generator.Marker) >= 8 {
+			origin = binary.BigEndian.Uint64(generator.Marker)
+		}
+		go base.generate(&generatorStats{
+			wiping:   wiper,
+			origin:   origin,
+			start:    time.Now(),
+			accounts: generator.Accounts,
+			slots:    generator.Slots,
+			storage:  common.StorageSize(generator.Storage),
+		})
+	}
+	return snapshot, nil
+}
+
+// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new
+// diff and verifying that it can be linked to the requested parent.
+func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) {
+	// Read the next diff journal entry
+	var root common.Hash
+	if err := r.Decode(&root); err != nil {
+		// The first read may fail with EOF, marking the end of the journal
+		if err == io.EOF {
+			return parent, nil
+		}
+		return nil, fmt.Errorf("load diff root: %v", err)
+	}
+	var destructs []journalDestruct
+	if err := r.Decode(&destructs); err != nil {
+		return nil, fmt.Errorf("load diff destructs: %v", err)
+	}
+	destructSet := make(map[common.Hash]struct{})
+	for _, entry := range destructs {
+		destructSet[entry.Hash] = struct{}{}
+	}
+	var accounts []journalAccount
+	if err := r.Decode(&accounts); err != nil {
+		return nil, fmt.Errorf("load diff accounts: %v", err)
+	}
+	accountData := make(map[common.Hash][]byte)
+	for _, entry := range accounts {
+		if len(entry.Blob) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that
+			accountData[entry.Hash] = entry.Blob
+		} else {
+			accountData[entry.Hash] = nil
+		}
+	}
+	var storage []journalStorage
+	if err := r.Decode(&storage); err != nil {
+		return nil, fmt.Errorf("load diff storage: %v", err)
+	}
+	storageData := make(map[common.Hash]map[common.Hash][]byte)
+	for _, entry := range storage {
+		slots := make(map[common.Hash][]byte)
+		for i, key := range entry.Keys {
+			if len(entry.Vals[i]) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that
+				slots[key] = entry.Vals[i]
+			} else {
+				slots[key] = nil
+			}
+		}
+		storageData[entry.Hash] = slots
+	}
+	return loadDiffLayer(newDiffLayer(parent, root, destructSet, accountData, storageData), r)
+}
+
+// Journal writes the persistent layer generator stats into a buffer to be stored
+// in the database as the snapshot journal.
+func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
+	// If the snapshot is currently being generated, abort it
+	var stats *generatorStats
+	if dl.genAbort != nil {
+		abort := make(chan *generatorStats)
+		dl.genAbort <- abort
+
+		if stats = <-abort; stats != nil {
+			stats.Log("Journalling in-progress snapshot", dl.root, dl.genMarker)
+		}
+	}
+	// Ensure the layer didn't get stale
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	if dl.stale {
+		return common.Hash{}, ErrSnapshotStale
+	}
+	// Write out the generator marker
+	entry := journalGenerator{
+		Done:   dl.genMarker == nil,
+		Marker: dl.genMarker,
+	}
+	if stats != nil {
+		entry.Wiping = (stats.wiping != nil)
+		entry.Accounts = stats.accounts
+		entry.Slots = stats.slots
+		entry.Storage = uint64(stats.storage)
+	}
+	if err := rlp.Encode(buffer, entry); err != nil {
+		return common.Hash{}, err
+	}
+	return dl.root, nil
+}
+
+// Journal writes the memory layer contents into a buffer to be stored in the
+// database as the snapshot journal.
+func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
+	// Journal the parent first
+	base, err := dl.parent.Journal(buffer)
+	if err != nil {
+		return common.Hash{}, err
+	}
+	// Ensure the layer didn't get stale
+	dl.lock.RLock()
+	defer dl.lock.RUnlock()
+
+	if dl.Stale() {
+		return common.Hash{}, ErrSnapshotStale
+	}
+	// Everything below was journalled, persist this layer too
+	if err := rlp.Encode(buffer, dl.root); err != nil {
+		return common.Hash{}, err
+	}
+	destructs := make([]journalDestruct, 0, len(dl.destructSet))
+	for hash := range dl.destructSet {
+		destructs = append(destructs, journalDestruct{Hash: hash})
+	}
+	if err := rlp.Encode(buffer, destructs); err != nil {
+		return common.Hash{}, err
+	}
+	accounts := make([]journalAccount, 0, len(dl.accountData))
+	for hash, blob := range dl.accountData {
+		accounts = append(accounts, journalAccount{Hash: hash, Blob: blob})
+	}
+	if err := rlp.Encode(buffer, accounts); err != nil {
+		return common.Hash{}, err
+	}
+	storage := make([]journalStorage, 0, len(dl.storageData))
+	for hash, slots := range dl.storageData {
+		keys := make([]common.Hash, 0, len(slots))
+		vals := make([][]byte, 0, len(slots))
+		for key, val := range slots {
+			keys = append(keys, key)
+			vals = append(vals, val)
+		}
+		storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals})
+	}
+	if err := rlp.Encode(buffer, storage); err != nil {
+		return common.Hash{}, err
+	}
+	return base, nil
+}
diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go
new file mode 100644
index 0000000..3348685
--- /dev/null
+++ b/core/state/snapshot/snapshot.go
@@ -0,0 +1,619 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Package snapshot implements a journalled, dynamic state dump.
+package snapshot
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"sync"
+	"sync/atomic"
+
+	"github.com/ava-labs/coreth/core/rawdb"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/metrics"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+var (
+	snapshotCleanAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/account/hit", nil)
+	snapshotCleanAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/miss", nil)
+	snapshotCleanAccountInexMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/inex", nil)
+	snapshotCleanAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/read", nil)
+	snapshotCleanAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/write", nil)
+
+	snapshotCleanStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/storage/hit", nil)
+	snapshotCleanStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/miss", nil)
+	snapshotCleanStorageInexMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/inex", nil)
+	snapshotCleanStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/read", nil)
+	snapshotCleanStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/write", nil)
+
+	snapshotDirtyAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/account/hit", nil)
+	snapshotDirtyAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/miss", nil)
+	snapshotDirtyAccountInexMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/inex", nil)
+	snapshotDirtyAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/read", nil)
+	snapshotDirtyAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/write", nil)
+
+	snapshotDirtyStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/hit", nil)
+	snapshotDirtyStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/miss", nil)
+	snapshotDirtyStorageInexMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/inex", nil)
+	snapshotDirtyStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/read", nil)
+	snapshotDirtyStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/write", nil)
+
+	snapshotDirtyAccountHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/account/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
+	snapshotDirtyStorageHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/storage/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
+
+	snapshotFlushAccountItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/item", nil)
+	snapshotFlushAccountSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/size", nil)
+	snapshotFlushStorageItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/item", nil)
+	snapshotFlushStorageSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/size", nil)
+
+	snapshotBloomIndexTimer = metrics.NewRegisteredResettingTimer("state/snapshot/bloom/index", nil)
+	snapshotBloomErrorGauge = metrics.NewRegisteredGaugeFloat64("state/snapshot/bloom/error", nil)
+
+	snapshotBloomAccountTrueHitMeter  = metrics.NewRegisteredMeter("state/snapshot/bloom/account/truehit", nil)
+	snapshotBloomAccountFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/falsehit", nil)
+	snapshotBloomAccountMissMeter     = metrics.NewRegisteredMeter("state/snapshot/bloom/account/miss", nil)
+
+	snapshotBloomStorageTrueHitMeter  = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/truehit", nil)
+	snapshotBloomStorageFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/falsehit", nil)
+	snapshotBloomStorageMissMeter     = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/miss", nil)
+
+	// ErrSnapshotStale is returned from data accessors if the underlying snapshot
+	// layer had been invalidated due to the chain progressing forward far enough
+	// to not maintain the layer's original state.
+	ErrSnapshotStale = errors.New("snapshot stale")
+
+	// ErrNotCoveredYet is returned from data accessors if the underlying snapshot
+	// is being generated currently and the requested data item is not yet in the
+	// range of accounts covered.
+	ErrNotCoveredYet = errors.New("not covered yet")
+
+	// errSnapshotCycle is returned if a snapshot is attempted to be inserted
+	// that forms a cycle in the snapshot tree.
+	errSnapshotCycle = errors.New("snapshot cycle")
+)
+
+// Snapshot represents the functionality supported by a snapshot storage layer.
+type Snapshot interface {
+	// Root returns the root hash for which this snapshot was made.
+	Root() common.Hash
+
+	// Account directly retrieves the account associated with a particular hash in
+	// the snapshot slim data format.
+	Account(hash common.Hash) (*Account, error)
+
+	// AccountRLP directly retrieves the account RLP associated with a particular
+	// hash in the snapshot slim data format.
+	AccountRLP(hash common.Hash) ([]byte, error)
+
+	// Storage directly retrieves the storage data associated with a particular hash,
+	// within a particular account.
+	Storage(accountHash, storageHash common.Hash) ([]byte, error)
+}
+
+// snapshot is the internal version of the snapshot data layer that supports some
+// additional methods compared to the public API.
+type snapshot interface {
+	Snapshot
+
+	// Parent returns the subsequent layer of a snapshot, or nil if the base was
+	// reached.
+	//
+	// Note, the method is an internal helper to avoid type switching between the
+	// disk and diff layers. There is no locking involved.
+	Parent() snapshot
+
+	// Update creates a new layer on top of the existing snapshot diff tree with
+	// the specified data items.
+	//
+	// Note, the maps are retained by the method to avoid copying everything.
+	Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
+
+	// Journal commits an entire diff hierarchy to disk into a single journal entry.
+	// This is meant to be used during shutdown to persist the snapshot without
+	// flattening everything down (bad for reorgs).
+	Journal(buffer *bytes.Buffer) (common.Hash, error)
+
+	// Stale return whether this layer has become stale (was flattened across) or
+	// if it's still live.
+	Stale() bool
+
+	// AccountIterator creates an account iterator over an arbitrary layer.
+	AccountIterator(seek common.Hash) AccountIterator
+
+	// StorageIterator creates a storage iterator over an arbitrary layer.
+	StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool)
+}
+
+// SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
+// base layer backed by a key-value store, on top of which arbitrarily many in-
+// memory diff layers are topped. The memory diffs can form a tree with branching,
+// but the disk layer is singleton and common to all. If a reorg goes deeper than
+// the disk layer, everything needs to be deleted.
+//
+// The goal of a state snapshot is twofold: to allow direct access to account and
+// storage data to avoid expensive multi-level trie lookups; and to allow sorted,
+// cheap iteration of the account/storage tries for sync aid.
+type Tree struct {
+	diskdb ethdb.KeyValueStore      // Persistent database to store the snapshot
+	triedb *trie.Database           // In-memory cache to access the trie through
+	cache  int                      // Megabytes permitted to use for read caches
+	layers map[common.Hash]snapshot // Collection of all known layers
+	lock   sync.RWMutex
+}
+
+// New attempts to load an already existing snapshot from a persistent key-value
+// store (with a number of memory layers from a journal), ensuring that the head
+// of the snapshot matches the expected one.
+//
+// If the snapshot is missing or inconsistent, the entirety is deleted and will
+// be reconstructed from scratch based on the tries in the key-value store, on a
+// background thread.
+func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool) *Tree {
+	// Create a new, empty snapshot tree
+	snap := &Tree{
+		diskdb: diskdb,
+		triedb: triedb,
+		cache:  cache,
+		layers: make(map[common.Hash]snapshot),
+	}
+	if !async {
+		defer snap.waitBuild()
+	}
+	// Attempt to load a previously persisted snapshot and rebuild one if failed
+	head, err := loadSnapshot(diskdb, triedb, cache, root)
+	if err != nil {
+		log.Warn("Failed to load snapshot, regenerating", "err", err)
+		snap.Rebuild(root)
+		return snap
+	}
+	// Existing snapshot loaded, seed all the layers
+	for head != nil {
+		snap.layers[head.Root()] = head
+		head = head.Parent()
+	}
+	return snap
+}
+
+// waitBuild blocks until the snapshot finishes rebuilding. This method is meant
+// to  be used by tests to ensure we're testing what we believe we are.
+func (t *Tree) waitBuild() {
+	// Find the rebuild termination channel
+	var done chan struct{}
+
+	t.lock.RLock()
+	for _, layer := range t.layers {
+		if layer, ok := layer.(*diskLayer); ok {
+			done = layer.genPending
+			break
+		}
+	}
+	t.lock.RUnlock()
+
+	// Wait until the snapshot is generated
+	if done != nil {
+		<-done
+	}
+}
+
+// Snapshot retrieves a snapshot belonging to the given block root, or nil if no
+// snapshot is maintained for that block.
+func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
+	t.lock.RLock()
+	defer t.lock.RUnlock()
+
+	return t.layers[blockRoot]
+}
+
+// Update adds a new snapshot into the tree, if that can be linked to an existing
+// old parent. It is disallowed to insert a disk layer (the origin of all).
+func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
+	// Reject noop updates to avoid self-loops in the snapshot tree. This is a
+	// special case that can only happen for Clique networks where empty blocks
+	// don't modify the state (0 block subsidy).
+	//
+	// Although we could silently ignore this internally, it should be the caller's
+	// responsibility to avoid even attempting to insert such a snapshot.
+	if blockRoot == parentRoot {
+		return errSnapshotCycle
+	}
+	// Generate a new snapshot on top of the parent
+	parent := t.Snapshot(parentRoot).(snapshot)
+	if parent == nil {
+		return fmt.Errorf("parent [%#x] snapshot missing", parentRoot)
+	}
+	snap := parent.Update(blockRoot, destructs, accounts, storage)
+
+	// Save the new snapshot for later
+	t.lock.Lock()
+	defer t.lock.Unlock()
+
+	t.layers[snap.root] = snap
+	return nil
+}
+
+// Cap traverses downwards the snapshot tree from a head block hash until the
+// number of allowed layers are crossed. All layers beyond the permitted number
+// are flattened downwards.
+func (t *Tree) Cap(root common.Hash, layers int) error {
+	// Retrieve the head snapshot to cap from
+	snap := t.Snapshot(root)
+	if snap == nil {
+		return fmt.Errorf("snapshot [%#x] missing", root)
+	}
+	diff, ok := snap.(*diffLayer)
+	if !ok {
+		return fmt.Errorf("snapshot [%#x] is disk layer", root)
+	}
+	// If the generator is still running, use a more aggressive cap
+	diff.origin.lock.RLock()
+	if diff.origin.genMarker != nil && layers > 8 {
+		layers = 8
+	}
+	diff.origin.lock.RUnlock()
+
+	// Run the internal capping and discard all stale layers
+	t.lock.Lock()
+	defer t.lock.Unlock()
+
+	// Flattening the bottom-most diff layer requires special casing since there's
+	// no child to rewire to the grandparent. In that case we can fake a temporary
+	// child for the capping and then remove it.
+	var persisted *diskLayer
+
+	switch layers {
+	case 0:
+		// If full commit was requested, flatten the diffs and merge onto disk
+		diff.lock.RLock()
+		base := diffToDisk(diff.flatten().(*diffLayer))
+		diff.lock.RUnlock()
+
+		// Replace the entire snapshot tree with the flat base
+		t.layers = map[common.Hash]snapshot{base.root: base}
+		return nil
+
+	case 1:
+		// If full flattening was requested, flatten the diffs but only merge if the
+		// memory limit was reached
+		var (
+			bottom *diffLayer
+			base   *diskLayer
+		)
+		diff.lock.RLock()
+		bottom = diff.flatten().(*diffLayer)
+		if bottom.memory >= aggregatorMemoryLimit {
+			base = diffToDisk(bottom)
+		}
+		diff.lock.RUnlock()
+
+		// If all diff layers were removed, replace the entire snapshot tree
+		if base != nil {
+			t.layers = map[common.Hash]snapshot{base.root: base}
+			return nil
+		}
+		// Merge the new aggregated layer into the snapshot tree, clean stales below
+		t.layers[bottom.root] = bottom
+
+	default:
+		// Many layers requested to be retained, cap normally
+		persisted = t.cap(diff, layers)
+	}
+	// Remove any layer that is stale or links into a stale layer
+	children := make(map[common.Hash][]common.Hash)
+	for root, snap := range t.layers {
+		if diff, ok := snap.(*diffLayer); ok {
+			parent := diff.parent.Root()
+			children[parent] = append(children[parent], root)
+		}
+	}
+	var remove func(root common.Hash)
+	remove = func(root common.Hash) {
+		delete(t.layers, root)
+		for _, child := range children[root] {
+			remove(child)
+		}
+		delete(children, root)
+	}
+	for root, snap := range t.layers {
+		if snap.Stale() {
+			remove(root)
+		}
+	}
+	// If the disk layer was modified, regenerate all the cumulative blooms
+	if persisted != nil {
+		var rebloom func(root common.Hash)
+		rebloom = func(root common.Hash) {
+			if diff, ok := t.layers[root].(*diffLayer); ok {
+				diff.rebloom(persisted)
+			}
+			for _, child := range children[root] {
+				rebloom(child)
+			}
+		}
+		rebloom(persisted.root)
+	}
+	return nil
+}
+
+// cap traverses downwards the diff tree until the number of allowed layers are
+// crossed. All diffs beyond the permitted number are flattened downwards. If the
+// layer limit is reached, memory cap is also enforced (but not before).
+//
+// The method returns the new disk layer if diffs were persistend into it.
+func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer {
+	// Dive until we run out of layers or reach the persistent database
+	for ; layers > 2; layers-- {
+		// If we still have diff layers below, continue down
+		if parent, ok := diff.parent.(*diffLayer); ok {
+			diff = parent
+		} else {
+			// Diff stack too shallow, return without modifications
+			return nil
+		}
+	}
+	// We're out of layers, flatten anything below, stopping if it's the disk or if
+	// the memory limit is not yet exceeded.
+	switch parent := diff.parent.(type) {
+	case *diskLayer:
+		return nil
+
+	case *diffLayer:
+		// Flatten the parent into the grandparent. The flattening internally obtains a
+		// write lock on grandparent.
+		flattened := parent.flatten().(*diffLayer)
+		t.layers[flattened.root] = flattened
+
+		diff.lock.Lock()
+		defer diff.lock.Unlock()
+
+		diff.parent = flattened
+		if flattened.memory < aggregatorMemoryLimit {
+			// Accumulator layer is smaller than the limit, so we can abort, unless
+			// there's a snapshot being generated currently. In that case, the trie
+			// will move fron underneath the generator so we **must** merge all the
+			// partial data down into the snapshot and restart the generation.
+			if flattened.parent.(*diskLayer).genAbort == nil {
+				return nil
+			}
+		}
+	default:
+		panic(fmt.Sprintf("unknown data layer: %T", parent))
+	}
+	// If the bottom-most layer is larger than our memory cap, persist to disk
+	bottom := diff.parent.(*diffLayer)
+
+	bottom.lock.RLock()
+	base := diffToDisk(bottom)
+	bottom.lock.RUnlock()
+
+	t.layers[base.root] = base
+	diff.parent = base
+	return base
+}
+
+// diffToDisk merges a bottom-most diff into the persistent disk layer underneath
+// it. The method will panic if called onto a non-bottom-most diff layer.
+func diffToDisk(bottom *diffLayer) *diskLayer {
+	var (
+		base  = bottom.parent.(*diskLayer)
+		batch = base.diskdb.NewBatch()
+		stats *generatorStats
+	)
+	// If the disk layer is running a snapshot generator, abort it
+	if base.genAbort != nil {
+		abort := make(chan *generatorStats)
+		base.genAbort <- abort
+		stats = <-abort
+	}
+	// Start by temporarily deleting the current snapshot block marker. This
+	// ensures that in the case of a crash, the entire snapshot is invalidated.
+	rawdb.DeleteSnapshotRoot(batch)
+
+	// Mark the original base as stale as we're going to create a new wrapper
+	base.lock.Lock()
+	if base.stale {
+		panic("parent disk layer is stale") // we've committed into the same base from two children, boo
+	}
+	base.stale = true
+	base.lock.Unlock()
+
+	// Destroy all the destructed accounts from the database
+	for hash := range bottom.destructSet {
+		// Skip any account not covered yet by the snapshot
+		if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
+			continue
+		}
+		// Remove all storage slots
+		rawdb.DeleteAccountSnapshot(batch, hash)
+		base.cache.Set(hash[:], nil)
+
+		it := rawdb.IterateStorageSnapshots(base.diskdb, hash)
+		for it.Next() {
+			if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
+				batch.Delete(key)
+				base.cache.Del(key[1:])
+
+				snapshotFlushStorageItemMeter.Mark(1)
+			}
+		}
+		it.Release()
+	}
+	// Push all updated accounts into the database
+	for hash, data := range bottom.accountData {
+		// Skip any account not covered yet by the snapshot
+		if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
+			continue
+		}
+		// Push the account to disk
+		rawdb.WriteAccountSnapshot(batch, hash, data)
+		base.cache.Set(hash[:], data)
+		snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
+
+		if batch.ValueSize() > ethdb.IdealBatchSize {
+			if err := batch.Write(); err != nil {
+				log.Crit("Failed to write account snapshot", "err", err)
+			}
+			batch.Reset()
+		}
+		snapshotFlushAccountItemMeter.Mark(1)
+		snapshotFlushAccountSizeMeter.Mark(int64(len(data)))
+	}
+	// Push all the storage slots into the database
+	for accountHash, storage := range bottom.storageData {
+		// Skip any account not covered yet by the snapshot
+		if base.genMarker != nil && bytes.Compare(accountHash[:], base.genMarker) > 0 {
+			continue
+		}
+		// Generation might be mid-account, track that case too
+		midAccount := base.genMarker != nil && bytes.Equal(accountHash[:], base.genMarker[:common.HashLength])
+
+		for storageHash, data := range storage {
+			// Skip any slot not covered yet by the snapshot
+			if midAccount && bytes.Compare(storageHash[:], base.genMarker[common.HashLength:]) > 0 {
+				continue
+			}
+			if len(data) > 0 {
+				rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
+				base.cache.Set(append(accountHash[:], storageHash[:]...), data)
+				snapshotCleanStorageWriteMeter.Mark(int64(len(data)))
+			} else {
+				rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
+				base.cache.Set(append(accountHash[:], storageHash[:]...), nil)
+			}
+			snapshotFlushStorageItemMeter.Mark(1)
+			snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
+		}
+		if batch.ValueSize() > ethdb.IdealBatchSize {
+			if err := batch.Write(); err != nil {
+				log.Crit("Failed to write storage snapshot", "err", err)
+			}
+			batch.Reset()
+		}
+	}
+	// Update the snapshot block marker and write any remainder data
+	rawdb.WriteSnapshotRoot(batch, bottom.root)
+	if err := batch.Write(); err != nil {
+		log.Crit("Failed to write leftover snapshot", "err", err)
+	}
+	res := &diskLayer{
+		root:       bottom.root,
+		cache:      base.cache,
+		diskdb:     base.diskdb,
+		triedb:     base.triedb,
+		genMarker:  base.genMarker,
+		genPending: base.genPending,
+	}
+	// If snapshot generation hasn't finished yet, port over all the starts and
+	// continue where the previous round left off.
+	//
+	// Note, the `base.genAbort` comparison is not used normally, it's checked
+	// to allow the tests to play with the marker without triggering this path.
+	if base.genMarker != nil && base.genAbort != nil {
+		res.genMarker = base.genMarker
+		res.genAbort = make(chan chan *generatorStats)
+		go res.generate(stats)
+	}
+	return res
+}
+
+// Journal commits an entire diff hierarchy to disk into a single journal entry.
+// This is meant to be used during shutdown to persist the snapshot without
+// flattening everything down (bad for reorgs).
+//
+// The method returns the root hash of the base layer that needs to be persisted
+// to disk as a trie too to allow continuing any pending generation op.
+func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
+	// Retrieve the head snapshot to journal from var snap snapshot
+	snap := t.Snapshot(root)
+	if snap == nil {
+		return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
+	}
+	// Run the journaling
+	t.lock.Lock()
+	defer t.lock.Unlock()
+
+	journal := new(bytes.Buffer)
+	base, err := snap.(snapshot).Journal(journal)
+	if err != nil {
+		return common.Hash{}, err
+	}
+	// Store the journal into the database and return
+	rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes())
+	return base, nil
+}
+
+// Rebuild wipes all available snapshot data from the persistent database and
+// discard all caches and diff layers. Afterwards, it starts a new snapshot
+// generator with the given root hash.
+func (t *Tree) Rebuild(root common.Hash) {
+	t.lock.Lock()
+	defer t.lock.Unlock()
+
+	// Track whether there's a wipe currently running and keep it alive if so
+	var wiper chan struct{}
+
+	// Iterate over and mark all layers stale
+	for _, layer := range t.layers {
+		switch layer := layer.(type) {
+		case *diskLayer:
+			// If the base layer is generating, abort it and save
+			if layer.genAbort != nil {
+				abort := make(chan *generatorStats)
+				layer.genAbort <- abort
+
+				if stats := <-abort; stats != nil {
+					wiper = stats.wiping
+				}
+			}
+			// Layer should be inactive now, mark it as stale
+			layer.lock.Lock()
+			layer.stale = true
+			layer.lock.Unlock()
+
+		case *diffLayer:
+			// If the layer is a simple diff, simply mark as stale
+			layer.lock.Lock()
+			atomic.StoreUint32(&layer.stale, 1)
+			layer.lock.Unlock()
+
+		default:
+			panic(fmt.Sprintf("unknown layer type: %T", layer))
+		}
+	}
+	// Start generating a new snapshot from scratch on a backgroung thread. The
+	// generator will run a wiper first if there's not one running right now.
+	log.Info("Rebuilding state snapshot")
+	t.layers = map[common.Hash]snapshot{
+		root: generateSnapshot(t.diskdb, t.triedb, t.cache, root, wiper),
+	}
+}
+
+// AccountIterator creates a new account iterator for the specified root hash and
+// seeks to a starting account hash.
+func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
+	return newFastAccountIterator(t, root, seek)
+}
+
+// StorageIterator creates a new storage iterator for the specified root hash and
+// account. The iterator will be move to the specific start position.
+func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
+	return newFastStorageIterator(t, root, account, seek)
+}
diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go
new file mode 100644
index 0000000..94e3610
--- /dev/null
+++ b/core/state/snapshot/snapshot_test.go
@@ -0,0 +1,371 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"fmt"
+	"math/big"
+	"math/rand"
+	"testing"
+
+	"github.com/VictoriaMetrics/fastcache"
+	"github.com/ava-labs/coreth/core/rawdb"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/rlp"
+)
+
+// randomHash generates a random blob of data and returns it as a hash.
+func randomHash() common.Hash {
+	var hash common.Hash
+	if n, err := rand.Read(hash[:]); n != common.HashLength || err != nil {
+		panic(err)
+	}
+	return hash
+}
+
+// randomAccount generates a random account and returns it RLP encoded.
+func randomAccount() []byte {
+	root := randomHash()
+	a := Account{
+		Balance:  big.NewInt(rand.Int63()),
+		Nonce:    rand.Uint64(),
+		Root:     root[:],
+		CodeHash: emptyCode[:],
+	}
+	data, _ := rlp.EncodeToBytes(a)
+	return data
+}
+
+// randomAccountSet generates a set of random accounts with the given strings as
+// the account address hashes.
+func randomAccountSet(hashes ...string) map[common.Hash][]byte {
+	accounts := make(map[common.Hash][]byte)
+	for _, hash := range hashes {
+		accounts[common.HexToHash(hash)] = randomAccount()
+	}
+	return accounts
+}
+
+// randomStorageSet generates a set of random slots with the given strings as
+// the slot addresses.
+func randomStorageSet(accounts []string, hashes [][]string, nilStorage [][]string) map[common.Hash]map[common.Hash][]byte {
+	storages := make(map[common.Hash]map[common.Hash][]byte)
+	for index, account := range accounts {
+		storages[common.HexToHash(account)] = make(map[common.Hash][]byte)
+
+		if index < len(hashes) {
+			hashes := hashes[index]
+			for _, hash := range hashes {
+				storages[common.HexToHash(account)][common.HexToHash(hash)] = randomHash().Bytes()
+			}
+		}
+		if index < len(nilStorage) {
+			nils := nilStorage[index]
+			for _, hash := range nils {
+				storages[common.HexToHash(account)][common.HexToHash(hash)] = nil
+			}
+		}
+	}
+	return storages
+}
+
+// Tests that if a disk layer becomes stale, no active external references will
+// be returned with junk data. This version of the test flattens every diff layer
+// to check internal corner case around the bottom-most memory accumulator.
+func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Retrieve a reference to the base and commit a diff on top
+	ref := snaps.Snapshot(base.root)
+
+	accounts := map[common.Hash][]byte{
+		common.HexToHash("0xa1"): randomAccount(),
+	}
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, accounts, nil); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if n := len(snaps.layers); n != 2 {
+		t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 2)
+	}
+	// Commit the diff layer onto the disk and ensure it's persisted
+	if err := snaps.Cap(common.HexToHash("0x02"), 0); err != nil {
+		t.Fatalf("failed to merge diff layer onto disk: %v", err)
+	}
+	// Since the base layer was modified, ensure that data retrieval on the external reference fail
+	if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned account: %#x (err: %v)", acc, err)
+	}
+	if slot, err := ref.Storage(common.HexToHash("0xa1"), common.HexToHash("0xb1")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned storage slot: %#x (err: %v)", slot, err)
+	}
+	if n := len(snaps.layers); n != 1 {
+		t.Errorf("post-cap layer count mismatch: have %d, want %d", n, 1)
+		fmt.Println(snaps.layers)
+	}
+}
+
+// Tests that if a disk layer becomes stale, no active external references will
+// be returned with junk data. This version of the test retains the bottom diff
+// layer to check the usual mode of operation where the accumulator is retained.
+func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Retrieve a reference to the base and commit two diffs on top
+	ref := snaps.Snapshot(base.root)
+
+	accounts := map[common.Hash][]byte{
+		common.HexToHash("0xa1"): randomAccount(),
+	}
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, accounts, nil); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, accounts, nil); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if n := len(snaps.layers); n != 3 {
+		t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 3)
+	}
+	// Commit the diff layer onto the disk and ensure it's persisted
+	defer func(memcap uint64) { aggregatorMemoryLimit = memcap }(aggregatorMemoryLimit)
+	aggregatorMemoryLimit = 0
+
+	if err := snaps.Cap(common.HexToHash("0x03"), 2); err != nil {
+		t.Fatalf("failed to merge diff layer onto disk: %v", err)
+	}
+	// Since the base layer was modified, ensure that data retrievald on the external reference fail
+	if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned account: %#x (err: %v)", acc, err)
+	}
+	if slot, err := ref.Storage(common.HexToHash("0xa1"), common.HexToHash("0xb1")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned storage slot: %#x (err: %v)", slot, err)
+	}
+	if n := len(snaps.layers); n != 2 {
+		t.Errorf("post-cap layer count mismatch: have %d, want %d", n, 2)
+		fmt.Println(snaps.layers)
+	}
+}
+
+// Tests that if a diff layer becomes stale, no active external references will
+// be returned with junk data. This version of the test flattens every diff layer
+// to check internal corner case around the bottom-most memory accumulator.
+func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Commit two diffs on top and retrieve a reference to the bottommost
+	accounts := map[common.Hash][]byte{
+		common.HexToHash("0xa1"): randomAccount(),
+	}
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, accounts, nil); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, accounts, nil); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if n := len(snaps.layers); n != 3 {
+		t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 3)
+	}
+	ref := snaps.Snapshot(common.HexToHash("0x02"))
+
+	// Flatten the diff layer into the bottom accumulator
+	if err := snaps.Cap(common.HexToHash("0x03"), 1); err != nil {
+		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
+	}
+	// Since the accumulator diff layer was modified, ensure that data retrievald on the external reference fail
+	if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned account: %#x (err: %v)", acc, err)
+	}
+	if slot, err := ref.Storage(common.HexToHash("0xa1"), common.HexToHash("0xb1")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned storage slot: %#x (err: %v)", slot, err)
+	}
+	if n := len(snaps.layers); n != 2 {
+		t.Errorf("post-cap layer count mismatch: have %d, want %d", n, 2)
+		fmt.Println(snaps.layers)
+	}
+}
+
+// Tests that if a diff layer becomes stale, no active external references will
+// be returned with junk data. This version of the test retains the bottom diff
+// layer to check the usual mode of operation where the accumulator is retained.
+func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
+	// Create an empty base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Commit three diffs on top and retrieve a reference to the bottommost
+	accounts := map[common.Hash][]byte{
+		common.HexToHash("0xa1"): randomAccount(),
+	}
+	if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, accounts, nil); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, accounts, nil); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if err := snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), nil, accounts, nil); err != nil {
+		t.Fatalf("failed to create a diff layer: %v", err)
+	}
+	if n := len(snaps.layers); n != 4 {
+		t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 4)
+	}
+	ref := snaps.Snapshot(common.HexToHash("0x02"))
+
+	// Doing a Cap operation with many allowed layers should be a no-op
+	exp := len(snaps.layers)
+	if err := snaps.Cap(common.HexToHash("0x04"), 2000); err != nil {
+		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
+	}
+	if got := len(snaps.layers); got != exp {
+		t.Errorf("layers modified, got %d exp %d", got, exp)
+	}
+	// Flatten the diff layer into the bottom accumulator
+	if err := snaps.Cap(common.HexToHash("0x04"), 2); err != nil {
+		t.Fatalf("failed to flatten diff layer into accumulator: %v", err)
+	}
+	// Since the accumulator diff layer was modified, ensure that data retrievald on the external reference fail
+	if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned account: %#x (err: %v)", acc, err)
+	}
+	if slot, err := ref.Storage(common.HexToHash("0xa1"), common.HexToHash("0xb1")); err != ErrSnapshotStale {
+		t.Errorf("stale reference returned storage slot: %#x (err: %v)", slot, err)
+	}
+	if n := len(snaps.layers); n != 3 {
+		t.Errorf("post-cap layer count mismatch: have %d, want %d", n, 3)
+		fmt.Println(snaps.layers)
+	}
+}
+
+// TestPostCapBasicDataAccess tests some functionality regarding capping/flattening.
+func TestPostCapBasicDataAccess(t *testing.T) {
+	// setAccount is a helper to construct a random account entry and assign it to
+	// an account slot in a snapshot
+	setAccount := func(accKey string) map[common.Hash][]byte {
+		return map[common.Hash][]byte{
+			common.HexToHash(accKey): randomAccount(),
+		}
+	}
+	// Create a starting base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// The lowest difflayer
+	snaps.Update(common.HexToHash("0xa1"), common.HexToHash("0x01"), nil, setAccount("0xa1"), nil)
+	snaps.Update(common.HexToHash("0xa2"), common.HexToHash("0xa1"), nil, setAccount("0xa2"), nil)
+	snaps.Update(common.HexToHash("0xb2"), common.HexToHash("0xa1"), nil, setAccount("0xb2"), nil)
+
+	snaps.Update(common.HexToHash("0xa3"), common.HexToHash("0xa2"), nil, setAccount("0xa3"), nil)
+	snaps.Update(common.HexToHash("0xb3"), common.HexToHash("0xb2"), nil, setAccount("0xb3"), nil)
+
+	// checkExist verifies if an account exiss in a snapshot
+	checkExist := func(layer *diffLayer, key string) error {
+		if data, _ := layer.Account(common.HexToHash(key)); data == nil {
+			return fmt.Errorf("expected %x to exist, got nil", common.HexToHash(key))
+		}
+		return nil
+	}
+	// shouldErr checks that an account access errors as expected
+	shouldErr := func(layer *diffLayer, key string) error {
+		if data, err := layer.Account(common.HexToHash(key)); err == nil {
+			return fmt.Errorf("expected error, got data %x", data)
+		}
+		return nil
+	}
+	// check basics
+	snap := snaps.Snapshot(common.HexToHash("0xb3")).(*diffLayer)
+
+	if err := checkExist(snap, "0xa1"); err != nil {
+		t.Error(err)
+	}
+	if err := checkExist(snap, "0xb2"); err != nil {
+		t.Error(err)
+	}
+	if err := checkExist(snap, "0xb3"); err != nil {
+		t.Error(err)
+	}
+	// Cap to a bad root should fail
+	if err := snaps.Cap(common.HexToHash("0x1337"), 0); err == nil {
+		t.Errorf("expected error, got none")
+	}
+	// Now, merge the a-chain
+	snaps.Cap(common.HexToHash("0xa3"), 0)
+
+	// At this point, a2 got merged into a1. Thus, a1 is now modified, and as a1 is
+	// the parent of b2, b2 should no longer be able to iterate into parent.
+
+	// These should still be accessible
+	if err := checkExist(snap, "0xb2"); err != nil {
+		t.Error(err)
+	}
+	if err := checkExist(snap, "0xb3"); err != nil {
+		t.Error(err)
+	}
+	// But these would need iteration into the modified parent
+	if err := shouldErr(snap, "0xa1"); err != nil {
+		t.Error(err)
+	}
+	if err := shouldErr(snap, "0xa2"); err != nil {
+		t.Error(err)
+	}
+	if err := shouldErr(snap, "0xa3"); err != nil {
+		t.Error(err)
+	}
+	// Now, merge it again, just for fun. It should now error, since a3
+	// is a disk layer
+	if err := snaps.Cap(common.HexToHash("0xa3"), 0); err == nil {
+		t.Error("expected error capping the disk layer, got none")
+	}
+}
diff --git a/core/state/snapshot/sort.go b/core/state/snapshot/sort.go
new file mode 100644
index 0000000..8884123
--- /dev/null
+++ b/core/state/snapshot/sort.go
@@ -0,0 +1,36 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// hashes is a helper to implement sort.Interface.
+type hashes []common.Hash
+
+// Len is the number of elements in the collection.
+func (hs hashes) Len() int { return len(hs) }
+
+// Less reports whether the element with index i should sort before the element
+// with index j.
+func (hs hashes) Less(i, j int) bool { return bytes.Compare(hs[i][:], hs[j][:]) < 0 }
+
+// Swap swaps the elements with indexes i and j.
+func (hs hashes) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] }
diff --git a/core/state/snapshot/wipe.go b/core/state/snapshot/wipe.go
new file mode 100644
index 0000000..853a1a7
--- /dev/null
+++ b/core/state/snapshot/wipe.go
@@ -0,0 +1,131 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"bytes"
+	"time"
+
+	"github.com/ava-labs/coreth/core/rawdb"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/log"
+)
+
+// wipeSnapshot starts a goroutine to iterate over the entire key-value database
+// and delete all the  data associated with the snapshot (accounts, storage,
+// metadata). After all is done, the snapshot range of the database is compacted
+// to free up unused data blocks.
+func wipeSnapshot(db ethdb.KeyValueStore, full bool) chan struct{} {
+	// Wipe the snapshot root marker synchronously
+	if full {
+		rawdb.DeleteSnapshotRoot(db)
+	}
+	// Wipe everything else asynchronously
+	wiper := make(chan struct{}, 1)
+	go func() {
+		if err := wipeContent(db); err != nil {
+			log.Error("Failed to wipe state snapshot", "err", err) // Database close will trigger this
+			return
+		}
+		close(wiper)
+	}()
+	return wiper
+}
+
+// wipeContent iterates over the entire key-value database and deletes all the
+// data associated with the snapshot (accounts, storage), but not the root hash
+// as the wiper is meant to run on a background thread but the root needs to be
+// removed in sync to avoid data races. After all is done, the snapshot range of
+// the database is compacted to free up unused data blocks.
+func wipeContent(db ethdb.KeyValueStore) error {
+	if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, len(rawdb.SnapshotAccountPrefix)+common.HashLength); err != nil {
+		return err
+	}
+	if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength); err != nil {
+		return err
+	}
+	// Compact the snapshot section of the database to get rid of unused space
+	start := time.Now()
+
+	log.Info("Compacting snapshot account area ")
+	end := common.CopyBytes(rawdb.SnapshotAccountPrefix)
+	end[len(end)-1]++
+
+	if err := db.Compact(rawdb.SnapshotAccountPrefix, end); err != nil {
+		return err
+	}
+	log.Info("Compacting snapshot storage area ")
+	end = common.CopyBytes(rawdb.SnapshotStoragePrefix)
+	end[len(end)-1]++
+
+	if err := db.Compact(rawdb.SnapshotStoragePrefix, end); err != nil {
+		return err
+	}
+	log.Info("Compacted snapshot area in database", "elapsed", common.PrettyDuration(time.Since(start)))
+
+	return nil
+}
+
+// wipeKeyRange deletes a range of keys from the database starting with prefix
+// and having a specific total key length.
+func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int) error {
+	// Batch deletions together to avoid holding an iterator for too long
+	var (
+		batch = db.NewBatch()
+		items int
+	)
+	// Iterate over the key-range and delete all of them
+	start, logged := time.Now(), time.Now()
+
+	it := db.NewIterator(prefix, nil)
+	for it.Next() {
+		// Skip any keys with the correct prefix but wrong length (trie nodes)
+		key := it.Key()
+		if !bytes.HasPrefix(key, prefix) {
+			break
+		}
+		if len(key) != keylen {
+			continue
+		}
+		// Delete the key and periodically recreate the batch and iterator
+		batch.Delete(key)
+		items++
+
+		if items%10000 == 0 {
+			// Batch too large (or iterator too long lived, flush and recreate)
+			it.Release()
+			if err := batch.Write(); err != nil {
+				return err
+			}
+			batch.Reset()
+			seekPos := key[len(prefix):]
+			it = db.NewIterator(prefix, seekPos)
+
+			if time.Since(logged) > 8*time.Second {
+				log.Info("Deleting state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start)))
+				logged = time.Now()
+			}
+		}
+	}
+	it.Release()
+	if err := batch.Write(); err != nil {
+		return err
+	}
+	log.Info("Deleted state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start)))
+	return nil
+}
diff --git a/core/state/snapshot/wipe_test.go b/core/state/snapshot/wipe_test.go
new file mode 100644
index 0000000..a656982
--- /dev/null
+++ b/core/state/snapshot/wipe_test.go
@@ -0,0 +1,124 @@
+// Copyright 2019 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snapshot
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/ava-labs/coreth/core/rawdb"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb/memorydb"
+)
+
+// Tests that given a database with random data content, all parts of a snapshot
+// can be crrectly wiped without touching anything else.
+func TestWipe(t *testing.T) {
+	// Create a database with some random snapshot data
+	db := memorydb.New()
+
+	for i := 0; i < 128; i++ {
+		account := randomHash()
+		rawdb.WriteAccountSnapshot(db, account, randomHash().Bytes())
+		for j := 0; j < 1024; j++ {
+			rawdb.WriteStorageSnapshot(db, account, randomHash(), randomHash().Bytes())
+		}
+	}
+	rawdb.WriteSnapshotRoot(db, randomHash())
+
+	// Add some random non-snapshot data too to make wiping harder
+	for i := 0; i < 65536; i++ {
+		// Generate a key that's the wrong length for a state snapshot item
+		var keysize int
+		for keysize == 0 || keysize == 32 || keysize == 64 {
+			keysize = 8 + rand.Intn(64) // +8 to ensure we will "never" randomize duplicates
+		}
+		// Randomize the suffix, dedup and inject it under the snapshot namespace
+		keysuffix := make([]byte, keysize)
+		rand.Read(keysuffix)
+
+		if rand.Int31n(2) == 0 {
+			db.Put(append(rawdb.SnapshotAccountPrefix, keysuffix...), randomHash().Bytes())
+		} else {
+			db.Put(append(rawdb.SnapshotStoragePrefix, keysuffix...), randomHash().Bytes())
+		}
+	}
+	// Sanity check that all the keys are present
+	var items int
+
+	it := db.NewIterator(rawdb.SnapshotAccountPrefix, nil)
+	defer it.Release()
+
+	for it.Next() {
+		key := it.Key()
+		if len(key) == len(rawdb.SnapshotAccountPrefix)+common.HashLength {
+			items++
+		}
+	}
+	it = db.NewIterator(rawdb.SnapshotStoragePrefix, nil)
+	defer it.Release()
+
+	for it.Next() {
+		key := it.Key()
+		if len(key) == len(rawdb.SnapshotStoragePrefix)+2*common.HashLength {
+			items++
+		}
+	}
+	if items != 128+128*1024 {
+		t.Fatalf("snapshot size mismatch: have %d, want %d", items, 128+128*1024)
+	}
+	if hash := rawdb.ReadSnapshotRoot(db); hash == (common.Hash{}) {
+		t.Errorf("snapshot block marker mismatch: have %#x, want <not-nil>", hash)
+	}
+	// Wipe all snapshot entries from the database
+	<-wipeSnapshot(db, true)
+
+	// Iterate over the database end ensure no snapshot information remains
+	it = db.NewIterator(rawdb.SnapshotAccountPrefix, nil)
+	defer it.Release()
+
+	for it.Next() {
+		key := it.Key()
+		if len(key) == len(rawdb.SnapshotAccountPrefix)+common.HashLength {
+			t.Errorf("snapshot entry remained after wipe: %x", key)
+		}
+	}
+	it = db.NewIterator(rawdb.SnapshotStoragePrefix, nil)
+	defer it.Release()
+
+	for it.Next() {
+		key := it.Key()
+		if len(key) == len(rawdb.SnapshotStoragePrefix)+2*common.HashLength {
+			t.Errorf("snapshot entry remained after wipe: %x", key)
+		}
+	}
+	if hash := rawdb.ReadSnapshotRoot(db); hash != (common.Hash{}) {
+		t.Errorf("snapshot block marker remained after wipe: %#x", hash)
+	}
+	// Iterate over the database and ensure miscellaneous items are present
+	items = 0
+
+	it = db.NewIterator(nil, nil)
+	defer it.Release()
+
+	for it.Next() {
+		items++
+	}
+	if items != 65536 {
+		t.Fatalf("misc item count mismatch: have %d, want %d", items, 65536)
+	}
+}