From 78745551c077bf54151202138c2629f288769561 Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 15 Sep 2020 23:55:34 -0400 Subject: WIP: geth-tavum --- core/state/snapshot/account.go | 86 +++ core/state/snapshot/conversion.go | 275 +++++++++ core/state/snapshot/difflayer.go | 553 +++++++++++++++++ core/state/snapshot/difflayer_test.go | 400 ++++++++++++ core/state/snapshot/disklayer.go | 166 +++++ core/state/snapshot/disklayer_test.go | 511 ++++++++++++++++ core/state/snapshot/generate.go | 264 ++++++++ core/state/snapshot/iterator.go | 400 ++++++++++++ core/state/snapshot/iterator_binary.go | 213 +++++++ core/state/snapshot/iterator_fast.go | 350 +++++++++++ core/state/snapshot/iterator_test.go | 1046 ++++++++++++++++++++++++++++++++ core/state/snapshot/journal.go | 270 +++++++++ core/state/snapshot/snapshot.go | 619 +++++++++++++++++++ core/state/snapshot/snapshot_test.go | 371 +++++++++++ core/state/snapshot/sort.go | 36 ++ core/state/snapshot/wipe.go | 131 ++++ core/state/snapshot/wipe_test.go | 124 ++++ 17 files changed, 5815 insertions(+) create mode 100644 core/state/snapshot/account.go create mode 100644 core/state/snapshot/conversion.go create mode 100644 core/state/snapshot/difflayer.go create mode 100644 core/state/snapshot/difflayer_test.go create mode 100644 core/state/snapshot/disklayer.go create mode 100644 core/state/snapshot/disklayer_test.go create mode 100644 core/state/snapshot/generate.go create mode 100644 core/state/snapshot/iterator.go create mode 100644 core/state/snapshot/iterator_binary.go create mode 100644 core/state/snapshot/iterator_fast.go create mode 100644 core/state/snapshot/iterator_test.go create mode 100644 core/state/snapshot/journal.go create mode 100644 core/state/snapshot/snapshot.go create mode 100644 core/state/snapshot/snapshot_test.go create mode 100644 core/state/snapshot/sort.go create mode 100644 core/state/snapshot/wipe.go create mode 100644 core/state/snapshot/wipe_test.go (limited to 'core/state/snapshot') diff --git a/core/state/snapshot/account.go b/core/state/snapshot/account.go new file mode 100644 index 0000000..b92e942 --- /dev/null +++ b/core/state/snapshot/account.go @@ -0,0 +1,86 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + "math/big" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" +) + +// Account is a modified version of a state.Account, where the root is replaced +// with a byte slice. This format can be used to represent full-consensus format +// or slim-snapshot format which replaces the empty root and code hash as nil +// byte slice. +type Account struct { + Nonce uint64 + Balance *big.Int + Root []byte + CodeHash []byte +} + +// SlimAccount converts a state.Account content into a slim snapshot account +func SlimAccount(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) Account { + slim := Account{ + Nonce: nonce, + Balance: balance, + } + if root != emptyRoot { + slim.Root = root[:] + } + if !bytes.Equal(codehash, emptyCode[:]) { + slim.CodeHash = codehash + } + return slim +} + +// SlimAccountRLP converts a state.Account content into a slim snapshot +// version RLP encoded. +func SlimAccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) []byte { + data, err := rlp.EncodeToBytes(SlimAccount(nonce, balance, root, codehash)) + if err != nil { + panic(err) + } + return data +} + +// FullAccount decodes the data on the 'slim RLP' format and return +// the consensus format account. +func FullAccount(data []byte) (Account, error) { + var account Account + if err := rlp.DecodeBytes(data, &account); err != nil { + return Account{}, err + } + if len(account.Root) == 0 { + account.Root = emptyRoot[:] + } + if len(account.CodeHash) == 0 { + account.CodeHash = emptyCode[:] + } + return account, nil +} + +// FullAccountRLP converts data on the 'slim RLP' format into the full RLP-format. +func FullAccountRLP(data []byte) ([]byte, error) { + account, err := FullAccount(data) + if err != nil { + return nil, err + } + return rlp.EncodeToBytes(account) +} diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go new file mode 100644 index 0000000..dee9ff0 --- /dev/null +++ b/core/state/snapshot/conversion.go @@ -0,0 +1,275 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + "fmt" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +// trieKV represents a trie key-value pair +type trieKV struct { + key common.Hash + value []byte +} + +type ( + // trieGeneratorFn is the interface of trie generation which can + // be implemented by different trie algorithm. + trieGeneratorFn func(in chan (trieKV), out chan (common.Hash)) + + // leafCallbackFn is the callback invoked at the leaves of the trie, + // returns the subtrie root with the specified subtrie identifier. + leafCallbackFn func(hash common.Hash, stat *generateStats) common.Hash +) + +// GenerateAccountTrieRoot takes an account iterator and reproduces the root hash. +func GenerateAccountTrieRoot(it AccountIterator) (common.Hash, error) { + return generateTrieRoot(it, common.Hash{}, stdGenerate, nil, &generateStats{start: time.Now()}, true) +} + +// GenerateStorageTrieRoot takes a storage iterator and reproduces the root hash. +func GenerateStorageTrieRoot(account common.Hash, it StorageIterator) (common.Hash, error) { + return generateTrieRoot(it, account, stdGenerate, nil, &generateStats{start: time.Now()}, true) +} + +// VerifyState takes the whole snapshot tree as the input, traverses all the accounts +// as well as the corresponding storages and compares the re-computed hash with the +// original one(state root and the storage root). +func VerifyState(snaptree *Tree, root common.Hash) error { + acctIt, err := snaptree.AccountIterator(root, common.Hash{}) + if err != nil { + return err + } + defer acctIt.Release() + + got, err := generateTrieRoot(acctIt, common.Hash{}, stdGenerate, func(account common.Hash, stat *generateStats) common.Hash { + storageIt, err := snaptree.StorageIterator(root, account, common.Hash{}) + if err != nil { + return common.Hash{} + } + defer storageIt.Release() + + hash, err := generateTrieRoot(storageIt, account, stdGenerate, nil, stat, false) + if err != nil { + return common.Hash{} + } + return hash + }, &generateStats{start: time.Now()}, true) + + if err != nil { + return err + } + if got != root { + return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root) + } + return nil +} + +// generateStats is a collection of statistics gathered by the trie generator +// for logging purposes. +type generateStats struct { + accounts uint64 + slots uint64 + curAccount common.Hash + curSlot common.Hash + start time.Time + lock sync.RWMutex +} + +// progress records the progress trie generator made recently. +func (stat *generateStats) progress(accounts, slots uint64, curAccount common.Hash, curSlot common.Hash) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.accounts += accounts + stat.slots += slots + stat.curAccount = curAccount + stat.curSlot = curSlot +} + +// report prints the cumulative progress statistic smartly. +func (stat *generateStats) report() { + stat.lock.RLock() + defer stat.lock.RUnlock() + + var ctx []interface{} + if stat.curSlot != (common.Hash{}) { + ctx = append(ctx, []interface{}{ + "in", stat.curAccount, + "at", stat.curSlot, + }...) + } else { + ctx = append(ctx, []interface{}{"at", stat.curAccount}...) + } + // Add the usual measurements + ctx = append(ctx, []interface{}{"accounts", stat.accounts}...) + if stat.slots != 0 { + ctx = append(ctx, []interface{}{"slots", stat.slots}...) + } + ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...) + log.Info("Generating trie hash from snapshot", ctx...) +} + +// reportDone prints the last log when the whole generation is finished. +func (stat *generateStats) reportDone() { + stat.lock.RLock() + defer stat.lock.RUnlock() + + var ctx []interface{} + ctx = append(ctx, []interface{}{"accounts", stat.accounts}...) + if stat.slots != 0 { + ctx = append(ctx, []interface{}{"slots", stat.slots}...) + } + ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...) + log.Info("Generated trie hash from snapshot", ctx...) +} + +// generateTrieRoot generates the trie hash based on the snapshot iterator. +// It can be used for generating account trie, storage trie or even the +// whole state which connects the accounts and the corresponding storages. +func generateTrieRoot(it Iterator, account common.Hash, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) { + var ( + in = make(chan trieKV) // chan to pass leaves + out = make(chan common.Hash, 1) // chan to collect result + stoplog = make(chan bool, 1) // 1-size buffer, works when logging is not enabled + wg sync.WaitGroup + ) + // Spin up a go-routine for trie hash re-generation + wg.Add(1) + go func() { + defer wg.Done() + generatorFn(in, out) + }() + + // Spin up a go-routine for progress logging + if report && stats != nil { + wg.Add(1) + go func() { + defer wg.Done() + + timer := time.NewTimer(0) + defer timer.Stop() + + for { + select { + case <-timer.C: + stats.report() + timer.Reset(time.Second * 8) + case success := <-stoplog: + if success { + stats.reportDone() + } + return + } + } + }() + } + // stop is a helper function to shutdown the background threads + // and return the re-generated trie hash. + stop := func(success bool) common.Hash { + close(in) + result := <-out + stoplog <- success + wg.Wait() + return result + } + var ( + logged = time.Now() + processed = uint64(0) + leaf trieKV + last common.Hash + ) + // Start to feed leaves + for it.Next() { + if account == (common.Hash{}) { + var ( + err error + fullData []byte + ) + if leafCallback == nil { + fullData, err = FullAccountRLP(it.(AccountIterator).Account()) + if err != nil { + stop(false) + return common.Hash{}, err + } + } else { + account, err := FullAccount(it.(AccountIterator).Account()) + if err != nil { + stop(false) + return common.Hash{}, err + } + // Apply the leaf callback. Normally the callback is used to traverse + // the storage trie and re-generate the subtrie root. + subroot := leafCallback(it.Hash(), stats) + if !bytes.Equal(account.Root, subroot.Bytes()) { + stop(false) + return common.Hash{}, fmt.Errorf("invalid subroot(%x), want %x, got %x", it.Hash(), account.Root, subroot) + } + fullData, err = rlp.EncodeToBytes(account) + if err != nil { + stop(false) + return common.Hash{}, err + } + } + leaf = trieKV{it.Hash(), fullData} + } else { + leaf = trieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())} + } + in <- leaf + + // Accumulate the generaation statistic if it's required. + processed++ + if time.Since(logged) > 3*time.Second && stats != nil { + if account == (common.Hash{}) { + stats.progress(processed, 0, it.Hash(), common.Hash{}) + } else { + stats.progress(0, processed, account, it.Hash()) + } + logged, processed = time.Now(), 0 + } + last = it.Hash() + } + // Commit the last part statistic. + if processed > 0 && stats != nil { + if account == (common.Hash{}) { + stats.progress(processed, 0, last, common.Hash{}) + } else { + stats.progress(0, processed, account, last) + } + } + result := stop(true) + return result, nil +} + +// stdGenerate is a very basic hexary trie builder which uses the same Trie +// as the rest of geth, with no enhancements or optimizations +func stdGenerate(in chan (trieKV), out chan (common.Hash)) { + t, _ := trie.New(common.Hash{}, trie.NewDatabase(memorydb.New())) + for leaf := range in { + t.TryUpdate(leaf.key[:], leaf.value) + } + out <- t.Hash() +} diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go new file mode 100644 index 0000000..0aef6cf --- /dev/null +++ b/core/state/snapshot/difflayer.go @@ -0,0 +1,553 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "encoding/binary" + "fmt" + "math" + "math/rand" + "sort" + "sync" + "sync/atomic" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" + "github.com/steakknife/bloomfilter" +) + +var ( + // aggregatorMemoryLimit is the maximum size of the bottom-most diff layer + // that aggregates the writes from above until it's flushed into the disk + // layer. + // + // Note, bumping this up might drastically increase the size of the bloom + // filters that's stored in every diff layer. Don't do that without fully + // understanding all the implications. + aggregatorMemoryLimit = uint64(4 * 1024 * 1024) + + // aggregatorItemLimit is an approximate number of items that will end up + // in the agregator layer before it's flushed out to disk. A plain account + // weighs around 14B (+hash), a storage slot 32B (+hash), a deleted slot + // 0B (+hash). Slots are mostly set/unset in lockstep, so thet average at + // 16B (+hash). All in all, the average entry seems to be 15+32=47B. Use a + // smaller number to be on the safe side. + aggregatorItemLimit = aggregatorMemoryLimit / 42 + + // bloomTargetError is the target false positive rate when the aggregator + // layer is at its fullest. The actual value will probably move around up + // and down from this number, it's mostly a ballpark figure. + // + // Note, dropping this down might drastically increase the size of the bloom + // filters that's stored in every diff layer. Don't do that without fully + // understanding all the implications. + bloomTargetError = 0.02 + + // bloomSize is the ideal bloom filter size given the maximum number of items + // it's expected to hold and the target false positive error rate. + bloomSize = math.Ceil(float64(aggregatorItemLimit) * math.Log(bloomTargetError) / math.Log(1/math.Pow(2, math.Log(2)))) + + // bloomFuncs is the ideal number of bits a single entry should set in the + // bloom filter to keep its size to a minimum (given it's size and maximum + // entry count). + bloomFuncs = math.Round((bloomSize / float64(aggregatorItemLimit)) * math.Log(2)) + + // the bloom offsets are runtime constants which determines which part of the + // the account/storage hash the hasher functions looks at, to determine the + // bloom key for an account/slot. This is randomized at init(), so that the + // global population of nodes do not all display the exact same behaviour with + // regards to bloom content + bloomDestructHasherOffset = 0 + bloomAccountHasherOffset = 0 + bloomStorageHasherOffset = 0 +) + +func init() { + // Init the bloom offsets in the range [0:24] (requires 8 bytes) + bloomDestructHasherOffset = rand.Intn(25) + bloomAccountHasherOffset = rand.Intn(25) + bloomStorageHasherOffset = rand.Intn(25) + + // The destruct and account blooms must be different, as the storage slots + // will check for destruction too for every bloom miss. It should not collide + // with modified accounts. + for bloomAccountHasherOffset == bloomDestructHasherOffset { + bloomAccountHasherOffset = rand.Intn(25) + } +} + +// diffLayer represents a collection of modifications made to a state snapshot +// after running a block on top. It contains one sorted list for the account trie +// and one-one list for each storage tries. +// +// The goal of a diff layer is to act as a journal, tracking recent modifications +// made to the state, that have not yet graduated into a semi-immutable state. +type diffLayer struct { + origin *diskLayer // Base disk layer to directly use on bloom misses + parent snapshot // Parent snapshot modified by this one, never nil + memory uint64 // Approximate guess as to how much memory we use + + root common.Hash // Root hash to which this snapshot diff belongs to + stale uint32 // Signals that the layer became stale (state progressed) + + // destructSet is a very special helper marker. If an account is marked as + // deleted, then it's recorded in this set. However it's allowed that an account + // is included here but still available in other sets(e.g. storageData). The + // reason is the diff layer includes all the changes in a *block*. It can + // happen that in the tx_1, account A is self-destructed while in the tx_2 + // it's recreated. But we still need this marker to indicate the "old" A is + // deleted, all data in other set belongs to the "new" A. + destructSet map[common.Hash]struct{} // Keyed markers for deleted (and potentially) recreated accounts + accountList []common.Hash // List of account for iteration. If it exists, it's sorted, otherwise it's nil + accountData map[common.Hash][]byte // Keyed accounts for direct retrival (nil means deleted) + storageList map[common.Hash][]common.Hash // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil + storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted) + + diffed *bloomfilter.Filter // Bloom filter tracking all the diffed items up to the disk layer + + lock sync.RWMutex +} + +// destructBloomHasher is a wrapper around a common.Hash to satisfy the interface +// API requirements of the bloom library used. It's used to convert a destruct +// event into a 64 bit mini hash. +type destructBloomHasher common.Hash + +func (h destructBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } +func (h destructBloomHasher) Sum(b []byte) []byte { panic("not implemented") } +func (h destructBloomHasher) Reset() { panic("not implemented") } +func (h destructBloomHasher) BlockSize() int { panic("not implemented") } +func (h destructBloomHasher) Size() int { return 8 } +func (h destructBloomHasher) Sum64() uint64 { + return binary.BigEndian.Uint64(h[bloomDestructHasherOffset : bloomDestructHasherOffset+8]) +} + +// accountBloomHasher is a wrapper around a common.Hash to satisfy the interface +// API requirements of the bloom library used. It's used to convert an account +// hash into a 64 bit mini hash. +type accountBloomHasher common.Hash + +func (h accountBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } +func (h accountBloomHasher) Sum(b []byte) []byte { panic("not implemented") } +func (h accountBloomHasher) Reset() { panic("not implemented") } +func (h accountBloomHasher) BlockSize() int { panic("not implemented") } +func (h accountBloomHasher) Size() int { return 8 } +func (h accountBloomHasher) Sum64() uint64 { + return binary.BigEndian.Uint64(h[bloomAccountHasherOffset : bloomAccountHasherOffset+8]) +} + +// storageBloomHasher is a wrapper around a [2]common.Hash to satisfy the interface +// API requirements of the bloom library used. It's used to convert an account +// hash into a 64 bit mini hash. +type storageBloomHasher [2]common.Hash + +func (h storageBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } +func (h storageBloomHasher) Sum(b []byte) []byte { panic("not implemented") } +func (h storageBloomHasher) Reset() { panic("not implemented") } +func (h storageBloomHasher) BlockSize() int { panic("not implemented") } +func (h storageBloomHasher) Size() int { return 8 } +func (h storageBloomHasher) Sum64() uint64 { + return binary.BigEndian.Uint64(h[0][bloomStorageHasherOffset:bloomStorageHasherOffset+8]) ^ + binary.BigEndian.Uint64(h[1][bloomStorageHasherOffset:bloomStorageHasherOffset+8]) +} + +// newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low +// level persistent database or a hierarchical diff already. +func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { + // Create the new layer with some pre-allocated data segments + dl := &diffLayer{ + parent: parent, + root: root, + destructSet: destructs, + accountData: accounts, + storageData: storage, + storageList: make(map[common.Hash][]common.Hash), + } + switch parent := parent.(type) { + case *diskLayer: + dl.rebloom(parent) + case *diffLayer: + dl.rebloom(parent.origin) + default: + panic("unknown parent type") + } + // Sanity check that accounts or storage slots are never nil + for accountHash, blob := range accounts { + if blob == nil { + panic(fmt.Sprintf("account %#x nil", accountHash)) + } + } + for accountHash, slots := range storage { + if slots == nil { + panic(fmt.Sprintf("storage %#x nil", accountHash)) + } + } + // Determine memory size and track the dirty writes + for _, data := range accounts { + dl.memory += uint64(common.HashLength + len(data)) + snapshotDirtyAccountWriteMeter.Mark(int64(len(data))) + } + // Determine memory size and track the dirty writes + for _, slots := range storage { + for _, data := range slots { + dl.memory += uint64(common.HashLength + len(data)) + snapshotDirtyStorageWriteMeter.Mark(int64(len(data))) + } + } + dl.memory += uint64(len(destructs) * common.HashLength) + return dl +} + +// rebloom discards the layer's current bloom and rebuilds it from scratch based +// on the parent's and the local diffs. +func (dl *diffLayer) rebloom(origin *diskLayer) { + dl.lock.Lock() + defer dl.lock.Unlock() + + defer func(start time.Time) { + snapshotBloomIndexTimer.Update(time.Since(start)) + }(time.Now()) + + // Inject the new origin that triggered the rebloom + dl.origin = origin + + // Retrieve the parent bloom or create a fresh empty one + if parent, ok := dl.parent.(*diffLayer); ok { + parent.lock.RLock() + dl.diffed, _ = parent.diffed.Copy() + parent.lock.RUnlock() + } else { + dl.diffed, _ = bloomfilter.New(uint64(bloomSize), uint64(bloomFuncs)) + } + // Iterate over all the accounts and storage slots and index them + for hash := range dl.destructSet { + dl.diffed.Add(destructBloomHasher(hash)) + } + for hash := range dl.accountData { + dl.diffed.Add(accountBloomHasher(hash)) + } + for accountHash, slots := range dl.storageData { + for storageHash := range slots { + dl.diffed.Add(storageBloomHasher{accountHash, storageHash}) + } + } + // Calculate the current false positive rate and update the error rate meter. + // This is a bit cheating because subsequent layers will overwrite it, but it + // should be fine, we're only interested in ballpark figures. + k := float64(dl.diffed.K()) + n := float64(dl.diffed.N()) + m := float64(dl.diffed.M()) + snapshotBloomErrorGauge.Update(math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k)) +} + +// Root returns the root hash for which this snapshot was made. +func (dl *diffLayer) Root() common.Hash { + return dl.root +} + +// Parent returns the subsequent layer of a diff layer. +func (dl *diffLayer) Parent() snapshot { + return dl.parent +} + +// Stale return whether this layer has become stale (was flattened across) or if +// it's still live. +func (dl *diffLayer) Stale() bool { + return atomic.LoadUint32(&dl.stale) != 0 +} + +// Account directly retrieves the account associated with a particular hash in +// the snapshot slim data format. +func (dl *diffLayer) Account(hash common.Hash) (*Account, error) { + data, err := dl.AccountRLP(hash) + if err != nil { + return nil, err + } + if len(data) == 0 { // can be both nil and []byte{} + return nil, nil + } + account := new(Account) + if err := rlp.DecodeBytes(data, account); err != nil { + panic(err) + } + return account, nil +} + +// AccountRLP directly retrieves the account RLP associated with a particular +// hash in the snapshot slim data format. +// +// Note the returned account is not a copy, please don't modify it. +func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) { + // Check the bloom filter first whether there's even a point in reaching into + // all the maps in all the layers below + dl.lock.RLock() + hit := dl.diffed.Contains(accountBloomHasher(hash)) + if !hit { + hit = dl.diffed.Contains(destructBloomHasher(hash)) + } + dl.lock.RUnlock() + + // If the bloom filter misses, don't even bother with traversing the memory + // diff layers, reach straight into the bottom persistent disk layer + if !hit { + snapshotBloomAccountMissMeter.Mark(1) + return dl.origin.AccountRLP(hash) + } + // The bloom filter hit, start poking in the internal maps + return dl.accountRLP(hash, 0) +} + +// accountRLP is an internal version of AccountRLP that skips the bloom filter +// checks and uses the internal maps to try and retrieve the data. It's meant +// to be used if a higher layer's bloom filter hit already. +func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the layer was flattened into, consider it invalid (any live reference to + // the original should be marked as unusable). + if dl.Stale() { + return nil, ErrSnapshotStale + } + // If the account is known locally, return it + if data, ok := dl.accountData[hash]; ok { + snapshotDirtyAccountHitMeter.Mark(1) + snapshotDirtyAccountHitDepthHist.Update(int64(depth)) + snapshotDirtyAccountReadMeter.Mark(int64(len(data))) + snapshotBloomAccountTrueHitMeter.Mark(1) + return data, nil + } + // If the account is known locally, but deleted, return it + if _, ok := dl.destructSet[hash]; ok { + snapshotDirtyAccountHitMeter.Mark(1) + snapshotDirtyAccountHitDepthHist.Update(int64(depth)) + snapshotDirtyAccountInexMeter.Mark(1) + snapshotBloomAccountTrueHitMeter.Mark(1) + return nil, nil + } + // Account unknown to this diff, resolve from parent + if diff, ok := dl.parent.(*diffLayer); ok { + return diff.accountRLP(hash, depth+1) + } + // Failed to resolve through diff layers, mark a bloom error and use the disk + snapshotBloomAccountFalseHitMeter.Mark(1) + return dl.parent.AccountRLP(hash) +} + +// Storage directly retrieves the storage data associated with a particular hash, +// within a particular account. If the slot is unknown to this diff, it's parent +// is consulted. +// +// Note the returned slot is not a copy, please don't modify it. +func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) { + // Check the bloom filter first whether there's even a point in reaching into + // all the maps in all the layers below + dl.lock.RLock() + hit := dl.diffed.Contains(storageBloomHasher{accountHash, storageHash}) + if !hit { + hit = dl.diffed.Contains(destructBloomHasher(accountHash)) + } + dl.lock.RUnlock() + + // If the bloom filter misses, don't even bother with traversing the memory + // diff layers, reach straight into the bottom persistent disk layer + if !hit { + snapshotBloomStorageMissMeter.Mark(1) + return dl.origin.Storage(accountHash, storageHash) + } + // The bloom filter hit, start poking in the internal maps + return dl.storage(accountHash, storageHash, 0) +} + +// storage is an internal version of Storage that skips the bloom filter checks +// and uses the internal maps to try and retrieve the data. It's meant to be +// used if a higher layer's bloom filter hit already. +func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([]byte, error) { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the layer was flattened into, consider it invalid (any live reference to + // the original should be marked as unusable). + if dl.Stale() { + return nil, ErrSnapshotStale + } + // If the account is known locally, try to resolve the slot locally + if storage, ok := dl.storageData[accountHash]; ok { + if data, ok := storage[storageHash]; ok { + snapshotDirtyStorageHitMeter.Mark(1) + snapshotDirtyStorageHitDepthHist.Update(int64(depth)) + if n := len(data); n > 0 { + snapshotDirtyStorageReadMeter.Mark(int64(n)) + } else { + snapshotDirtyStorageInexMeter.Mark(1) + } + snapshotBloomStorageTrueHitMeter.Mark(1) + return data, nil + } + } + // If the account is known locally, but deleted, return an empty slot + if _, ok := dl.destructSet[accountHash]; ok { + snapshotDirtyStorageHitMeter.Mark(1) + snapshotDirtyStorageHitDepthHist.Update(int64(depth)) + snapshotDirtyStorageInexMeter.Mark(1) + snapshotBloomStorageTrueHitMeter.Mark(1) + return nil, nil + } + // Storage slot unknown to this diff, resolve from parent + if diff, ok := dl.parent.(*diffLayer); ok { + return diff.storage(accountHash, storageHash, depth+1) + } + // Failed to resolve through diff layers, mark a bloom error and use the disk + snapshotBloomStorageFalseHitMeter.Mark(1) + return dl.parent.Storage(accountHash, storageHash) +} + +// Update creates a new layer on top of the existing snapshot diff tree with +// the specified data items. +func (dl *diffLayer) Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { + return newDiffLayer(dl, blockRoot, destructs, accounts, storage) +} + +// flatten pushes all data from this point downwards, flattening everything into +// a single diff at the bottom. Since usually the lowermost diff is the largest, +// the flattening builds up from there in reverse. +func (dl *diffLayer) flatten() snapshot { + // If the parent is not diff, we're the first in line, return unmodified + parent, ok := dl.parent.(*diffLayer) + if !ok { + return dl + } + // Parent is a diff, flatten it first (note, apart from weird corned cases, + // flatten will realistically only ever merge 1 layer, so there's no need to + // be smarter about grouping flattens together). + parent = parent.flatten().(*diffLayer) + + parent.lock.Lock() + defer parent.lock.Unlock() + + // Before actually writing all our data to the parent, first ensure that the + // parent hasn't been 'corrupted' by someone else already flattening into it + if atomic.SwapUint32(&parent.stale, 1) != 0 { + panic("parent diff layer is stale") // we've flattened into the same parent from two children, boo + } + // Overwrite all the updated accounts blindly, merge the sorted list + for hash := range dl.destructSet { + parent.destructSet[hash] = struct{}{} + delete(parent.accountData, hash) + delete(parent.storageData, hash) + } + for hash, data := range dl.accountData { + parent.accountData[hash] = data + } + // Overwrite all the updated storage slots (individually) + for accountHash, storage := range dl.storageData { + // If storage didn't exist (or was deleted) in the parent, overwrite blindly + if _, ok := parent.storageData[accountHash]; !ok { + parent.storageData[accountHash] = storage + continue + } + // Storage exists in both parent and child, merge the slots + comboData := parent.storageData[accountHash] + for storageHash, data := range storage { + comboData[storageHash] = data + } + parent.storageData[accountHash] = comboData + } + // Return the combo parent + return &diffLayer{ + parent: parent.parent, + origin: parent.origin, + root: dl.root, + destructSet: parent.destructSet, + accountData: parent.accountData, + storageData: parent.storageData, + storageList: make(map[common.Hash][]common.Hash), + diffed: dl.diffed, + memory: parent.memory + dl.memory, + } +} + +// AccountList returns a sorted list of all accounts in this difflayer, including +// the deleted ones. +// +// Note, the returned slice is not a copy, so do not modify it. +func (dl *diffLayer) AccountList() []common.Hash { + // If an old list already exists, return it + dl.lock.RLock() + list := dl.accountList + dl.lock.RUnlock() + + if list != nil { + return list + } + // No old sorted account list exists, generate a new one + dl.lock.Lock() + defer dl.lock.Unlock() + + dl.accountList = make([]common.Hash, 0, len(dl.destructSet)+len(dl.accountData)) + for hash := range dl.accountData { + dl.accountList = append(dl.accountList, hash) + } + for hash := range dl.destructSet { + if _, ok := dl.accountData[hash]; !ok { + dl.accountList = append(dl.accountList, hash) + } + } + sort.Sort(hashes(dl.accountList)) + dl.memory += uint64(len(dl.accountList) * common.HashLength) + return dl.accountList +} + +// StorageList returns a sorted list of all storage slot hashes in this difflayer +// for the given account. If the whole storage is destructed in this layer, then +// an additional flag *destructed = true* will be returned, otherwise the flag is +// false. Besides, the returned list will include the hash of deleted storage slot. +// Note a special case is an account is deleted in a prior tx but is recreated in +// the following tx with some storage slots set. In this case the returned list is +// not empty but the flag is true. +// +// Note, the returned slice is not a copy, so do not modify it. +func (dl *diffLayer) StorageList(accountHash common.Hash) ([]common.Hash, bool) { + dl.lock.RLock() + _, destructed := dl.destructSet[accountHash] + if _, ok := dl.storageData[accountHash]; !ok { + // Account not tracked by this layer + dl.lock.RUnlock() + return nil, destructed + } + // If an old list already exists, return it + if list, exist := dl.storageList[accountHash]; exist { + dl.lock.RUnlock() + return list, destructed // the cached list can't be nil + } + dl.lock.RUnlock() + + // No old sorted account list exists, generate a new one + dl.lock.Lock() + defer dl.lock.Unlock() + + storageMap := dl.storageData[accountHash] + storageList := make([]common.Hash, 0, len(storageMap)) + for k := range storageMap { + storageList = append(storageList, k) + } + sort.Sort(hashes(storageList)) + dl.storageList[accountHash] = storageList + dl.memory += uint64(len(dl.storageList)*common.HashLength + common.HashLength) + return storageList, destructed +} diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go new file mode 100644 index 0000000..31636ee --- /dev/null +++ b/core/state/snapshot/difflayer_test.go @@ -0,0 +1,400 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + "math/rand" + "testing" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb/memorydb" +) + +func copyDestructs(destructs map[common.Hash]struct{}) map[common.Hash]struct{} { + copy := make(map[common.Hash]struct{}) + for hash := range destructs { + copy[hash] = struct{}{} + } + return copy +} + +func copyAccounts(accounts map[common.Hash][]byte) map[common.Hash][]byte { + copy := make(map[common.Hash][]byte) + for hash, blob := range accounts { + copy[hash] = blob + } + return copy +} + +func copyStorage(storage map[common.Hash]map[common.Hash][]byte) map[common.Hash]map[common.Hash][]byte { + copy := make(map[common.Hash]map[common.Hash][]byte) + for accHash, slots := range storage { + copy[accHash] = make(map[common.Hash][]byte) + for slotHash, blob := range slots { + copy[accHash][slotHash] = blob + } + } + return copy +} + +// TestMergeBasics tests some simple merges +func TestMergeBasics(t *testing.T) { + var ( + destructs = make(map[common.Hash]struct{}) + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + // Fill up a parent + for i := 0; i < 100; i++ { + h := randomHash() + data := randomAccount() + + accounts[h] = data + if rand.Intn(4) == 0 { + destructs[h] = struct{}{} + } + if rand.Intn(2) == 0 { + accStorage := make(map[common.Hash][]byte) + value := make([]byte, 32) + rand.Read(value) + accStorage[randomHash()] = value + storage[h] = accStorage + } + } + // Add some (identical) layers on top + parent := newDiffLayer(emptyLayer(), common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage)) + child := newDiffLayer(parent, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage)) + child = newDiffLayer(child, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage)) + child = newDiffLayer(child, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage)) + child = newDiffLayer(child, common.Hash{}, copyDestructs(destructs), copyAccounts(accounts), copyStorage(storage)) + // And flatten + merged := (child.flatten()).(*diffLayer) + + { // Check account lists + if have, want := len(merged.accountList), 0; have != want { + t.Errorf("accountList wrong: have %v, want %v", have, want) + } + if have, want := len(merged.AccountList()), len(accounts); have != want { + t.Errorf("AccountList() wrong: have %v, want %v", have, want) + } + if have, want := len(merged.accountList), len(accounts); have != want { + t.Errorf("accountList [2] wrong: have %v, want %v", have, want) + } + } + { // Check account drops + if have, want := len(merged.destructSet), len(destructs); have != want { + t.Errorf("accountDrop wrong: have %v, want %v", have, want) + } + } + { // Check storage lists + i := 0 + for aHash, sMap := range storage { + if have, want := len(merged.storageList), i; have != want { + t.Errorf("[1] storageList wrong: have %v, want %v", have, want) + } + list, _ := merged.StorageList(aHash) + if have, want := len(list), len(sMap); have != want { + t.Errorf("[2] StorageList() wrong: have %v, want %v", have, want) + } + if have, want := len(merged.storageList[aHash]), len(sMap); have != want { + t.Errorf("storageList wrong: have %v, want %v", have, want) + } + i++ + } + } +} + +// TestMergeDelete tests some deletion +func TestMergeDelete(t *testing.T) { + var ( + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + // Fill up a parent + h1 := common.HexToHash("0x01") + h2 := common.HexToHash("0x02") + + flipDrops := func() map[common.Hash]struct{} { + return map[common.Hash]struct{}{ + h2: {}, + } + } + flipAccs := func() map[common.Hash][]byte { + return map[common.Hash][]byte{ + h1: randomAccount(), + } + } + flopDrops := func() map[common.Hash]struct{} { + return map[common.Hash]struct{}{ + h1: {}, + } + } + flopAccs := func() map[common.Hash][]byte { + return map[common.Hash][]byte{ + h2: randomAccount(), + } + } + // Add some flipAccs-flopping layers on top + parent := newDiffLayer(emptyLayer(), common.Hash{}, flipDrops(), flipAccs(), storage) + child := parent.Update(common.Hash{}, flopDrops(), flopAccs(), storage) + child = child.Update(common.Hash{}, flipDrops(), flipAccs(), storage) + child = child.Update(common.Hash{}, flopDrops(), flopAccs(), storage) + child = child.Update(common.Hash{}, flipDrops(), flipAccs(), storage) + child = child.Update(common.Hash{}, flopDrops(), flopAccs(), storage) + child = child.Update(common.Hash{}, flipDrops(), flipAccs(), storage) + + if data, _ := child.Account(h1); data == nil { + t.Errorf("last diff layer: expected %x account to be non-nil", h1) + } + if data, _ := child.Account(h2); data != nil { + t.Errorf("last diff layer: expected %x account to be nil", h2) + } + if _, ok := child.destructSet[h1]; ok { + t.Errorf("last diff layer: expected %x drop to be missing", h1) + } + if _, ok := child.destructSet[h2]; !ok { + t.Errorf("last diff layer: expected %x drop to be present", h1) + } + // And flatten + merged := (child.flatten()).(*diffLayer) + + if data, _ := merged.Account(h1); data == nil { + t.Errorf("merged layer: expected %x account to be non-nil", h1) + } + if data, _ := merged.Account(h2); data != nil { + t.Errorf("merged layer: expected %x account to be nil", h2) + } + if _, ok := merged.destructSet[h1]; !ok { // Note, drops stay alive until persisted to disk! + t.Errorf("merged diff layer: expected %x drop to be present", h1) + } + if _, ok := merged.destructSet[h2]; !ok { // Note, drops stay alive until persisted to disk! + t.Errorf("merged diff layer: expected %x drop to be present", h1) + } + // If we add more granular metering of memory, we can enable this again, + // but it's not implemented for now + //if have, want := merged.memory, child.memory; have != want { + // t.Errorf("mem wrong: have %d, want %d", have, want) + //} +} + +// This tests that if we create a new account, and set a slot, and then merge +// it, the lists will be correct. +func TestInsertAndMerge(t *testing.T) { + // Fill up a parent + var ( + acc = common.HexToHash("0x01") + slot = common.HexToHash("0x02") + parent *diffLayer + child *diffLayer + ) + { + var ( + destructs = make(map[common.Hash]struct{}) + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + parent = newDiffLayer(emptyLayer(), common.Hash{}, destructs, accounts, storage) + } + { + var ( + destructs = make(map[common.Hash]struct{}) + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + accounts[acc] = randomAccount() + storage[acc] = make(map[common.Hash][]byte) + storage[acc][slot] = []byte{0x01} + child = newDiffLayer(parent, common.Hash{}, destructs, accounts, storage) + } + // And flatten + merged := (child.flatten()).(*diffLayer) + { // Check that slot value is present + have, _ := merged.Storage(acc, slot) + if want := []byte{0x01}; !bytes.Equal(have, want) { + t.Errorf("merged slot value wrong: have %x, want %x", have, want) + } + } +} + +func emptyLayer() *diskLayer { + return &diskLayer{ + diskdb: memorydb.New(), + cache: fastcache.New(500 * 1024), + } +} + +// BenchmarkSearch checks how long it takes to find a non-existing key +// BenchmarkSearch-6 200000 10481 ns/op (1K per layer) +// BenchmarkSearch-6 200000 10760 ns/op (10K per layer) +// BenchmarkSearch-6 100000 17866 ns/op +// +// BenchmarkSearch-6 500000 3723 ns/op (10k per layer, only top-level RLock() +func BenchmarkSearch(b *testing.B) { + // First, we set up 128 diff layers, with 1K items each + fill := func(parent snapshot) *diffLayer { + var ( + destructs = make(map[common.Hash]struct{}) + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + for i := 0; i < 10000; i++ { + accounts[randomHash()] = randomAccount() + } + return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage) + } + var layer snapshot + layer = emptyLayer() + for i := 0; i < 128; i++ { + layer = fill(layer) + } + key := crypto.Keccak256Hash([]byte{0x13, 0x38}) + b.ResetTimer() + for i := 0; i < b.N; i++ { + layer.AccountRLP(key) + } +} + +// BenchmarkSearchSlot checks how long it takes to find a non-existing key +// - Number of layers: 128 +// - Each layers contains the account, with a couple of storage slots +// BenchmarkSearchSlot-6 100000 14554 ns/op +// BenchmarkSearchSlot-6 100000 22254 ns/op (when checking parent root using mutex) +// BenchmarkSearchSlot-6 100000 14551 ns/op (when checking parent number using atomic) +// With bloom filter: +// BenchmarkSearchSlot-6 3467835 351 ns/op +func BenchmarkSearchSlot(b *testing.B) { + // First, we set up 128 diff layers, with 1K items each + accountKey := crypto.Keccak256Hash([]byte{0x13, 0x37}) + storageKey := crypto.Keccak256Hash([]byte{0x13, 0x37}) + accountRLP := randomAccount() + fill := func(parent snapshot) *diffLayer { + var ( + destructs = make(map[common.Hash]struct{}) + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + accounts[accountKey] = accountRLP + + accStorage := make(map[common.Hash][]byte) + for i := 0; i < 5; i++ { + value := make([]byte, 32) + rand.Read(value) + accStorage[randomHash()] = value + storage[accountKey] = accStorage + } + return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage) + } + var layer snapshot + layer = emptyLayer() + for i := 0; i < 128; i++ { + layer = fill(layer) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + layer.Storage(accountKey, storageKey) + } +} + +// With accountList and sorting +// BenchmarkFlatten-6 50 29890856 ns/op +// +// Without sorting and tracking accountlist +// BenchmarkFlatten-6 300 5511511 ns/op +func BenchmarkFlatten(b *testing.B) { + fill := func(parent snapshot) *diffLayer { + var ( + destructs = make(map[common.Hash]struct{}) + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + for i := 0; i < 100; i++ { + accountKey := randomHash() + accounts[accountKey] = randomAccount() + + accStorage := make(map[common.Hash][]byte) + for i := 0; i < 20; i++ { + value := make([]byte, 32) + rand.Read(value) + accStorage[randomHash()] = value + + } + storage[accountKey] = accStorage + } + return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + var layer snapshot + layer = emptyLayer() + for i := 1; i < 128; i++ { + layer = fill(layer) + } + b.StartTimer() + + for i := 1; i < 128; i++ { + dl, ok := layer.(*diffLayer) + if !ok { + break + } + layer = dl.flatten() + } + b.StopTimer() + } +} + +// This test writes ~324M of diff layers to disk, spread over +// - 128 individual layers, +// - each with 200 accounts +// - containing 200 slots +// +// BenchmarkJournal-6 1 1471373923 ns/ops +// BenchmarkJournal-6 1 1208083335 ns/op // bufio writer +func BenchmarkJournal(b *testing.B) { + fill := func(parent snapshot) *diffLayer { + var ( + destructs = make(map[common.Hash]struct{}) + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + for i := 0; i < 200; i++ { + accountKey := randomHash() + accounts[accountKey] = randomAccount() + + accStorage := make(map[common.Hash][]byte) + for i := 0; i < 200; i++ { + value := make([]byte, 32) + rand.Read(value) + accStorage[randomHash()] = value + + } + storage[accountKey] = accStorage + } + return newDiffLayer(parent, common.Hash{}, destructs, accounts, storage) + } + layer := snapshot(new(diskLayer)) + for i := 1; i < 128; i++ { + layer = fill(layer) + } + b.ResetTimer() + + for i := 0; i < b.N; i++ { + layer.Journal(new(bytes.Buffer)) + } +} diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go new file mode 100644 index 0000000..496edaa --- /dev/null +++ b/core/state/snapshot/disklayer.go @@ -0,0 +1,166 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + "sync" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ava-labs/coreth/core/rawdb" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +// diskLayer is a low level persistent snapshot built on top of a key-value store. +type diskLayer struct { + diskdb ethdb.KeyValueStore // Key-value store containing the base snapshot + triedb *trie.Database // Trie node cache for reconstuction purposes + cache *fastcache.Cache // Cache to avoid hitting the disk for direct access + + root common.Hash // Root hash of the base snapshot + stale bool // Signals that the layer became stale (state progressed) + + genMarker []byte // Marker for the state that's indexed during initial layer generation + genPending chan struct{} // Notification channel when generation is done (test synchronicity) + genAbort chan chan *generatorStats // Notification channel to abort generating the snapshot in this layer + + lock sync.RWMutex +} + +// Root returns root hash for which this snapshot was made. +func (dl *diskLayer) Root() common.Hash { + return dl.root +} + +// Parent always returns nil as there's no layer below the disk. +func (dl *diskLayer) Parent() snapshot { + return nil +} + +// Stale return whether this layer has become stale (was flattened across) or if +// it's still live. +func (dl *diskLayer) Stale() bool { + dl.lock.RLock() + defer dl.lock.RUnlock() + + return dl.stale +} + +// Account directly retrieves the account associated with a particular hash in +// the snapshot slim data format. +func (dl *diskLayer) Account(hash common.Hash) (*Account, error) { + data, err := dl.AccountRLP(hash) + if err != nil { + return nil, err + } + if len(data) == 0 { // can be both nil and []byte{} + return nil, nil + } + account := new(Account) + if err := rlp.DecodeBytes(data, account); err != nil { + panic(err) + } + return account, nil +} + +// AccountRLP directly retrieves the account RLP associated with a particular +// hash in the snapshot slim data format. +func (dl *diskLayer) AccountRLP(hash common.Hash) ([]byte, error) { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the layer was flattened into, consider it invalid (any live reference to + // the original should be marked as unusable). + if dl.stale { + return nil, ErrSnapshotStale + } + // If the layer is being generated, ensure the requested hash has already been + // covered by the generator. + if dl.genMarker != nil && bytes.Compare(hash[:], dl.genMarker) > 0 { + return nil, ErrNotCoveredYet + } + // If we're in the disk layer, all diff layers missed + snapshotDirtyAccountMissMeter.Mark(1) + + // Try to retrieve the account from the memory cache + if blob, found := dl.cache.HasGet(nil, hash[:]); found { + snapshotCleanAccountHitMeter.Mark(1) + snapshotCleanAccountReadMeter.Mark(int64(len(blob))) + return blob, nil + } + // Cache doesn't contain account, pull from disk and cache for later + blob := rawdb.ReadAccountSnapshot(dl.diskdb, hash) + dl.cache.Set(hash[:], blob) + + snapshotCleanAccountMissMeter.Mark(1) + if n := len(blob); n > 0 { + snapshotCleanAccountWriteMeter.Mark(int64(n)) + } else { + snapshotCleanAccountInexMeter.Mark(1) + } + return blob, nil +} + +// Storage directly retrieves the storage data associated with a particular hash, +// within a particular account. +func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the layer was flattened into, consider it invalid (any live reference to + // the original should be marked as unusable). + if dl.stale { + return nil, ErrSnapshotStale + } + key := append(accountHash[:], storageHash[:]...) + + // If the layer is being generated, ensure the requested hash has already been + // covered by the generator. + if dl.genMarker != nil && bytes.Compare(key, dl.genMarker) > 0 { + return nil, ErrNotCoveredYet + } + // If we're in the disk layer, all diff layers missed + snapshotDirtyStorageMissMeter.Mark(1) + + // Try to retrieve the storage slot from the memory cache + if blob, found := dl.cache.HasGet(nil, key); found { + snapshotCleanStorageHitMeter.Mark(1) + snapshotCleanStorageReadMeter.Mark(int64(len(blob))) + return blob, nil + } + // Cache doesn't contain storage slot, pull from disk and cache for later + blob := rawdb.ReadStorageSnapshot(dl.diskdb, accountHash, storageHash) + dl.cache.Set(key, blob) + + snapshotCleanStorageMissMeter.Mark(1) + if n := len(blob); n > 0 { + snapshotCleanStorageWriteMeter.Mark(int64(n)) + } else { + snapshotCleanStorageInexMeter.Mark(1) + } + return blob, nil +} + +// Update creates a new layer on top of the existing snapshot diff tree with +// the specified data items. Note, the maps are retained by the method to avoid +// copying everything. +func (dl *diskLayer) Update(blockHash common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { + return newDiffLayer(dl, blockHash, destructs, accounts, storage) +} diff --git a/core/state/snapshot/disklayer_test.go b/core/state/snapshot/disklayer_test.go new file mode 100644 index 0000000..5df5efc --- /dev/null +++ b/core/state/snapshot/disklayer_test.go @@ -0,0 +1,511 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + "io/ioutil" + "os" + "testing" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ava-labs/coreth/core/rawdb" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/ethdb/leveldb" + "github.com/ethereum/go-ethereum/ethdb/memorydb" +) + +// reverse reverses the contents of a byte slice. It's used to update random accs +// with deterministic changes. +func reverse(blob []byte) []byte { + res := make([]byte, len(blob)) + for i, b := range blob { + res[len(blob)-1-i] = b + } + return res +} + +// Tests that merging something into a disk layer persists it into the database +// and invalidates any previously written and cached values. +func TestDiskMerge(t *testing.T) { + // Create some accounts in the disk layer + db := memorydb.New() + + var ( + accNoModNoCache = common.Hash{0x1} + accNoModCache = common.Hash{0x2} + accModNoCache = common.Hash{0x3} + accModCache = common.Hash{0x4} + accDelNoCache = common.Hash{0x5} + accDelCache = common.Hash{0x6} + conNoModNoCache = common.Hash{0x7} + conNoModNoCacheSlot = common.Hash{0x70} + conNoModCache = common.Hash{0x8} + conNoModCacheSlot = common.Hash{0x80} + conModNoCache = common.Hash{0x9} + conModNoCacheSlot = common.Hash{0x90} + conModCache = common.Hash{0xa} + conModCacheSlot = common.Hash{0xa0} + conDelNoCache = common.Hash{0xb} + conDelNoCacheSlot = common.Hash{0xb0} + conDelCache = common.Hash{0xc} + conDelCacheSlot = common.Hash{0xc0} + conNukeNoCache = common.Hash{0xd} + conNukeNoCacheSlot = common.Hash{0xd0} + conNukeCache = common.Hash{0xe} + conNukeCacheSlot = common.Hash{0xe0} + baseRoot = randomHash() + diffRoot = randomHash() + ) + + rawdb.WriteAccountSnapshot(db, accNoModNoCache, accNoModNoCache[:]) + rawdb.WriteAccountSnapshot(db, accNoModCache, accNoModCache[:]) + rawdb.WriteAccountSnapshot(db, accModNoCache, accModNoCache[:]) + rawdb.WriteAccountSnapshot(db, accModCache, accModCache[:]) + rawdb.WriteAccountSnapshot(db, accDelNoCache, accDelNoCache[:]) + rawdb.WriteAccountSnapshot(db, accDelCache, accDelCache[:]) + + rawdb.WriteAccountSnapshot(db, conNoModNoCache, conNoModNoCache[:]) + rawdb.WriteStorageSnapshot(db, conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:]) + rawdb.WriteAccountSnapshot(db, conNoModCache, conNoModCache[:]) + rawdb.WriteStorageSnapshot(db, conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:]) + rawdb.WriteAccountSnapshot(db, conModNoCache, conModNoCache[:]) + rawdb.WriteStorageSnapshot(db, conModNoCache, conModNoCacheSlot, conModNoCacheSlot[:]) + rawdb.WriteAccountSnapshot(db, conModCache, conModCache[:]) + rawdb.WriteStorageSnapshot(db, conModCache, conModCacheSlot, conModCacheSlot[:]) + rawdb.WriteAccountSnapshot(db, conDelNoCache, conDelNoCache[:]) + rawdb.WriteStorageSnapshot(db, conDelNoCache, conDelNoCacheSlot, conDelNoCacheSlot[:]) + rawdb.WriteAccountSnapshot(db, conDelCache, conDelCache[:]) + rawdb.WriteStorageSnapshot(db, conDelCache, conDelCacheSlot, conDelCacheSlot[:]) + + rawdb.WriteAccountSnapshot(db, conNukeNoCache, conNukeNoCache[:]) + rawdb.WriteStorageSnapshot(db, conNukeNoCache, conNukeNoCacheSlot, conNukeNoCacheSlot[:]) + rawdb.WriteAccountSnapshot(db, conNukeCache, conNukeCache[:]) + rawdb.WriteStorageSnapshot(db, conNukeCache, conNukeCacheSlot, conNukeCacheSlot[:]) + + rawdb.WriteSnapshotRoot(db, baseRoot) + + // Create a disk layer based on the above and cache in some data + snaps := &Tree{ + layers: map[common.Hash]snapshot{ + baseRoot: &diskLayer{ + diskdb: db, + cache: fastcache.New(500 * 1024), + root: baseRoot, + }, + }, + } + base := snaps.Snapshot(baseRoot) + base.AccountRLP(accNoModCache) + base.AccountRLP(accModCache) + base.AccountRLP(accDelCache) + base.Storage(conNoModCache, conNoModCacheSlot) + base.Storage(conModCache, conModCacheSlot) + base.Storage(conDelCache, conDelCacheSlot) + base.Storage(conNukeCache, conNukeCacheSlot) + + // Modify or delete some accounts, flatten everything onto disk + if err := snaps.Update(diffRoot, baseRoot, map[common.Hash]struct{}{ + accDelNoCache: {}, + accDelCache: {}, + conNukeNoCache: {}, + conNukeCache: {}, + }, map[common.Hash][]byte{ + accModNoCache: reverse(accModNoCache[:]), + accModCache: reverse(accModCache[:]), + }, map[common.Hash]map[common.Hash][]byte{ + conModNoCache: {conModNoCacheSlot: reverse(conModNoCacheSlot[:])}, + conModCache: {conModCacheSlot: reverse(conModCacheSlot[:])}, + conDelNoCache: {conDelNoCacheSlot: nil}, + conDelCache: {conDelCacheSlot: nil}, + }); err != nil { + t.Fatalf("failed to update snapshot tree: %v", err) + } + if err := snaps.Cap(diffRoot, 0); err != nil { + t.Fatalf("failed to flatten snapshot tree: %v", err) + } + // Retrieve all the data through the disk layer and validate it + base = snaps.Snapshot(diffRoot) + if _, ok := base.(*diskLayer); !ok { + t.Fatalf("update not flattend into the disk layer") + } + + // assertAccount ensures that an account matches the given blob. + assertAccount := func(account common.Hash, data []byte) { + t.Helper() + blob, err := base.AccountRLP(account) + if err != nil { + t.Errorf("account access (%x) failed: %v", account, err) + } else if !bytes.Equal(blob, data) { + t.Errorf("account access (%x) mismatch: have %x, want %x", account, blob, data) + } + } + assertAccount(accNoModNoCache, accNoModNoCache[:]) + assertAccount(accNoModCache, accNoModCache[:]) + assertAccount(accModNoCache, reverse(accModNoCache[:])) + assertAccount(accModCache, reverse(accModCache[:])) + assertAccount(accDelNoCache, nil) + assertAccount(accDelCache, nil) + + // assertStorage ensures that a storage slot matches the given blob. + assertStorage := func(account common.Hash, slot common.Hash, data []byte) { + t.Helper() + blob, err := base.Storage(account, slot) + if err != nil { + t.Errorf("storage access (%x:%x) failed: %v", account, slot, err) + } else if !bytes.Equal(blob, data) { + t.Errorf("storage access (%x:%x) mismatch: have %x, want %x", account, slot, blob, data) + } + } + assertStorage(conNoModNoCache, conNoModNoCacheSlot, conNoModNoCacheSlot[:]) + assertStorage(conNoModCache, conNoModCacheSlot, conNoModCacheSlot[:]) + assertStorage(conModNoCache, conModNoCacheSlot, reverse(conModNoCacheSlot[:])) + assertStorage(conModCache, conModCacheSlot, reverse(conModCacheSlot[:])) + assertStorage(conDelNoCache, conDelNoCacheSlot, nil) + assertStorage(conDelCache, conDelCacheSlot, nil) + assertStorage(conNukeNoCache, conNukeNoCacheSlot, nil) + assertStorage(conNukeCache, conNukeCacheSlot, nil) + + // Retrieve all the data directly from the database and validate it + + // assertDatabaseAccount ensures that an account from the database matches the given blob. + assertDatabaseAccount := func(account common.Hash, data []byte) { + t.Helper() + if blob := rawdb.ReadAccountSnapshot(db, account); !bytes.Equal(blob, data) { + t.Errorf("account database access (%x) mismatch: have %x, want %x", account, blob, data) + } + } + assertDatabaseAccount(accNoModNoCache, accNoModNoCache[:]) + assertDatabaseAccount(accNoModCache, accNoModCache[:]) + assertDatabaseAccount(accModNoCache, reverse(accModNoCache[:])) + assertDatabaseAccount(accModCache, reverse(accModCache[:])) + assertDatabaseAccount(accDelNoCache, nil) + assertDatabaseAccount(accDelCache, nil) + + // assertDatabaseStorage ensures that a storage slot from the database matches the given blob. + assertDatabaseStorage := func(account common.Hash, slot common.Hash, data []byte) { + t.Helper() + if blob := rawdb