diff options
Diffstat (limited to 'core/state')
-rw-r--r-- | core/state/database.go | 47 | ||||
-rw-r--r-- | core/state/dump.go | 124 | ||||
-rw-r--r-- | core/state/iterator.go | 6 | ||||
-rw-r--r-- | core/state/snapshot/account.go | 86 | ||||
-rw-r--r-- | core/state/snapshot/conversion.go | 275 | ||||
-rw-r--r-- | core/state/snapshot/difflayer.go | 553 | ||||
-rw-r--r-- | core/state/snapshot/difflayer_test.go | 400 | ||||
-rw-r--r-- | core/state/snapshot/disklayer.go | 166 | ||||
-rw-r--r-- | core/state/snapshot/disklayer_test.go | 511 | ||||
-rw-r--r-- | core/state/snapshot/generate.go | 264 | ||||
-rw-r--r-- | core/state/snapshot/iterator.go | 400 | ||||
-rw-r--r-- | core/state/snapshot/iterator_binary.go | 213 | ||||
-rw-r--r-- | core/state/snapshot/iterator_fast.go | 350 | ||||
-rw-r--r-- | core/state/snapshot/iterator_test.go | 1046 | ||||
-rw-r--r-- | core/state/snapshot/journal.go | 270 | ||||
-rw-r--r-- | core/state/snapshot/snapshot.go | 619 | ||||
-rw-r--r-- | core/state/snapshot/snapshot_test.go | 371 | ||||
-rw-r--r-- | core/state/snapshot/sort.go | 36 | ||||
-rw-r--r-- | core/state/snapshot/wipe.go | 131 | ||||
-rw-r--r-- | core/state/snapshot/wipe_test.go | 124 | ||||
-rw-r--r-- | core/state/sync.go | 14 |
21 files changed, 5948 insertions, 58 deletions
diff --git a/core/state/database.go b/core/state/database.go index 8c641c3..a9342f5 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -17,17 +17,23 @@ package state import ( + "errors" "fmt" - "github.com/ava-labs/go-ethereum/common" - "github.com/ava-labs/go-ethereum/ethdb" - "github.com/ava-labs/go-ethereum/trie" + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie" lru "github.com/hashicorp/golang-lru" ) const ( // Number of codehash->size associations to keep. codeSizeCacheSize = 100000 + + // Cache size granted for caching clean code. + codeCacheSize = 64 * 1024 * 1024 ) // Database wraps access to tries and contract code. @@ -100,23 +106,25 @@ type Trie interface { // concurrent use, but does not retain any recent trie nodes in memory. To keep some // historical state in memory, use the NewDatabaseWithCache constructor. func NewDatabase(db ethdb.Database) Database { - return NewDatabaseWithCache(db, 0) + return NewDatabaseWithCache(db, 0, "") } // NewDatabaseWithCache creates a backing store for state. The returned database // is safe for concurrent use and retains a lot of collapsed RLP trie nodes in a // large memory cache. -func NewDatabaseWithCache(db ethdb.Database, cache int) Database { +func NewDatabaseWithCache(db ethdb.Database, cache int, journal string) Database { csc, _ := lru.New(codeSizeCacheSize) return &cachingDB{ - db: trie.NewDatabaseWithCache(db, cache), + db: trie.NewDatabaseWithCache(db, cache, journal), codeSizeCache: csc, + codeCache: fastcache.New(codeCacheSize), } } type cachingDB struct { db *trie.Database codeSizeCache *lru.Cache + codeCache *fastcache.Cache } // OpenTrie opens the main account trie at a specific root hash. @@ -141,11 +149,32 @@ func (db *cachingDB) CopyTrie(t Trie) Trie { // ContractCode retrieves a particular contract's code. func (db *cachingDB) ContractCode(addrHash, codeHash common.Hash) ([]byte, error) { - code, err := db.db.Node(codeHash) - if err == nil { + if code := db.codeCache.Get(nil, codeHash.Bytes()); len(code) > 0 { + return code, nil + } + code := rawdb.ReadCode(db.db.DiskDB(), codeHash) + if len(code) > 0 { + db.codeCache.Set(codeHash.Bytes(), code) + db.codeSizeCache.Add(codeHash, len(code)) + return code, nil + } + return nil, errors.New("not found") +} + +// ContractCodeWithPrefix retrieves a particular contract's code. If the +// code can't be found in the cache, then check the existence with **new** +// db scheme. +func (db *cachingDB) ContractCodeWithPrefix(addrHash, codeHash common.Hash) ([]byte, error) { + if code := db.codeCache.Get(nil, codeHash.Bytes()); len(code) > 0 { + return code, nil + } + code := rawdb.ReadCodeWithPrefix(db.db.DiskDB(), codeHash) + if len(code) > 0 { + db.codeCache.Set(codeHash.Bytes(), code) db.codeSizeCache.Add(codeHash, len(code)) + return code, nil } - return code, err + return nil, errors.New("not found") } // ContractCodeSize retrieves a particular contracts code's size. diff --git a/core/state/dump.go b/core/state/dump.go index 91c7d08..9bb946d 100644 --- a/core/state/dump.go +++ b/core/state/dump.go @@ -20,14 +20,22 @@ import ( "encoding/json" "fmt" - "github.com/ava-labs/go-ethereum/common" - "github.com/ava-labs/go-ethereum/common/hexutil" - "github.com/ava-labs/go-ethereum/log" - "github.com/ava-labs/go-ethereum/rlp" - "github.com/ava-labs/go-ethereum/trie" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" ) -// DumpAccount represents an account in the state +// DumpCollector interface which the state trie calls during iteration +type DumpCollector interface { + // OnRoot is called with the state root + OnRoot(common.Hash) + // OnAccount is called once for each account in the trie + OnAccount(common.Address, DumpAccount) +} + +// DumpAccount represents an account in the state. type DumpAccount struct { Balance string `json:"balance"` Nonce uint64 `json:"nonce"` @@ -40,30 +48,46 @@ type DumpAccount struct { } -// Dump represents the full dump in a collected format, as one large map +// Dump represents the full dump in a collected format, as one large map. type Dump struct { Root string `json:"root"` Accounts map[common.Address]DumpAccount `json:"accounts"` } -// iterativeDump is a 'collector'-implementation which dump output line-by-line iteratively -type iterativeDump json.Encoder +// OnRoot implements DumpCollector interface +func (d *Dump) OnRoot(root common.Hash) { + d.Root = fmt.Sprintf("%x", root) +} + +// OnAccount implements DumpCollector interface +func (d *Dump) OnAccount(addr common.Address, account DumpAccount) { + d.Accounts[addr] = account +} + +// IteratorDump is an implementation for iterating over data. +type IteratorDump struct { + Root string `json:"root"` + Accounts map[common.Address]DumpAccount `json:"accounts"` + Next []byte `json:"next,omitempty"` // nil if no more accounts +} -// Collector interface which the state trie calls during iteration -type collector interface { - onRoot(common.Hash) - onAccount(common.Address, DumpAccount) +// OnRoot implements DumpCollector interface +func (d *IteratorDump) OnRoot(root common.Hash) { + d.Root = fmt.Sprintf("%x", root) } -func (self *Dump) onRoot(root common.Hash) { - self.Root = fmt.Sprintf("%x", root) +// OnAccount implements DumpCollector interface +func (d *IteratorDump) OnAccount(addr common.Address, account DumpAccount) { + d.Accounts[addr] = account } -func (self *Dump) onAccount(addr common.Address, account DumpAccount) { - self.Accounts[addr] = account +// iterativeDump is a DumpCollector-implementation which dumps output line-by-line iteratively. +type iterativeDump struct { + *json.Encoder } -func (self iterativeDump) onAccount(addr common.Address, account DumpAccount) { +// OnAccount implements DumpCollector interface +func (d iterativeDump) OnAccount(addr common.Address, account DumpAccount) { dumpAccount := &DumpAccount{ Balance: account.Balance, Nonce: account.Nonce, @@ -77,33 +101,35 @@ func (self iterativeDump) onAccount(addr common.Address, account DumpAccount) { if addr != (common.Address{}) { dumpAccount.Address = &addr } - (*json.Encoder)(&self).Encode(dumpAccount) + d.Encode(dumpAccount) } -func (self iterativeDump) onRoot(root common.Hash) { - (*json.Encoder)(&self).Encode(struct { + +// OnRoot implements DumpCollector interface +func (d iterativeDump) OnRoot(root common.Hash) { + d.Encode(struct { Root common.Hash `json:"root"` }{root}) } -func (self *StateDB) dump(c collector, excludeCode, excludeStorage, excludeMissingPreimages bool) { - emptyAddress := (common.Address{}) +func (s *StateDB) DumpToCollector(c DumpCollector, excludeCode, excludeStorage, excludeMissingPreimages bool, start []byte, maxResults int) (nextKey []byte) { missingPreimages := 0 - c.onRoot(self.trie.Hash()) - it := trie.NewIterator(self.trie.NodeIterator(nil)) + c.OnRoot(s.trie.Hash()) + + var count int + it := trie.NewIterator(s.trie.NodeIterator(start)) for it.Next() { var data Account if err := rlp.DecodeBytes(it.Value, &data); err != nil { panic(err) } - addr := common.BytesToAddress(self.trie.GetKey(it.Key)) - obj := newObject(nil, addr, data) account := DumpAccount{ Balance: data.Balance.String(), Nonce: data.Nonce, Root: common.Bytes2Hex(data.Root[:]), CodeHash: common.Bytes2Hex(data.CodeHash), } - if emptyAddress == addr { + addrBytes := s.trie.GetKey(it.Key) + if addrBytes == nil { // Preimage missing missingPreimages++ if excludeMissingPreimages { @@ -111,48 +137,68 @@ func (self *StateDB) dump(c collector, excludeCode, excludeStorage, excludeMissi } account.SecureKey = it.Key } + addr := common.BytesToAddress(addrBytes) + obj := newObject(nil, addr, data) if !excludeCode { - account.Code = common.Bytes2Hex(obj.Code(self.db)) + account.Code = common.Bytes2Hex(obj.Code(s.db)) } if !excludeStorage { account.Storage = make(map[common.Hash]string) - storageIt := trie.NewIterator(obj.getTrie(self.db).NodeIterator(nil)) + storageIt := trie.NewIterator(obj.getTrie(s.db).NodeIterator(nil)) for storageIt.Next() { _, content, _, err := rlp.Split(storageIt.Value) if err != nil { log.Error("Failed to decode the value returned by iterator", "error", err) continue } - account.Storage[common.BytesToHash(self.trie.GetKey(storageIt.Key))] = common.Bytes2Hex(content) + account.Storage[common.BytesToHash(s.trie.GetKey(storageIt.Key))] = common.Bytes2Hex(content) + } + } + c.OnAccount(addr, account) + count++ + if maxResults > 0 && count >= maxResults { + if it.Next() { + nextKey = it.Key } + break } - c.onAccount(addr, account) } if missingPreimages > 0 { log.Warn("Dump incomplete due to missing preimages", "missing", missingPreimages) } + + return nextKey } // RawDump returns the entire state an a single large object -func (self *StateDB) RawDump(excludeCode, excludeStorage, excludeMissingPreimages bool) Dump { +func (s *StateDB) RawDump(excludeCode, excludeStorage, excludeMissingPreimages bool) Dump { dump := &Dump{ Accounts: make(map[common.Address]DumpAccount), } - self.dump(dump, excludeCode, excludeStorage, excludeMissingPreimages) + s.DumpToCollector(dump, excludeCode, excludeStorage, excludeMissingPreimages, nil, 0) return *dump } // Dump returns a JSON string representing the entire state as a single json-object -func (self *StateDB) Dump(excludeCode, excludeStorage, excludeMissingPreimages bool) []byte { - dump := self.RawDump(excludeCode, excludeStorage, excludeMissingPreimages) +func (s *StateDB) Dump(excludeCode, excludeStorage, excludeMissingPreimages bool) []byte { + dump := s.RawDump(excludeCode, excludeStorage, excludeMissingPreimages) json, err := json.MarshalIndent(dump, "", " ") if err != nil { - fmt.Println("dump err", err) + fmt.Println("Dump err", err) } return json } // IterativeDump dumps out accounts as json-objects, delimited by linebreaks on stdout -func (self *StateDB) IterativeDump(excludeCode, excludeStorage, excludeMissingPreimages bool, output *json.Encoder) { - self.dump(iterativeDump(*output), excludeCode, excludeStorage, excludeMissingPreimages) +func (s *StateDB) IterativeDump(excludeCode, excludeStorage, excludeMissingPreimages bool, output *json.Encoder) { + s.DumpToCollector(iterativeDump{output}, excludeCode, excludeStorage, excludeMissingPreimages, nil, 0) +} + +// IteratorDump dumps out a batch of accounts starts with the given start key +func (s *StateDB) IteratorDump(excludeCode, excludeStorage, excludeMissingPreimages bool, start []byte, maxResults int) IteratorDump { + iterator := &IteratorDump{ + Accounts: make(map[common.Address]DumpAccount), + } + iterator.Next = s.DumpToCollector(iterator, excludeCode, excludeStorage, excludeMissingPreimages, start, maxResults) + return *iterator } diff --git a/core/state/iterator.go b/core/state/iterator.go index c6d2a48..6a5c73d 100644 --- a/core/state/iterator.go +++ b/core/state/iterator.go @@ -20,9 +20,9 @@ import ( "bytes" "fmt" - "github.com/ava-labs/go-ethereum/common" - "github.com/ava-labs/go-ethereum/rlp" - "github.com/ava-labs/go-ethereum/trie" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" ) // NodeIterator is an iterator to traverse the entire state trie post-order, diff --git a/core/state/snapshot/account.go b/core/state/snapshot/account.go new file mode 100644 index 0000000..b92e942 --- /dev/null +++ b/core/state/snapshot/account.go @@ -0,0 +1,86 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + "math/big" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" +) + +// Account is a modified version of a state.Account, where the root is replaced +// with a byte slice. This format can be used to represent full-consensus format +// or slim-snapshot format which replaces the empty root and code hash as nil +// byte slice. +type Account struct { + Nonce uint64 + Balance *big.Int + Root []byte + CodeHash []byte +} + +// SlimAccount converts a state.Account content into a slim snapshot account +func SlimAccount(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) Account { + slim := Account{ + Nonce: nonce, + Balance: balance, + } + if root != emptyRoot { + slim.Root = root[:] + } + if !bytes.Equal(codehash, emptyCode[:]) { + slim.CodeHash = codehash + } + return slim +} + +// SlimAccountRLP converts a state.Account content into a slim snapshot +// version RLP encoded. +func SlimAccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) []byte { + data, err := rlp.EncodeToBytes(SlimAccount(nonce, balance, root, codehash)) + if err != nil { + panic(err) + } + return data +} + +// FullAccount decodes the data on the 'slim RLP' format and return +// the consensus format account. +func FullAccount(data []byte) (Account, error) { + var account Account + if err := rlp.DecodeBytes(data, &account); err != nil { + return Account{}, err + } + if len(account.Root) == 0 { + account.Root = emptyRoot[:] + } + if len(account.CodeHash) == 0 { + account.CodeHash = emptyCode[:] + } + return account, nil +} + +// FullAccountRLP converts data on the 'slim RLP' format into the full RLP-format. +func FullAccountRLP(data []byte) ([]byte, error) { + account, err := FullAccount(data) + if err != nil { + return nil, err + } + return rlp.EncodeToBytes(account) +} diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go new file mode 100644 index 0000000..dee9ff0 --- /dev/null +++ b/core/state/snapshot/conversion.go @@ -0,0 +1,275 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + "fmt" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +// trieKV represents a trie key-value pair +type trieKV struct { + key common.Hash + value []byte +} + +type ( + // trieGeneratorFn is the interface of trie generation which can + // be implemented by different trie algorithm. + trieGeneratorFn func(in chan (trieKV), out chan (common.Hash)) + + // leafCallbackFn is the callback invoked at the leaves of the trie, + // returns the subtrie root with the specified subtrie identifier. + leafCallbackFn func(hash common.Hash, stat *generateStats) common.Hash +) + +// GenerateAccountTrieRoot takes an account iterator and reproduces the root hash. +func GenerateAccountTrieRoot(it AccountIterator) (common.Hash, error) { + return generateTrieRoot(it, common.Hash{}, stdGenerate, nil, &generateStats{start: time.Now()}, true) +} + +// GenerateStorageTrieRoot takes a storage iterator and reproduces the root hash. +func GenerateStorageTrieRoot(account common.Hash, it StorageIterator) (common.Hash, error) { + return generateTrieRoot(it, account, stdGenerate, nil, &generateStats{start: time.Now()}, true) +} + +// VerifyState takes the whole snapshot tree as the input, traverses all the accounts +// as well as the corresponding storages and compares the re-computed hash with the +// original one(state root and the storage root). +func VerifyState(snaptree *Tree, root common.Hash) error { + acctIt, err := snaptree.AccountIterator(root, common.Hash{}) + if err != nil { + return err + } + defer acctIt.Release() + + got, err := generateTrieRoot(acctIt, common.Hash{}, stdGenerate, func(account common.Hash, stat *generateStats) common.Hash { + storageIt, err := snaptree.StorageIterator(root, account, common.Hash{}) + if err != nil { + return common.Hash{} + } + defer storageIt.Release() + + hash, err := generateTrieRoot(storageIt, account, stdGenerate, nil, stat, false) + if err != nil { + return common.Hash{} + } + return hash + }, &generateStats{start: time.Now()}, true) + + if err != nil { + return err + } + if got != root { + return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root) + } + return nil +} + +// generateStats is a collection of statistics gathered by the trie generator +// for logging purposes. +type generateStats struct { + accounts uint64 + slots uint64 + curAccount common.Hash + curSlot common.Hash + start time.Time + lock sync.RWMutex +} + +// progress records the progress trie generator made recently. +func (stat *generateStats) progress(accounts, slots uint64, curAccount common.Hash, curSlot common.Hash) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.accounts += accounts + stat.slots += slots + stat.curAccount = curAccount + stat.curSlot = curSlot +} + +// report prints the cumulative progress statistic smartly. +func (stat *generateStats) report() { + stat.lock.RLock() + defer stat.lock.RUnlock() + + var ctx []interface{} + if stat.curSlot != (common.Hash{}) { + ctx = append(ctx, []interface{}{ + "in", stat.curAccount, + "at", stat.curSlot, + }...) + } else { + ctx = append(ctx, []interface{}{"at", stat.curAccount}...) + } + // Add the usual measurements + ctx = append(ctx, []interface{}{"accounts", stat.accounts}...) + if stat.slots != 0 { + ctx = append(ctx, []interface{}{"slots", stat.slots}...) + } + ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...) + log.Info("Generating trie hash from snapshot", ctx...) +} + +// reportDone prints the last log when the whole generation is finished. +func (stat *generateStats) reportDone() { + stat.lock.RLock() + defer stat.lock.RUnlock() + + var ctx []interface{} + ctx = append(ctx, []interface{}{"accounts", stat.accounts}...) + if stat.slots != 0 { + ctx = append(ctx, []interface{}{"slots", stat.slots}...) + } + ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...) + log.Info("Generated trie hash from snapshot", ctx...) +} + +// generateTrieRoot generates the trie hash based on the snapshot iterator. +// It can be used for generating account trie, storage trie or even the +// whole state which connects the accounts and the corresponding storages. +func generateTrieRoot(it Iterator, account common.Hash, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) { + var ( + in = make(chan trieKV) // chan to pass leaves + out = make(chan common.Hash, 1) // chan to collect result + stoplog = make(chan bool, 1) // 1-size buffer, works when logging is not enabled + wg sync.WaitGroup + ) + // Spin up a go-routine for trie hash re-generation + wg.Add(1) + go func() { + defer wg.Done() + generatorFn(in, out) + }() + + // Spin up a go-routine for progress logging + if report && stats != nil { + wg.Add(1) + go func() { + defer wg.Done() + + timer := time.NewTimer(0) + defer timer.Stop() + + for { + select { + case <-timer.C: + stats.report() + timer.Reset(time.Second * 8) + case success := <-stoplog: + if success { + stats.reportDone() + } + return + } + } + }() + } + // stop is a helper function to shutdown the background threads + // and return the re-generated trie hash. + stop := func(success bool) common.Hash { + close(in) + result := <-out + stoplog <- success + wg.Wait() + return result + } + var ( + logged = time.Now() + processed = uint64(0) + leaf trieKV + last common.Hash + ) + // Start to feed leaves + for it.Next() { + if account == (common.Hash{}) { + var ( + err error + fullData []byte + ) + if leafCallback == nil { + fullData, err = FullAccountRLP(it.(AccountIterator).Account()) + if err != nil { + stop(false) + return common.Hash{}, err + } + } else { + account, err := FullAccount(it.(AccountIterator).Account()) + if err != nil { + stop(false) + return common.Hash{}, err + } + // Apply the leaf callback. Normally the callback is used to traverse + // the storage trie and re-generate the subtrie root. + subroot := leafCallback(it.Hash(), stats) + if !bytes.Equal(account.Root, subroot.Bytes()) { + stop(false) + return common.Hash{}, fmt.Errorf("invalid subroot(%x), want %x, got %x", it.Hash(), account.Root, subroot) + } + fullData, err = rlp.EncodeToBytes(account) + if err != nil { + stop(false) + return common.Hash{}, err + } + } + leaf = trieKV{it.Hash(), fullData} + } else { + leaf = trieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())} + } + in <- leaf + + // Accumulate the generaation statistic if it's required. + processed++ + if time.Since(logged) > 3*time.Second && stats != nil { + if account == (common.Hash{}) { + stats.progress(processed, 0, it.Hash(), common.Hash{}) + } else { + stats.progress(0, processed, account, it.Hash()) + } + logged, processed = time.Now(), 0 + } + last = it.Hash() + } + // Commit the last part statistic. + if processed > 0 && stats != nil { + if account == (common.Hash{}) { + stats.progress(processed, 0, last, common.Hash{}) + } else { + stats.progress(0, processed, account, last) + } + } + result := stop(true) + return result, nil +} + +// stdGenerate is a very basic hexary trie builder which uses the same Trie +// as the rest of geth, with no enhancements or optimizations +func stdGenerate(in chan (trieKV), out chan (common.Hash)) { + t, _ := trie.New(common.Hash{}, trie.NewDatabase(memorydb.New())) + for leaf := range in { + t.TryUpdate(leaf.key[:], leaf.value) + } + out <- t.Hash() +} diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go new file mode 100644 index 0000000..0aef6cf --- /dev/null +++ b/core/state/snapshot/difflayer.go @@ -0,0 +1,553 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "encoding/binary" + "fmt" + "math" + "math/rand" + "sort" + "sync" + "sync/atomic" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" + "github.com/steakknife/bloomfilter" +) + +var ( + // aggregatorMemoryLimit is the maximum size of the bottom-most diff layer + // that aggregates the writes from above until it's flushed into the disk + // layer. + // + // Note, bumping this up might drastically increase the size of the bloom + // filters that's stored in every diff layer. Don't do that without fully + // understanding all the implications. + aggregatorMemoryLimit = uint64(4 * 1024 * 1024) + + // aggregatorItemLimit is an approximate number of items that will end up + // in the agregator layer before it's flushed out to disk. A plain account + // weighs around 14B (+hash), a storage slot 32B (+hash), a deleted slot + // 0B (+hash). Slots are mostly set/unset in lockstep, so thet average at + // 16B (+hash). All in all, the average entry seems to be 15+32=47B. Use a + // smaller number to be on the safe side. + aggregatorItemLimit = aggregatorMemoryLimit / 42 + + // bloomTargetError is the target false positive rate when the aggregator + // layer is at its fullest. The actual value will probably move around up + // and down from this number, it's mostly a ballpark figure. + // + // Note, dropping this down might drastically increase the size of the bloom + // filters that's stored in every diff layer. Don't do that without fully + // understanding all the implications. + bloomTargetError = 0.02 + + // bloomSize is the ideal bloom filter size given the maximum number of items + // it's expected to hold and the target false positive error rate. + bloomSize = math.Ceil(float64(aggregatorItemLimit) * math.Log(bloomTargetError) / math.Log(1/math.Pow(2, math.Log(2)))) + + // bloomFuncs is the ideal number of bits a single entry should set in the + // bloom filter to keep its size to a minimum (given it's size and maximum + // entry count). + bloomFuncs = math.Round((bloomSize / float64(aggregatorItemLimit)) * math.Log(2)) + + // the bloom offsets are runtime constants which determines which part of the + // the account/storage hash the hasher functions looks at, to determine the + // bloom key for an account/slot. This is randomized at init(), so that the + // global population of nodes do not all display the exact same behaviour with + // regards to bloom content + bloomDestructHasherOffset = 0 + bloomAccountHasherOffset = 0 + bloomStorageHasherOffset = 0 +) + +func init() { + // Init the bloom offsets in the range [0:24] (requires 8 bytes) + bloomDestructHasherOffset = rand.Intn(25) + bloomAccountHasherOffset = rand.Intn(25) + bloomStorageHasherOffset = rand.Intn(25) + + // The destruct and account blooms must be different, as the storage slots + // will check for destruction too for every bloom miss. It should not collide + // with modified accounts. + for bloomAccountHasherOffset == bloomDestructHasherOffset { + bloomAccountHasherOffset = rand.Intn(25) + } +} + +// diffLayer represents a collection of modifications made to a state snapshot +// after running a block on top. It contains one sorted list for the account trie +// and one-one list for each storage tries. +// +// The goal of a diff layer is to act as a journal, tracking recent modifications +// made to the state, that have not yet graduated into a semi-immutable state. +type diffLayer struct { + origin *diskLayer // Base disk layer to directly use on bloom misses + parent snapshot // Parent snapshot modified by this one, never nil + memory uint64 // Approximate guess as to how much memory we use + + root common.Hash // Root hash to which this snapshot diff belongs to + stale uint32 // Signals that the layer became stale (state progressed) + + // destructSet is a very special helper marker. If an account is marked as + // deleted, then it's recorded in this set. However it's allowed that an account + // is included here but still available in other sets(e.g. storageData). The + // reason is the diff layer includes all the changes in a *block*. It can + // happen that in the tx_1, account A is self-destructed while in the tx_2 + // it's recreated. But we still need this marker to indicate the "old" A is + // deleted, all data in other set belongs to the "new" A. + destructSet map[common.Hash]struct{} // Keyed markers for deleted (and potentially) recreated accounts + accountList []common.Hash // List of account for iteration. If it exists, it's sorted, otherwise it's nil + accountData map[common.Hash][]byte // Keyed accounts for direct retrival (nil means deleted) + storageList map[common.Hash][]common.Hash // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil + storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted) + + diffed *bloomfilter.Filter // Bloom filter tracking all the diffed items up to the disk layer + + lock sync.RWMutex +} + +// destructBloomHasher is a wrapper around a common.Hash to satisfy the interface +// API requirements of the bloom library used. It's used to convert a destruct +// event into a 64 bit mini hash. +type destructBloomHasher common.Hash + +func (h destructBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } +func (h destructBloomHasher) Sum(b []byte) []byte { panic("not implemented") } +func (h destructBloomHasher) Reset() { panic("not implemented") } +func (h destructBloomHasher) BlockSize() int { panic("not implemented") } +func (h destructBloomHasher) Size() int { return 8 } +func (h destructBloomHasher) Sum64() uint64 { + return binary.BigEndian.Uint64(h[bloomDestructHasherOffset : bloomDestructHasherOffset+8]) |