diff options
Diffstat (limited to 'core/state')
-rw-r--r-- | core/state/database.go | 47 | ||||
-rw-r--r-- | core/state/dump.go | 167 | ||||
-rw-r--r-- | core/state/iterator.go | 6 | ||||
-rw-r--r-- | core/state/journal.go | 12 | ||||
-rw-r--r-- | core/state/snapshot/account.go | 88 | ||||
-rw-r--r-- | core/state/snapshot/conversion.go | 275 | ||||
-rw-r--r-- | core/state/snapshot/difflayer.go | 553 | ||||
-rw-r--r-- | core/state/snapshot/disklayer.go | 166 | ||||
-rw-r--r-- | core/state/snapshot/generate.go | 265 | ||||
-rw-r--r-- | core/state/snapshot/iterator.go | 400 | ||||
-rw-r--r-- | core/state/snapshot/iterator_binary.go | 213 | ||||
-rw-r--r-- | core/state/snapshot/iterator_fast.go | 350 | ||||
-rw-r--r-- | core/state/snapshot/journal.go | 270 | ||||
-rw-r--r-- | core/state/snapshot/snapshot.go | 619 | ||||
-rw-r--r-- | core/state/snapshot/sort.go | 36 | ||||
-rw-r--r-- | core/state/snapshot/wipe.go | 131 | ||||
-rw-r--r-- | core/state/state_object.go | 161 | ||||
-rw-r--r-- | core/state/statedb.go | 686 | ||||
-rw-r--r-- | core/state/sync.go | 14 |
19 files changed, 4066 insertions, 393 deletions
diff --git a/core/state/database.go b/core/state/database.go index 8c641c3..385c25d 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -17,17 +17,23 @@ package state import ( + "errors" "fmt" - "github.com/ava-labs/go-ethereum/common" - "github.com/ava-labs/go-ethereum/ethdb" - "github.com/ava-labs/go-ethereum/trie" + "github.com/VictoriaMetrics/fastcache" + "github.com/ava-labs/coreth/core/rawdb" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie" lru "github.com/hashicorp/golang-lru" ) const ( // Number of codehash->size associations to keep. codeSizeCacheSize = 100000 + + // Cache size granted for caching clean code. + codeCacheSize = 64 * 1024 * 1024 ) // Database wraps access to tries and contract code. @@ -100,23 +106,25 @@ type Trie interface { // concurrent use, but does not retain any recent trie nodes in memory. To keep some // historical state in memory, use the NewDatabaseWithCache constructor. func NewDatabase(db ethdb.Database) Database { - return NewDatabaseWithCache(db, 0) + return NewDatabaseWithCache(db, 0, "") } // NewDatabaseWithCache creates a backing store for state. The returned database // is safe for concurrent use and retains a lot of collapsed RLP trie nodes in a // large memory cache. -func NewDatabaseWithCache(db ethdb.Database, cache int) Database { +func NewDatabaseWithCache(db ethdb.Database, cache int, journal string) Database { csc, _ := lru.New(codeSizeCacheSize) return &cachingDB{ - db: trie.NewDatabaseWithCache(db, cache), + db: trie.NewDatabaseWithCache(db, cache, journal), codeSizeCache: csc, + codeCache: fastcache.New(codeCacheSize), } } type cachingDB struct { db *trie.Database codeSizeCache *lru.Cache + codeCache *fastcache.Cache } // OpenTrie opens the main account trie at a specific root hash. @@ -141,11 +149,32 @@ func (db *cachingDB) CopyTrie(t Trie) Trie { // ContractCode retrieves a particular contract's code. func (db *cachingDB) ContractCode(addrHash, codeHash common.Hash) ([]byte, error) { - code, err := db.db.Node(codeHash) - if err == nil { + if code := db.codeCache.Get(nil, codeHash.Bytes()); len(code) > 0 { + return code, nil + } + code := rawdb.ReadCode(db.db.DiskDB(), codeHash) + if len(code) > 0 { + db.codeCache.Set(codeHash.Bytes(), code) + db.codeSizeCache.Add(codeHash, len(code)) + return code, nil + } + return nil, errors.New("not found") +} + +// ContractCodeWithPrefix retrieves a particular contract's code. If the +// code can't be found in the cache, then check the existence with **new** +// db scheme. +func (db *cachingDB) ContractCodeWithPrefix(addrHash, codeHash common.Hash) ([]byte, error) { + if code := db.codeCache.Get(nil, codeHash.Bytes()); len(code) > 0 { + return code, nil + } + code := rawdb.ReadCodeWithPrefix(db.db.DiskDB(), codeHash) + if len(code) > 0 { + db.codeCache.Set(codeHash.Bytes(), code) db.codeSizeCache.Add(codeHash, len(code)) + return code, nil } - return code, err + return nil, errors.New("not found") } // ContractCodeSize retrieves a particular contracts code's size. diff --git a/core/state/dump.go b/core/state/dump.go index 91c7d08..6f09398 100644 --- a/core/state/dump.go +++ b/core/state/dump.go @@ -20,90 +20,119 @@ import ( "encoding/json" "fmt" - "github.com/ava-labs/go-ethereum/common" - "github.com/ava-labs/go-ethereum/common/hexutil" - "github.com/ava-labs/go-ethereum/log" - "github.com/ava-labs/go-ethereum/rlp" - "github.com/ava-labs/go-ethereum/trie" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" ) -// DumpAccount represents an account in the state +// DumpCollector interface which the state trie calls during iteration +type DumpCollector interface { + // OnRoot is called with the state root + OnRoot(common.Hash) + // OnAccount is called once for each account in the trie + OnAccount(common.Address, DumpAccount) +} + +// DumpAccount represents an account in the state. type DumpAccount struct { - Balance string `json:"balance"` - Nonce uint64 `json:"nonce"` - Root string `json:"root"` - CodeHash string `json:"codeHash"` - Code string `json:"code,omitempty"` - Storage map[common.Hash]string `json:"storage,omitempty"` - Address *common.Address `json:"address,omitempty"` // Address only present in iterative (line-by-line) mode - SecureKey hexutil.Bytes `json:"key,omitempty"` // If we don't have address, we can output the key + Balance string `json:"balance"` + Nonce uint64 `json:"nonce"` + Root string `json:"root"` + CodeHash string `json:"codeHash"` + IsMultiCoin bool `json:"isMultiCoin"` + Code string `json:"code,omitempty"` + Storage map[common.Hash]string `json:"storage,omitempty"` + Address *common.Address `json:"address,omitempty"` // Address only present in iterative (line-by-line) mode + SecureKey hexutil.Bytes `json:"key,omitempty"` // If we don't have address, we can output the key } -// Dump represents the full dump in a collected format, as one large map +// Dump represents the full dump in a collected format, as one large map. type Dump struct { Root string `json:"root"` Accounts map[common.Address]DumpAccount `json:"accounts"` } -// iterativeDump is a 'collector'-implementation which dump output line-by-line iteratively -type iterativeDump json.Encoder +// OnRoot implements DumpCollector interface +func (d *Dump) OnRoot(root common.Hash) { + d.Root = fmt.Sprintf("%x", root) +} + +// OnAccount implements DumpCollector interface +func (d *Dump) OnAccount(addr common.Address, account DumpAccount) { + d.Accounts[addr] = account +} + +// IteratorDump is an implementation for iterating over data. +type IteratorDump struct { + Root string `json:"root"` + Accounts map[common.Address]DumpAccount `json:"accounts"` + Next []byte `json:"next,omitempty"` // nil if no more accounts +} -// Collector interface which the state trie calls during iteration -type collector interface { - onRoot(common.Hash) - onAccount(common.Address, DumpAccount) +// OnRoot implements DumpCollector interface +func (d *IteratorDump) OnRoot(root common.Hash) { + d.Root = fmt.Sprintf("%x", root) } -func (self *Dump) onRoot(root common.Hash) { - self.Root = fmt.Sprintf("%x", root) +// OnAccount implements DumpCollector interface +func (d *IteratorDump) OnAccount(addr common.Address, account DumpAccount) { + d.Accounts[addr] = account } -func (self *Dump) onAccount(addr common.Address, account DumpAccount) { - self.Accounts[addr] = account +// iterativeDump is a DumpCollector-implementation which dumps output line-by-line iteratively. +type iterativeDump struct { + *json.Encoder } -func (self iterativeDump) onAccount(addr common.Address, account DumpAccount) { +// OnAccount implements DumpCollector interface +func (d iterativeDump) OnAccount(addr common.Address, account DumpAccount) { dumpAccount := &DumpAccount{ - Balance: account.Balance, - Nonce: account.Nonce, - Root: account.Root, - CodeHash: account.CodeHash, - Code: account.Code, - Storage: account.Storage, - SecureKey: account.SecureKey, - Address: nil, + Balance: account.Balance, + Nonce: account.Nonce, + Root: account.Root, + CodeHash: account.CodeHash, + IsMultiCoin: account.IsMultiCoin, + Code: account.Code, + Storage: account.Storage, + SecureKey: account.SecureKey, + Address: nil, } if addr != (common.Address{}) { dumpAccount.Address = &addr } - (*json.Encoder)(&self).Encode(dumpAccount) + d.Encode(dumpAccount) } -func (self iterativeDump) onRoot(root common.Hash) { - (*json.Encoder)(&self).Encode(struct { + +// OnRoot implements DumpCollector interface +func (d iterativeDump) OnRoot(root common.Hash) { + d.Encode(struct { Root common.Hash `json:"root"` }{root}) } -func (self *StateDB) dump(c collector, excludeCode, excludeStorage, excludeMissingPreimages bool) { - emptyAddress := (common.Address{}) +func (s *StateDB) DumpToCollector(c DumpCollector, excludeCode, excludeStorage, excludeMissingPreimages bool, start []byte, maxResults int) (nextKey []byte) { missingPreimages := 0 - c.onRoot(self.trie.Hash()) - it := trie.NewIterator(self.trie.NodeIterator(nil)) + c.OnRoot(s.trie.Hash()) + + var count int + it := trie.NewIterator(s.trie.NodeIterator(start)) for it.Next() { var data Account if err := rlp.DecodeBytes(it.Value, &data); err != nil { panic(err) } - addr := common.BytesToAddress(self.trie.GetKey(it.Key)) - obj := newObject(nil, addr, data) account := DumpAccount{ - Balance: data.Balance.String(), - Nonce: data.Nonce, - Root: common.Bytes2Hex(data.Root[:]), - CodeHash: common.Bytes2Hex(data.CodeHash), + Balance: data.Balance.String(), + Nonce: data.Nonce, + Root: common.Bytes2Hex(data.Root[:]), + CodeHash: common.Bytes2Hex(data.CodeHash), + IsMultiCoin: data.IsMultiCoin, } - if emptyAddress == addr { + addrBytes := s.trie.GetKey(it.Key) + if addrBytes == nil { // Preimage missing missingPreimages++ if excludeMissingPreimages { @@ -111,48 +140,68 @@ func (self *StateDB) dump(c collector, excludeCode, excludeStorage, excludeMissi } account.SecureKey = it.Key } + addr := common.BytesToAddress(addrBytes) + obj := newObject(nil, addr, data) if !excludeCode { - account.Code = common.Bytes2Hex(obj.Code(self.db)) + account.Code = common.Bytes2Hex(obj.Code(s.db)) } if !excludeStorage { account.Storage = make(map[common.Hash]string) - storageIt := trie.NewIterator(obj.getTrie(self.db).NodeIterator(nil)) + storageIt := trie.NewIterator(obj.getTrie(s.db).NodeIterator(nil)) for storageIt.Next() { _, content, _, err := rlp.Split(storageIt.Value) if err != nil { log.Error("Failed to decode the value returned by iterator", "error", err) continue } - account.Storage[common.BytesToHash(self.trie.GetKey(storageIt.Key))] = common.Bytes2Hex(content) + account.Storage[common.BytesToHash(s.trie.GetKey(storageIt.Key))] = common.Bytes2Hex(content) + } + } + c.OnAccount(addr, account) + count++ + if maxResults > 0 && count >= maxResults { + if it.Next() { + nextKey = it.Key } + break } - c.onAccount(addr, account) } if missingPreimages > 0 { log.Warn("Dump incomplete due to missing preimages", "missing", missingPreimages) } + + return nextKey } // RawDump returns the entire state an a single large object -func (self *StateDB) RawDump(excludeCode, excludeStorage, excludeMissingPreimages bool) Dump { +func (s *StateDB) RawDump(excludeCode, excludeStorage, excludeMissingPreimages bool) Dump { dump := &Dump{ Accounts: make(map[common.Address]DumpAccount), } - self.dump(dump, excludeCode, excludeStorage, excludeMissingPreimages) + s.DumpToCollector(dump, excludeCode, excludeStorage, excludeMissingPreimages, nil, 0) return *dump } // Dump returns a JSON string representing the entire state as a single json-object -func (self *StateDB) Dump(excludeCode, excludeStorage, excludeMissingPreimages bool) []byte { - dump := self.RawDump(excludeCode, excludeStorage, excludeMissingPreimages) +func (s *StateDB) Dump(excludeCode, excludeStorage, excludeMissingPreimages bool) []byte { + dump := s.RawDump(excludeCode, excludeStorage, excludeMissingPreimages) json, err := json.MarshalIndent(dump, "", " ") if err != nil { - fmt.Println("dump err", err) + fmt.Println("Dump err", err) } return json } // IterativeDump dumps out accounts as json-objects, delimited by linebreaks on stdout -func (self *StateDB) IterativeDump(excludeCode, excludeStorage, excludeMissingPreimages bool, output *json.Encoder) { - self.dump(iterativeDump(*output), excludeCode, excludeStorage, excludeMissingPreimages) +func (s *StateDB) IterativeDump(excludeCode, excludeStorage, excludeMissingPreimages bool, output *json.Encoder) { + s.DumpToCollector(iterativeDump{output}, excludeCode, excludeStorage, excludeMissingPreimages, nil, 0) +} + +// IteratorDump dumps out a batch of accounts starts with the given start key +func (s *StateDB) IteratorDump(excludeCode, excludeStorage, excludeMissingPreimages bool, start []byte, maxResults int) IteratorDump { + iterator := &IteratorDump{ + Accounts: make(map[common.Address]DumpAccount), + } + iterator.Next = s.DumpToCollector(iterator, excludeCode, excludeStorage, excludeMissingPreimages, start, maxResults) + return *iterator } diff --git a/core/state/iterator.go b/core/state/iterator.go index c6d2a48..6a5c73d 100644 --- a/core/state/iterator.go +++ b/core/state/iterator.go @@ -20,9 +20,9 @@ import ( "bytes" "fmt" - "github.com/ava-labs/go-ethereum/common" - "github.com/ava-labs/go-ethereum/rlp" - "github.com/ava-labs/go-ethereum/trie" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" ) // NodeIterator is an iterator to traverse the entire state trie post-order, diff --git a/core/state/journal.go b/core/state/journal.go index 6e85173..0cc556b 100644 --- a/core/state/journal.go +++ b/core/state/journal.go @@ -19,7 +19,7 @@ package state import ( "math/big" - "github.com/ava-labs/go-ethereum/common" + "github.com/ethereum/go-ethereum/common" ) // journalEntry is a modification entry in the state change journal that can be @@ -90,7 +90,8 @@ type ( account *common.Address } resetObjectChange struct { - prev *stateObject + prev *stateObject + prevdestruct bool } suicideChange struct { account *common.Address @@ -130,9 +131,7 @@ type ( hash common.Hash } touchChange struct { - account *common.Address - prev bool - prevDirty bool + account *common.Address } ) @@ -147,6 +146,9 @@ func (ch createObjectChange) dirtied() *common.Address { func (ch resetObjectChange) revert(s *StateDB) { s.setStateObject(ch.prev) + if !ch.prevdestruct && s.snap != nil { + delete(s.snapDestructs, ch.prev.addrHash) + } } func (ch resetObjectChange) dirtied() *common.Address { diff --git a/core/state/snapshot/account.go b/core/state/snapshot/account.go new file mode 100644 index 0000000..303c2fc --- /dev/null +++ b/core/state/snapshot/account.go @@ -0,0 +1,88 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + "math/big" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" +) + +// Account is a modified version of a state.Account, where the root is replaced +// with a byte slice. This format can be used to represent full-consensus format +// or slim-snapshot format which replaces the empty root and code hash as nil +// byte slice. +type Account struct { + Nonce uint64 + Balance *big.Int + Root []byte + CodeHash []byte + IsMultiCoin bool +} + +// SlimAccount converts a state.Account content into a slim snapshot account +func SlimAccount(nonce uint64, balance *big.Int, root common.Hash, codehash []byte, isMultiCoin bool) Account { + slim := Account{ + Nonce: nonce, + Balance: balance, + IsMultiCoin: isMultiCoin, + } + if root != emptyRoot { + slim.Root = root[:] + } + if !bytes.Equal(codehash, emptyCode[:]) { + slim.CodeHash = codehash + } + return slim +} + +// SlimAccountRLP converts a state.Account content into a slim snapshot +// version RLP encoded. +func SlimAccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte, isMultiCoin bool) []byte { + data, err := rlp.EncodeToBytes(SlimAccount(nonce, balance, root, codehash, isMultiCoin)) + if err != nil { + panic(err) + } + return data +} + +// FullAccount decodes the data on the 'slim RLP' format and return +// the consensus format account. +func FullAccount(data []byte) (Account, error) { + var account Account + if err := rlp.DecodeBytes(data, &account); err != nil { + return Account{}, err + } + if len(account.Root) == 0 { + account.Root = emptyRoot[:] + } + if len(account.CodeHash) == 0 { + account.CodeHash = emptyCode[:] + } + return account, nil +} + +// FullAccountRLP converts data on the 'slim RLP' format into the full RLP-format. +func FullAccountRLP(data []byte) ([]byte, error) { + account, err := FullAccount(data) + if err != nil { + return nil, err + } + return rlp.EncodeToBytes(account) +} diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go new file mode 100644 index 0000000..dee9ff0 --- /dev/null +++ b/core/state/snapshot/conversion.go @@ -0,0 +1,275 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + "fmt" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +// trieKV represents a trie key-value pair +type trieKV struct { + key common.Hash + value []byte +} + +type ( + // trieGeneratorFn is the interface of trie generation which can + // be implemented by different trie algorithm. + trieGeneratorFn func(in chan (trieKV), out chan (common.Hash)) + + // leafCallbackFn is the callback invoked at the leaves of the trie, + // returns the subtrie root with the specified subtrie identifier. + leafCallbackFn func(hash common.Hash, stat *generateStats) common.Hash +) + +// GenerateAccountTrieRoot takes an account iterator and reproduces the root hash. +func GenerateAccountTrieRoot(it AccountIterator) (common.Hash, error) { + return generateTrieRoot(it, common.Hash{}, stdGenerate, nil, &generateStats{start: time.Now()}, true) +} + +// GenerateStorageTrieRoot takes a storage iterator and reproduces the root hash. +func GenerateStorageTrieRoot(account common.Hash, it StorageIterator) (common.Hash, error) { + return generateTrieRoot(it, account, stdGenerate, nil, &generateStats{start: time.Now()}, true) +} + +// VerifyState takes the whole snapshot tree as the input, traverses all the accounts +// as well as the corresponding storages and compares the re-computed hash with the +// original one(state root and the storage root). +func VerifyState(snaptree *Tree, root common.Hash) error { + acctIt, err := snaptree.AccountIterator(root, common.Hash{}) + if err != nil { + return err + } + defer acctIt.Release() + + got, err := generateTrieRoot(acctIt, common.Hash{}, stdGenerate, func(account common.Hash, stat *generateStats) common.Hash { + storageIt, err := snaptree.StorageIterator(root, account, common.Hash{}) + if err != nil { + return common.Hash{} + } + defer storageIt.Release() + + hash, err := generateTrieRoot(storageIt, account, stdGenerate, nil, stat, false) + if err != nil { + return common.Hash{} + } + return hash + }, &generateStats{start: time.Now()}, true) + + if err != nil { + return err + } + if got != root { + return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root) + } + return nil +} + +// generateStats is a collection of statistics gathered by the trie generator +// for logging purposes. +type generateStats struct { + accounts uint64 + slots uint64 + curAccount common.Hash + curSlot common.Hash + start time.Time + lock sync.RWMutex +} + +// progress records the progress trie generator made recently. +func (stat *generateStats) progress(accounts, slots uint64, curAccount common.Hash, curSlot common.Hash) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.accounts += accounts + stat.slots += slots + stat.curAccount = curAccount + stat.curSlot = curSlot +} + +// report prints the cumulative progress statistic smartly. +func (stat *generateStats) report() { + stat.lock.RLock() + defer stat.lock.RUnlock() + + var ctx []interface{} + if stat.curSlot != (common.Hash{}) { + ctx = append(ctx, []interface{}{ + "in", stat.curAccount, + "at", stat.curSlot, + }...) + } else { + ctx = append(ctx, []interface{}{"at", stat.curAccount}...) + } + // Add the usual measurements + ctx = append(ctx, []interface{}{"accounts", stat.accounts}...) + if stat.slots != 0 { + ctx = append(ctx, []interface{}{"slots", stat.slots}...) + } + ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...) + log.Info("Generating trie hash from snapshot", ctx...) +} + +// reportDone prints the last log when the whole generation is finished. +func (stat *generateStats) reportDone() { + stat.lock.RLock() + defer stat.lock.RUnlock() + + var ctx []interface{} + ctx = append(ctx, []interface{}{"accounts", stat.accounts}...) + if stat.slots != 0 { + ctx = append(ctx, []interface{}{"slots", stat.slots}...) + } + ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...) + log.Info("Generated trie hash from snapshot", ctx...) +} + +// generateTrieRoot generates the trie hash based on the snapshot iterator. +// It can be used for generating account trie, storage trie or even the +// whole state which connects the accounts and the corresponding storages. +func generateTrieRoot(it Iterator, account common.Hash, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) { + var ( + in = make(chan trieKV) // chan to pass leaves + out = make(chan common.Hash, 1) // chan to collect result + stoplog = make(chan bool, 1) // 1-size buffer, works when logging is not enabled + wg sync.WaitGroup + ) + // Spin up a go-routine for trie hash re-generation + wg.Add(1) + go func() { + defer wg.Done() + generatorFn(in, out) + }() + + // Spin up a go-routine for progress logging + if report && stats != nil { + wg.Add(1) + go func() { + defer wg.Done() + + timer := time.NewTimer(0) + defer timer.Stop() + + for { + select { + case <-timer.C: + stats.report() + timer.Reset(time.Second * 8) + case success := <-stoplog: + if success { + stats.reportDone() + } + return + } + } + }() + } + // stop is a helper function to shutdown the background threads + // and return the re-generated trie hash. + stop := func(success bool) common.Hash { + close(in) + result := <-out + stoplog <- success + wg.Wait() + return result + } + var ( + logged = time.Now() + processed = uint64(0) + leaf trieKV + last common.Hash + ) + // Start to feed leaves + for it.Next() { + if account == (common.Hash{}) { + var ( + err error + fullData []byte + ) + if leafCallback == nil { + fullData, err = FullAccountRLP(it.(AccountIterator).Account()) + if err != nil { + stop(false) + return common.Hash{}, err + } + } else { + account, err := FullAccount(it.(AccountIterator).Account()) + if err != nil { + stop(false) + return common.Hash{}, err + } + // Apply the leaf callback. Normally the callback is used to traverse + // the storage trie and re-generate the subtrie root. + subroot := leafCallback(it.Hash(), stats) + if !bytes.Equal(account.Root, subroot.Bytes()) { + stop(false) + return common.Hash{}, fmt.Errorf("invalid subroot(%x), want %x, got %x", it.Hash(), account.Root, subroot) + } + fullData, err = rlp.EncodeToBytes(account) + if err != nil { + stop(false) + return common.Hash{}, err + } + } + leaf = trieKV{it.Hash(), fullData} + } else { + leaf = trieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())} + } + in <- leaf + + // Accumulate the generaation statistic if it's required. + processed++ + if time.Since(logged) > 3*time.Second && stats != nil { + if account == (common.Hash{}) { + stats.progress(processed, 0, it.Hash(), common.Hash{}) + } else { + stats.progress(0, processed, account, it.Hash()) + } + logged, processed = time.Now(), 0 + } + last = it.Hash() + } + // Commit the last part statistic. + if processed > 0 && stats != nil { + if account == (common.Hash{}) { + stats.progress(processed, 0, last, common.Hash{}) + } else { + stats.progress(0, processed, account, last) + } + } + result := stop(true) + return result, nil +} + +// stdGenerate is a very basic hexary trie builder which uses the same Trie +// as the rest of geth, with no enhancements or optimizations +func stdGenerate(in chan (trieKV), out chan (common.Hash)) { + t, _ := trie.New(common.Hash{}, trie.NewDatabase(memorydb.New())) + for leaf := range in { + t.TryUpdate(leaf.key[:], leaf.value) + } + out <- t.Hash() +} diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go new file mode 100644 index 0000000..0aef6cf --- /dev/null +++ b/core/state/snapshot/difflayer.go @@ -0,0 +1,553 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "encoding/binary" + "fmt" + "math" + "math/rand" + "sort" + "sync" + "sync/atomic" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" + "github.com/steakknife/bloomfilter" +) + +var ( + // aggregatorMemoryLimit is the maximum size of the bottom-most diff layer + // that aggregates the writes from above until it's flushed into the disk + // layer. + // + // Note, bumping this up might drastically increase the size of the bloom + // filters that's stored in every diff layer. Don't do that without fully + // understanding all the implications. + aggregatorMemoryLimit = uint64(4 * 1024 * 1024) + + // aggregatorItemLimit is an approximate number of items that will end up + // in the agregator layer before it's flushed out to disk. A plain account + // weighs around 14B (+hash), a storage slot 32B (+hash), a deleted slot + // 0B (+hash). Slots are mostly set/unset in lockstep, so thet average at + // 16B (+hash). All in all, the average entry seems to be 15+32=47B. Use a + // smaller number to be on the safe side. + aggregatorItemLimit = aggregatorMemoryLimit / 42 + + // bloomTargetError is the target false positive rate when the aggregator + // layer is at its fullest. The actual value will probably move around up + // and down from this number, it's mostly a ballpark figure. + // + // Note, dropping this down might drastically increase the size of the bloom + // filters that's stored in every diff layer. Don't do that without fully + // understanding all the implications. + bloomTargetError = 0.02 + + // bloomSize is the ideal bloom filter size given the maximum number of items + // it's expected to hold and the target false positive error rate. + bloomSize = math.Ceil(float64(aggregatorItemLimit) * math.Log(bloomTargetError) / math.Log(1/math.Pow(2, math.Log(2)))) + + // bloomFuncs is the ideal number of bits a single entry should set in the + // bloom filter to keep its size to a minimum (given it's size and maximum + // entry count). + bloomFuncs = math.Round((bloomSize / float64(aggregatorItemLimit)) * math.Log(2)) + + // the bloom offsets are runtime constants which determines which part of the + // the account/storage hash the hasher functions looks at, to determine the + // bloom key for an account/slot. This is randomized at init(), so that the + // global population of nodes do not all display the exact same behaviour with + // regards to bloom content + bloomDestructHasherOffset = 0 + bloomAccountHasherOffset = 0 + bloomStorageHasherOffset = 0 +) + +func init() { + // Init the bloom offsets in the range [0:24] (requires 8 bytes) + bloomDestructHasherOffset = rand.Intn(25) + bloomAccountHasherOffset = rand.Intn(25) + bloomStorageHasherOffset = rand.Intn(25) + + // The destruct and account blooms must be different, as the storage slots + // will check for destruction too for every bloom miss. It should not collide + // with modified accounts. + for bloomAccountHasherOffset == bloomDestructHasherOffset { + bloomAccountHasherOffset = rand.Intn(25) + } +} + +// diffLayer represents a collection of modifications made to a state snapshot +// after running a block on top. It contains one sorted list for the account trie +// and one-one list for each storage tries. +// +// The goal of a diff layer is to act as a journal, tracking recent modifications +// made to the state, that have not yet graduated into a semi-immutable state. +type diffLayer struct { + origin *diskLayer // Base disk layer to directly use on bloom misses + parent snapshot // Parent snapshot modified by this one, never nil + memory uint64 // Approximate guess as to how much memory we use + + root common.Hash // Root hash to which this snapshot diff belongs to + stale uint32 // Signals that the layer became stale (state progressed) + + // destructSet is a very special helper marker. If an account is marked as + // deleted, then it's recorded in this set. However it's allowed that an account + // is included here but still available in other sets(e.g. storageData). The + // reason is the diff layer includes all the changes in a *block*. It can + // happen that in the tx_1, account A is self-destructed while in the tx_2 + // it's recreated. But we still need this marker to indicate the "old" A is + // deleted, all data in other set belongs to the "new" A. + destructSet map[common.Hash]struct{} // Keyed markers for deleted (and potentially) recreated accounts + accountList []common.Hash // List of account for iteration. If it exists, it's sorted, otherwise it's nil + accountData map[common.Hash][]byte // Keyed accounts for direct retrival (nil means deleted) + storageList map[common.Hash][]common.Hash // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil + storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted) + + diffed *bloomfilter.Filter // Bloom filter tracking all the diffed items up to the disk layer + + lock sync.RWMutex +} + +// destructBloomHasher is a wrapper around a common.Hash to satisfy the interface +// API requirements of the bloom library used. It's used to convert a destruct +// event into a 64 bit mini hash. +type destructBloomHasher common.Hash + +func (h destructBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } +func (h destructBloomHasher) Sum(b []byte) []byte { panic("not implemented") } +func (h destructBloomHasher) Reset() { panic("not implemented") } +func (h destructBloomHasher) BlockSize() int { panic("not implemented") } +func (h destructBloomHasher) Size() int { return 8 } +func (h destructBloomHasher) Sum64() uint64 { + return binary.BigEndian.Uint64(h[bloomDestructHasherOffset : bloomDestructHasherOffset+8]) +} + +// accountBloomHasher is a wrapper around a common.Hash to satisfy the interface +// API requirements of the bloom library used. It's used to convert an account +// hash into a 64 bit mini hash. +type accountBloomHasher common.Hash + +func (h accountBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } +func (h accountBloomHasher) Sum(b []byte) []byte { panic("not implemented") } +func (h accountBloomHasher) Reset() { panic("not implemented") } +func (h accountBloomHasher) BlockSize() int { panic("not implemented") } +func (h accountBloomHasher) Size() int { return 8 } +func (h accountBloomHasher) Sum64() uint64 { + return binary.BigEndian.Uint64(h[bloomAccountHasherOffset : bloomAccountHasherOffset+8]) +} + +// storageBloomHasher is a wrapper around a [2]common.Hash to satisfy the interface +// API requirements of the bloom library used. It's used to convert an account +// hash into a 64 bit mini hash. +type storageBloomHasher [2]common.Hash + +func (h storageBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } +func (h storageBloomHasher) Sum(b []byte) []byte { panic("not implemented") } +func (h storageBloomHasher) Reset() { panic("not implemented") } +func (h storageBloomHasher) BlockSize() int { panic("not implemented") } +func (h storageBloomHasher) Size() int { return 8 } +func (h storageBloomHasher) Sum64() uint64 { + return binary.BigEndian.Uint64(h[0][bloomStorageHasherOffset:bloomStorageHasherOffset+8]) ^ + binary.BigEndian.Uint64(h[1][bloomStorageHasherOffset:bloomStorageHasherOffset+8]) +} + +// newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low +// level persistent database or a hierarchical diff already. +func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { + // Create the new layer with some pre-allocated data segments + dl := &diffLayer{ + parent: parent, + root: root, + destructSet: destructs, + accountData: accounts, + storageData: storage, + storageList: make(map[common.Hash][]common.Hash), + } + switch parent := parent.(type) { + case *diskLayer: + dl.rebloom(parent) + case *diffLayer: + dl.rebloom(parent.origin) + default: + panic("unknown parent type") + } + // Sanity check that accounts or storage slots are never nil + for accountHash, blob := range accounts { + if blob == nil { + panic(fmt.Sprintf("account %#x nil", accountHash)) + } + } + for accountHash, slots := range storage { + if slots == nil { + panic(fmt.Sprintf("storage %#x nil", accountHash)) + } + } + // Determine memory size and track the dirty writes + for _, data := range accounts { + dl.memory += uint64(common.HashLength + len(data)) + snapshotDirtyAccountWriteMeter.Mark(int64(len(data))) + } + // Determine memory size and track the dirty writes + for _, slots := range storage { + for _, data := range slots { + dl.memory += uint64(common.HashLength + len(data)) + snapshotDirtyStorageWriteMeter.Mark(int64(len(data))) + } + } + dl.memory += uint64(len(destructs) * common.HashLength) + return dl +} + +// rebloom discards the layer's current bloom and rebuilds it from scratch based +// on the parent's and the local diffs. +func (dl *diffLayer) rebloom(origin *diskLayer) { + dl.lock.Lock() + defer dl.lock.Unlock() + + defer func(start time.Time) { + snapshotBloomIndexTimer.Update(time.Since(start)) + }(time.Now()) + + // Inject the new origin that triggered the rebloom + dl.origin = origin + + // Retrieve the parent bloom or create a fresh empty one + if parent, ok := dl.parent.(*diffLayer); ok { + parent.lock.RLock() + dl.diffed, _ = parent.diffed.Copy() + parent.lock.RUnlock() + } else { + dl.diffed, _ = bloomfilter.New(uint64(bloomSize), uint64(bloomFuncs)) + } + // Iterate over all the accounts and storage slots and index them + for hash := range dl.destructSet { + dl.diffed.Add(destructBloomHasher(hash)) + } + for hash := range dl.accountData { + dl.diffed.Add(accountBloomHasher(hash)) + } + for accountHash, slots := range dl.storageData { + for storageHash := range slots { + dl.diffed.Add(storageBloomHasher{accountHash, storageHash}) + } + } + // Calculate the current false positive rate and update the error rate meter. + // This is a bit cheating because subsequent layers will overwrite it, but it + // should be fine, we're only interested in ballpark figures. + k := float64(dl.diffed.K()) + n := float64(dl.diffed.N()) + m := float64(dl.diffed.M()) + snapshotBloomErrorGauge.Update(math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k)) +} + +// Root returns the root hash for which this snapshot was made. +func (dl *diffLayer) Root() common.Hash { + return dl.root +} + +// Parent returns the subsequent layer of a diff layer. +func (dl *diffLayer) Parent() snapshot { + return dl.parent +} + +// Stale return whether this layer has become stale (was flattened across) or if +// it's still live. +func (dl *diffLayer) Stale() bool { + return atomic.LoadUint32(&dl.stale) != 0 +} + +// Account directly retrieves the account associated with a particular hash in +// the snapshot slim data format. +func (dl *diffLayer) Account(hash common.Hash) (*Account, error) { + data, err := dl.AccountRLP(hash) + if err != nil { + return nil, err + } + if len(data) == 0 { // can be both nil and []byte{} + return nil, nil + } + account := new(Account) + if err := rlp.DecodeBytes(data, account); err != nil { + panic(err) + } + return account, nil +} + +// AccountRLP directly retrieves the account RLP associated with a particular +// hash in the snapshot slim data format. +// +// Note the returned account is not a copy, please don't modify it. +func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) { + // Check the bloom filter first whether there's even a point in reaching into + // all the maps in all the layers below + dl.lock.RLock() + hit := dl.diffed.Contains(accountBloomHasher(hash)) + if !hit { + hit = dl.diffed.Contains(destructBloomHasher(hash)) + } + dl.lock.RUnlock() + + // If the bloom filter misses, don't even bother with traversing the memory + // diff layers, reach straight into the bottom persistent disk layer + if !hit { + snapshotBloomAccountMissMeter.Mark(1) + return dl.origin.AccountRLP(hash) + } + // The bloom filter hit, start poking in the internal maps + return dl.accountRLP(hash, 0) +} + +// accountRLP is an internal version of AccountRLP that skips the bloom filter +// checks and uses the internal maps to try and retrieve the data. It's meant +// to be used if a higher layer's bloom filter hit already. +func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the layer was flattened into, consider it invalid (any live reference to + // the original should be marked as unusable). + if dl.Stale() { + return nil, ErrSnapshotStale + } + // If the account is known locally, return it + if data, ok := dl.accountData[hash]; ok { + snapshotDirtyAccountHitMeter.Mark(1) + snapshotDirtyAccountHitDepthHist.Update(int64(depth)) + snapshotDirtyAccountReadMeter.Mark(int64(len(data))) + snapshotBloomAccountTrueHitMeter.Mark(1) + return data, nil + } + // If the account is known locally, but deleted, return it + if _, ok := dl.destructSet[hash]; ok { + snapshotDirtyAccountHitMeter.Mark(1) + snapshotDirtyAccountHitDepthHist.Update(int64(depth)) + snapshotDirtyAccountInexMeter.Mark(1) + snapshotBloomAccountTrueHitMeter.Mark(1) + return nil, nil + } + // Account unknown to this diff, resolve from parent + if diff, ok := dl.parent.(*diffLayer); ok { + return diff.accountRLP(hash, depth+1) + } + // Failed to resolve through diff layers, mark a bloom error and use the disk + snapshotBloomAccountFalseHitMeter.Mark(1) + return dl.parent.AccountRLP(hash) +} + +// Storage directly retrieves the storage data associated with a particular hash, +// within a particular account. If the slot is unknown to this diff, it's parent +// is consulted. +// +// Note the returned slot is not a copy, please don't modify it. +func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) { + // Check the bloom filter first whether there's even a point in reaching into + // all the maps in all the layers below + dl.lock.RLock() + hit := dl.diffed.Contains(storageBloomHasher{accountHash, storageHash}) + if !hit { + hit = dl.diffed.Contains(destructBloomHasher(accountHash)) + } + dl.lock.RUnlock() + + // If the bloom filter misses, don't even bother with traversing the memory + // diff layers, reach straight into the bottom persistent disk layer + if !hit { + snapshotBloomStorageMissMeter.Mark(1) + return dl.origin.Storage(accountHash, storageHash) + } + // The bloom filter hit, start poking in the internal maps + return dl.storage(accountHash, storageHash, 0) +} + +// storage is an internal version of Storage that skips the bloom filter checks +// and uses the internal maps to try and retrieve the data. It's meant to be +// used if a higher layer's bloom filter hit already. +func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([]byte, error) { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the layer was flattened into, consider it invalid (any live reference to + // the original should be marked as unusable). + if dl.Stale() { + return nil, ErrSnapshotStale + } + // If the account is known locally, try to resolve the slot locally + if storage, ok := dl.storageData[accountHash]; ok { + if data, ok := storage[storageHash]; ok { + snapshotDirtyStorageHitMeter.Mark(1) + snapshotDirtyStorageHitDepthHist.Update(int64(depth)) + if n := len(data); n > 0 { + snapshotDirtyStorageReadMeter.Mark(int64(n)) + } else { + snapshotDirtyStorageInexMeter.Mark(1) + } + snapshotBloomStorageTrueHitMeter.Mark(1) + return data, nil + } + } + // If the account is known locally, but deleted, return an empty slot + if _, ok := dl.destructSet[accountHash]; ok { + snapshotDirtyStorageHitMeter.Mark(1) + snapshotDirtyStorageHitDepthHist.Update(int64(depth)) + snapshotDirtyStorageInexMeter.Mark(1) + snapshotBloomStorageTrueHitMeter.Mark(1) + return nil, nil + } + // Storage slot unknown to this diff, resolve from parent + if diff, ok := dl.parent.(*diffLayer); ok { + return diff.storage(accountHash, storageHash, depth+1) + } + // Failed to resolve through diff layers, mark a bloom error and use the disk + snapshotBloomStorageFalseHitMeter.Mark(1) + return dl.parent.Storage(accountHash, storageHash) +} + +// Update creates a new layer on top of the existing snapshot diff tree with +// the specified data items. +func (dl *diffLayer) Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { + return newDiffLayer(dl, blockRoot, destructs, accounts, storage) +} + +// flatten pushes all data from this point downwards, flattening everything into +// a single diff at the bottom. Since usually the lowermost diff is the largest, +// the flattening builds up from there in reverse. +func (dl *diffLayer) flatten() snapshot { + // If the parent is not diff, we're the first in line, return unmodified + parent, ok := dl.parent.(*diffLayer) + if !ok { + return dl + } + // Parent is a diff, flatten it first (note, apart from weird corned cases, + // flatten will realistically only ever merge 1 layer, so there's no need to + // be smarter about grouping flattens together). + parent = parent.flatten().(*diffLayer) + + parent.lock.Lock() + defer parent.lock.Unlock() + + // Before actually writing all our data to the parent, first ensure that the + // parent hasn't been 'corrupted' by someone else already flattening into it + if atomic.SwapUint32(&parent.stale, 1) != 0 { + panic("parent diff layer is stale") // we've flattened into the same parent from two children, boo + } + // Overwrite all the updated accounts blindly, merge the sorted list + for hash := range dl.destructSet { + parent.destructSet[hash] = struct{}{} + delete(parent.accountData, hash) + delete(parent.storageData, hash) + } + for hash, data := range dl.accountData { + parent.accountData[hash] = data + } + // Overwrite all the updated storage slots (individually) + for accountHash, storage := range dl.storageData { + // If storage didn't exist (or was deleted) in the parent, overwrite blindly + if _, ok := parent.storageData[accountHash]; !ok { + parent.storageData[accountHash] = storage + continue + } + // Storage exists in both parent and child, merge the slots + comboData := parent.storageData[accountHash] + for storageHash, data := range storage { + comboData[storageHash] = data + } + parent.storageData[accountHash] = comboData + } + // Return the combo parent + return &diffLayer{ + parent: parent.parent, + origin: parent.origin, + root: dl.root, + destructSet: parent.destructSet, + accountData: parent.accountData, + storageData: parent.storageData, + storageList: make(map[common.Hash][]common.Hash), + diffed: dl.diffed, + memory: parent.memory + dl.memory, + } +} + +// AccountList returns a sorted list of all accounts in this difflayer, including +// the deleted ones. +// +// Note, the returned slice is not a copy, so do not modify it. +func (dl *diffLayer) AccountList() []common.Hash { + // If an old list already exists, return it + dl.lock.RLock() + list := dl.accountList + dl.lock.RUnlock() + + if list != nil { + return list + } + // No old sorted account list exists, generate a new one + dl.lock.Lock() + defer dl.lock.Unlock() + + dl.accountList = make([]common.Hash, 0, len(dl.destructSet)+len(dl.accountData)) + for hash := range dl.accountData { + dl.accountList = append(dl.accountList, hash) + } + for hash := range dl.destructSet { + if _, ok := dl.accountData[hash]; !ok { + dl.accountList = append(dl.accountList, hash) + } + } + sort.Sort(hashes(dl.accountList)) + dl.memory += uint64(len(dl.accountList) * common.HashLength) + return dl.accountList +} + +// StorageList returns a sorted list of all storage slot hashes in this difflayer +// for the given account. If the whole storage is destructed in this layer, then +// an additional flag *destructed = true* will be returned, otherwise the flag is +// false. Besides, the returned list will include the hash of deleted storage slot. +// Note a special case is an account is deleted in a prior tx but is recreated in +// the following tx with some storage slots set. In this case the returned list is +// not empty but the flag is true. +// +// Note, the returned slice is not a copy, so do not modify it. +func (dl *diffLayer) StorageList(accountHash common.Hash) ([]common.Hash, bool) { + dl.lock.RLock() + _, destructed := dl.destructSet[accountHash] + if _, ok := dl.storageData[accountHash]; !ok { + // Account not tracked by this layer + dl.lock.RUnlock() + return nil, destructed + } + // If an old list already exists, return it + if list, exist := dl.storageList[accountHash]; exist { + dl.lock.RUnlock() + return list, destructed // the cached list can't be nil + } + dl.lock.RUnlock() + + // No old sorted account list exists, generate a new one + dl.lock.Lock() + defer dl.lock.Unlock() + + storageMap := dl.storageData[accountHash] + storageList := make([]common.Hash, 0, len(storageMap)) + for k := range storageMap { + storageList = append(storageList, k) + } + sort.Sort(hashes(storageList)) + dl.storageList[accountHash] = storageList + dl.memory += uint64(len(dl.storageList)*common.HashLength + common.HashLength) + return storageList, destructed +} diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go new file mode 100644 index 0000000..496edaa --- /dev/null +++ b/core/state/snapshot/disklayer.go @@ -0,0 +1,166 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + "sync" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ava-labs/coreth/core/rawdb" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +// diskLayer is a low level persistent snapshot built on top of a key-value store. +type diskLayer struct { + diskdb ethdb.KeyValueStore // Key-value store containing the base snapshot + triedb *trie.Database // Trie node cache for reconstuction purposes + cache *fastcache.Cache // Cache to avoid hitting the disk for direct access + + root common.Hash // Root hash of the base snapshot + stale bool // Signals that the layer became stale (state progressed) + + genMarker []byte // Marker for the state that's indexed during initial layer generation + genPending chan struct{} // Notification channel when generation is done (test synchronicity) + genAbort chan chan *generatorStats // Notification channel to abort generating the snapshot in this layer + + lock sync.RWMutex +} + +// Root returns root hash for which this snapshot was made. +func (dl *diskLayer) Root() common.Hash { + return dl.root +} + +// Parent always returns nil as there's no layer below the disk. +func (dl *diskLayer) Parent() snapshot { + return nil +} + +// Stale return whether this layer has become stale (was flattened across) or if +// it's still live. +func (dl *diskLayer) Stale() bool { + dl.lock.RLock() + defer dl.lock.RUnlock() + + return dl.stale +} + +// Account directly retrieves the account associated with a particular hash in +// the snapshot slim data format. +func (dl *diskLayer) Account(hash common.Hash) (*Account, error) { + data, err := dl.AccountRLP(hash) + if err != nil { + return nil, err + } + if len(data) == 0 { // can be both nil and []byte{} + return nil, nil + } + account := new(Account) + if err := rlp.DecodeBytes(data, account); err != nil { + panic(err) + } + return account, nil +} + +// AccountRLP directly retrieves the account RLP associated with a particular +// hash in the snapshot slim data format. +func (dl *diskLayer) AccountRLP(hash common.Hash) ([]byte, error) { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the layer was flattened into, consider it invalid (any live reference to + // the original should be marked as unusable). + if dl.stale { + return nil, ErrSnapshotStale + } + // If the layer is being generated, ensure the requested hash has already been + // covered by the generator. + if dl.genMarker != nil && bytes.Compare(hash[:], dl.genMarker) > 0 { + return nil, ErrNotCoveredYet + } + // If we're in the disk layer, all diff layers missed + snapshotDirtyAccountMissMeter.Mark(1) + + // Try to retrieve the account from the memory cache + if blob, found := dl.cache.HasGet(nil, hash[:]); found { + snapshotCleanAccountHitMeter.Mark(1) + snapshotCleanAccountReadMeter.Mark(int64(len(blob))) + return blob, nil + } + // Cache doesn't contain account, pull from disk and cache for later + blob := rawdb.ReadAccountSnapshot(dl.diskdb, hash) + dl.cache.Set(hash[:], blob) + + snapshotCleanAccountMissMeter.Mark(1) + if n := len(blob); n > 0 { + snapshotCleanAccountWriteMeter.Mark(int64(n)) + } else { + snapshotCleanAccountInexMeter.Mark(1) + } + return blob, nil +} + +// Storage directly retrieves the storage data associated with a particular hash, +// within a particular account. +func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the layer was flattened into, consider it invalid (any live reference to + // the original should be marked as unusable). + if dl.stale { + return nil, ErrSnapshotStale + } + key := append(accountHash[:], storageHash[:]...) + + // If the layer is being generated, ensure the requested hash has already been + // covered by the generator. + if dl.genMarker != nil && bytes.Compare(key, dl.genMarker) > 0 { + return nil, ErrNotCoveredYet + } + // If we're in the disk layer, all diff layers missed + snapshotDirtyStorageMissMeter.Mark(1) + + // Try to retrieve the storage slot from the memory cache + if blob, found := dl.cache.HasGet(nil, key); found { + snapshotCleanStorageHitMeter.Mark(1) + snapshotCleanStorageReadMeter.Mark(int64(len(blob))) + return blob, nil + } + // Cache doesn't contain storage slot, pull from disk and cache for later + blob := rawdb.ReadStorageSnapshot(dl.diskdb, accountHash, storageHash) + dl.cache.Set(key, blob) + + snapshotCleanStorageMissMeter.Mark(1) + if n := len(blob); n > 0 { + snapshotCleanStorageWriteMeter.Mark(int64(n)) + } else { + snapshotCleanStorageInexMeter.Mark(1) + } + return blob, nil +} + +// Update creates a new layer on top of the existing snapshot diff tree with +// the specified data items. Note, the maps are retained by the method to avoid +// copying everything. +func (dl *diskLayer) Update(blockHash common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { + return newDiffLayer(dl, blockHash, destructs, accounts, storage) +} diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go new file mode 100644 index 0000000..27f8dfc --- /dev/null +++ b/core/state/snapshot/generate.go @@ -0,0 +1,265 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + "encoding/binary" + "math/big" + "time" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ava-labs/coreth/core/rawdb" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/math" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +var ( + // emptyRoot is the known root hash of an empty trie. + emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") + + // emptyCode is the known hash of the empty EVM bytecode. + emptyCode = crypto.Keccak256Hash(nil) +) + +// generatorStats is a collection of statistics gathered by the snapshot generator +// for logging purposes. +type generatorStats struct { + wiping chan struct{} // Notification channel if wiping is in progress + origin uint64 // Origin prefix where generation started + start time.Time // Timestamp when generation started + accounts uint64 // Number of accounts indexed + slots uint64 // Number of storage slots indexed + storage common.StorageSize // Account and storage slot size +} + +// Log creates an contextual log with the given message and the context pulled +// from the internally maintained statistics. +func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) { + var ctx []interface{} + if root != (common.Hash{}) { + ctx = append(ctx, []interface{}{"root", root}...) + } + // Figure out whether we're after or within an account + switch len(marker) { + case common.HashLength: + ctx = append(ctx, []interface{}{"at", common.BytesToHash(marker)}...) + case 2 * common.HashLength: + ctx = append(ctx, []interface{}{ + "in", common.BytesToHash(marker[:common.HashLength]), + "at", common.BytesToHash(marker[common.HashLength:]), + }...) + } + // Add the usual measurements + ctx = append(ctx, []interface{}{ + "accounts", gs.accounts, + "slots", gs.slots, + "storage", gs.storage, + "elapsed", common.PrettyDuration(time.Since(gs.start)), + }...) + // Calculate the estimated indexing time based on current stats + if len(marker) > 0 { + if done := binary.BigEndian.Uint64(marker[:8]) - gs.origin; done > 0 { + left := math.MaxUint64 - binary.BigEndian.Uint64(marker[:8]) + + speed := done/uint64(time.Since(gs.start)/time.Millisecond+1) + 1 // +1s to avoid division by zero + ctx = append(ctx, []interface{}{ + "eta", common.PrettyDuration(time.Duration(left/speed) * time.Millisecond), + }...) + } + } + log.Info(msg, ctx...) +} + +// generateSnapshot regenerates a brand new snapshot based on an existing state +// database and head block asynchronously. The snapshot is returned immediately +// and generation is continued in the background until done. +func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, wiper chan struct{}) *diskLayer { + // Wipe any previously existing snapshot from the database if no wiper is + // currently in progress. + if wiper == nil { + wiper = wipeSnapshot(diskdb, true) + } + // Create a new disk layer with an initialized state marker at zero + rawdb.WriteSnapshotRoot(diskdb, root) + + base := &diskLayer{ + diskdb: diskdb, + triedb: triedb, + root: root, + cache: fastcache.New(cache * 1024 * 1024), + genMarker: []byte{}, // Initialized but empty! + genPending: make(chan struct{}), + genAbort: make(chan chan *generatorStats), + } + go base.generate(&generatorStats{wiping: wiper, start: time.Now()}) + return base +} + +// generate is a background thread that iterates over the state and storage tries, +// constructing the state snapshot. All the arguments are purely for statistics +// gethering and logging, since the method surfs the blocks as they arrive, often +// being restarted. +func (dl *diskLayer) generate(stats *generatorStats) { + // If a database wipe is in operation, wait until it's done + if stats.wiping != nil { + stats.Log("Wiper running, state snapshotting paused", common.Hash{}, dl.genMarker) + select { + // If wiper is done, resume normal mode of operation + case <-stats.wiping: + stats.wiping = nil + stats.start = time.Now() + + // If generator was aboted during wipe, return + case abort := <-dl.genAbort: + abort <- stats + return + } + } + // Create an account and state iterator pointing to the current generator marker + accTrie, err := trie.NewSecure(dl.root, dl.triedb) + if err != nil { + // The account trie is missing (GC), surf the chain until one becomes available + stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker) + + abort := <-dl.genAbort + abort <- stats + return + } + stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker) + + var accMarker []byte + if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that + accMarker = dl.genMarker[:common.HashLength] + } + accIt := trie.NewIterator(accTrie.NodeIterator(accMarker)) + batch := dl.diskdb.NewBatch() + + // Iterate from the previous marker and continue generating the state snapshot + logged := time.Now() + for accIt.Next() { + // Retrieve the current account and flatten it into the internal format + accountHash := common.BytesToHash(accIt.Key) + + var acc struct { + Nonce uint64 + Balance *big.Int + Root common.Hash + CodeHash []byte + IsMultiCoin bool + } + if err := rlp.DecodeBytes(accIt.Value, &acc); err != nil { + log.Crit("Invalid account encountered during snapshot creation", "err", err) + } + data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash, acc.IsMultiCoin) + + // If the account is not yet in-progress, write it out + if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) { + rawdb.WriteAccountSnapshot(batch, accountHash, data) + stats.storage += common.StorageSize(1 + common.HashLength + len(data)) + stats.accounts++ + } + // If we've exceeded our batch allowance or termination was requested, flush to disk + var abort chan *generatorStats + select { + case abort = <-dl.genAbort: + default: + } + if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { + // Only write and set the marker if we actually did something useful + if batch.ValueSize() > 0 { + batch.Write() + batch.Reset() + + dl.lock.Lock() + dl.genMarker = accountHash[:] + dl.lock.Unlock() + } + if abort != nil { + stats.Log("Aborting state snapshot generation", dl.root, accountHash[:]) + abort <- stats + return + } + } + // If the account is in-progress, continue where we left off (otherwise iterate all) + if acc.Root != emptyRoot { + storeTrie, err := trie.NewSecure(acc.Root, dl.triedb) + if err != nil { + log.Crit("Storage trie inaccessible for snapshot generation", "err", err) + } + var storeMarker []byte + if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { + storeMarker = dl.genMarker[common.HashLength:] + } + storeIt := trie.NewIterator(storeTrie.NodeIterator(storeMarker)) + for storeIt.Next() { + rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(storeIt.Key), storeIt.Value) + stats.storage += common.StorageSize(1 + 2*common.HashLength + len(storeIt.Value)) + stats.slots++ + + // If we've exceeded our batch allowance or termination was requested, flush to disk + var abort chan *generatorStats + select { + case abort = <-dl.genAbort: + default: + } + if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { + // Only write and set the marker if we actually did something useful + if batch.ValueSize() > 0 { + batch.Write() + batch.Reset() + + dl.lock.Lock() + dl.genMarker = append(accountHash[:], storeIt.Key...) + dl.lock.Unlock() + } + if abort != nil { + stats.Log("Aborting state snapshot generation", dl.root, append(accountHash[:], storeIt.Key...)) + abort <- stats + return + } + } + } + } + if time.Since(logged) > 8*time.Second { + stats.Log("Generating state snapshot", dl.root, accIt.Key) + logged = time.Now() + } + // Some account processed, unmark the marker + accMarker = nil + } + // Snapshot fully generated, set the marker to nil + if batch.ValueSize() > 0 { + batch.Write() + } + log.Info("Generated state snapshot", "accounts", stats.accounts, "slots", stats.slots, + "storage", stats.storage, "elapsed", common.PrettyDuration(time.Since(stats.start))) + + dl.lock.Lock() + dl.genMarker = nil + close(dl.genPending) + dl.lock.Unlock() + + // Someone will be looking for us, wait it out + abort := <-dl.genAbort + abort <- nil +} diff --git a/core/state/snapshot/iterator.go b/core/state/snapshot/iterator.go new file mode 100644 index 0000000..ef527ff --- /dev/null +++ b/core/state/snapshot/iterator.go @@ -0,0 +1,400 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + "fmt" + "sort" + + "github.com/ava-labs/coreth/core/rawdb" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" +) + +// Iterator is an iterator to step over all the accounts or the specific +// storage in a snapshot which may or may not be composed of multiple layers. +type Iterator interface { + // Next steps the iterator forward one element, returning false if exhausted, + // or an error if iteration failed for some reason (e.g. root being iterated + // becomes stale and garbage collected). + Next() bool + + // Error returns any failure that occurred during iteration, which might have + // caused a premature iteration exit (e.g. snapshot stack becoming stale). + Error() error + + // Hash returns the hash of the account or storage slot the iterator is + // currently at. + Hash() common.Hash + + // Release releases associated resources. Release should always succeed and + // can be called multiple times without causing error. + Release() +} + +// AccountIterator is an iterator to step over all the accounts in a snapshot, +// which may or may not be composed of multiple layers. +type AccountIterator interface { + Iterator + + // Account returns the RLP encoded slim account the iterator is currently at. + // An error will be returned if the iterator becomes invalid + Account() []byte +} + +// StorageIterator is an iterator to step over the specific storage in a snapshot, +// which may or may not be composed of multiple layers. +type StorageIterator interface { + Iterator + + // Slot returns the storage slot the iterator is currently at. An error will + // be returned if the iterator becomes invalid + Slot() []byte +} + +// diffAccountIterator is an account iterator that steps over the accounts (both +// live and deleted) contained within a single diff layer. Higher order iterators +// will use the deleted accounts to skip deeper iterators. +type diffAccountIterator struct { + // curHash is the current hash the iterator is positioned on. The field is + // explicitly tracked since the referenced diff layer might go stale after + // the iterator was positioned and we don't want to fail accessing the old + // hash as long as the iterator is not touched any more. + curHash common.Hash + + layer *diffLayer // Live layer to retrieve values from + keys []common.Hash // Keys left in the layer to iterate + fail error // Any failures encountered (stale) +} + +// AccountIterator creates an account iterator over a single diff layer. +func (dl *diffLayer) AccountIterator(seek common.Hash) AccountIterator { + // Seek out the requested starting account + hashes := dl.AccountList() + index := sort.Search(len(hashes), func(i int) bool { + return bytes.Compare(seek[:], hashes[i][:]) <= 0 + }) + // Assemble and returned the already seeked iterator + return &diffAccountIterator{ + layer: dl, + keys: hashes[index:], + } +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (it *diffAccountIterator) Next() bool { + // If the iterator was already stale, consider it a programmer error. Although + // we could just return false here, triggering this path would probably mean + // somebody forgot to check for Error, so lets blow up instead of undefined + // behavior that's hard to debug. + if it.fail != nil { + panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail)) + } + // Stop iterating if all keys were exhausted + if len(it.keys) == 0 { + return false + } + if it.layer.Stale() { + it.fail, it.keys = ErrSnapshotStale, nil + return false + } + // Iterator seems to be still alive, retrieve and cache the live hash + it.curHash = it.keys[0] + // key cached, shift the iterator and notify the user of success + it.keys = it.keys[1:] + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +func (it *diffAccountIterator) Error() error { + return it.fail +} + +// Hash returns the hash of the account the iterator is currently at. +func (it *diffAccountIterator) Hash() common.Hash { + return it.curHash +} + +// Account returns the RLP encoded slim account the iterator is currently at. +// This method may _fail_, if the underlying layer has been flattened between +// the call to Next and Acccount. That type of error will set it.Err. +// This method assumes that flattening does not delete elements from +// the accountdata mapping (writing nil into it is fine though), and will panic +// if elements have been deleted. +// +// Note the returned account is not a copy, please don't modify it. +func (it *diffAccountIterator) Account() []byte { + it.layer.lock.RLock() + blob, ok := it.layer.accountData[it.curHash] + if !ok { + if _, ok := it.layer.destructSet[it.curHash]; ok { + it.layer.lock.RUnlock() + return nil + } + panic(fmt.Sprintf("iterator referenced non-existent account: %x", it.curHash)) + } + it.layer.lock.RUnlock() + if it.layer.Stale() { + it.fail, it.keys = ErrSnapshotStale, nil + } + return blob +} + +// Release is a noop for diff account iterators as there are no held resources. +func (it *diffAccountIterator) Release() {} + +// diskAccountIterator is an account iterator that steps over the live accounts +// contained within a disk layer. +type diskAccountIterator struct { + layer *diskLayer + it ethdb.Iterator +} + +// AccountIterator creates an account iterator over a disk layer. +func (dl *diskLayer) AccountIterator(seek common.Hash) AccountIterator { + pos := common.TrimRightZeroes(seek[:]) + return &diskAccountIterator{ + layer: dl, + it: dl.diskdb.NewIterator(rawdb.SnapshotAccountPrefix, pos), + } +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (it *diskAccountIterator) Next() bool { + // If the iterator was already exhausted, don't bother + if it.it == nil { + return false + } + // Try to advance the iterator and release it if we reached the end + for { + if !it.it.Next() { + it.it.Release() + it.it = nil + return false + } + if len(it.it.Key()) == len(rawdb.SnapshotAccountPrefix)+common.HashLength { + break + } + } + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +// +// A diff layer is immutable after creation content wise and can always be fully +// iterated without error, so this method always returns nil. +func (it *diskAccountIterator) Error() error { + if it.it == nil { + return nil // Iterator is exhausted and released + } + return it.it.Error() +} + +// Hash returns the hash of the account the iterator is currently at. +func (it *diskAccountIterator) Hash() common.Hash { + return common.BytesToHash(it.it.Key()) // The prefix will be truncated +} + +// Account returns the RLP encoded slim account the iterator is currently at. +func (it *diskAccountIterator) Account() []byte { + return it.it.Value() +} + +// Release releases the database snapshot held during iteration. +func (it *diskAccountIterator) Release() { + // The iterator is auto-released on exhaustion, so make sure it's still alive + if it.it != nil { + it.it.Release() + it.it = nil + } +} + +// diffStorageIterator is a storage iterator that steps over the specific storage +// (both live and deleted) contained within a single diff layer. Higher order +// iterators will use the deleted slot to skip deeper iterators. +type diffStorageIterator struct { + // curHash is the current hash the iterator is positioned on. The field is + // explicitly tracked since the referenced diff layer might go stale after + // the iterator was positioned and we don't want to fail accessing the old + // hash as long as the iterator is not touched any more. + curHash common.Hash + account common.Hash + + layer *diffLayer // Live layer to retrieve values from + keys []common.Hash // Keys left in the layer to iterate + fail error // Any failures encountered (stale) +} + +// StorageIterator creates a storage iterator over a single diff layer. +// Execept the storage iterator is returned, there is an additional flag +// "destructed" returned. If it's true then it means the whole storage is +// destructed in this layer(maybe recreated too), don't bother deeper layer +// for storage retrieval. +func (dl *diffLayer) StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) { + // Create the storage for this account even it's marked + // as destructed. The iterator is for the new one which + // just has the same address as the deleted one. + hashes, destructed := dl.StorageList(account) + index := sort.Search(len(hashes), func(i int) bool { + return bytes.Compare(seek[:], hashes[i][:]) <= 0 + }) + // Assemble and returned the already seeked iterator + return &diffStorageIterator{ + layer: dl, + account: account, + keys: hashes[index:], + }, destructed +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (it *diffStorageIterator) Next() bool { + // If the iterator was already stale, consider it a programmer error. Although + // we could just return false here, triggering this path would probably mean + // somebody forgot to check for Error, so lets blow up instead of undefined + // behavior that's hard to debug. + if it.fail != nil { + panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail)) + } + // Stop iterating if all keys were exhausted + if len(it.keys) == 0 { + return false + } + if it.layer.Stale() { + it.fail, it.keys = ErrSnapshotStale, nil + return false + } + // Iterator seems to be still alive, retrieve and cache the live hash + it.curHash = it.keys[0] + // key cached, shift the iterator and notify the user of success + it.keys = it.keys[1:] + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +func (it *diffStorageIterator) Error() error { + return it.fail +} + +// Hash returns the hash of the storage slot the iterator is currently at. +func (it *diffStorageIterator) Hash() common.Hash { + return it.curHash +} + +// Slot returns the raw storage slot value the iterator is currently at. +// This method may _fail_, if the underlying layer has been flattened between +// the call to Next and Value. That type of error will set it.Err. +// This method assumes that flattening does not delete elements from +// the storage mapping (writing nil into it is fine though), and will panic +// if elements have been deleted. +// +// Note the returned slot is not a copy, please don't modify it. +func (it *diffStorageIterator) Slot() []byte { + it.layer.lock.RLock() + storage, ok := it.layer.storageData[it.account] + if !ok { + panic(fmt.Sprintf("iterator referenced non-existent account storage: %x", it.account)) + } + // Storage slot might be nil(deleted), but it must exist + blob, ok := storage[it.curHash] + if !ok { + panic(fmt.Sprintf("iterator referenced non-existent storage slot: %x", it.curHash)) + } + it.layer.lock.RUnlock() + if it.layer.Stale() { + it.fail, it.keys = ErrSnapshotStale, nil + } + return blob +} + +// Release is a noop for diff account iterators as there are no held resources. +func (it *diffStorageIterator) Release() {} + +// diskStorageIterator is a storage iterator that steps over the live storage +// contained within a disk layer. +type diskStorageIterator struct { + layer *diskLayer + account common.Hash + it ethdb.Iterator +} + +// StorageIterator creates a storage iterator over a disk layer. +// If the whole storage is destructed, then all entries in the disk +// layer are deleted already. So the "destructed" flag returned here +// is always false. +func (dl *diskLayer) StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) { + pos := common.TrimRightZeroes(seek[:]) + return &diskStorageIterator{ + layer: dl, + account: account, + it: dl.diskdb.NewIterator(append(rawdb.SnapshotStoragePrefix, account.Bytes()...), pos), + }, false +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (it *diskStorageIterator) Next() bool { + // If the iterator was already exhausted, don't bother + if it.it == nil { + return false + } + // Try to advance the iterator and release it if we reached the end + for { + if !it.it.Next() { + it.it.Release() + it.it = nil + return false + } + if len(it.it.Key()) == len(rawdb.SnapshotStoragePrefix)+common.HashLength+common.HashLength { + break + } + } + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +// +// A diff layer is immutable after creation content wise and can always be fully +// iterated without error, so this method always returns nil. +func (it *diskStorageIterator) Error() error { + if it.it == nil { + return nil // Iterator is exhausted and released + } + return it.it.Error() +} + +// Hash returns the hash of the storage slot the iterator is currently at. +func (it *diskStorageIterator) Hash() common.Hash { + return common.BytesToHash(it.it.Key()) // The prefix will be truncated +} + +// Slot returns the raw strorage slot content the iterator is currently at. +func (it *diskStorageIterator) Slot() []byte { + return it.it.Value() +} + +// Release releases the database snapshot held during iteration. +func (it *diskStorageIterator) Release() { + // The iterator is auto-released on exhaustion, so make sure it's still alive + if it.it != nil { + it.it.Release() + it.it = nil + } +} diff --git a/core/state/snapshot/iterator_binary.go b/core/state/snapshot/iterator_binary.go new file mode 100644 index 0000000..f82f750 --- /dev/null +++ b/core/state/snapshot/iterator_binary.go @@ -0,0 +1,213 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + + "github.com/ethereum/go-ethereum/common" +) + +// binaryIterator is a simplistic iterator to step over the accounts or storage +// in a snapshot, which may or may not be composed of multiple layers. Performance +// wise this iterator is slow, it's meant for cross validating the fast one, +type binaryIterator struct { + a Iterator + b Iterator + aDone bool + bDone bool + accountIterator bool + k common.Hash + account common.Hash + fail error +} + +// initBinaryAccountIterator creates a simplistic iterator to step over all the +// accounts in a slow, but eaily verifiable way. Note this function is used for +// initialization, use `newBinaryAccountIterator` as the API. +func (dl *diffLayer) initBinaryAccountIterator() Iterator { + parent, ok := dl.parent.(*diffLayer) + if !ok { + l := &binaryIterator{ + a: dl.AccountIterator(common.Hash{}), + b: dl.Parent().AccountIterator(common.Hash{}), + accountIterator: true, + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l + } + l := &binaryIterator{ + a: dl.AccountIterator(common.Hash{}), + b: parent.initBinaryAccountIterator(), + accountIterator: true, + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l +} + +// initBinaryStorageIterator creates a simplistic iterator to step over all the +// storage slots in a slow, but eaily verifiable way. Note this function is used +// for initialization, use `newBinaryStorageIterator` as the API. +func (dl *diffLayer) initBinaryStorageIterator(account common.Hash) Iterator { + parent, ok := dl.parent.(*diffLayer) + if !ok { + // If the storage in this layer is already destructed, discard all + // deeper layers but still return an valid single-branch iterator. + a, destructed := dl.StorageIterator(account, common.Hash{}) + if destructed { + l := &binaryIterator{ + a: a, + account: account, + } + l.aDone = !l.a.Next() + l.bDone = true + return l + } + // The parent is disk layer, don't need to take care "destructed" + // anymore. + b, _ := dl.Parent().StorageIterator(account, common.Hash{}) + l := &binaryIterator{ + a: a, + b: b, + account: account, + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l + } + // If the storage in this layer is already destructed, discard all + // deeper layers but still return an valid single-branch iterator. + a, destructed := dl.StorageIterator(account, common.Hash{}) + if destructed { + l := &binaryIterator{ + a: a, + account: account, + } + l.aDone = !l.a.Next() + l.bDone = true + return l + } + l := &binaryIterator{ + a: a, + b: parent.initBinaryStorageIterator(account), + account: account, + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l +} + +// Next steps the iterator forward one element, returning false if exhausted, +// or an error if iteration failed for some reason (e.g. root being iterated +// becomes stale and garbage collected). +func (it *binaryIterator) Next() bool { + if it.aDone && it.bDone { + return false + } +first: + if it.aDone { + it.k = it.b.Hash() + it.bDone = !it.b.Next() + return true + } + if it.bDone { + it.k = it.a.Hash() + it.aDone = !it.a.Next() + return true + } + nextA, nextB := it.a.Hash(), it.b.Hash() + if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 { + it.aDone = !it.a.Next() + it.k = nextA + return true + } else if diff == 0 { + // Now we need to advance one of them + it.aDone = !it.a.Next() + goto first + } + it.bDone = !it.b.Next() + it.k = nextB + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +func (it *binaryIterator) Error() error { + return it.fail +} + +// Hash returns the hash of the account the iterator is currently at. +func (it *binaryIterator) Hash() common.Hash { + return it.k +} + +// Account returns the RLP encoded slim account the iterator is currently at, or +// nil if the iterated snapshot stack became stale (you can check Error after +// to see if it failed or not). +// +// Note the returned account is not a copy, please don't modify it. +func (it *binaryIterator) Account() []byte { + if !it.accountIterator { + return nil + } + // The topmost iterator must be `diffAccountIterator` + blob, err := it.a.(*diffAccountIterator).layer.AccountRLP(it.k) + if err != nil { + it.fail = err + return nil + } + return blob +} + +// Slot returns the raw storage slot data the iterator is currently at, or +// nil if the iterated snapshot stack became stale (you can check Error after +// to see if it failed or not). +// +// Note the returned slot is not a copy, please don't modify it. +func (it *binaryIterator) Slot() []byte { + if it.accountIterator { + return nil + } + blob, err := it.a.(*diffStorageIterator).layer.Storage(it.account, it.k) + if err != nil { + it.fail = err + return nil + } + return blob +} + +// Release recursively releases all the iterators in the stack. +func (it *binaryIterator) Release() { + it.a.Release() + it.b.Release() +} + +// newBinaryAccountIterator creates a simplistic account iterator to step over +// all the accounts in a slow, but eaily verifiable way. +func (dl *diffLayer) newBinaryAccountIterator() AccountIterator { + iter := dl.initBinaryAccountIterator() + return iter.(AccountIterator) +} + +// newBinaryStorageIterator creates a simplistic account iterator to step over +// all the storage slots in a slow, but eaily verifiable way. +func (dl *diffLayer) newBinaryStorageIterator(account common.Hash) StorageIterator { + iter := dl.initBinaryStorageIterator(account) + return iter.(StorageIterator) +} diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go new file mode 100644 index 0000000..291d529 --- /dev/null +++ b/core/state/snapshot/iterator_fast.go @@ -0,0 +1,350 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + "fmt" + "sort" + + "github.com/ethereum/go-ethereum/common" +) + +// weightedIterator is a iterator with an assigned weight. It is used to prioritise +// which account or storage slot is the correct one if multiple iterators find the +// same one (modified in multiple consecutive blocks). +type weightedIterator struct { + it Iterator + priority int +} + +// weightedIterators is a set of iterators implementing the sort.Interface. +type weightedIterators []*weightedIterator + +// Len implements sort.Interface, returning the number of active iterators. +func (its weightedIterators) Len() int { return len(its) } + +// Less implements sort.Interface, returning which of two iterators in the stack +// is before the other. +func (its weightedIterators) Less(i, j int) bool { + // Order the iterators primarily by the account hashes + hashI := its[i].it.Hash() + hashJ := its[j].it.Hash() + + switch bytes.Compare(hashI[:], hashJ[:]) { + case -1: + return true + case 1: + return false + } + // Same account/storage-slot in multiple layers, split by priority + return its[i].priority < its[j].priority +} + +// Swap implements sort.Interface, swapping two entries in the iterator stack. +func (its weightedIterators) Swap(i, j int) { + its[i], its[j] = its[j], its[i] +} + +// fastIterator is a more optimized multi-layer iterator which maintains a +// direct mapping of all iterators leading down to the bottom layer. +type fastIterator struct { + tree *Tree // Snapshot tree to reinitialize stale sub-iterators with + root common.Hash // Root hash to reinitialize stale sub-iterators through + + curAccount []byte + curSlot []byte + + iterators weightedIterators + initiated bool + account bool + fail error +} + +// newFastIterator creates a new hierarhical account or storage iterator with one +// element per diff layer. The returned combo iterator can be used to walk over +// the entire snapshot diff stack simultaneously. +func newFastIterator(tree *Tree, root common.Hash, account common.Hash, seek common.Hash, accountIterator bool) (*fastIterator, error) { + snap := tree.Snapshot(root) + if snap == nil { + return nil, fmt.Errorf("unknown snapshot: %x", root) + } + fi := &fastIterator{ + tree: tree, + root: root, + account: accountIterator, + } + current := snap.(snapshot) + for depth := 0; current != nil; depth++ { + if accountIterator { + fi.iterators = append(fi.iterators, &weightedIterator{ + it: current.AccountIterator(seek), + priority: depth, + }) + } else { + // If the whole storage is destructed in this layer, don't + // bother deeper layer anymore. But we should still keep + // the iterator for this layer, since the iterator can contain + // some valid slots which belongs to the re-created account. + it, destructed := current.StorageIterator(account, seek) + fi.iterators = append(fi.iterators, &weightedIterator{ + it: it, + priority: depth, + }) + if destructed { + break + } + } + current = current.Parent() + } + fi.init() + return fi, nil +} + +// init walks over all the iterators and resolves any clashes between them, after +// which it prepares the stack for step-by-step iteration. +func (fi *fastIterator) init() { + // Track which account hashes are iterators positioned on + var positioned = make(map[common.Hash]int) + + // Position all iterators and track how many remain live + for i := 0; i < len(fi.iterators); i++ { + // Retrieve the first element and if it clashes with a previous iterator, + // advance either the current one or the old one. Repeat until nothing is + // clashing any more. + it := fi.iterators[i] + for { + // If the iterator is exhausted, drop it off the end + if !it.it.Next() { + it.it.Release() + last := len(fi.iterators) - 1 + + fi.iterators[i] = fi.iterators[last] + fi.iterators[last] = nil + fi.iterators = fi.iterators[:last] + + i-- + break + } + // The iterator is still alive, check for collisions with previous ones + hash := it.it.Hash() + if other, exist := positioned[hash]; !exist { + positioned[hash] = i + break + } else { + // Iterators collide, one needs to be progressed, use priority to + // determine which. + // + // This whole else-block can be avoided, if we instead + // do an initial priority-sort of the iterators. If we do that, + // then we'll only wind up here if a lower-priority (preferred) iterator + // has the same value, and then we will always just continue. + // However, it costs an extra sort, so it's probably not better + if fi.iterators[other].priority < it.priority { + // The 'it' should be progressed + continue + } else { + // The 'other' should be progressed, swap them + it = fi.iterators[other] + fi.iterators[other], fi.iterators[i] = fi.iterators[i], fi.iterators[other] + continue + } + } + } + } + // Re-sort the entire list + sort.Sort(fi.iterators) + fi.initiated = false +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (fi *fastIterator) Next() bool { + if len(fi.iterators) == 0 { + return false + } + if !fi.initiated { + // Don't forward first time -- we had to 'Next' once in order to + // do the sorting already + fi.initiated = true + if fi.account { + fi.curAccount = fi.iterators[0].it.(AccountIterator).Account() + } else { + fi.curSlot = fi.iterators[0].it.(StorageIterator).Slot() + } + if innerErr := fi.iterators[0].it.Error(); innerErr != nil { + fi.fail = innerErr + return false + } + if fi.curAccount != nil || fi.curSlot != nil { + return true + } + // Implicit else: we've hit a nil-account or nil-slot, and need to + // fall through to the loop below to land on something non-nil + } + // If an account or a slot is deleted in one of the layers, the key will + // still be there, but the actual value will be nil. However, the iterator + // should not export nil-values (but instead simply omit the key), so we + // need to loop here until we either + // - get a non-nil value, + // - hit an error, + // - or exhaust the iterator + for { + if !fi.next(0) { + return false // exhausted + } + if fi.account { + fi.curAccount = fi.iterators[0].it.(AccountIterator).Account() + } else { + fi.curSlot = fi.iterators[0].it.(StorageIterator).Slot() + } + if innerErr := fi.iterators[0].it.Error(); innerErr != nil { + fi.fail = innerErr + return false // error + } + if fi.curAccount != nil || fi.curSlot != nil { + break // non-nil value found + } + } + return true +} + +// next handles the next operation internally and should be invoked when we know +// that two elements in the list may have the same value. +// +// For example, if the iterated hashes become [2,3,5,5,8,9,10], then we should +// invoke next(3), which will call Next on elem 3 (the second '5') and will +// cascade along the list, applying the same operation if needed. +func (fi *fastIterator) next(idx int) bool { + // If this particular iterator got exhausted, remove it and return true (the + // next one is surely not exhausted yet, otherwise it would have been removed + // already). + if it := fi.iterators[idx].it; !it.Next() { + it.Release() + + fi.iterators = append(fi.iterators[:idx], fi.iterators[idx+1:]...) + return len(fi.iterators) > 0 + } + // If there's no one left to cascade into, return + if idx == len(fi.iterators)-1 { + return true + } + // We next-ed the iterator at 'idx', now we may have to re-sort that element + var ( + cur, next = fi.iterators[idx], fi.iterators[idx+1] + curHash, nextHash = cur.it.Hash(), next.it.Hash() + ) + if diff := bytes.Compare(curHash[:], nextHash[:]); diff < 0 { + // It is still in correct place + return true + } else if diff == 0 && cur.priority < next.priority { + // So still in correct place, but we need to iterate on the next + fi.next(idx + 1) + return true + } + // At this point, the iterator is in the wrong location, but the remaining + // list is sorted. Find out where to move the item. + clash := -1 + index := sort.Search(len(fi.iterators), func(n int) bool { + // The iterator always advances forward, so anything before the old slot + // is known to be behind us, so just skip them altogether. This actually + // is an important clause since the sort order got invalidated. + if n < idx { + return false + } + if n == len(fi.iterators)-1 { + // Can always place an elem last + return true + } + nextHash := fi.iterators[n+1].it.Hash() + if diff := bytes.Compare(curHash[:], nextHash[:]); diff < 0 { + return true + } else if diff > 0 { + return false + } + // The elem we're placing it next to has the same value, + // so whichever winds up on n+1 will need further iteraton + clash = n + 1 + + return cur.priority < fi.iterators[n+1].priority + }) + fi.move(idx, index) + if clash != -1 { + fi.next(clash) + } + return true +} + +// move advances an iterator to another position in the list. +func (fi *fastIterator) move(index, newpos int) { + elem := fi.iterators[index] + copy(fi.iterators[index:], fi.iterators[index+1:newpos+1]) + fi.iterators[newpos] = elem +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +func (fi *fastIterator) Error() error { + return fi.fail +} + +// Hash returns the current key +func (fi *fastIterator) Hash() common.Hash { + return fi.iterators[0].it.Hash() +} + +// Account returns the current account blob. +// Note the returned account is not a copy, please don't modify it. +func (fi *fastIterator) Account() []byte { + return fi.curAccount +} + +// Slot returns the current storage slot. +// Note the returned slot is not a copy, please don't modify it. +func (fi *fastIterator) Slot() []byte { + return fi.curSlot +} + +// Release iterates over all the remaining live layer iterators and releases each +// of thme individually. +func (fi *fastIterator) Release() { + for _, it := range fi.iterators { + it.it.Release() + } + fi.iterators = nil +} + +// Debug is a convencience helper during testing +func (fi *fastIterator) Debug() { + for _, it := range fi.iterators { + fmt.Printf("[p=%v v=%v] ", it.priority, it.it.Hash()[0]) + } + fmt.Println() +} + +// newFastAccountIterator creates a new hierarhical account iterator with one +// element per diff layer. The returned combo iterator can be used to walk over +// the entire snapshot diff stack simultaneously. +func newFastAccountIterator(tree *Tree, root common.Hash, seek common.Hash) (AccountIterator, error) { + return newFastIterator(tree, root, common.Hash{}, seek, true) +} + +// newFastStorageIterator creates a new hierarhical storage iterator with one +// element per diff layer. The returned combo iterator can be used to walk over +// the entire snapshot diff stack simultaneously. +func newFastStorageIterator(tree *Tree, root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) { + return newFastIterator(tree, root, account, seek, false) +} diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go new file mode 100644 index 0000000..3d4c9de --- /dev/null +++ b/core/state/snapshot/journal.go @@ -0,0 +1,270 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "time" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ava-labs/coreth/core/rawdb" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +// journalGenerator is a disk layer entry containing the generator progress marker. +type journalGenerator struct { + Wiping bool // Whether the database was in progress of being wiped + Done bool // Whether the generator finished creating the snapshot + Marker []byte + Accounts uint64 + Slots uint64 + Storage uint64 +} + +// journalDestruct is an account deletion entry in a diffLayer's disk journal. +type journalDestruct struct { + Hash common.Hash +} + +// journalAccount is an account entry in a diffLayer's disk journal. +type journalAccount struct { + Hash common.Hash + Blob []byte +} + +// journalStorage is an account's storage map in a diffLayer's disk journal. +type journalStorage struct { + Hash common.Hash + Keys []common.Hash + Vals [][]byte +} + +// loadSnapshot loads a pre-existing state snapshot backed by a key-value store. +func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) (snapshot, error) { + // Retrieve the block number and hash of the snapshot, failing if no snapshot + // is present in the database (or crashed mid-update). + baseRoot := rawdb.ReadSnapshotRoot(diskdb) + if baseRoot == (common.Hash{}) { + return nil, errors.New("missing or corrupted snapshot") + } + base := &diskLayer{ + diskdb: diskdb, + triedb: triedb, + cache: fastcache.New(cache * 1024 * 1024), + root: baseRoot, + } + // Retrieve the journal, it must exist since even for 0 layer it stores whether + // we've already generated the snapshot or are in progress only + journal := rawdb.ReadSnapshotJournal(diskdb) + if len(journal) == 0 { + return nil, errors.New("missing or corrupted snapshot journal") + } + r := rlp.NewStream(bytes.NewReader(journal), 0) + + // Read the snapshot generation progress for the disk layer + var generator journalGenerator + if err := r.Decode(&generator); err != nil { + return nil, fmt.Errorf("failed to load snapshot progress marker: %v", err) + } + // Load all the snapshot diffs from the journal + snapshot, err := loadDiffLayer(base, r) + if err != nil { + return nil, err + } + // Entire snapshot journal loaded, sanity check the head and return + // Journal doesn't exist, don't worry if it's not supposed to + if head := snapshot.Root(); head != root { + return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root) + } + // Everything loaded correctly, resume any suspended operations + if !generator.Done { + // If the generator was still wiping, restart one from scratch (fine for + // now as it's rare and the wiper deletes the stuff it touches anyway, so + // restarting won't incur a lot of extra database hops. + var wiper chan struct{} + if generator.Wiping { + log.Info("Resuming previous snapshot wipe") + wiper = wipeSnapshot(diskdb, false) + } + // Whether or not wiping was in progress, load any generator progress too + base.genMarker = generator.Marker + if base.genMarker == nil { + base.genMarker = []byte{} + } + base.genPending = make(chan struct{}) + base.genAbort = make(chan chan *generatorStats) + + var origin uint64 + if len(generator.Marker) >= 8 { + origin = binary.BigEndian.Uint64(generator.Marker) + } + go base.generate(&generatorStats{ + wiping: wiper, + origin: origin, + start: time.Now(), + accounts: generator.Accounts, + slots: generator.Slots, + storage: common.StorageSize(generator.Storage), + }) + } + return snapshot, nil +} + +// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new +// diff and verifying that it can be linked to the requested parent. +func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) { + // Read the next diff journal entry + var root common.Hash + if err := r.Decode(&root); err != nil { + // The first read may fail with EOF, marking the end of the journal + if err == io.EOF { + return parent, nil + } + return nil, fmt.Errorf("load diff root: %v", err) + } + var destructs []journalDestruct + if err := r.Decode(&destructs); err != nil { + return nil, fmt.Errorf("load diff destructs: %v", err) + } + destructSet := make(map[common.Hash]struct{}) + for _, entry := range destructs { + destructSet[entry.Hash] = struct{}{} + } + var accounts []journalAccount + if err := r.Decode(&accounts); err != nil { + return nil, fmt.Errorf("load diff accounts: %v", err) + } + accountData := make(map[common.Hash][]byte) + for _, entry := range accounts { + if len(entry.Blob) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that + accountData[entry.Hash] = entry.Blob + } else { + accountData[entry.Hash] = nil + } + } + var storage []journalStorage + if err := r.Decode(&storage); err != nil { + return nil, fmt.Errorf("load diff storage: %v", err) + } + storageData := make(map[common.Hash]map[common.Hash][]byte) + for _, entry := range storage { + slots := make(map[common.Hash][]byte) + for i, key := range entry.Keys { + if len(entry.Vals[i]) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that + slots[key] = entry.Vals[i] + } else { + slots[key] = nil + } + } + storageData[entry.Hash] = slots + } + return loadDiffLayer(newDiffLayer(parent, root, destructSet, accountData, storageData), r) +} + +// Journal writes the persistent layer generator stats into a buffer to be stored +// in the database as the snapshot journal. +func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { + // If the snapshot is currently being generated, abort it + var stats *generatorStats + if dl.genAbort != nil { + abort := make(chan *generatorStats) + dl.genAbort <- abort + + if stats = <-abort; stats != nil { + stats.Log("Journalling in-progress snapshot", dl.root, dl.genMarker) + } + } + // Ensure the layer didn't get stale + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.stale { + return common.Hash{}, ErrSnapshotStale + } + // Write out the generator marker + entry := journalGenerator{ + Done: dl.genMarker == nil, + Marker: dl.genMarker, + } + if stats != nil { + entry.Wiping = (stats.wiping != nil) + entry.Accounts = stats.accounts + entry.Slots = stats.slots + entry.Storage = uint64(stats.storage) + } + if err := rlp.Encode(buffer, entry); err != nil { + return common.Hash{}, err + } + return dl.root, nil +} + +// Journal writes the memory layer contents into a buffer to be stored in the +// database as the snapshot journal. +func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { + // Journal the parent first + base, err := dl.parent.Journal(buffer) + if err != nil { + return common.Hash{}, err + } + // Ensure the layer didn't get stale + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.Stale() { + return common.Hash{}, ErrSnapshotStale + } + // Everything below was journalled, persist this layer too + if err := rlp.Encode(buffer, dl.root); err != nil { + return common.Hash{}, err + } + destructs := make([]journalDestruct, 0, len(dl.destructSet)) + for hash := range dl.destructSet { + destructs = append(destructs, journalDestruct{Hash: hash}) + } + if err := rlp.Encode(buffer, destructs); err != nil { + return common.Hash{}, err + } + accounts := make([]journalAccount, 0, len(dl.accountData)) + for hash, blob := range dl.accountData { + accounts = append(accounts, journalAccount{Hash: hash, Blob: blob}) + } + if err := rlp.Encode(buffer, accounts); err != nil { + return common.Hash{}, err + } + storage := make([]journalStorage, 0, len(dl.storageData)) + for hash, slots := range dl.storageData { + keys := make([]common.Hash, 0, len(slots)) + vals := make([][]byte, 0, len(slots)) + for key, val := range slots { + keys = append(keys, key) + vals = append(vals, val) + } + storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals}) + } + if err := rlp.Encode(buffer, storage); err != nil { + return common.Hash{}, err + } + return base, nil +} diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go new file mode 100644 index 0000000..3348685 --- /dev/null +++ b/core/state/snapshot/snapshot.go @@ -0,0 +1,619 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +// Package snapshot implements a journalled, dynamic state dump. +package snapshot + +import ( + "bytes" + "errors" + "fmt" + "sync" + "sync/atomic" + + "github.com/ava-labs/coreth/core/rawdb" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/trie" +) + +var ( + snapshotCleanAccountHitMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/hit", nil) + snapshotCleanAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/miss", nil) + snapshotCleanAccountInexMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/inex", nil) + snapshotCleanAccountReadMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/read", nil) + snapshotCleanAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/write", nil) + + snapshotCleanStorageHitMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/hit", nil) + snapshotCleanStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/miss", nil) + snapshotCleanStorageInexMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/inex", nil) + snapshotCleanStorageReadMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/read", nil) + snapshotCleanStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/write", nil) + + snapshotDirtyAccountHitMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/hit", nil) + snapshotDirtyAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/miss", nil) + snapshotDirtyAccountInexMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/inex", nil) + snapshotDirtyAccountReadMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/read", nil) + snapshotDirtyAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/write", nil) + + snapshotDirtyStorageHitMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/hit", nil) + snapshotDirtyStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/miss", nil) + snapshotDirtyStorageInexMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/inex", nil) + snapshotDirtyStorageReadMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/read", nil) + snapshotDirtyStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/write", nil) + + snapshotDirtyAccountHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/account/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015)) + snapshotDirtyStorageHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/storage/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015)) + + snapshotFlushAccountItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/item", nil) + snapshotFlushAccountSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/size", nil) + snapshotFlushStorageItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/item", nil) + snapshotFlushStorageSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/size", nil) + + snapshotBloomIndexTimer = metrics.NewRegisteredResettingTimer("state/snapshot/bloom/index", nil) + snapshotBloomErrorGauge = metrics.NewRegisteredGaugeFloat64("state/snapshot/bloom/error", nil) + + snapshotBloomAccountTrueHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/truehit", nil) + snapshotBloomAccountFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/falsehit", nil) + snapshotBloomAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/miss", nil) + + snapshotBloomStorageTrueHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/truehit", nil) + snapshotBloomStorageFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/falsehit", nil) + snapshotBloomStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/miss", nil) + + // ErrSnapshotStale is returned from data accessors if the underlying snapshot + // layer had been invalidated due to the chain progressing forward far enough + // to not maintain the layer's original state. + ErrSnapshotStale = errors.New("snapshot stale") + + // ErrNotCoveredYet is returned from data accessors if the underlying snapshot + // is being generated currently and the requested data item is not yet in the + // range of accounts covered. + ErrNotCoveredYet = errors.New("not covered yet") + + // errSnapshotCycle is returned if a snapshot is attempted to be inserted + // that forms a cycle in the snapshot tree. + errSnapshotCycle = errors.New("snapshot cycle") +) + +// Snapshot represents the functionality supported by a snapshot storage layer. +type Snapshot interface { + // Root returns the root hash for which this snapshot was made. + Root() common.Hash + + // Account directly retrieves the account associated with a particular hash in + // the snapshot slim data format. + Account(hash common.Hash) (*Account, error) + + // AccountRLP directly retrieves the account RLP associated with a particular + // hash in the snapshot slim data format. + AccountRLP(hash common.Hash) ([]byte, error) + + // Storage directly retrieves the storage data associated with a particular hash, + // within a particular account. + Storage(accountHash, storageHash common.Hash) ([]byte, error) +} + +// snapshot is the internal version of the snapshot data layer that supports some +// additional methods compared to the public API. +type snapshot interface { + Snapshot + + // Parent returns the subsequent layer of a snapshot, or nil if the base was + // reached. + // + // Note, the method is an internal helper to avoid type switching between the + // disk and diff layers. There is no locking involved. + Parent() snapshot + + // Update creates a new layer on top of the existing snapshot diff tree with + // the specified data items. + // + // Note, the maps are retained by the method to avoid copying everything. + Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer + + // Journal commits an entire diff hierarchy to disk into a single journal entry. + // This is meant to be used during shutdown to persist the snapshot without + // flattening everything down (bad for reorgs). + Journal(buffer *bytes.Buffer) (common.Hash, error) + + // Stale return whether this layer has become stale (was flattened across) or + // if it's still live. + Stale() bool + + // AccountIterator creates an account iterator over an arbitrary layer. + AccountIterator(seek common.Hash) AccountIterator + + // StorageIterator creates a storage iterator over an arbitrary layer. + StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) +} + +// SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent +// base layer backed by a key-value store, on top of which arbitrarily many in- +// memory diff layers are topped. The memory diffs can form a tree with branching, +// but the disk layer is singleton and common to all. If a reorg goes deeper than +// the disk layer, everything needs to be deleted. +// +// The goal of a state snapshot is twofold: to allow direct access to account and +// storage data to avoid expensive multi-level trie lookups; and to allow sorted, +// cheap iteration of the account/storage tries for sync aid. +type Tree struct { + diskdb ethdb.KeyValueStore // Persistent database to store the snapshot + triedb *trie.Database // In-memory cache to access the trie through + cache int // Megabytes permitted to use for read caches + layers map[common.Hash]snapshot // Collection of all known layers + lock sync.RWMutex +} + +// New attempts to load an already existing snapshot from a persistent key-value +// store (with a number of memory layers from a journal), ensuring that the head +// of the snapshot matches the expected one. +// +// If the snapshot is missing or inconsistent, the entirety is deleted and will +// be reconstructed from scratch based on the tries in the key-value store, on a +// background thread. +func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool) *Tree { + // Create a new, empty snapshot tree + snap := &Tree{ + diskdb: diskdb, + triedb: triedb, + cache: cache, + layers: make(map[common.Hash]snapshot), + } + if !async { + defer snap.waitBuild() + } + // Attempt to load a previously persisted snapshot and rebuild one if failed + head, err := loadSnapshot(diskdb, triedb, cache, root) + if err != nil { + log.Warn("Failed to load snapshot, regenerating", "err", err) + snap.Rebuild(root) + return snap + } + // Existing snapshot loaded, seed all the layers + for head != nil { + snap.layers[head.Root()] = head + head = head.Parent() + } + return snap +} + +// waitBuild blocks until the snapshot finishes rebuilding. This method is meant +// to be used by tests to ensure we're testing what we believe we are. +func (t *Tree) waitBuild() { + // Find the rebuild termination channel + var done chan struct{} + + t.lock.RLock() + for _, layer := range t.layers { + if layer, ok := layer.(*diskLayer); ok { + done = layer.genPending + break + } + } + t.lock.RUnlock() + + // Wait until the snapshot is generated + if done != nil { + <-done + } +} + +// Snapshot retrieves a snapshot belonging to the given block root, or nil if no +// snapshot is maintained for that block. +func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot { + t.lock.RLock() + defer t.lock.RUnlock() + + return t.layers[blockRoot] +} + +// Update adds a new snapshot into the tree, if that can be linked to an existing +// old parent. It is disallowed to insert a disk layer (the origin of all). +func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error { + // Reject noop updates to avoid self-loops in the snapshot tree. This is a + // special case that can only happen for Clique networks where empty blocks + // don't modify the state (0 block subsidy). + // + // Although we could silently ignore this internally, it should be the caller's + // responsibility to avoid even attempting to insert such a snapshot. + if blockRoot == parentRoot { + return errSnapshotCycle + } + // Generate a new snapshot on top of the parent + parent := t.Snapshot(parentRoot).(snapshot) + if parent == nil { + return fmt.Errorf("parent [%#x] snapshot missing", parentRoot) + } + snap := parent.Update(blockRoot, destructs, accounts, storage) + + // Save the new snapshot for later + t.lock.Lock() + defer t.lock.Unlock() + + t.layers[snap.root] = snap + return nil +} + +// Cap traverses downwards the snapshot tree from a head block hash until the +// number of allowed layers are crossed. All layers beyond the permitted number +// are flattened downwards. +func (t *Tree) Cap(root common.Hash, layers int) error { + // Retrieve the head snapshot to cap from + snap := t.Snapshot(root) + if snap == nil { + return fmt.Errorf("snapshot [%#x] missing", root) + } + diff, ok := snap.(*diffLayer) + if !ok { + return fmt.Errorf("snapshot [%#x] is disk layer", root) + } + // If the generator is still running, use a more aggressive cap + diff.origin.lock.RLock() + if diff.origin.genMarker != nil && layers > 8 { + layers = 8 + } + diff.origin.lock.RUnlock() + + // Run the internal capping and discard all stale layers + t.lock.Lock() + defer t.lock.Unlock() + + // Flattening the bottom-most diff layer requires special casing since there's + // no child to rewire to the grandparent. In that case we can fake a temporary + // child for the capping and then remove it. + var persisted *diskLayer + + switch layers { + case 0: + // If full commit was requested, flatten the diffs and merge onto disk + diff.lock.RLock() + base := diffToDisk(diff.flatten().(*diffLayer)) + diff.lock.RUnlock() + + // Replace the entire snapshot tree with the flat base + t.layers = map[common.Hash]snapshot{base.root: base} + return nil + + case 1: + // If full flattening was requested, flatten the diffs but only merge if the + // memory limit was reached + var ( + bottom *diffLayer + base *diskLayer + ) + diff.lock.RLock() + bottom = diff.flatten().(*diffLayer) + if bottom.memory >= aggregatorMemoryLimit { + base = diffToDisk(bottom) + } + diff.lock.RUnlock() + + // If all diff layers were removed, replace the entire snapshot tree + if base != nil { + t.layers = map[common.Hash]snapshot{base.root: base} + return nil + } + // Merge the new aggregated layer into the snapshot tree, clean stales below + t.layers[bottom.root] = bottom + + default: + // Many layers requested to be retained, cap normally + persisted = t.cap(diff, layers) + } + // Remove any layer that is stale or links into a stale layer + children := make(map[common.Hash][]common.Hash) + for root, snap := range t.layers { + if diff, ok := snap.(*diffLayer); ok { + parent := diff.parent.Root() + children[parent] = append(children[parent], root) + } + } + var remove func(root common.Hash) + remove = func(root common.Hash) { + delete(t.layers, root) + for _, child := range children[root] { + remove(child) + } + delete(children, root) + } + for root, snap := range t.layers { + if snap.Stale() { + remove(root) + } + } + // If the disk layer was modified, regenerate all the cumulative blooms + if persisted != nil { + var rebloom func(root common.Hash) + rebloom = func(root common.Hash) { + if diff, ok := t.layers[root].(*diffLayer); ok { + diff.rebloom(persisted) + } + for _, child := range children[root] { + rebloom(child) + } + } + rebloom(persisted.root) + } + return nil +} + +// cap traverses downwards the diff tree until the number of allowed layers are +// crossed. All diffs beyond the permitted number are flattened downwards. If the +// layer limit is reached, memory cap is also enforced (but not before). +// +// The method returns the new disk layer if diffs were persistend into it. +func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer { + // Dive until we run out of layers or reach the persistent database + for ; layers > 2; layers-- { + // If we still have diff layers below, continue down + if parent, ok := diff.parent.(*diffLayer); ok { + diff = parent + } else { + // Diff stack too shallow, return without modifications + return nil + } + } + // We're out of layers, flatten anything below, stopping if it's the disk or if + // the memory limit is not yet exceeded. + switch parent := diff.parent.(type) { + case *diskLayer: + return nil + + case *diffLayer: + // Flatten the parent into the grandparent. The flattening internally obtains a + // write lock on grandparent. + flattened := parent.flatten().(*diffLayer) + t.layers[flattened.root] = flattened + + diff.lock.Lock() + defer diff.lock.Unlock() + + diff.parent = flattened + if flattened.memory < aggregatorMemoryLimit { + // Accumulator layer is smaller than the limit, so we can abort, unless + // there's a snapshot being generated currently. In that case, the trie + // will move fron underneath the generator so we **must** merge all the + // partial data down into the snapshot and restart the generation. + if flattened.parent.(*diskLayer).genAbort == nil { + return nil + } + } + default: + panic(fmt.Sprintf("unknown data layer: %T", parent)) + } + // If the bottom-most layer is larger than our memory cap, persist to disk + bottom := diff.parent.(*diffLayer) + + bottom.lock.RLock() + base := diffToDisk(bottom) + bottom.lock.RUnlock() + + t.layers[base.root] = base + diff.parent = base + return base +} + +// diffToDisk merges a bottom-most diff into the persistent disk layer underneath +// it. The method will panic if called onto a non-bottom-most diff layer. +func diffToDisk(bottom *diffLayer) *diskLayer { + var ( + base = bottom.parent.(*diskLayer) + batch = base.diskdb.NewBatch() + stats *generatorStats + ) + // If the disk layer is running a snapshot generator, abort it + if base.genAbort != nil { + abort := make(chan *generatorStats) + base.genAbort <- abort + stats = <-abort + } + // Start by temporarily deleting the current snapshot block marker. This + // ensures that in the case of a crash, the entire snapshot is invalidated. + rawdb.DeleteSnapshotRoot(batch) + + // Mark the original base as stale as we're going to create a new wrapper + base.lock.Lock() + if base.stale { + panic("parent disk layer is stale") // we've committed into the same base from two children, boo + } + base.stale = true + base.lock.Unlock() + + // Destroy all the destructed accounts from the database + for hash := range bottom.destructSet { + // Skip any account not covered yet by the snapshot + if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 { + continue + } + // Remove all storage slots + rawdb.DeleteAccountSnapshot(batch, hash) + base.cache.Set(hash[:], nil) + + it := rawdb.IterateStorageSnapshots(base.diskdb, hash) + for it.Next() { + if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator + batch.Delete(key) + base.cache.Del(key[1:]) + + snapshotFlushStorageItemMeter.Mark(1) + } + } + it.Release() + } + // Push all updated accounts into the database + for hash, data := range bottom.accountData { + // Skip any account not covered yet by the snapshot + if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 { + continue + } + // Push the account to disk + rawdb.WriteAccountSnapshot(batch, hash, data) + base.cache.Set(hash[:], data) + snapshotCleanAccountWriteMeter.Mark(int64(len(data))) + + if batch.ValueSize() > ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + log.Crit("Failed to write account snapshot", "err", err) + } + batch.Reset() + } + snapshotFlushAccountItemMeter.Mark(1) + snapshotFlushAccountSizeMeter.Mark(int64(len(data))) + } + // Push all the storage slots into the database + for accountHash, storage := range bottom.storageData { + // Skip any account not covered yet by the snapshot + if base.genMarker != nil && bytes.Compare(accountHash[:], base.genMarker) > 0 { + continue + } + // Generation might be mid-account, track that case too + midAccount := base.genMarker != nil && bytes.Equal(accountHash[:], base.genMarker[:common.HashLength]) + + for storageHash, data := range storage { + // Skip any slot not covered yet by the snapshot + if midAccount && bytes.Compare(storageHash[:], base.genMarker[common.HashLength:]) > 0 { + continue + } + if len(data) > 0 { + rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data) + base.cache.Set(append(accountHash[:], storageHash[:]...), data) + snapshotCleanStorageWriteMeter.Mark(int64(len(data))) + } else { + rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash) + base.cache.Set(append(accountHash[:], storageHash[:]...), nil) + } + snapshotFlushStorageItemMeter.Mark(1) + snapshotFlushStorageSizeMeter.Mark(int64(len(data))) + } + if batch.ValueSize() > ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + log.Crit("Failed to write storage snapshot", "err", err) + } + batch.Reset() + } + } + // Update the snapshot block marker and write any remainder data + rawdb.WriteSnapshotRoot(batch, bottom.root) + if err := batch.Write(); err != nil { + log.Crit("Failed to write leftover snapshot", "err", err) + } + res := &diskLayer{ + root: bottom.root, + cache: base.cache, + diskdb: base.diskdb, + triedb: base.triedb, + genMarker: base.genMarker, + genPending: base.genPending, + } + // If snapshot generation hasn't finished yet, port over all the starts and + // continue where the previous round left off. + // + // Note, the `base.genAbort` comparison is not used normally, it's checked + // to allow the tests to play with the marker without triggering this path. + if base.genMarker != nil && base.genAbort != nil { + res.genMarker = base.genMarker + res.genAbort = make(chan chan *generatorStats) + go res.generate(stats) + } + return res +} + +// Journal commits an entire diff hierarchy to disk into a single journal entry. +// This is meant to be used during shutdown to persist the snapshot without +// flattening everything down (bad for reorgs). +// +// The method returns the root hash of the base layer that needs to be persisted +// to disk as a trie too to allow continuing any pending generation op. +func (t *Tree) Journal(root common.Hash) (common.Hash, error) { + // Retrieve the head snapshot to journal from var snap snapshot + snap := t.Snapshot(root) + if snap == nil { + return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root) + } + // Run the journaling + t.lock.Lock() + defer t.lock.Unlock() + + journal := new(bytes.Buffer) + base, err := snap.(snapshot).Journal(journal) + if err != nil { + return common.Hash{}, err + } + // Store the journal into the database and return + rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes()) + return base, nil +} + +// Rebuild wipes all available snapshot data from the persistent database and +// discard all caches and diff layers. Afterwards, it starts a new snapshot +// generator with the given root hash. +func (t *Tree) Rebuild(root common.Hash) { + t.lock.Lock() + defer t.lock.Unlock() + + // Track whether there's a wipe currently running and keep it alive if so + var wiper chan struct{} + + // Iterate over and mark all layers stale + for _, layer := range t.layers { + switch layer := layer.(type) { + case *diskLayer: + // If the base layer is generating, abort it and save + if layer.genAbort != nil { + abort := make(chan *generatorStats) + layer.genAbort <- abort + + if stats := <-abort; stats != nil { + wiper = stats.wiping + } + } + // Layer should be inactive now, mark it as stale + layer.lock.Lock() + layer.stale = true + layer.lock.Unlock() + + case *diffLayer: + // If the layer is a simple diff, simply mark as stale + layer.lock.Lock() + atomic.StoreUint32(&layer.stale, 1) + layer.lock.Unlock() + + default: + panic(fmt.Sprintf("unknown layer type: %T", layer)) + } + } + // Start generating a new snapshot from scratch on a backgroung thread. The + // generator will run a wiper first if there's not one running right now. + log.Info("Rebuilding state snapshot") + t.layers = map[common.Hash]snapshot{ + root: generateSnapshot(t.diskdb, t.triedb, t.cache, root, wiper), + } +} + +// AccountIterator creates a new account iterator for the specified root hash and +// seeks to a starting account hash. +func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) { + return newFastAccountIterator(t, root, seek) +} + +// StorageIterator creates a new storage iterator for the specified root hash and +// account. The iterator will be move to the specific start position. +func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) { + return newFastStorageIterator(t, root, account, seek) +} diff --git a/core/state/snapshot/sort.go b/core/state/snapshot/sort.go new file mode 100644 index 0000000..8884123 --- /dev/null +++ b/core/state/snapshot/sort.go @@ -0,0 +1,36 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + + "github.com/ethereum/go-ethereum/common" +) + +// hashes is a helper to implement sort.Interface. +type hashes []common.Hash + +// Len is the number of elements in the collection. +func (hs hashes) Len() int { return len(hs) } + +// Less reports whether the element with index i should sort before the element +// with index j. +func (hs hashes) Less(i, j int) bool { return bytes.Compare(hs[i][:], hs[j][:]) < 0 } + +// Swap swaps the elements with indexes i and j. +func (hs hashes) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } diff --git a/core/state/snapshot/wipe.go b/core/state/snapshot/wipe.go new file mode 100644 index 0000000..853a1a7 --- /dev/null +++ b/core/state/snapshot/wipe.go @@ -0,0 +1,131 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package snapshot + +import ( + "bytes" + "time" + + "github.com/ava-labs/coreth/core/rawdb" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" +) + +// wipeSnapshot starts a goroutine to iterate over the entire key-value database +// and delete all the data associated with the snapshot (accounts, storage, +// metadata). After all is done, the snapshot range of the database is compacted +// to free up unused data blocks. +func wipeSnapshot(db ethdb.KeyValueStore, full bool) chan struct{} { + // Wipe the snapshot root marker synchronously + if full { + rawdb.DeleteSnapshotRoot(db) + } + // Wipe everything else asynchronously + wiper := make(chan struct{}, 1) + go func() { + if err := wipeContent(db); err != nil { + log.Error("Failed to wipe state snapshot", "err", err) // Database close will trigger this + return + } + close(wiper) + }() + return wiper +} + +// wipeContent iterates over the entire key-value database and deletes all the +// data associated with the snapshot (accounts, storage), but not the root hash +// as the wiper is meant to run on a background thread but the root needs to be +// removed in sync to avoid data races. After all is done, the snapshot range of +// the database is compacted to free up unused data blocks. +func wipeContent(db ethdb.KeyValueStore) error { + if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, len(rawdb.SnapshotAccountPrefix)+common.HashLength); err != nil { + return err + } + if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength); err != nil { + return err + } + // Compact the snapshot section of the database to get rid of unused space + start := time.Now() + + log.Info("Compacting snapshot account area ") + end := common.CopyBytes(rawdb.SnapshotAccountPrefix) + end[len(end)-1]++ + + if err := db.Compact(rawdb.SnapshotAccountPrefix, end); err != nil { + return err + } + log.Info("Compacting snapshot storage area ") + end = common.CopyBytes(rawdb.SnapshotStoragePrefix) + end[len(end)-1]++ + + if err := db.Compact(rawdb.SnapshotStoragePrefix, end); err != nil { + return err + } + log.Info("Compacted snapshot area in database", "elapsed", common.PrettyDuration(time.Since(start))) + + return nil +} + +// wipeKeyRange deletes a range of keys from the database starting with prefix +// and having a specific total key length. +func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int) error { + // Batch deletions together to avoid holding an iterator for too long + var ( + batch = db.NewBatch() + items int + ) + // Iterate over the key-range and delete all of them + start, logged := time.Now(), time.Now() + + it := db.NewIterator(prefix, nil) + for it.Next() { + // Skip any keys with the correct prefix but wrong length (trie nodes) + key := it.Key() + if !bytes.HasPrefix(key, prefix) { + break + } + if len(key) != keylen { + continue + } + // Delete the key and periodically recreate the batch and iterator + batch.Delete(key) + items++ + + if items%10000 == 0 { + // Batch too large (or iterator too long lived, flush and recreate) + it.Release() + if err := batch.Write(); err != nil { + return err + } + batch.Reset() + seekPos := key[len(prefix):] + it = db.NewIterator(prefix, seekPos) + + if time.Since(logged) > 8*time.Second { + log.Info("Deleting state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start))) + logged = time.Now() + } + } + } + it.Release() + if err := batch.Write(); err != nil { + return err + } + log.Info("Deleted state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start))) + return nil +} diff --git a/core/state/state_object.go b/core/state/state_object.go index 9c47dc4..2893f80 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -23,10 +23,10 @@ import ( "math/big" "time" - "github.com/ava-labs/go-ethereum/common" - "github.com/ava-labs/go-ethereum/crypto" - "github.com/ava-labs/go-ethereum/metrics" - "github.com/ava-labs/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/rlp" ) var emptyCodeHash = crypto.Keccak256(nil) @@ -79,9 +79,10 @@ type stateObject struct { trie Trie // storage trie, which becomes non-nil on first access code Code // contract bytecode, which gets set when code is loaded - originStorage Storage // Storage cache of original entries to dedup rewrites - dirtyStorage Storage // Storage entries that need to be flushed to disk - fakeStorage Storage // Fake storage which constructed by caller for debugging purpose. + originStorage Storage // Storage cache of original entries to dedup rewrites, reset for every transaction + pendingStorage Storage // Storage entries that need to be flushed to disk, at the end of an entire block + dirtyStorage Storage // Storage entries that have been modified in the current transaction execution + fakeStorage Storage // Fake storage which constructed by caller for debugging purpose. // Cache flags. // When an object is marked suicided it will be delete from the trie @@ -114,13 +115,17 @@ func newObject(db *StateDB, address common.Address, data Account) *stateObject { if data.CodeHash == nil { data.CodeHash = emptyCodeHash } + if data.Root == (common.Hash{}) { + data.Root = emptyRoot + } return &stateObject{ - db: db, - address: address, - addrHash: crypto.Keccak256Hash(address[:]), - data: data, - originStorage: make(Storage), - dirtyStorage: make(Storage), + db: db, + address: address, + addrHash: crypto.Keccak256Hash(address[:]), + data: data, + originStorage: make(Storage), + pendingStorage: make(Storage), + dirtyStorage: make(Storage), } } @@ -184,21 +189,44 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has if s.fakeStorage != nil { return s.fakeStorage[key] } - // If we have the original value cached, return that - value, cached := s.originStorage[key] - if cached { + // If we have a pending write or clean cached, return that + if value, pending := s.pendingStorage[key]; pending { return value } - // Track the amount of time wasted on reading the storage trie - if metrics.EnabledExpensive { - defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now()) + if value, cached := s.originStorage[key]; cached { + return value } - // Otherwise load the value from the database - enc, err := s.getTrie(db).TryGet(key[:]) - if err != nil { - s.setError(err) - return common.Hash{} + // If no live objects are available, attempt to use snapshots + var ( + enc []byte + err error + ) + if s.db.snap != nil { + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.SnapshotStorageReads += time.Since(start) }(time.Now()) + } + // If the object was destructed in *this* block (and potentially resurrected), + // the storage has been cleared out, and we should *not* consult the previous + // snapshot about any storage values. The only possible alternatives are: + // 1) resurrect happened, and new slot values were set -- those should + // have been handles via pendingStorage above. + // 2) we don't have new values, and can deliver empty response back + if _, destructed := s.db.snapDestructs[s.addrHash]; destructed { + return common.Hash{} + } + enc, err = s.db.snap.Storage(s.addrHash, crypto.Keccak256Hash(key.Bytes())) + } + // If snapshot unavailable or reading from it failed, load from the database + if s.db.snap == nil || err != nil { + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now()) + } + if enc, err = s.getTrie(db).TryGet(key.Bytes()); err != nil { + s.setError(err) + return common.Hash{} + } } + var value common.Hash if len(enc) > 0 { _, content, _, err := rlp.Split(enc) if err != nil { @@ -253,38 +281,73 @@ func (s *stateObject) setState(key, value common.Hash) { s.dirtyStorage[key] = value } +// finalise moves all dirty storage slots into the pending area to be hashed or +// committed later. It is invoked at the end of every transaction. +func (s *stateObject) finalise() { + for key, value := range s.dirtyStorage { + s.pendingStorage[key] = value + } + if len(s.dirtyStorage) > 0 { + s.dirtyStorage = make(Storage) + } +} + // updateTrie writes cached storage modifications into the object's storage trie. +// It will return nil if the trie has not been loaded and no changes have been made func (s *stateObject) updateTrie(db Database) Trie { + // Make sure all dirty slots are finalized into the pending storage area + s.finalise() + if len(s.pendingStorage) == 0 { + return s.trie + } // Track the amount of time wasted on updating the storge trie if metrics.EnabledExpensive { defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now()) } - // Update all the dirty slots in the trie + // Retrieve the snapshot storage map for the object + var storage map[common.Hash][]byte + if s.db.snap != nil { + // Retrieve the old storage map, if available, create a new one otherwise + storage = s.db.snapStorage[s.addrHash] + if storage == nil { + storage = make(map[common.Hash][]byte) + s.db.snapStorage[s.addrHash] = storage + } + } + // Insert all the pending updates into the trie tr := s.getTrie(db) - for key, value := range s.dirtyStorage { - delete(s.dirtyStorage, key) - + for key, value := range s.pendingStorage { // Skip noop changes, persist actual changes if value == s.originStorage[key] { continue } s.originStorage[key] = value + var v []byte if (value == common.Hash{}) { s.setError(tr.TryDelete(key[:])) - continue + } else { + // Encoding []byte cannot fail, ok to ignore the error. + v, _ = rlp.EncodeToBytes(common.TrimLeftZeroes(value[:])) + s.setError(tr.TryUpdate(key[:], v)) + } + // If state snapshotting is active, cache the data til commit + if storage != nil { + storage[crypto.Keccak256Hash(key[:])] = v // v will be nil if value is 0x00 } - // Encoding []byte cannot fail, ok to ignore the error. - v, _ := rlp.EncodeToBytes(bytes.TrimLeft(value[:], "\x00")) - s.setError(tr.TryUpdate(key[:], v)) + } + if len(s.pendingStorage) > 0 { + s.pendingStorage = make(Storage) } return tr } // UpdateRoot sets the trie root to the current root hash of func (s *stateObject) updateRoot(db Database) { - s.updateTrie(db) - + // If nothing changed, don't bother with hashing anything + if s.updateTrie(db) == nil { + return + } // Track the amount of time wasted on hashing the storge trie if metrics.EnabledExpensive { defer func(start time.Time) { s.db.StorageHashes += time.Since(start) }(time.Now()) @@ -295,7 +358,10 @@ func (s *stateObject) updateRoot(db Database) { // CommitTrie the storage trie of the object to db. // This updates the trie root. func (s *stateObject) CommitTrie(db Database) error { - s.updateTrie(db) + // If nothing changed, don't bother with hashing anything + if s.updateTrie(db) == nil { + return nil + } if s.dbErr != nil { return s.dbErr } @@ -310,22 +376,21 @@ func (s *stateObject) CommitTrie(db Database) error { return err } -// AddBalance removes amount from c's balance. +// AddBalance adds amount to s's balance. // It is used to add funds to the destination account of a transfer. func (s *stateObject) AddBalance(amount *big.Int) { - // EIP158: We must check emptiness for the objects such that the account + // EIP161: We must check emptiness for the objects such that the account // clearing (0,0,0 objects) can take effect. if amount.Sign() == 0 { if s.empty() { s.touch() } - return } s.SetBalance(new(big.Int).Add(s.Balance(), amount)) } -// SubBalance removes amount from c's balance. +// SubBalance removes amount from s's balance. // It is used to remove funds from the origin account of a transfer. func (s *stateObject) SubBalance(amount *big.Int) { if amount.Sign() == 0 { @@ -388,6 +453,7 @@ func (s *stateObject) deepCopy(db *StateDB) *stateObject { stateObject.code = s.code stateObject.dirtyStorage = s.dirtyStorage.Copy() stateObject.originStorage = s.originStorage.Copy() + stateObject.pendingStorage = s.pendingStorage.Copy() stateObject.suicided = s.suicided stateObject.dirtyCode = s.dirtyCode stateObject.deleted = s.deleted @@ -419,6 +485,23 @@ func (s *stateObject) Code(db Database) []byte { return code } +// CodeSize returns the size of the contract code associated with this object, +// or zero if none. This method is an almost mirror of Code, but uses a cache +// inside the database to avoid loading codes seen recently. +func (s *stateObject) CodeSize(db Database) int { + if s.code != nil { + return len(s.code) + } + if bytes.Equal(s.CodeHash(), emptyCodeHash) { + return 0 + } + size, err := db.ContractCodeSize(s.addrHash, common.BytesToHash(s.CodeHash())) + if err != nil { + s.setError(fmt.Errorf("can't load code size %x: %v", s.CodeHash(), err)) + } + return size +} + func (s *stateObject) SetCode(codeHash common.Hash, code []byte) { prevcode := s.Code(s.db.db) s.db.journal.append(codeChange{ diff --git a/core/state/statedb.go b/core/state/statedb.go index 9c7535b..81be542 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -24,13 +24,15 @@ import ( "sort" "time" + "github.com/ava-labs/coreth/core/rawdb" + "github.com/ava-labs/coreth/core/state/snapshot" "github.com/ava-labs/coreth/core/types" - "github.com/ava-labs/go-ethereum/common" - "github.com/ava-labs/go-ethereum/crypto" - "github.com/ava-labs/go-ethereum/log" - "github.com/ava-labs/go-ethereum/metrics" - "github.com/ava-labs/go-ethereum/rlp" - "github.com/ava-labs/go-ethereum/trie" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" ) type revision struct { @@ -42,9 +44,6 @@ var ( // emptyRoot is the known root hash of an empty trie. emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") zeroRoot = common.HexToHash("0000000000000000000000000000000000000000000000000000000000000000") - - // emptyCode is the known hash of the empty EVM bytecode. - emptyCode = crypto.Keccak256Hash(nil) ) type proofList [][]byte @@ -58,7 +57,7 @@ func (n *proofList) Delete(key []byte) error { panic("not supported") } -// StateDBs within the ethereum protocol are used to store anything +// StateDB structs within the ethereum protocol are used to store anything // within the merkle trie. StateDBs take care of caching and storing // nested states. It's the general query interface to retrieve: // * Contracts @@ -67,9 +66,16 @@ type StateDB struct { db Database trie Trie + snaps *snapshot.Tree + snap snapshot.Snapshot + snapDestructs map[common.Hash]struct{} + snapAccounts map[common.Hash][]byte + snapStorage map[common.Hash]map[common.Hash][]byte + // This map holds 'live' objects, which will get modified while processing a state transition. - stateObjects map[common.Address]*stateObject - stateObjectsDirty map[common.Address]struct{} + stateObjects map[common.Address]*stateObject + stateObjectsPending map[common.Address]struct{} // State objects finalized but not yet written to the trie + stateObjectsDirty map[common.Address]struct{} // State objects modified in the current execution // DB error. // State objects are used by the consensus core and VM which are @@ -95,134 +101,157 @@ type StateDB struct { nextRevisionId int // Measurements gathered during execution for debugging purposes - AccountReads time.Duration - AccountHashes time.Duration - AccountUpdates time.Duration - AccountCommits time.Duration - StorageReads time.Duration - StorageHashes time.Duration - StorageUpdates time.Duration - StorageCommits time.Duration -} - -// Create a new state from a given trie. -func New(root common.Hash, db Database) (*StateDB, error) { + AccountReads time.Duration + AccountHashes time.Duration + AccountUpdates time.Duration + AccountCommits time.Duration + StorageReads time.Duration + StorageHashes time.Duration + StorageUpdates time.Duration + StorageCommits time.Duration + SnapshotAccountReads time.Duration + SnapshotStorageReads time.Duration + SnapshotCommits time.Duration +} + +// New creates a new state from a given trie. +func New(root common.Hash, db Database, snaps *snapshot.Tree) (*StateDB, error) { tr, err := db.OpenTrie(root) if err != nil { return nil, err } - return &StateDB{ - db: db, - trie: tr, - stateObjects: make(map[common.Address]*stateObject), - stateObjectsDirty: make(map[common.Address]struct{}), - logs: make(map[common.Hash][]*types.Log), - preimages: make(map[common.Hash][]byte), - journal: newJournal(), - }, nil + sdb := &StateDB{ + db: db, + trie: tr, + snaps: snaps, + stateObjects: make(map[common.Address]*stateObject), + stateObjectsPending: make(map[common.Address]struct{}), + stateObjectsDirty: make(map[common.Address]struct{}), + logs: make(map[common.Hash][]*types.Log), + preimages: make(map[common.Hash][]byte), + journal: newJournal(), + } + if sdb.snaps != nil { + if sdb.snap = sdb.snaps.Snapshot(root); sdb.snap != nil { + sdb.snapDestructs = make(map[common.Hash]struct{}) + sdb.snapAccounts = make(map[common.Hash][]byte) + sdb.snapStorage = make(map[common.Hash]map[common.Hash][]byte) + } + } + return sdb, nil } // setError remembers the first non-nil error it is called with. -func (self *StateDB) setError(err error) { - if self.dbErr == nil { - self.dbErr = err +func (s *StateDB) setError(err error) { + if s.dbErr == nil { + s.dbErr = err } } -func (self *StateDB) Error() error { - return self.dbErr +func (s *StateDB) Error() error { + return s.dbErr } // Reset clears out all ephemeral state objects from the state db, but keeps // the underlying state trie to avoid reloading data for the next operations. -func (self *StateDB) Reset(root common.Hash) error { - tr, err := self.db.OpenTrie(root) +func (s *StateDB) Reset(root common.Hash) error { + tr, err := s.db.OpenTrie(root) if err != nil { return err } - self.trie = tr - self.stateObjects = make(map[common.Address]*stateObject) - self.stateObjectsDirty = make(map[common.Address]struct{}) - self.thash = common.Hash{} - self.bhash = common.Hash{} - self.txIndex = 0 - self.logs = make(map[common.Hash][]*types.Log) - self.logSize = 0 - self.preimages = make(map[common.Hash][]byte) - self.clearJournalAndRefund() + s.trie = tr + s.stateObjects = make(map[common.Address]*stateObject) + s.stateObjectsPending = make(map[common.Address]struct{}) + s.stateObjectsDirty = make(map[common.Address]struct{}) + s.thash = common.Hash{} + s.bhash = common.Hash{} + s.txIndex = 0 + s.logs = make(map[common.Hash][]*types.Log) + s.logSize = 0 + s.preimages = make(map[common.Hash][]byte) + s.clearJournalAndRefund() + + if s.snaps != nil { + s.snapAccounts, s.snapDestructs, s.snapStorage = nil, nil, nil + if s.snap = s.snaps.Snapshot(root); s.snap != nil { + s.snapDestructs = make(map[common.Hash]struct{}) + s.snapAccounts = make(map[common.Hash][]byte) + s.snapStorage = make(map[common.Hash]map[common.Hash][]byte) + } + } return nil } -func (self *StateDB) AddLog(log *types.Log) { - self.journal.append(addLogChange{txhash: self.thash}) +func (s *StateDB) AddLog(log *types.Log) { + s.journal.append(addLogChange{txhash: s.thash}) - log.TxHash = self.thash - log.BlockHash = self.bhash - log.TxIndex = uint(self.txIndex) - log.Index = self.logSize - self.logs[self.thash] = append(self.logs[self.thash], log) - self.logSize++ + log.TxHash = s.thash + log.BlockHash = s.bhash + log.TxIndex = uint(s.txIndex) + log.Index = s.logSize + s.logs[s.thash] = append(s.logs[s.thash], log) + s.logSize++ } -func (self *StateDB) GetLogs(hash common.Hash) []*types.Log { - return self.logs[hash] +func (s *StateDB) GetLogs(hash common.Hash) []*types.Log { + return s.logs[hash] } -func (self *StateDB) Logs() []*types.Log { +func (s *StateDB) Logs() []*types.Log { var logs []*types.Log - for _, lgs := range self.logs { + for _, lgs := range s.logs { logs = append(logs, lgs...) } return logs } // AddPreimage records a SHA3 preimage seen by the VM. -func (self *StateDB) AddPreimage(hash common.Hash, preimage []byte) { - if _, ok := self.preimages[hash]; !ok { - self.journal.append(addPreimageChange{hash: hash}) +func (s *StateDB) AddPreimage(hash common.Hash, preimage []byte) { + if _, ok := s.preimages[hash]; !ok { + s.journal.append(addPreimageChange{hash: hash}) pi := make([]byte, len(preimage)) copy(pi, preimage) - self.preimages[hash] = pi + s.preimages[hash] = pi } } // Preimages returns a list of SHA3 preimages that have been submitted. -func (self *StateDB) Preimages() map[common.Hash][]byte { - return self.preimages +func (s *StateDB) Preimages() map[common.Hash][]byte { + return s.preimages } // AddRefund adds gas to the refund counter -func (self *StateDB) AddRefund(gas uint64) { - self.journal.append(refundChange{prev: self.refund}) - self.refund += gas +func (s *StateDB) AddRefund(gas uint64) { + s.journal.append(refundChange{prev: s.refund}) + s.refund += gas } // SubRefund removes gas from the refund counter. // This method will panic if the refund counter goes below zero -func (self *StateDB) SubRefund(gas uint64) { - self.journal.append(refundChange{prev: self.refund}) - if gas > self.refund { - panic("Refund counter below zero") +func (s *StateDB) SubRefund(gas uint64) { + s.journal.append(refundChange{prev: s.refund}) + if gas > s.refund { + panic(fmt.Sprintf("Refund counter below zero (gas: %d > refund: %d)", gas, s.refund)) } - self.refund -= gas + s.refund -= gas } // Exist reports whether the given account address exists in the state. // Notably this also returns true for suicided accounts. -func (self *StateDB) Exist(addr common.Address) bool { - return self.getStateObject(addr) != nil +func (s *StateDB) Exist(addr common.Address) bool { + return s.getStateObject(addr) != nil } // Empty returns whether the state object is either non-existent // or empty according to the EIP161 specification (balance = nonce = code = 0) -func (self *StateDB) Empty(addr common.Address) bool { - so := self.getStateObject(addr) +func (s *StateDB) Empty(addr common.Address) bool { + so := s.getStateObject(addr) return so == nil || so.empty() } -// Retrieve the balance from the given address or 0 if object not found -func (self *StateDB) GetBalance(addr common.Address) *big.Int { - stateObject := self.getStateObject(addr) +// GetBalance retrieves the balance from the given address or 0 if object not found +func (s *StateDB) GetBalance(addr common.Address) *big.Int { + stateObject := s.getStateObject(addr) if stateObject != nil { return stateObject.Balance() } @@ -241,11 +270,12 @@ func (self *StateDB) GetBalanceMultiCoin(addr common.Address, coinID common.Hash func (self *StateDB) EnableMultiCoin(addr common.Address) error { stateObject := self.GetOrNewStateObject(addr) if stateObject.data.Root != emptyRoot && stateObject.data.Root != zeroRoot { - return errors.New("not a fresh account") + return errors.New(fmt.Sprintf("not a fresh account: %s", stateObject.data.Root.Hex())) } if !stateObject.EnableMultiCoin() { return errors.New("multi-coin mode already enabled") } + log.Debug(fmt.Sprintf("enabled MC for %s", addr.Hex())) return nil } @@ -262,8 +292,8 @@ func (self *StateDB) IsMultiCoin(addr common.Address) bool { return false } -func (self *StateDB) GetNonce(addr common.Address) uint64 { - stateObject := self.getStateObject(addr) +func (s *StateDB) GetNonce(addr common.Address) uint64 { + stateObject := s.getStateObject(addr) if stateObject != nil { return stateObject.Nonce() } @@ -272,40 +302,33 @@ func (self *StateDB) GetNonce(addr common.Address) uint64 { } // TxIndex returns the current transaction index set by Prepare. -func (self *StateDB) TxIndex() int { - return self.txIndex +func (s *StateDB) TxIndex() int { + return s.txIndex } // BlockHash returns the current block hash set by Prepare. -func (self *StateDB) BlockHash() common.Hash { - return self.bhash +func (s *StateDB) BlockHash() common.Hash { + return s.bhash } -func (self *StateDB) GetCode(addr common.Address) []byte { - stateObject := self.getStateObject(addr) +func (s *StateDB) GetCode(addr common.Address) []byte { + stateObject := s.getStateObject(addr) if stateObject != nil { - return stateObject.Code(self.db) + return stateObject.Code(s.db) } return nil } -func (self *StateDB) GetCodeSize(addr common.Address) int { - stateObject := self.getStateObject(addr) - if stateObject == nil { - return 0 - } - if stateObject.code != nil { - return len(stateObject.code) - } - size, err := self.db.ContractCodeSize(stateObject.addrHash, common.BytesToHash(stateObject.CodeHash())) - if err != nil { - self.setError(err) +func (s *StateDB) GetCodeSize(addr common.Address) int { + stateObject := s.getStateObject(addr) + if stateObject != nil { + return stateObject.CodeSize(s.db) } - return size + return 0 } -func (self *StateDB) GetCodeHash(addr common.Address) common.Hash { - stateObject := self.getStateObject(addr) +func (s *StateDB) GetCodeHash(addr common.Address) common.Hash { + stateObject := s.getStateObject(addr) if stateObject == nil { return common.Hash{} } @@ -313,25 +336,25 @@ func (self *StateDB) GetCodeHash(addr common.Address) common.Hash { } // GetState retrieves a value from the given account's storage trie. -func (self *StateDB) GetState(addr common.Address, hash common.Hash) common.Hash { - stateObject := self.getStateObject(addr) +func (s *StateDB) GetState(addr common.Address, hash common.Hash) common.Hash { + stateObject := s.getStateObject(addr) if stateObject != nil { - return stateObject.GetState(self.db, hash) + return stateObject.GetState(s.db, hash) } return common.Hash{} } // GetProof returns the MerkleProof for a given Account -func (self *StateDB) GetProof(a common.Address) ([][]byte, error) { +func (s *StateDB) GetProof(a common.Address) ([][]byte, error) { var proof proofList - err := self.trie.Prove(crypto.Keccak256(a.Bytes()), 0, &proof) + err := s.trie.Prove(crypto.Keccak256(a.Bytes()), 0, &proof) return [][]byte(proof), err } -// GetProof returns the StorageProof for given key -func (self *StateDB) GetStorageProof(a common.Address, key common.Hash) ([][]byte, error) { +// GetStorageProof returns the StorageProof for given key +func (s *StateDB) GetStorageProof(a common.Address, key common.Hash) ([][]byte, error) { var proof proofList - trie := self.StorageTrie(a) + trie := s.StorageTrie(a) if trie == nil { return proof, errors.New("storage trie for requested address does not exist") } @@ -340,32 +363,33 @@ func (self *StateDB) GetStorageProof(a common.Address, key common.Hash) ([][]byt } // GetCommittedState retrieves a value from the given account's committed storage trie. -func (self *StateDB) GetCommittedState(addr common.Address, hash common.Hash) common.Hash { - stateObject := self.getStateObject(addr) +func (s *StateDB) GetCommittedState(addr common.Address, hash common.Hash) common.Hash { + stateObject := s.getStateObject(addr) if stateObject != nil { - return stateObject.GetCommittedState(self.db, hash) + return stateObject.GetCommittedState(s.db, hash) } return common.Hash{} } // Database retrieves the low level database supporting the lower level trie ops. -func (self *StateDB) Database() Database { - return self.db +func (s *StateDB) Database() Database { + return s.db } // StorageTrie returns the storage trie of an account. // The return value is a copy and is nil for non-existent accounts. -func (self *StateDB) StorageTrie(addr common.Address) Trie { - stateObject := self.getStateObject(addr) +func (s *StateDB) StorageTrie(addr common.Address) Trie { + stateObject := s.getStateObject(addr) if stateObject == nil { return nil } - cpy := stateObject.deepCopy(self) - return cpy.updateTrie(self.db) + cpy := stateObject.deepCopy(s) + cpy.updateTrie(s.db) + return cpy.getTrie(s.db) } -func (self *StateDB) HasSuicided(addr common.Address) bool { - stateObject := self.getStateObject(addr) +func (s *StateDB) HasSuicided(addr common.Address) bool { + stateObject := s.getStateObject(addr) if stateObject != nil { return stateObject.suicided } @@ -377,81 +401,81 @@ func (self *StateDB) HasSuicided(addr common.Address) bool { */ // AddBalance adds amount to the account associated with addr. -func (self *StateDB) AddBalance(addr common.Address, amount *big.Int) { - stateObject := self.GetOrNewStateObject(addr) +func (s *StateDB) AddBalance(addr common.Address, amount *big.Int) { + stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { stateObject.AddBalance(amount) } } // SubBalance subtracts amount from the account associated with addr. -func (self *StateDB) SubBalance(addr common.Address, amount *big.Int) { - stateObject := self.GetOrNewStateObject(addr) +func (s *StateDB) SubBalance(addr common.Address, amount *big.Int) { + stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { stateObject.SubBalance(amount) } } -func (self *StateDB) SetBalance(addr common.Address, amount *big.Int) { - stateObject := self.GetOrNewStateObject(addr) +func (s *StateDB) SetBalance(addr common.Address, amount *big.Int) { + stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { stateObject.SetBalance(amount) } } // AddBalance adds amount to the account associated with addr. -func (self *StateDB) AddBalanceMultiCoin(addr common.Address, coinID common.Hash, amount *big.Int) { - stateObject := self.GetOrNewStateObject(addr) +func (s *StateDB) AddBalanceMultiCoin(addr common.Address, coinID common.Hash, amount *big.Int) { + stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { - stateObject.AddBalanceMultiCoin(coinID, amount, self.db) + stateObject.AddBalanceMultiCoin(coinID, amount, s.db) } } // SubBalance subtracts amount from the account associated with addr. -func (self *StateDB) SubBalanceMultiCoin(addr common.Address, coinID common.Hash, amount *big.Int) { - stateObject := self.GetOrNewStateObject(addr) +func (s *StateDB) SubBalanceMultiCoin(addr common.Address, coinID common.Hash, amount *big.Int) { + stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { - stateObject.SubBalanceMultiCoin(coinID, amount, self.db) + stateObject.SubBalanceMultiCoin(coinID, amount, s.db) } } -func (self *StateDB) SetBalanceMultiCoin(addr common.Address, coinID common.Hash, amount *big.Int) { - stateObject := self.GetOrNewStateObject(addr) +func (s *StateDB) SetBalanceMultiCoin(addr common.Address, coinID common.Hash, amount *big.Int) { + stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { - stateObject.SetBalanceMultiCoin(coinID, amount, self.db) + stateObject.SetBalanceMultiCoin(coinID, amount, s.db) } } -func (self *StateDB) SetNonce(addr common.Address, nonce uint64) { - stateObject := self.GetOrNewStateObject(addr) +func (s *StateDB) SetNonce(addr common.Address, nonce uint64) { + stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { stateObject.SetNonce(nonce) } } -func (self *StateDB) SetCode(addr common.Address, code []byte) { - stateObject := self.GetOrNewStateObject(addr) +func (s *StateDB) SetCode(addr common.Address, code []byte) { + stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { stateObject.SetCode(crypto.Keccak256Hash(code), code) } } -func (self *StateDB) SetState(addr common.Address, key, value common.Hash) (res error) { +func (s *StateDB) SetState(addr common.Address, key, value common.Hash) (res error) { res = nil - stateObject := self.GetOrNewStateObject(addr) + stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { if stateObject.data.IsMultiCoin { NormalizeStateKey(&key) } - stateObject.SetState(self.db, key, value) + stateObject.SetState(s.db, key, value) } return } // SetStorage replaces the entire storage for the specified account with given // storage. This function should only be used for debugging. -func (self *StateDB) SetStorage(addr common.Address, storage map[common.Hash]common.Hash) { - stateObject := self.GetOrNewStateObject(addr) +func (s *StateDB) SetStorage(addr common.Address, storage map[common.Hash]common.Hash) { + stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { stateObject.SetStorage(storage) } @@ -462,12 +486,12 @@ func (self *StateDB) SetStorage(addr common.Address, storage map[common.Hash]com // // The account's state object is still available until the state is committed, // getStateObject will return a non-nil account after Suicide. -func (self *StateDB) Suicide(addr common.Address) bool { - stateObject := self.getStateObject(addr) +func (s *StateDB) Suicide(addr common.Address) bool { + stateObject := s.getStateObject(addr) if stateObject == nil { return false } - self.journal.append(suicideChange{ + s.journal.append(suicideChange{ account: &addr, prev: stateObject.suicided, prevbalance: new(big.Int).Set(stateObject.Balance()), @@ -483,90 +507,154 @@ func (self *StateDB) Suicide(addr common.Address) bool { // // updateStateObject writes the given object to the trie. -func (s *StateDB) updateStateObject(stateObject *stateObject) { +func (s *StateDB) updateStateObject(obj *stateObject) { // Track the amount of time wasted on updating the account from the trie if metrics.EnabledExpensive { defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) } // Encode the account and update the account trie - addr := stateObject.Address() + addr := obj.Address() - data, err := rlp.EncodeToBytes(stateObject) + data, err := rlp.EncodeToBytes(obj) if err != nil { panic(fmt.Errorf("can't encode object at %x: %v", addr[:], err)) } - s.setError(s.trie.TryUpdate(addr[:], data)) + if err = s.trie.TryUpdate(addr[:], data); err != nil { + s.setError(fmt.Errorf("updateStateObject (%x) error: %v", addr[:], err)) + } + + // If state snapshotting is active, cache the data til commit. Note, this + // update mechanism is not symmetric to the deletion, because whereas it is + // enough to track account updates at commit time, deletions need tracking + // at transaction boundary level to ensure we capture state clearing. + if s.snap != nil { + s.snapAccounts[obj.addrHash] = snapshot.SlimAccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash, obj.data.IsMultiCoin) + } } // deleteStateObject removes the given object from the state trie. -func (s *StateDB) deleteStateObject(stateObject *stateObject) { +func (s *StateDB) deleteStateObject(obj *stateObject) { // Track the amount of time wasted on deleting the account from the trie if metrics.EnabledExpensive { defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) } // Delete the account from the trie - stateObject.deleted = true + addr := obj.Address() + if err := s.trie.TryDelete(addr[:]); err != nil { + s.setError(fmt.Errorf("deleteStateObject (%x) error: %v", addr[:], err)) + } +} - addr := stateObject.Address() - s.setError(s.trie.TryDelete(addr[:])) +// getStateObject retrieves a state object given by the address, returning nil if +// the object is not found or was deleted in this execution context. If you need +// to differentiate between non-existent/just-deleted, use getDeletedStateObject. +func (s *StateDB) getStateObject(addr common.Address) *stateObject { + if obj := s.getDeletedStateObject(addr); obj != nil && !obj.deleted { + return obj + } + return nil } -// Retrieve a state object given by the address. Returns nil if not found. -func (s *StateDB) getStateObject(addr common.Address) (stateObject *stateObject) { - // Prefer live objects +// getDeletedStateObject is similar to getStateObject, but instead of returning +// nil for a deleted state object, it returns the actual object with the deleted +// flag set. This is needed by the state journal to revert to the correct s- +// destructed object instead of wiping all knowledge about the state object. +func (s *StateDB) getDeletedStateObject(addr common.Address) *stateObject { + // Prefer live objects if any is available if obj := s.stateObjects[addr]; obj != nil { - if obj.deleted { - return nil - } return obj } - // Track the amount of time wasted on loading the object from the database - if metrics.EnabledExpensive { - defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now()) - } - // Load the object from the database - enc, err := s.trie.TryGet(addr[:]) - if len(enc) == 0 { - s.setError(err) - return nil + // If no live objects are available, attempt to use snapshots + var ( + data *Account + err error + ) + if s.snap != nil { + if metrics.EnabledExpensive { + defer func(start time.Time) { s.SnapshotAccountReads += time.Since(start) }(time.Now()) + } + var acc *snapshot.Account + if acc, err = s.snap.Account(crypto.Keccak256Hash(addr.Bytes())); err == nil { + if acc == nil { + return nil + } + data = &Account{ + Nonce: acc.Nonce, + Balance: acc.Balance, + CodeHash: acc.CodeHash, + IsMultiCoin: acc.IsMultiCoin, + Root: common.BytesToHash(acc.Root), + } + if len(data.CodeHash) == 0 { + data.CodeHash = emptyCodeHash + } + if data.Root == (common.Hash{}) { + data.Root = emptyRoot + } + } } - var data Account - if err := rlp.DecodeBytes(enc, &data); err != nil { - log.Error("Failed to decode state object", "addr", addr, "err", err) - return nil + // If snapshot unavailable or reading from it failed, load from the database + if s.snap == nil || err != nil { + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now()) + } + enc, err := s.trie.TryGet(addr.Bytes()) + if err != nil { + s.setError(fmt.Errorf("getDeleteStateObject (%x) error: %v", addr.Bytes(), err)) + return nil + } + if len(enc) == 0 { + return nil + } + data = new(Account) + if err := rlp.DecodeBytes(enc, data); err != nil { + log.Error("Failed to decode state object", "addr", addr, "err", err) + return nil + } } // Insert into the live set - obj := newObject(s, addr, data) + obj := newObject(s, addr, *data) s.setStateObject(obj) return obj } -func (self *StateDB) setStateObject(object *stateObject) { - self.stateObjects[object.Address()] = object +func (s *StateDB) setStateObject(object *stateObject) { + s.stateObjects[object.Address()] = object } -// Retrieve a state object or create a new state object if nil. -func (self *StateDB) GetOrNewStateObject(addr common.Address) *stateObject { - stateObject := self.getStateObject(addr) - if stateObject == nil || stateObject.deleted { - stateObject, _ = self.createObject(addr) +// GetOrNewStateObject retrieves a state object or create a new state object if nil. +func (s *StateDB) GetOrNewStateObject(addr common.Address) *stateObject { + stateObject := s.getStateObject(addr) + if stateObject == nil { + stateObject, _ = s.createObject(addr) } return stateObject } // createObject creates a new state object. If there is an existing account with // the given address, it is overwritten and returned as the second return value. -func (self *StateDB) createObject(addr common.Address) (newobj, prev *stateObject) { - prev = self.getStateObject(addr) - newobj = newObject(self, addr, Account{}) +func (s *StateDB) createObject(addr common.Address) (newobj, prev *stateObject) { + prev = s.getDeletedStateObject(addr) // Note, prev might have been deleted, we need that! + + var prevdestruct bool + if s.snap != nil && prev != nil { + _, prevdestruct = s.snapDestructs[prev.addrHash] + if !prevdestruct { + s.snapDestructs[prev.addrHash] = struct{}{} + } + } + newobj = newObject(s, addr, Account{}) newobj.setNonce(0) // sets the object to dirty if prev == nil { - self.journal.append(createObjectChange{account: &addr}) + s.journal.append(createObjectChange{account: &addr}) } else { - self.journal.append(resetObjectChange{prev: prev}) + s.journal.append(resetObjectChange{prev: prev, prevdestruct: prevdestruct}) + } + s.setStateObject(newobj) + if prev != nil && !prev.deleted { + return newobj, prev } - self.setStateObject(newobj) - return newobj, prev + return newobj, nil } // CreateAccount explicitly creates a state object. If a state object with the address @@ -579,8 +667,8 @@ func (self *StateDB) createObject(addr common.Address) (newobj, prev *stateObjec // 2. tx_create(sha(account ++ nonce)) (note that this gets the address of 1) // // Carrying over the balance ensures that Ether doesn't disappear. -func (self *StateDB) CreateAccount(addr common.Address) { - newObj, prev := self.createObject(addr) +func (s *StateDB) CreateAccount(addr common.Address) { + newObj, prev := s.createObject(addr) if prev != nil { newObj.setBalance(prev.data.Balance) } @@ -617,40 +705,52 @@ func (db *StateDB) ForEachStorage(addr common.Address, cb func(key, value common // Copy creates a deep, independent copy of the state. // Snapshots of the copied state cannot be applied to the copy. -func (self *StateDB) Copy() *StateDB { +func (s *StateDB) Copy() *StateDB { // Copy all the basic fields, initialize the memory ones state := &StateDB{ - db: self.db, - trie: self.db.CopyTrie(self.trie), - stateObjects: make(map[common.Address]*stateObject, len(self.journal.dirties)), - stateObjectsDirty: make(map[common.Address]struct{}, len(self.journal.dirties)), - refund: self.refund, - logs: make(map[common.Hash][]*types.Log, len(self.logs)), - logSize: self.logSize, - preimages: make(map[common.Hash][]byte, len(self.preimages)), - journal: newJournal(), + db: s.db, + trie: s.db.CopyTrie(s.trie), + stateObjects: make(map[common.Address]*stateObject, len(s.journal.dirties)), + stateObjectsPending: make(map[common.Address]struct{}, len(s.stateObjectsPending)), + stateObjectsDirty: make(map[common.Address]struct{}, len(s.journal.dirties)), + refund: s.refund, + logs: make(map[common.Hash][]*types.Log, len(s.logs)), + logSize: s.logSize, + preimages: make(map[common.Hash][]byte, len(s.preimages)), + journal: newJournal(), } // Copy the dirty states, logs, and preimages - for addr := range self.journal.dirties { + for addr := range s.journal.dirties { // As documented [here](https://github.com/ethereum/go-ethereum/pull/16485#issuecomment-380438527), // and in the Finalise-method, there is a case where an object is in the journal but not // in the stateObjects: OOG after touch on ripeMD prior to Byzantium. Thus, we need to check for // nil - if object, exist := self.stateObjects[addr]; exist { + if object, exist := s.stateObjects[addr]; exist { + // Even though the original object is dirty, we are not copying the journal, + // so we need to make sure that anyside effect the journal would have caused + // during a commit (or similar op) is already applied to the copy. state.stateObjects[addr] = object.deepCopy(state) - state.stateObjectsDirty[addr] = struct{}{} + + state.stateObjectsDirty[addr] = struct{}{} // Mark the copy dirty to force internal (code/state) commits + state.stateObjectsPending[addr] = struct{}{} // Mark the copy pending to force external (account) commits } } // Above, we don't copy the actual journal. This means that if the copy is copied, the // loop above will be a no-op, since the copy's journal is empty. // Thus, here we iterate over stateObjects, to enable copies of copies - for addr := range self.stateObjectsDirty { + for addr := range s.stateObjectsPending { + if _, exist := state.stateObjects[addr]; !exist { + state.stateObjects[addr] = s.stateObjects[addr].deepCopy(state) + } + state.stateObjectsPending[addr] = struct{}{} + } + for addr := range s.stateObjectsDirty { if _, exist := state.stateObjects[addr]; !exist { - state.stateObjects[addr] = self.stateObjects[addr].deepCopy(state) - state.stateObjectsDirty[addr] = struct{}{} + state.stateObjects[addr] = s.stateObjects[addr].deepCopy(state) } + state.stateObjectsDirty[addr] = struct{}{} } - for hash, logs := range self.logs { + for hash, logs := range s.logs { cpy := make([]*types.Log, len(logs)) for i, l := range logs { cpy[i] = new(types.Log) @@ -658,46 +758,47 @@ func (self *StateDB) Copy() *StateDB { } state.logs[hash] = cpy } - for hash, preimage := range self.preimages { + for hash, preimage := range s.preimages { state.preimages[hash] = preimage } return state } // Snapshot returns an identifier for the current revision of the state. -func (self *StateDB) Snapshot() int { - id := self.nextRevisionId - self.nextRevisionId++ - self.validRevisions = append(self.validRevisions, revision{id, self.journal.length()}) +func (s *StateDB) Snapshot() int { + id := s.nextRevisionId + s.nextRevisionId++ + s.validRevisions = append(s.validRevisions, revision{id, s.journal.length()}) return id } // RevertToSnapshot reverts all state changes made since the given revision. -func (self *StateDB) RevertToSnapshot(revid int) { +func (s *StateDB) RevertToSnapshot(revid int) { // Find the snapshot in the stack of valid snapshots. - idx := sort.Search(len(self.validRevisions), func(i int) bool { - return self.validRevisions[i].id >= revid + idx := sort.Search(len(s.validRevisions), func(i int) bool { + return s.validRevisions[i].id >= revid }) - if idx == len(self.validRevisions) || self.validRevisions[idx].id != revid { + if idx == len(s.validRevisions) || s.validRevisions[idx].id != revid { panic(fmt.Errorf("revision id %v cannot be reverted", revid)) } - snapshot := self.validRevisions[idx].journalIndex + snapshot := s.validRevisions[idx].journalIndex // Replay the journal to undo changes and remove invalidated snapshots - self.journal.revert(self, snapshot) - self.validRevisions = self.validRevisions[:idx] + s.journal.revert(s, snapshot) + s.validRevisions = s.validRevisions[:idx] } // GetRefund returns the current value of the refund counter. -func (self *StateDB) GetRefund() uint64 { - return self.refund +func (s *StateDB) GetRefund() uint64 { + return s.refund } -// Finalise finalises the state by removing the self destructed objects -// and clears the journal as well as the refunds. +// Finalise finalises the state by removing the s destructed objects and clears +// the journal as well as the refunds. Finalise, however, will not push any updates +// into the tries just yet. Only IntermediateRoot or Commit will do that. func (s *StateDB) Finalise(deleteEmptyObjects bool) { for addr := range s.journal.dirties { - stateObject, exist := s.stateObjects[addr] + obj, exist := s.stateObjects[addr] if !exist { // ripeMD is 'touched' at block 1714175, in tx 0x1237f737031e40bcde4a8b7e717b2d15e3ecadfe49bb1bbc71ee9deb09c6fcf2 // That tx goes out of gas, and although the notion of 'touched' does not exist there, the @@ -707,13 +808,22 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) { // Thus, we can safely ignore it here continue } - - if stateObject.suicided || (deleteEmptyObjects && stateObject.empty()) { - s.deleteStateObject(stateObject) + if obj.suicided || (deleteEmptyObjects && obj.empty()) { + obj.deleted = true + + // If state snapshotting is active, also mark the destruction there. + // Note, we can't do this only at the end of a block because multiple + // transactions within the same block might self destruct and then + // ressurrect an account; but the snapshotter needs both events. + if s.snap != nil { + s.snapDestructs[obj.addrHash] = struct{}{} // We need to maintain account deletions explicitly (will remain set indefinitely) + delete(s.snapAccounts, obj.addrHash) // Clear out any previously updated account data (may be recreated via a ressurrect) + delete(s.snapStorage, obj.addrHash) // Clear out any previously updated storage data (may be recreated via a ressurrect) + } } else { - stateObject.updateRoot(s.db) - s.updateStateObject(stateObject) + obj.finalise() } + s.stateObjectsPending[addr] = struct{}{} s.stateObjectsDirty[addr] = struct{}{} } // Invalidate journal because reverting across transactions is not allowed. @@ -724,8 +834,21 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) { // It is called in between transactions to get the root hash that // goes into transaction receipts. func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { + // Finalise all the dirty storage states and write them into the tries s.Finalise(deleteEmptyObjects) + for addr := range s.stateObjectsPending { + obj := s.stateObjects[addr] + if obj.deleted { + s.deleteStateObject(obj) + } else { + obj.updateRoot(s.db) + s.updateStateObject(obj) + } + } + if len(s.stateObjectsPending) > 0 { + s.stateObjectsPending = make(map[common.Address]struct{}) + } // Track the amount of time wasted on hashing the account trie if metrics.EnabledExpensive { defer func(start time.Time) { s.AccountHashes += time.Since(start) }(time.Now()) @@ -735,65 +858,86 @@ func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { // Prepare sets the current transaction hash and index and block hash which is // used when the EVM emits new state logs. -func (self *StateDB) Prepare(thash, bhash common.Hash, ti int) { - self.thash = thash - self.bhash = bhash - self.txIndex = ti +func (s *StateDB) Prepare(thash, bhash common.Hash, ti int) { + s.thash = thash + s.bhash = bhash + s.txIndex = ti } func (s *StateDB) clearJournalAndRefund() { - s.journal = newJournal() - s.validRevisions = s.validRevisions[:0] - s.refund = 0 + if len(s.journal.entries) > 0 { + s.journal = newJournal() + s.refund = 0 + } + s.validRevisions = s.validRevisions[:0] // Snapshots can be created without journal entires } // Commit writes the state to the underlying in-memory trie database. -func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) { - defer s.clearJournalAndRefund() - - for addr := range s.journal.dirties { - s.stateObjectsDirty[addr] = struct{}{} +func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { + if s.dbErr != nil { + return common.Hash{}, fmt.Errorf("commit aborted due to earlier error: %v", s.dbErr) } + // Finalize any pending changes and merge everything into the tries + s.IntermediateRoot(deleteEmptyObjects) + // Commit objects to the trie, measuring the elapsed time - for addr, stateObject := range s.stateObjects { - _, isDirty := s.stateObjectsDirty[addr] - switch { - case stateObject.suicided || (isDirty && deleteEmptyObjects && stateObject.empty()): - // If the object has been removed, don't bother syncing it - // and just mark it for deletion in the trie. - s.deleteStateObject(stateObject) - case isDirty: + codeWriter := s.db.TrieDB().DiskDB().NewBatch() + for addr := range s.stateObjectsDirty { + if obj := s.stateObjects[addr]; !obj.deleted { // Write any contract code associated with the state object - if stateObject.code != nil && stateObject.dirtyCode { - s.db.TrieDB().InsertBlob(common.BytesToHash(stateObject.CodeHash()), stateObject.code) - stateObject.dirtyCode = false + if obj.code != nil && obj.dirtyCode { + rawdb.WriteCode(codeWriter, common.BytesToHash(obj.CodeHash()), obj.code) + obj.dirtyCode = false } - // Write any storage changes in the state object to its storage trie. - if err := stateObject.CommitTrie(s.db); err != nil { + // Write any storage changes in the state object to its storage trie + if err := obj.CommitTrie(s.db); err != nil { return common.Hash{}, err } - // Update the object in the main account trie. - s.updateStateObject(stateObject) } - delete(s.stateObjectsDirty, addr) + } + if len(s.stateObjectsDirty) > 0 { + s.stateObjectsDirty = make(map[common.Address]struct{}) + } + if codeWriter.ValueSize() > 0 { + if err := codeWriter.Write(); err != nil { + log.Crit("Failed to commit dirty codes", "error", err) + } } // Write the account trie changes, measuing the amount of wasted time + var start time.Time if metrics.EnabledExpensive { - defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now()) + start = time.Now() } - root, err = s.trie.Commit(func(leaf []byte, parent common.Hash) error { - var account Account + // The onleaf func is called _serially_, so we can reuse the same account + // for unmarshalling every time. + var account Account + root, err := s.trie.Commit(func(path []byte, leaf []byte, parent common.Hash) error { if err := rlp.DecodeBytes(leaf, &account); err != nil { return nil } if account.Root != emptyRoot { s.db.TrieDB().Reference(account.Root, parent) } - code := common.BytesToHash(account.CodeHash) - if code != emptyCode { - s.db.TrieDB().Reference(code, parent) - } return nil }) + if metrics.EnabledExpensive { + s.AccountCommits += time.Since(start) + } + // If snapshotting is enabled, update the snapshot tree with this new version + if s.snap != nil { + if metrics.EnabledExpensive { + defer func(start time.Time) { s.SnapshotCommits += time.Since(start) }(time.Now()) + } + // Only update if there's a state transition (skip empty Clique blocks) + if parent := s.snap.Root(); parent != root { + if err := s.snaps.Update(root, parent, s.snapDestructs, s.snapAccounts, s.snapStorage); err != nil { + log.Warn("Failed to update snapshot tree", "from", parent, "to", root, "err", err) + } + if err := s.snaps.Cap(root, 127); err != nil { // Persistent layer is 128th, the last available trie + log.Warn("Failed to cap snapshot tree", "root", root, "layers", 127, "err", err) + } + } + s.snap, s.snapDestructs, s.snapAccounts, s.snapStorage = nil, nil, nil, nil + } return root, err } diff --git a/core/state/sync.go b/core/state/sync.go index 873395c..1018b78 100644 --- a/core/state/sync.go +++ b/core/state/sync.go @@ -19,22 +19,22 @@ package state import ( "bytes" - "github.com/ava-labs/go-ethereum/common" - "github.com/ava-labs/go-ethereum/ethdb" - "github.com/ava-labs/go-ethereum/rlp" - "github.com/ava-labs/go-ethereum/trie" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" ) // NewStateSync create a new state trie download scheduler. func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.SyncBloom) *trie.Sync { var syncer *trie.Sync - callback := func(leaf []byte, parent common.Hash) error { + callback := func(path []byte, leaf []byte, parent common.Hash) error { var obj Account if err := rlp.Decode(bytes.NewReader(leaf), &obj); err != nil { return err } - syncer.AddSubTrie(obj.Root, 64, parent, nil) - syncer.AddRawEntry(common.BytesToHash(obj.CodeHash), 64, parent) + syncer.AddSubTrie(obj.Root, path, parent, nil) + syncer.AddCodeEntry(common.BytesToHash(obj.CodeHash), path, parent) return nil } syncer = trie.NewSync(root, database, callback, bloom) |