4 files changed, 942 insertions, 0 deletions
diff --git a/core/bloombits/doc.go b/core/bloombits/doc.go
new file mode 100644
index 0000000..3d159e7
--- /dev/null
+++ b/core/bloombits/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Package bloombits implements bloom filtering on batches of data.
+package bloombits
diff --git a/core/bloombits/generator.go b/core/bloombits/generator.go
new file mode 100644
index 0000000..3dd54b6
--- /dev/null
+++ b/core/bloombits/generator.go
@@ -0,0 +1,93 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+	"errors"
+
+	"github.com/ava-labs/coreth/core/types"
+)
+
+var (
+	// errSectionOutOfBounds is returned if the user tried to add more bloom filters
+	// to the batch than available space, or if tries to retrieve above the capacity.
+	errSectionOutOfBounds = errors.New("section out of bounds")
+
+	// errBloomBitOutOfBounds is returned if the user tried to retrieve specified
+	// bit bloom above the capacity.
+	errBloomBitOutOfBounds = errors.New("bloom bit out of bounds")
+)
+
+// Generator takes a number of bloom filters and generates the rotated bloom bits
+// to be used for batched filtering.
+type Generator struct {
+	blooms   [types.BloomBitLength][]byte // Rotated blooms for per-bit matching
+	sections uint                         // Number of sections to batch together
+	nextSec  uint                         // Next section to set when adding a bloom
+}
+
+// NewGenerator creates a rotated bloom generator that can iteratively fill a
+// batched bloom filter's bits.
+func NewGenerator(sections uint) (*Generator, error) {
+	if sections%8 != 0 {
+		return nil, errors.New("section count not multiple of 8")
+	}
+	b := &Generator{sections: sections}
+	for i := 0; i < types.BloomBitLength; i++ {
+		b.blooms[i] = make([]byte, sections/8)
+	}
+	return b, nil
+}
+
+// AddBloom takes a single bloom filter and sets the corresponding bit column
+// in memory accordingly.
+func (b *Generator) AddBloom(index uint, bloom types.Bloom) error {
+	// Make sure we're not adding more bloom filters than our capacity
+	if b.nextSec >= b.sections {
+		return errSectionOutOfBounds
+	}
+	if b.nextSec != index {
+		return errors.New("bloom filter with unexpected index")
+	}
+	// Rotate the bloom and insert into our collection
+	byteIndex := b.nextSec / 8
+	bitMask := byte(1) << byte(7-b.nextSec%8)
+
+	for i := 0; i < types.BloomBitLength; i++ {
+		bloomByteIndex := types.BloomByteLength - 1 - i/8
+		bloomBitMask := byte(1) << byte(i%8)
+
+		if (bloom[bloomByteIndex] & bloomBitMask) != 0 {
+			b.blooms[i][byteIndex] |= bitMask
+		}
+	}
+	b.nextSec++
+
+	return nil
+}
+
+// Bitset returns the bit vector belonging to the given bit index after all
+// blooms have been added.
+func (b *Generator) Bitset(idx uint) ([]byte, error) {
+	if b.nextSec != b.sections {
+		return nil, errors.New("bloom not fully generated yet")
+	}
+	if idx >= types.BloomBitLength {
+		return nil, errBloomBitOutOfBounds
+	}
+	return b.blooms[idx], nil
+}
diff --git a/core/bloombits/matcher.go b/core/bloombits/matcher.go
new file mode 100644
index 0000000..fdf296a
--- /dev/null
+++ b/core/bloombits/matcher.go
@@ -0,0 +1,650 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"math"
+	"sort"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/ava-labs/go-ethereum/common/bitutil"
+	"github.com/ava-labs/go-ethereum/crypto"
+)
+
+// bloomIndexes represents the bit indexes inside the bloom filter that belong
+// to some key.
+type bloomIndexes [3]uint
+
+// calcBloomIndexes returns the bloom filter bit indexes belonging to the given key.
+func calcBloomIndexes(b []byte) bloomIndexes {
+	b = crypto.Keccak256(b)
+
+	var idxs bloomIndexes
+	for i := 0; i < len(idxs); i++ {
+		idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1])
+	}
+	return idxs
+}
+
+// partialMatches with a non-nil vector represents a section in which some sub-
+// matchers have already found potential matches. Subsequent sub-matchers will
+// binary AND their matches with this vector. If vector is nil, it represents a
+// section to be processed by the first sub-matcher.
+type partialMatches struct {
+	section uint64
+	bitset  []byte
+}
+
+// Retrieval represents a request for retrieval task assignments for a given
+// bit with the given number of fetch elements, or a response for such a request.
+// It can also have the actual results set to be used as a delivery data struct.
+//
+// The contest and error fields are used by the light client to terminate matching
+// early if an error is encountered on some path of the pipeline.
+type Retrieval struct {
+	Bit      uint
+	Sections []uint64
+	Bitsets  [][]byte
+
+	Context context.Context
+	Error   error
+}
+
+// Matcher is a pipelined system of schedulers and logic matchers which perform
+// binary AND/OR operations on the bit-streams, creating a stream of potential
+// blocks to inspect for data content.
+type Matcher struct {
+	sectionSize uint64 // Size of the data batches to filter on
+
+	filters    [][]bloomIndexes    // Filter the system is matching for
+	schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits
+
+	retrievers chan chan uint       // Retriever processes waiting for bit allocations
+	counters   chan chan uint       // Retriever processes waiting for task count reports
+	retrievals chan chan *Retrieval // Retriever processes waiting for task allocations
+	deliveries chan *Retrieval      // Retriever processes waiting for task response deliveries
+
+	running uint32 // Atomic flag whether a session is live or not
+}
+
+// NewMatcher creates a new pipeline for retrieving bloom bit streams and doing
+// address and topic filtering on them. Setting a filter component to `nil` is
+// allowed and will result in that filter rule being skipped (OR 0x11...1).
+func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher {
+	// Create the matcher instance
+	m := &Matcher{
+		sectionSize: sectionSize,
+		schedulers:  make(map[uint]*scheduler),
+		retrievers:  make(chan chan uint),
+		counters:    make(chan chan uint),
+		retrievals:  make(chan chan *Retrieval),
+		deliveries:  make(chan *Retrieval),
+	}
+	// Calculate the bloom bit indexes for the groups we're interested in
+	m.filters = nil
+
+	for _, filter := range filters {
+		// Gather the bit indexes of the filter rule, special casing the nil filter
+		if len(filter) == 0 {
+			continue
+		}
+		bloomBits := make([]bloomIndexes, len(filter))
+		for i, clause := range filter {
+			if clause == nil {
+				bloomBits = nil
+				break
+			}
+			bloomBits[i] = calcBloomIndexes(clause)
+		}
+		// Accumulate the filter rules if no nil rule was within
+		if bloomBits != nil {
+			m.filters = append(m.filters, bloomBits)
+		}
+	}
+	// For every bit, create a scheduler to load/download the bit vectors
+	for _, bloomIndexLists := range m.filters {
+		for _, bloomIndexList := range bloomIndexLists {
+			for _, bloomIndex := range bloomIndexList {
+				m.addScheduler(bloomIndex)
+			}
+		}
+	}
+	return m
+}
+
+// addScheduler adds a bit stream retrieval scheduler for the given bit index if
+// it has not existed before. If the bit is already selected for filtering, the
+// existing scheduler can be used.
+func (m *Matcher) addScheduler(idx uint) {
+	if _, ok := m.schedulers[idx]; ok {
+		return
+	}
+	m.schedulers[idx] = newScheduler(idx)
+}
+
+// Start starts the matching process and returns a stream of bloom matches in
+// a given range of blocks. If there are no more matches in the range, the result
+// channel is closed.
+func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) {
+	// Make sure we're not creating concurrent sessions
+	if atomic.SwapUint32(&m.running, 1) == 1 {
+		return nil, errors.New("matcher already running")
+	}
+	defer atomic.StoreUint32(&m.running, 0)
+
+	// Initiate a new matching round
+	session := &MatcherSession{
+		matcher: m,
+		quit:    make(chan struct{}),
+		kill:    make(chan struct{}),
+		ctx:     ctx,
+	}
+	for _, scheduler := range m.schedulers {
+		scheduler.reset()
+	}
+	sink := m.run(begin, end, cap(results), session)
+
+	// Read the output from the result sink and deliver to the user
+	session.pend.Add(1)
+	go func() {
+		defer session.pend.Done()
+		defer close(results)
+
+		for {
+			select {
+			case <-session.quit:
+				return
+
+			case res, ok := <-sink:
+				// New match result found
+				if !ok {
+					return
+				}
+				// Calculate the first and last blocks of the section
+				sectionStart := res.section * m.sectionSize
+
+				first := sectionStart
+				if begin > first {
+					first = begin
+				}
+				last := sectionStart + m.sectionSize - 1
+				if end < last {
+					last = end
+				}
+				// Iterate over all the blocks in the section and return the matching ones
+				for i := first; i <= last; i++ {
+					// Skip the entire byte if no matches are found inside (and we're processing an entire byte!)
+					next := res.bitset[(i-sectionStart)/8]
+					if next == 0 {
+						if i%8 == 0 {
+							i += 7
+						}
+						continue
+					}
+					// Some bit it set, do the actual submatching
+					if bit := 7 - i%8; next&(1<<bit) != 0 {
+						select {
+						case <-session.quit:
+							return
+						case results <- i:
+						}
+					}
+				}
+			}
+		}
+	}()
+	return session, nil
+}
+
+// run creates a daisy-chain of sub-matchers, one for the address set and one
+// for each topic set, each sub-matcher receiving a section only if the previous
+// ones have all found a potential match in one of the blocks of the section,
+// then binary AND-ing its own matches and forwarding the result to the next one.
+//
+// The method starts feeding the section indexes into the first sub-matcher on a
+// new goroutine and returns a sink channel receiving the results.
+func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches {
+	// Create the source channel and feed section indexes into
+	source := make(chan *partialMatches, buffer)
+
+	session.pend.Add(1)
+	go func() {
+		defer session.pend.Done()
+		defer close(source)
+
+		for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ {
+			select {
+			case <-session.quit:
+				return
+			case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}:
+			}
+		}
+	}()
+	// Assemble the daisy-chained filtering pipeline
+	next := source
+	dist := make(chan *request, buffer)
+
+	for _, bloom := range m.filters {
+		next = m.subMatch(next, dist, bloom, session)
+	}
+	// Start the request distribution
+	session.pend.Add(1)
+	go m.distributor(dist, session)
+
+	return next
+}
+
+// subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then
+// binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output.
+// The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to
+// that address/topic, and binary AND-ing those vectors together.
+func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches {
+	// Start the concurrent schedulers for each bit required by the bloom filter
+	sectionSources := make([][3]chan uint64, len(bloom))
+	sectionSinks := make([][3]chan []byte, len(bloom))
+	for i, bits := range bloom {
+		for j, bit := range bits {
+			sectionSources[i][j] = make(chan uint64, cap(source))
+			sectionSinks[i][j] = make(chan []byte, cap(source))
+
+			m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend)
+		}
+	}
+
+	process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated
+	results := make(chan *partialMatches, cap(source))
+
+	session.pend.Add(2)
+	go func() {
+		// Tear down the goroutine and terminate all source channels
+		defer session.pend.Done()
+		defer close(process)
+
+		defer func() {
+			for _, bloomSources := range sectionSources {
+				for _, bitSource := range bloomSources {
+					close(bitSource)
+				}
+			}
+		}()
+		// Read sections from the source channel and multiplex into all bit-schedulers
+		for {
+			select {
+			case <-session.quit:
+				return
+
+			case subres, ok := <-source:
+				// New subresult from previous link
+				if !ok {
+					return
+				}
+				// Multiplex the section index to all bit-schedulers
+				for _, bloomSources := range sectionSources {
+					for _, bitSource := range bloomSources {
+						select {
+						case <-session.quit:
+							return
+						case bitSource <- subres.section:
+						}
+					}
+				}
+				// Notify the processor that this section will become available
+				select {
+				case <-session.quit:
+					return
+				case process <- subres:
+				}
+			}
+		}
+	}()
+
+	go func() {
+		// Tear down the goroutine and terminate the final sink channel
+		defer session.pend.Done()
+		defer close(results)
+
+		// Read the source notifications and collect the delivered results
+		for {
+			select {
+			case <-session.quit:
+				return
+
+			case subres, ok := <-process:
+				// Notified of a section being retrieved
+				if !ok {
+					return
+				}
+				// Gather all the sub-results and merge them together
+				var orVector []byte
+				for _, bloomSinks := range sectionSinks {
+					var andVector []byte
+					for _, bitSink := range bloomSinks {
+						var data []byte
+						select {
+						case <-session.quit:
+							return
+						case data = <-bitSink:
+						}
+						if andVector == nil {
+							andVector = make([]byte, int(m.sectionSize/8))
+							copy(andVector, data)
+						} else {
+							bitutil.ANDBytes(andVector, andVector, data)
+						}
+					}
+					if orVector == nil {
+						orVector = andVector
+					} else {
+						bitutil.ORBytes(orVector, orVector, andVector)
+					}
+				}
+
+				if orVector == nil {
+					orVector = make([]byte, int(m.sectionSize/8))
+				}
+				if subres.bitset != nil {
+					bitutil.ANDBytes(orVector, orVector, subres.bitset)
+				}
+				if bitutil.TestBytes(orVector) {
+					select {
+					case <-session.quit:
+						return
+					case results <- &partialMatches{subres.section, orVector}:
+					}
+				}
+			}
+		}
+	}()
+	return results
+}
+
+// distributor receives requests from the schedulers and queues them into a set
+// of pending requests, which are assigned to retrievers wanting to fulfil them.
+func (m *Matcher) distributor(dist chan *request, session *MatcherSession) {
+	defer session.pend.Done()
+
+	var (
+		requests   = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number
+		unallocs   = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever
+		retrievers chan chan uint            // Waiting retrievers (toggled to nil if unallocs is empty)
+	)
+	var (
+		allocs   int            // Number of active allocations to handle graceful shutdown requests
+		shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests
+	)
+
+	// assign is a helper method fo try to assign a pending bit an actively
+	// listening servicer, or schedule it up for later when one arrives.
+	assign := func(bit uint) {
+		select {
+		case fetcher := <-m.retrievers:
+			allocs++
+			fetcher <- bit
+		default:
+			// No retrievers active, start listening for new ones
+			retrievers = m.retrievers
+			unallocs[bit] = struct{}{}
+		}
+	}
+
+	for {
+		select {
+		case <-shutdown:
+			// Graceful shutdown requested, wait until all pending requests are honoured
+			if allocs == 0 {
+				return
+			}
+			shutdown = nil
+
+		case <-session.kill:
+			// Pending requests not honoured in time, hard terminate
+			return
+
+		case req := <-dist:
+			// New retrieval request arrived to be distributed to some fetcher process
+			queue := requests[req.bit]
+			index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section })
+			requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...)
+
+			// If it's a new bit and we have waiting fetchers, allocate to them
+			if len(queue) == 0 {
+				assign(req.bit)
+			}
+
+		case fetcher := <-retrievers:
+			// New retriever arrived, find the lowest section-ed bit to assign
+			bit, best := uint(0), uint64(math.MaxUint64)
+			for idx := range unallocs {
+				if requests[idx][0] < best {
+					bit, best = idx, requests[idx][0]
+				}
+			}
+			// Stop tracking this bit (and alloc notifications if no more work is available)
+			delete(unallocs, bit)
+			if len(unallocs) == 0 {
+				retrievers = nil
+			}
+			allocs++
+			fetcher <- bit
+
+		case fetcher := <-m.counters:
+			// New task count request arrives, return number of items
+			fetcher <- uint(len(requests[<-fetcher]))
+
+		case fetcher := <-m.retrievals:
+			// New fetcher waiting for tasks to retrieve, assign
+			task := <-fetcher
+			if want := len(task.Sections); want >= len(requests[task.Bit]) {
+				task.Sections = requests[task.Bit]
+				delete(requests, task.Bit)
+			} else {
+				task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...)
+				requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...)
+			}
+			fetcher <- task
+
+			// If anything was left unallocated, try to assign to someone else
+			if len(requests[task.Bit]) > 0 {
+				assign(task.Bit)
+			}
+
+		case result := <-m.deliveries:
+			// New retrieval task response from fetcher, split out missing sections and
+			// deliver complete ones
+			var (
+				sections = make([]uint64, 0, len(result.Sections))
+				bitsets  = make([][]byte, 0, len(result.Bitsets))
+				missing  = make([]uint64, 0, len(result.Sections))
+			)
+			for i, bitset := range result.Bitsets {
+				if len(bitset) == 0 {
+					missing = append(missing, result.Sections[i])
+					continue
+				}
+				sections = append(sections, result.Sections[i])
+				bitsets = append(bitsets, bitset)
+			}
+			m.schedulers[result.Bit].deliver(sections, bitsets)
+			allocs--
+
+			// Reschedule missing sections and allocate bit if newly available
+			if len(missing) > 0 {
+				queue := requests[result.Bit]
+				for _, section := range missing {
+					index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section })
+					queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...)
+				}
+				requests[result.Bit] = queue
+
+				if len(queue) == len(missing) {
+					assign(result.Bit)
+				}
+			}
+			// If we're in the process of shutting down, terminate
+			if allocs == 0 && shutdown == nil {
+				return
+			}
+		}
+	}
+}
+
+// MatcherSession is returned by a started matcher to be used as a terminator
+// for the actively running matching operation.
+type MatcherSession struct {
+	matcher *Matcher
+
+	closer sync.Once     // Sync object to ensure we only ever close once
+	quit   chan struct{} // Quit channel to request pipeline termination
+	kill   chan struct{} // Term channel to signal non-graceful forced shutdown
+
+	ctx context.Context // Context used by the light client to abort filtering
+	err atomic.Value    // Global error to track retrieval failures deep in the chain
+
+	pend sync.WaitGroup
+}
+
+// Close stops the matching process and waits for all subprocesses to terminate
+// before returning. The timeout may be used for graceful shutdown, allowing the
+// currently running retrievals to complete before this time.
+func (s *MatcherSession) Close() {
+	s.closer.Do(func() {
+		// Signal termination and wait for all goroutines to tear down
+		close(s.quit)
+		time.AfterFunc(time.Second, func() { close(s.kill) })
+		s.pend.Wait()
+	})
+}
+
+// Error returns any failure encountered during the matching session.
+func (s *MatcherSession) Error() error {
+	if err := s.err.Load(); err != nil {
+		return err.(error)
+	}
+	return nil
+}
+
+// AllocateRetrieval assigns a bloom bit index to a client process that can either
+// immediately request and fetch the section contents assigned to this bit or wait
+// a little while for more sections to be requested.
+func (s *MatcherSession) AllocateRetrieval() (uint, bool) {
+	fetcher := make(chan uint)
+
+	select {
+	case <-s.quit:
+		return 0, false
+	case s.matcher.retrievers <- fetcher:
+		bit, ok := <-fetcher
+		return bit, ok
+	}
+}
+
+// PendingSections returns the number of pending section retrievals belonging to
+// the given bloom bit index.
+func (s *MatcherSession) PendingSections(bit uint) int {
+	fetcher := make(chan uint)
+
+	select {
+	case <-s.quit:
+		return 0
+	case s.matcher.counters <- fetcher:
+		fetcher <- bit
+		return int(<-fetcher)
+	}
+}
+
+// AllocateSections assigns all or part of an already allocated bit-task queue
+// to the requesting process.
+func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 {
+	fetcher := make(chan *Retrieval)
+
+	select {
+	case <-s.quit:
+		return nil
+	case s.matcher.retrievals <- fetcher:
+		task := &Retrieval{
+			Bit:      bit,
+			Sections: make([]uint64, count),
+		}
+		fetcher <- task
+		return (<-fetcher).Sections
+	}
+}
+
+// DeliverSections delivers a batch of section bit-vectors for a specific bloom
+// bit index to be injected into the processing pipeline.
+func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) {
+	select {
+	case <-s.kill:
+		return
+	case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}:
+	}
+}
+
+// Multiplex polls the matcher session for retrieval tasks and multiplexes it into
+// the requested retrieval queue to be serviced together with other sessions.
+//
+// This method will block for the lifetime of the session. Even after termination
+// of the session, any request in-flight need to be responded to! Empty responses
+// are fine though in that case.
+func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) {
+	for {
+		// Allocate a new bloom bit index to retrieve data for, stopping when done
+		bit, ok := s.AllocateRetrieval()
+		if !ok {
+			return
+		}
+		// Bit allocated, throttle a bit if we're below our batch limit
+		if s.PendingSections(bit) < batch {
+			select {
+			case <-s.quit:
+				// Session terminating, we can't meaningfully service, abort
+				s.AllocateSections(bit, 0)
+				s.DeliverSections(bit, []uint64{}, [][]byte{})
+				return
+
+			case <-time.After(wait):
+				// Throttling up, fetch whatever's available
+			}
+		}
+		// Allocate as much as we can handle and request servicing
+		sections := s.AllocateSections(bit, batch)
+		request := make(chan *Retrieval)
+
+		select {
+		case <-s.quit:
+			// Session terminating, we can't meaningfully service, abort
+			s.DeliverSections(bit, sections, make([][]byte, len(sections)))
+			return
+
+		case mux <- request:
+			// Retrieval accepted, something must arrive before we're aborting
+			request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx}
+
+			result := <-request
+			if result.Error != nil {
+				s.err.Store(result.Error)
+				s.Close()
+			}
+			s.DeliverSections(result.Bit, result.Sections, result.Bitsets)
+		}
+	}
+}
diff --git a/core/bloombits/scheduler.go b/core/bloombits/scheduler.go
new file mode 100644
index 0000000..6449c74
--- /dev/null
+++ b/core/bloombits/scheduler.go
@@ -0,0 +1,181 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+	"sync"
+)
+
+// request represents a bloom retrieval task to prioritize and pull from the local
+// database or remotely from the network.
+type request struct {
+	section uint64 // Section index to retrieve the a bit-vector from
+	bit     uint   // Bit index within the section to retrieve the vector of
+}
+
+// response represents the state of a requested bit-vector through a scheduler.
+type response struct {
+	cached []byte        // Cached bits to dedup multiple requests
+	done   chan struct{} // Channel to allow waiting for completion
+}
+
+// scheduler handles the scheduling of bloom-filter retrieval operations for
+// entire section-batches belonging to a single bloom bit. Beside scheduling the
+// retrieval operations, this struct also deduplicates the requests and caches
+// the results to minimize network/database overhead even in complex filtering
+// scenarios.
+type scheduler struct {
+	bit       uint                 // Index of the bit in the bloom filter this scheduler is responsible for
+	responses map[uint64]*response // Currently pending retrieval requests or already cached responses
+	lock      sync.Mutex           // Lock protecting the responses from concurrent access
+}
+
+// newScheduler creates a new bloom-filter retrieval scheduler for a specific
+// bit index.
+func newScheduler(idx uint) *scheduler {
+	return &scheduler{
+		bit:       idx,
+		responses: make(map[uint64]*response),
+	}
+}
+
+// run creates a retrieval pipeline, receiving section indexes from sections and
+// returning the results in the same order through the done channel. Concurrent
+// runs of the same scheduler are allowed, leading to retrieval task deduplication.
+func (s *scheduler) run(sections chan uint64, dist chan *request, done chan []byte, quit chan struct{}, wg *sync.WaitGroup) {
+	// Create a forwarder channel between requests and responses of the same size as
+	// the distribution channel (since that will block the pipeline anyway).
+	pend := make(chan uint64, cap(dist))
+
+	// Start the pipeline schedulers to forward between user -> distributor -> user
+	wg.Add(2)
+	go s.scheduleRequests(sections, dist, pend, quit, wg)
+	go s.scheduleDeliveries(pend, done, quit, wg)
+}
+
+// reset cleans up any leftovers from previous runs. This is required before a
+// restart to ensure the no previously requested but never delivered state will
+// cause a lockup.
+func (s *scheduler) reset() {
+	s.lock.Lock()
+	defer s.lock.Unlock()
+
+	for section, res := range s.responses {
+		if res.cached == nil {
+			delete(s.responses, section)
+		}
+	}
+}
+
+// scheduleRequests reads section retrieval requests from the input channel,
+// deduplicates the stream and pushes unique retrieval tasks into the distribution
+// channel for a database or network layer to honour.
+func (s *scheduler) scheduleRequests(reqs chan uint64, dist chan *request, pend chan uint64, quit chan struct{}, wg *sync.WaitGroup) {
+	// Clean up the goroutine and pipeline when done
+	defer wg.Done()
+	defer close(pend)
+
+	// Keep reading and scheduling section requests
+	for {
+		select {
+		case <-quit:
+			return
+
+		case section, ok := <-reqs:
+			// New section retrieval requested
+			if !ok {
+				return
+			}
+			// Deduplicate retrieval requests
+			unique := false
+
+			s.lock.Lock()
+			if s.responses[section] == nil {
+				s.responses[section] = &response{
+					done: make(chan struct{}),
+				}
+				unique = true
+			}
+			s.lock.Unlock()
+
+			// Schedule the section for retrieval and notify the deliverer to expect this section
+			if unique {
+				select {
+				case <-quit:
+					return
+				case dist <- &request{bit: s.bit, section: section}:
+				}
+			}
+			select {
+			case <-quit:
+				return
+			case pend <- section:
+			}
+		}
+	}
+}
+
+// scheduleDeliveries reads section acceptance notifications and waits for them
+// to be delivered, pushing them into the output data buffer.
+func (s *scheduler) scheduleDeliveries(pend chan uint64, done chan []byte, quit chan struct{}, wg *sync.WaitGroup) {
+	// Clean up the goroutine and pipeline when done
+	defer wg.Done()
+	defer close(done)
+
+	// Keep reading notifications and scheduling deliveries
+	for {
+		select {
+		case <-quit:
+			return
+
+		case idx, ok := <-pend:
+			// New section retrieval pending
+			if !ok {
+				return
+			}
+			// Wait until the request is honoured
+			s.lock.Lock()
+			res := s.responses[idx]
+			s.lock.Unlock()
+
+			select {
+			case <-quit:
+				return
+			case <-res.done:
+			}
+			// Deliver the result
+			select {
+			case <-quit:
+				return
+			case done <- res.cached:
+			}
+		}
+	}
+}
+
+// deliver is called by the request distributor when a reply to a request arrives.
+func (s *scheduler) deliver(sections []uint64, data [][]byte) {
+	s.lock.Lock()
+	defer s.lock.Unlock()
+
+	for i, section := range sections {
+		if res := s.responses[section]; res != nil && res.cached == nil { // Avoid non-requests and double deliveries
+			res.cached = data[i]
+			close(res.done)
+		}
+	}
+}