aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDeterminant <ted.sybil@gmail.com>2018-09-03 00:13:58 -0400
committerDeterminant <ted.sybil@gmail.com>2018-09-03 00:13:58 -0400
commitc3368b286fbb1d6b8c22af8ce21e57b5a5720445 (patch)
tree061de96c58873f642a24a151af1bf7ed937fb1c3
parent2535cd89c13485cc4a8e68145c7cb5e8e9398e5c (diff)
parent17f7fd821cf71717a158e2c38699baa6ab2f2af8 (diff)
Merge branch 'master' of github.com:Determinant/hot-stuff
-rw-r--r--README.rst8
-rw-r--r--include/hotstuff/client.h16
-rw-r--r--include/hotstuff/hotstuff.h8
-rw-r--r--include/hotstuff/liveness.h345
-rw-r--r--scripts/gen_conf.py20
-rwxr-xr-xscripts/run.sh457
-rwxr-xr-xscripts/run_client.sh354
-rwxr-xr-xscripts/run_demo.sh (renamed from scripts/run_replicas.sh)0
-rwxr-xr-xscripts/run_demo_client.sh2
-rw-r--r--scripts/thr_hist.py5
-rw-r--r--src/client.cpp2
-rw-r--r--src/hotstuff.cpp2
-rw-r--r--src/hotstuff_app.cpp68
-rw-r--r--src/hotstuff_client.cpp39
14 files changed, 1220 insertions, 106 deletions
diff --git a/README.rst b/README.rst
index 7c526d5..ad93287 100644
--- a/README.rst
+++ b/README.rst
@@ -38,14 +38,14 @@ section may be incomplete and subject to changes.
::
- # clone from the repo
+ # install from the repo
git clone https://github.com/Determinant/hot-stuff.git
cd hot-stuff/
- git submodule update --recursive
+ git submodule update --init --recursive
# ensure openssl and libevent are installed on your machine
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED=ON -DHOTSTUFF_PROTO_LOG=ON
make
- # start 4 demo replicas with scripts/run_replicas.sh
- # start the demo client with scripts/run_client.sh
+ # start 4 demo replicas with scripts/run_demo.sh
+ # start the demo client with scripts/run_demo_client.sh
diff --git a/include/hotstuff/client.h b/include/hotstuff/client.h
index 92b4eec..447a9db 100644
--- a/include/hotstuff/client.h
+++ b/include/hotstuff/client.h
@@ -26,8 +26,8 @@ struct MsgRespCmd {
};
class CommandDummy: public Command {
- static uint64_t cnt;
- uint64_t n;
+ uint32_t cid;
+ uint32_t n;
uint256_t hash;
public:
@@ -36,19 +36,15 @@ class CommandDummy: public Command {
~CommandDummy() override {}
- CommandDummy(uint64_t n):
- n(n), hash(salticidae::get_hash(*this)) {}
-
- static command_t make_cmd() {
- return new CommandDummy(cnt++);
- }
+ CommandDummy(uint32_t cid, uint32_t n):
+ cid(cid), n(n), hash(salticidae::get_hash(*this)) {}
void serialize(DataStream &s) const override {
- s << n;
+ s << cid << n;
}
void unserialize(DataStream &s) override {
- s >> n;
+ s >> cid >> n;
hash = salticidae::get_hash(*this);
}
diff --git a/include/hotstuff/hotstuff.h b/include/hotstuff/hotstuff.h
index b0a6827..f9aad3d 100644
--- a/include/hotstuff/hotstuff.h
+++ b/include/hotstuff/hotstuff.h
@@ -23,13 +23,6 @@ using salticidae::_2;
const double ent_waiting_timeout = 10;
const double double_inf = 1e10;
-enum {
- PROPOSE = 0x0,
- VOTE = 0x1,
- QUERY_FETCH_BLK = 0x2,
- RESP_FETCH_BLK = 0x3,
-};
-
/** Network message format for HotStuff. */
struct MsgPropose {
static const opcode_t opcode = 0x0;
@@ -202,6 +195,7 @@ class HotStuffBase: public HotStuffCore {
void start(bool eb_loop = false);
size_t size() const { return pn.all_peers().size(); }
+ PaceMaker &get_pace_maker() { return *pmaker; }
void print_stat() const;
/* Helper functions */
diff --git a/include/hotstuff/liveness.h b/include/hotstuff/liveness.h
index 59306ab..8c9c9ab 100644
--- a/include/hotstuff/liveness.h
+++ b/include/hotstuff/liveness.h
@@ -32,6 +32,8 @@ class PaceMaker {
* to vote for a block. The promise is resolved with the next proposer's ID
* */
virtual promise_t beat_resp(ReplicaID last_proposer) = 0;
+ /** Impeach the current proposer. */
+ virtual void impeach() {}
};
using pacemaker_bt = BoxObj<PaceMaker>;
@@ -117,6 +119,8 @@ class PMWaitQC: public virtual PaceMaker {
std::queue<promise_t> pending_beats;
block_t last_proposed;
bool locked;
+ promise_t pm_qc_finish;
+ promise_t pm_wait_propose;
protected:
void schedule_next() {
@@ -124,19 +128,23 @@ class PMWaitQC: public virtual PaceMaker {
{
auto pm = pending_beats.front();
pending_beats.pop();
- hsc->async_qc_finish(last_proposed).then([this, pm]() {
- pm.resolve(get_proposer());
- });
+ pm_qc_finish.reject();
+ (pm_qc_finish = hsc->async_qc_finish(last_proposed))
+ .then([this, pm]() {
+ pm.resolve(get_proposer());
+ });
locked = true;
}
}
void update_last_proposed() {
- hsc->async_wait_proposal().then([this](const Proposal &prop) {
- update_last_proposed();
+ pm_wait_propose.reject();
+ (pm_wait_propose = hsc->async_wait_proposal()).then(
+ [this](const Proposal &prop) {
last_proposed = prop.blk;
locked = false;
schedule_next();
+ update_last_proposed();
});
}
@@ -155,7 +163,7 @@ class PMWaitQC: public virtual PaceMaker {
promise_t pm;
pending_beats.push(pm);
schedule_next();
- return pm;
+ return std::move(pm);
}
promise_t beat_resp(ReplicaID last_proposer) override {
@@ -198,23 +206,29 @@ class PaceMakerDummyFixed: public PaceMakerDummy {
};
/**
- * Simple long-standing proposer liveness gadget.
+ * Simple long-standing proposer liveness gadget (with randomization).
* There are three roles for each replica: proposer, candidate and follower.
*
* For a proposer, it proposes a new block and refrains itself from proposing
* the next block unless it receives the QC for the previous block. It will
- * give up the leadership and turn into a candidate when it hasn't seen such QC
- * for a while.
+ * give up the leadership and turn into a candidate when it sees QC for a
+ * higher block or being impeached.
*
* For a follower, it never proposes any block, but keeps a timer for the QC
* for the block last proposed by the proposer (the proposer it believes to
- * be). When it times out without seeing such QC, the follower turns into a
- * candidate.
+ * be). When it times out without seeing such QC or the proposer is impeached,
+ * the follower turns into a candidate.
*
* For a candidate, it periodically proposes empty blocks to synchronize the
* preferred branch, with randomized timeout, and check for any new QC. Once it
* sees such new QC, if the QC is given by itself, it becomes the proposer,
* otherwise yields to the creator of the QC as a follower.
+ *
+ * CAUTIONS: This pace maker does not guarantee liveness when a Byzantine node
+ * tries to contend with correct nodes and always proposes higher blocks to
+ * grab the leadership. If you want to use this for your system, please make
+ * sure you introduce mechanism to detect and ban such behavior, or use the
+ * round-robin style pace maker instead.
*/
class PMStickyProposer: virtual public PaceMaker {
enum {
@@ -227,6 +241,7 @@ class PMStickyProposer: virtual public PaceMaker {
EventContext ec;
/** QC timer or randomized timeout */
Event timer;
+ Event ev_imp;
block_t last_proposed;
/** the proposer it believes */
ReplicaID proposer;
@@ -242,12 +257,6 @@ class PMStickyProposer: virtual public PaceMaker {
promise_t pm_wait_propose;
promise_t pm_qc_finish;
- void reset_qc_timer() {
- timer.del();
- timer.add_with_timeout(qc_timeout);
- HOTSTUFF_LOG_PROTO("QC timer reset");
- }
-
void clear_promises() {
pm_wait_receive_proposal.reject();
pm_wait_propose.reject();
@@ -273,13 +282,12 @@ class PMStickyProposer: virtual public PaceMaker {
auto &qc_ref = prop.blk->get_qc_ref();
if (last_proposed && qc_ref != last_proposed)
{
- HOTSTUFF_LOG_PROTO("proposer misbehave");
+ HOTSTUFF_LOG_INFO("proposer misbehave");
to_candidate(); /* proposer misbehave */
return;
}
HOTSTUFF_LOG_PROTO("proposer emits new QC");
last_proposed = prop.blk;
- reset_qc_timer();
}
reg_follower_receive_proposal();
}
@@ -294,8 +302,10 @@ class PMStickyProposer: virtual public PaceMaker {
pm_qc_finish.reject();
(pm_qc_finish = hsc->async_qc_finish(last_proposed))
.then([this, pm]() {
- reset_qc_timer();
+ timer.del();
pm.resolve(proposer);
+ timer.add_with_timeout(qc_timeout);
+ HOTSTUFF_LOG_PROTO("QC timer reset");
});
locked = true;
}
@@ -332,7 +342,7 @@ class PMStickyProposer: virtual public PaceMaker {
(pm_wait_propose = hsc->async_wait_proposal()).then([this](const Proposal &prop) {
const auto &blk = prop.blk;
(pm_qc_finish = hsc->async_qc_finish(blk)).then([this, blk]() {
- HOTSTUFF_LOG_PROTO("collected QC for %s", std::string(*blk).c_str());
+ HOTSTUFF_LOG_INFO("collected QC for %s", std::string(*blk).c_str());
/* managed to collect a QC */
to_proposer();
propose_elect_block();
@@ -341,42 +351,40 @@ class PMStickyProposer: virtual public PaceMaker {
double t = salticidae::gen_rand_timeout(candidate_timeout);
timer.del();
timer.add_with_timeout(t);
- HOTSTUFF_LOG_PROTO("candidate next try in %.2fs", t);
+ HOTSTUFF_LOG_INFO("candidate next try in %.2fs", t);
propose_elect_block();
}
- void reg_candidate_receive_proposal() {
+ void reg_cp_receive_proposal() {
pm_wait_receive_proposal.reject();
(pm_wait_receive_proposal = hsc->async_wait_receive_proposal())
.then(
salticidae::generic_bind(
- &PMStickyProposer::candidate_receive_proposal, this, _1));
+ &PMStickyProposer::cp_receive_proposal, this, _1));
}
- void candidate_receive_proposal(const Proposal &prop) {
+ void cp_receive_proposal(const Proposal &prop) {
auto _proposer = prop.proposer;
auto &p = last_proposed_by[_proposer];
HOTSTUFF_LOG_PROTO("got block %s from %d", std::string(*prop.blk).c_str(), _proposer);
p.reject();
- (p = hsc->async_qc_finish(prop.blk)).then([this, _proposer]() {
- to_follower(_proposer);
+ (p = hsc->async_qc_finish(prop.blk)).then([this, blk=prop.blk, _proposer]() {
+ if (hsc->get_bqc()->get_qc_ref() == blk)
+ to_follower(_proposer);
});
- reg_candidate_receive_proposal();
+ reg_cp_receive_proposal();
}
/* role transitions */
void to_follower(ReplicaID new_proposer) {
- HOTSTUFF_LOG_PROTO("new role: follower");
+ HOTSTUFF_LOG_INFO("new role: follower");
clear_promises();
role = FOLLOWER;
proposer = new_proposer;
last_proposed = nullptr;
hsc->set_neg_vote(false);
- timer = Event(ec, -1, 0, [this](int, short) {
- /* unable to get a QC in time */
- to_candidate();
- });
+ timer.clear();
/* redirect all pending cmds to the new proposer */
while (!pending_beats.empty())
{
@@ -387,7 +395,7 @@ class PMStickyProposer: virtual public PaceMaker {
}
void to_proposer() {
- HOTSTUFF_LOG_PROTO("new role: proposer");
+ HOTSTUFF_LOG_INFO("new role: proposer");
clear_promises();
role = PROPOSER;
proposer = hsc->get_id();
@@ -397,12 +405,12 @@ class PMStickyProposer: virtual public PaceMaker {
/* proposer unable to get a QC in time */
to_candidate();
});
+ reg_cp_receive_proposal();
proposer_propose(Proposal(-1, uint256_t(), hsc->get_genesis(), nullptr));
- reset_qc_timer();
}
void to_candidate() {
- HOTSTUFF_LOG_PROTO("new role: candidate");
+ HOTSTUFF_LOG_INFO("new role: candidate");
clear_promises();
role = CANDIDATE;
proposer = hsc->get_id();
@@ -412,10 +420,20 @@ class PMStickyProposer: virtual public PaceMaker {
candidate_qc_timeout();
});
candidate_timeout = qc_timeout;
- reg_candidate_receive_proposal();
+ reg_cp_receive_proposal();
candidate_qc_timeout();
}
+ protected:
+ void impeach() override {
+ if (role == CANDIDATE) return;
+ ev_imp = Event(ec, -1, 0, [this](int, short) {
+ to_candidate();
+ });
+ ev_imp.add_with_timeout(0);
+ HOTSTUFF_LOG_INFO("schedule to impeach the proposer");
+ }
+
public:
PMStickyProposer(double qc_timeout, const EventContext &ec):
qc_timeout(qc_timeout), ec(ec) {}
@@ -459,6 +477,259 @@ struct PaceMakerSticky: public PMAllParents, public PMStickyProposer {
}
};
+/**
+ * Simple long-standing round-robin style proposer liveness gadget.
+ */
+class PMRoundRobinProposer: virtual public PaceMaker {
+ enum {
+ PROPOSER,
+ FOLLOWER,
+ CANDIDATE /* rotating */
+ } role;
+ double qc_timeout;
+ double candidate_timeout;
+ EventContext ec;
+ /** QC timer or randomized timeout */
+ Event timer;
+ Event ev_imp;
+ block_t last_proposed;
+ /** the proposer it believes */
+ ReplicaID proposer;
+
+ /* extra state needed for a proposer */
+ std::queue<promise_t> pending_beats;
+ bool locked;
+
+ /* extra state needed for a candidate */
+ std::unordered_map<ReplicaID, promise_t> last_proposed_by;
+
+ promise_t pm_wait_receive_proposal;
+ promise_t pm_wait_propose;
+ promise_t pm_qc_finish;
+
+ void clear_promises() {
+ pm_wait_receive_proposal.reject();
+ pm_wait_propose.reject();
+ pm_qc_finish.reject();
+ for (auto &p: last_proposed_by)
+ p.second.reject();
+ last_proposed_by.clear();
+ }
+
+ /* helper functions for a follower */
+
+ void reg_follower_receive_proposal() {
+ pm_wait_receive_proposal.reject();
+ (pm_wait_receive_proposal = hsc->async_wait_receive_proposal())
+ .then(
+ salticidae::generic_bind(
+ &PMRoundRobinProposer::follower_receive_proposal, this, _1));
+ }
+
+ void follower_receive_proposal(const Proposal &prop) {
+ if (prop.proposer == proposer)
+ {
+ auto &qc_ref = prop.blk->get_qc_ref();
+ if (last_proposed && qc_ref != last_proposed)
+ {
+ HOTSTUFF_LOG_INFO("proposer misbehave");
+ to_candidate(); /* proposer misbehave */
+ return;
+ }
+ HOTSTUFF_LOG_PROTO("proposer emits new QC");
+ last_proposed = prop.blk;
+ }
+ reg_follower_receive_proposal();
+ }
+
+ /* helper functions for a proposer */
+
+ void proposer_schedule_next() {
+ if (!pending_beats.empty() && !locked)
+ {
+ auto pm = pending_beats.front();
+ pending_beats.pop();
+ pm_qc_finish.reject();
+ (pm_qc_finish = hsc->async_qc_finish(last_proposed))
+ .then([this, pm]() {
+ timer.del();
+ pm.resolve(proposer);
+ timer.add_with_timeout(qc_timeout);
+ HOTSTUFF_LOG_PROTO("QC timer reset");
+ });
+ locked = true;
+ }
+ }
+
+ void reg_proposer_propose() {
+ pm_wait_propose.reject();
+ (pm_wait_propose = hsc->async_wait_proposal()).then(
+ salticidae::generic_bind(
+ &PMRoundRobinProposer::proposer_propose, this, _1));
+ }
+
+ void proposer_propose(const Proposal &prop) {
+ last_proposed = prop.blk;
+ locked = false;
+ proposer_schedule_next();
+ reg_proposer_propose();
+ }
+
+ void propose_elect_block() {
+ DataStream s;
+ /* FIXME: should extra data be the voter's id? */
+ s << hsc->get_id();
+ /* propose a block for leader election */
+ hsc->on_propose(std::vector<command_t>{},
+ get_parents(), std::move(s));
+ }
+
+ /* helper functions for a candidate */
+
+ void reg_cp_receive_proposal() {
+ pm_wait_receive_proposal.reject();
+ (pm_wait_receive_proposal = hsc->async_wait_receive_proposal())
+ .then(
+ salticidae::generic_bind(
+ &PMRoundRobinProposer::cp_receive_proposal, this, _1));
+ }
+
+ void cp_receive_proposal(const Proposal &prop) {
+ auto _proposer = prop.proposer;
+ auto &p = last_proposed_by[_proposer];
+ HOTSTUFF_LOG_PROTO("got block %s from %d", std::string(*prop.blk).c_str(), _proposer);
+ p.reject();
+ (p = hsc->async_qc_finish(prop.blk)).then([this, blk=prop.blk, _proposer]() {
+ if (_proposer == proposer)
+ to_follower();
+ });
+ reg_cp_receive_proposal();
+ }
+
+ void candidate_qc_timeout() {
+ timer.del();
+ timer.add_with_timeout(candidate_timeout);
+ candidate_timeout *= 1.01;
+ proposer = (proposer + 1) % hsc->get_config().nreplicas;
+ if (proposer == hsc->get_id())
+ {
+ pm_qc_finish.reject();
+ pm_wait_propose.reject();
+ (pm_wait_propose = hsc->async_wait_proposal()).then([this](const Proposal &prop) {
+ const auto &blk = prop.blk;
+ (pm_qc_finish = hsc->async_qc_finish(blk)).then([this, blk]() {
+ HOTSTUFF_LOG_INFO("collected QC for %s", std::string(*blk).c_str());
+ /* managed to collect a QC */
+ to_proposer();
+ propose_elect_block();
+ });
+ });
+ propose_elect_block();
+ }
+ HOTSTUFF_LOG_INFO("candidate rotates to %d, next try in %.2fs",
+ proposer, candidate_timeout);
+ }
+
+ /* role transitions */
+
+ void to_follower() {
+ HOTSTUFF_LOG_INFO("new role: follower");
+ clear_promises();
+ role = FOLLOWER;
+ last_proposed = nullptr;
+ hsc->set_neg_vote(false);
+ timer.clear();
+ /* redirect all pending cmds to the new proposer */
+ while (!pending_beats.empty())
+ {
+ pending_beats.front().resolve(proposer);
+ pending_beats.pop();
+ }
+ reg_follower_receive_proposal();
+ }
+
+ void to_proposer() {
+ HOTSTUFF_LOG_INFO("new role: proposer");
+ clear_promises();
+ role = PROPOSER;
+ last_proposed = nullptr;
+ hsc->set_neg_vote(true);
+ timer = Event(ec, -1, 0, [this](int, short) {
+ /* proposer unable to get a QC in time */
+ to_candidate();
+ });
+ proposer_propose(Proposal(-1, uint256_t(), hsc->get_genesis(), nullptr));
+ }
+
+ void to_candidate() {
+ HOTSTUFF_LOG_INFO("new role: candidate");
+ clear_promises();
+ role = CANDIDATE;
+ last_proposed = nullptr;
+ hsc->set_neg_vote(false);
+ timer = Event(ec, -1, 0, [this](int, short) {
+ candidate_qc_timeout();
+ });
+ candidate_timeout = qc_timeout * 0.1;
+ reg_cp_receive_proposal();
+ candidate_qc_timeout();
+ }
+
+ protected:
+ void impeach() override {
+ if (role == CANDIDATE) return;
+ ev_imp = Event(ec, -1, 0, [this](int, short) {
+ to_candidate();
+ });
+ ev_imp.add_with_timeout(0);
+ HOTSTUFF_LOG_INFO("schedule to impeach the proposer");
+ }
+
+ public:
+ PMRoundRobinProposer(double qc_timeout, const EventContext &ec):
+ qc_timeout(qc_timeout), ec(ec), proposer(0) {}
+
+ void init() {
+ to_candidate();
+ }
+
+ ReplicaID get_proposer() override {
+ return proposer;
+ }
+
+ promise_t beat() override {
+ if (role != FOLLOWER)
+ {
+ promise_t pm;
+ pending_beats.push(pm);
+ if (role == PROPOSER)
+ proposer_schedule_next();
+ return std::move(pm);
+ }
+ else
+ return promise_t([proposer=proposer](promise_t &pm) {
+ pm.resolve(proposer);
+ });
+ }
+
+ promise_t beat_resp(ReplicaID last_proposer) override {
+ return promise_t([this, last_proposer](promise_t &pm) {
+ pm.resolve(last_proposer);
+ });
+ }
+};
+
+struct PaceMakerRR: public PMAllParents, public PMRoundRobinProposer {
+ PaceMakerRR(int32_t parent_limit, double qc_timeout, EventContext eb):
+ PMAllParents(parent_limit), PMRoundRobinProposer(qc_timeout, eb) {}
+
+ void init(HotStuffCore *hsc) override {
+ PaceMaker::init(hsc);
+ PMAllParents::init();
+ PMRoundRobinProposer::init();
+ }
+};
+
}
#endif
diff --git a/scripts/gen_conf.py b/scripts/gen_conf.py
index bc45540..391e0d6 100644
--- a/scripts/gen_conf.py
+++ b/scripts/gen_conf.py
@@ -6,33 +6,43 @@ import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Generate configuration file for a batch of replicas')
parser.add_argument('--prefix', type=str, default='hotstuff.gen')
- parser.add_argument('--iplist', type=str, default=None)
+ parser.add_argument('--ips', type=str, default=None)
parser.add_argument('--iter', type=int, default=10)
parser.add_argument('--pport', type=int, default=10000)
parser.add_argument('--cport', type=int, default=20000)
parser.add_argument('--keygen', type=str, default='./hotstuff-keygen')
+ parser.add_argument('--nodes', type=str, default='nodes.txt')
+ parser.add_argument('--block-size', type=int, default=1)
+ parser.add_argument('--pace-maker', type=str, default='dummy')
args = parser.parse_args()
- if args.iplist is None:
+ if args.ips is None:
ips = ['127.0.0.1']
else:
- ips = [l.strip() for l in open(args.iplist, 'r').readlines()]
+ ips = [l.strip() for l in open(args.ips, 'r').readlines()]
prefix = args.prefix
iter = args.iter
base_pport = args.pport
base_cport = args.cport
- keygen_bin= args.keygen
+ keygen_bin = args.keygen
main_conf = open("{}.conf".format(prefix), 'w')
+ nodes = open(args.nodes, 'w')
replicas = ["{}:{};{}".format(ip, base_pport + i, base_cport + i)
for ip in ips
for i in range(iter)]
p = subprocess.Popen([keygen_bin, '--num', str(len(replicas))],
stdout=subprocess.PIPE, stderr=open(os.devnull, 'w'))
keys = [[t[4:] for t in l.decode('ascii').split()] for l in p.stdout]
+ if not (args.block_size is None):
+ main_conf.write("block-size = {}\n".format(args.block_size))
+ if not (args.pace_maker is None):
+ main_conf.write("pace-maker = {}\n".format(args.pace_maker))
for r in zip(replicas, keys, itertools.count(0)):
main_conf.write("replica = {}, {}\n".format(r[0], r[1][0]))
- r_conf = open("{}-sec{}.conf".format(prefix, r[2]), 'w')
+ r_conf_name = "{}-sec{}.conf".format(prefix, r[2])
+ nodes.write("{}:{}\t{}\n".format(r[2], r[0], r_conf_name))
+ r_conf = open(r_conf_name, 'w')
r_conf.write("privkey = {}\n".format(r[1][1]))
r_conf.write("idx = {}\n".format(r[2]))
diff --git a/scripts/run.sh b/scripts/run.sh
new file mode 100755
index 0000000..53d9923
--- /dev/null
+++ b/scripts/run.sh
@@ -0,0 +1,457 @@
+#!/bin/bash
+
+proj_server_bin="hotstuff-app"
+proj_server_path="/home/ted/hot-stuff/$proj_server_bin"
+proj_conf_name="hotstuff.conf"
+
+peer_list="./nodes.txt" # the list of nodes
+conf_src="./hotstuff.gen.conf"
+server_map="./server_map.txt" # optional mapping from node ip to server ip
+template_dir="template" # the dir that keeps the content shared among all nodes
+remote_base="/home/ted/testbed" # remote dir used to keep files for the experiment
+#remote_base="/tmp/" # remote dir used to keep files for the experiment
+remote_log="log" # log filename
+remote_user="ted"
+copy_to_remote_pat="rsync -avz <local_path> <remote_user>@<remote_ip>:<remote_path>"
+copy_from_remote_pat="rsync -avz <remote_user>@<remote_ip>:<remote_path> <local_path>"
+exe_remote_pat="ssh <remote_user>@<remote_ip> bash"
+run_remote_pat="cd \"<rworkdir>\"; gdb -ex r -ex bt -ex generate-core-file -ex q --args '$proj_server_path' --conf \"hotstuff.gen-sec<node_id>.conf\""
+reset_remote_pat="pgrep -f '$proj_server_bin' | xargs kill -9"
+
+fin_keyword="error:" # the keyword indicating completion of execution
+fin_chk_period=1
+fin_chk_skip_pat='^([A-O][0-9]*)|(_ctl)$'
+force_peer_list=0
+
+function join { local IFS="$1"; shift; echo "$*"; }
+function split {
+ local IFS="$1"
+ local arr=($2)
+ echo "${arr[@]}"
+}
+
+function die { echo "$1"; exit 1; }
+
+declare -A nodes
+declare -A node_confs
+nodes_cnt=0
+function get_node_info {
+ pl="$1"
+ if [[ "$force_peer_list" == 1 ]]; then
+ pl="$peer_list"
+ fi
+ OIFS="$IFS"
+ IFS=$'\n'
+ node_list=($(cat "$pl"))
+ IFS="$OIFS"
+ for tuple in "${node_list[@]}"; do
+ tup0=($(split $'\t' "$tuple"))
+ tup=($(split : "${tup0[0]}"))
+ nodes[${tup[0]}]="${tup[1]}:${tup[2]}"
+ node_confs[${tup[0]}]="${tup0[@]:1}"
+ echo "${tup[0]} => ${nodes[${tup[0]}]} & ${node_confs[${tup[0]}]}"
+ let nodes_cnt++
+ done
+}
+
+declare -A server_map
+function get_server_map {
+ {
+ IFS=$'\n'
+ map_list=($(cat "$1"))
+ }
+ IFS=$'\n \t'
+ for pair in "${map_list[@]}"; do
+ p=($pair)
+ server_map[${p[0]}]="${p[1]}"
+ echo "mapping ${p[0]} => ${p[1]}"
+ done
+}
+
+
+function get_addr {
+ tup=($(split ';' $1))
+ echo "${tup[0]}"
+}
+
+function get_ip {
+ tup=($(split : $1))
+ echo "${tup[0]}"
+}
+
+function get_peer_port {
+ tup=($(split : $1))
+ tup2=($(split ';' ${tup[1]}))
+ echo "${tup2[0]}"
+}
+
+
+function get_client_port {
+ tup=($(split : $1))
+ tup2=($(split ';' ${tup[1]}))
+ echo "${tup2[1]}"
+}
+
+
+function get_ip_by_id {
+ get_ip "${nodes[$1]}"
+}
+
+function get_peer_port_by_id {
+ get_peer_port "${nodes[$1]}"
+}
+
+
+function get_client_port_by_id {
+ get_client_port "${nodes[$1]}"
+}
+
+function copy_file {
+ local pat="$1"
+ local cmd="${pat//<local_path>/$2}"
+ cmd="${cmd//<remote_ip>/$3}"
+ cmd="${cmd//<remote_user>/$remote_user}"
+ cmd="${cmd//<remote_path>/$4}"
+ echo $cmd
+ eval "$cmd"
+} >> log 2>&1
+
+function execute_remote_cmd_pid {
+ local node_ip="$1"
+ local c="$2"
+ local l="$3"
+ local cmd="${exe_remote_pat//<remote_ip>/$node_ip}"
+ cmd="${cmd//<remote_user>/$remote_user}"
+ eval $cmd << EOF
+$c > $l 2>&1 & echo \$!
+EOF
+}
+
+
+
+function execute_remote_cmd_stat {
+ local node_ip="$1"
+ local c="$2"
+ local l="$3"
+ local cmd="${exe_remote_pat//<remote_ip>/$node_ip}"
+ cmd="${cmd//<remote_user>/$remote_user}"
+ eval $cmd << EOF
+$c > $l 2>&1 ; echo \$?
+EOF
+}
+
+
+function _remote_load {
+ local workdir="$1"
+ local rworkdir="$2"
+ local node_ip="$3"
+ local rid="$4"
+ local extra_conf=($5)
+ local tmpldir="$workdir/$template_dir/"
+ local node_tmpldir="$workdir/$rid"
+ [[ $(execute_remote_cmd_stat "$node_ip" \
+ "mkdir -p \"$rworkdir\"" \
+ /dev/null) == 0 ]] || die "failed to create directory $rworkdir"
+ copy_file "$copy_to_remote_pat" "$tmpldir" "$node_ip" "$rworkdir"
+ for conf in "${extra_conf[@]}"; do
+ copy_file "$copy_to_remote_pat" "$node_tmpldir/$conf" "$node_ip" "$rworkdir"
+ done
+}
+
+function _remote_start {
+ local workdir="$1"
+ local rworkdir="$2"
+ local node_id="$3"
+ local node_ip="$4"
+ local client_port="$5"
+ local cmd="${run_remote_pat//<rworkdir>/$rworkdir}"
+ cmd="${cmd//<node_id>/$node_id}"
+ cmd="${cmd//<cport>/$client_port}"
+ execute_remote_cmd_pid "$node_ip" "$cmd" \
+ "\"$rworkdir/$remote_log\"" > "$workdir/${node_id}.pid"
+}
+
+function _remote_exec {
+ local workdir="$1"
+ local rworkdir="$2"
+ local node_ip="$3"
+ local cmd="$4"
+ [[ $(execute_remote_cmd_stat "$node_ip" "$cmd" /dev/null) == 0 ]]
+}
+
+function _remote_stop {
+ local node_pid="$4"
+ _remote_exec "$1" "$2" "$3" "kill $node_pid"
+}
+
+function _remote_status {
+ local node_pid="$4"
+ _remote_exec "$1" "$2" "$3" "kill -0 $node_pid"
+}
+
+function _remote_finished {
+ _remote_exec "$1" "$2" "$3" "grep \"$fin_keyword\" \"$rworkdir/$remote_log\""
+}
+
+function _remote_fetch {
+ local workdir="$1"
+ local rworkdir="$2"
+ local node_id="$3"
+ local node_ip="$4"
+ copy_file "$copy_from_remote_pat" "$workdir/${node_id}.log" "$node_ip" "$rworkdir/$remote_log"
+}
+
+function start_all {
+ local workdir="$1"
+ local tmpldir="$workdir/$template_dir/"
+ mkdir "$workdir" > /dev/null 2>&1 || die "workdir already exists"
+ rm -rf "$tmpldir"
+ mkdir "$tmpldir"
+ cp "$peer_list" "$workdir/peer_list.txt"
+ cp "$server_map" "$workdir/server_map.txt"
+ get_node_info "$workdir/peer_list.txt"
+ get_server_map "$workdir/server_map.txt"
+ echo "copying configuration file"
+ cp "$conf_src" "$tmpldir/$proj_conf_name"
+ for rid in "${!nodes[@]}"; do
+ local node_tmpldir="$workdir/$rid"
+ local ip="$(get_ip_by_id $rid)"
+ ip="${server_map[$ip]:-$ip}"
+ local pport="$(get_peer_port_by_id $rid)"
+ local cport="$(get_client_port_by_id $rid)"
+ local rworkdir="$remote_base/$workdir/${rid}"
+ local extra_conf_=(${node_confs[$rid]})
+ rm -rf "$node_tmpldir"
+ mkdir "$node_tmpldir"
+ (
+ local extra_conf=()
+ for conf in "${extra_conf_[@]}"; do
+ cp "$conf" "$node_tmpldir/"
+ extra_conf+=($(basename "$conf"))
+ copy_file "$copy_to_remote_pat" "$tmpldir/$conf" "$node_ip" "$rworkdir"
+ done
+ echo "Starting $rid @ $ip, $pport and $cport"
+ _remote_load "$workdir" "$rworkdir" "$ip" "$rid" "${extra_conf[@]}"
+ echo "$rid loaded"
+ ) &
+ done
+ wait
+ for rid in "${!nodes[@]}"; do
+ local ip="$(get_ip_by_id $rid)"
+ ip="${server_map[$ip]:-$ip}"
+ local pport="$(get_peer_port_by_id $rid)"
+ local cport="$(get_client_port_by_id $rid)"
+ local rworkdir="$remote_base/$workdir/${rid}"
+ (
+ echo "Starting $rid @ $ip, $pport and $cport"
+ _remote_start "$workdir" "$rworkdir" "$rid" "$ip" "$cport"
+ echo "$rid started"
+ ) &
+ done
+ wait
+}
+
+function fetch_all {
+ local workdir="$1"
+ get_node_info "$workdir/peer_list.txt"
+ get_server_map "$workdir/server_map.txt"
+ for rid in "${!nodes[@]}"; do
+ loca