dispute cases rewritten

This commit is contained in:
Dmitry Sergeev 2018-06-22 00:06:59 +05:00
parent da007fef44
commit c6401002d7
7 changed files with 20 additions and 348 deletions

View File

@ -1,122 +0,0 @@
import sys, urllib, json, datetime, time
import matplotlib.pyplot as plt
from common_parse_utils import uvarint, parseutc, formatbytes, readjson, getmaxheight
def get_num_txs(json):
return json["result"]["block"]["header"]["num_txs"]
if len(sys.argv) < 2:
print "usage: python parse_block.py host:port [report_name [minheight [maxheight]]]"
sys.exit()
tmaddress = sys.argv[1]
report_name = sys.argv[2] if len(sys.argv) > 2 else ""
if len(sys.argv) > 4:
maxheight = int(sys.argv[4])
else:
maxheight = getmaxheight(tmaddress)
while maxheight >= 3 and get_num_txs(readjson(tmaddress + "/block?height=%d" % maxheight)) == 0:
maxheight -= 1
if len(sys.argv) > 3:
minheight = int(sys.argv[3])
else:
minheight = maxheight
while minheight >= 3 and get_num_txs(readjson(tmaddress + "/block?height=%d" % (minheight - 1))) > 0:
minheight -= 1
accsize = 0
acclatency = 0
minlatency = 1e20
maxlatency = 0
txcount = 0
blockcount = 0
firsttx = 1e20
lasttx = 0
firstblock = 1e20
lastblock = 0
maxblocksize = 0
txstat = []
for height in range(minheight, maxheight + 1):
data = readjson(tmaddress + "/block?height=%d" % height)
numtxs = get_num_txs(data)
blocktimetxt = data["result"]["block"]["header"]["time"]
blocktime = parseutc(blocktimetxt)
if numtxs > 0:
firstblock = min(firstblock, blocktime)
lastblock = max(lastblock, blocktime)
blockcount += 1
maxblocksize = max(maxblocksize, numtxs)
print height, numtxs, blocktimetxt
txs = data["result"]["block"]["data"]["txs"]
if txs:
for index, txhex in enumerate(txs):
txbytes = bytearray.fromhex(txhex)# if re.fullmatch(r"^[0-9a-fA-F]$", txhex) is not None
key = chr(txbytes[0]) if chr(txbytes[1]) == '=' else "*"
connindex = uvarint(txbytes[2:8])
txnumber = uvarint(txbytes[8:16])
hostnamehash = txhex[32:64]
txtime = uvarint(txbytes[32:40]) / 1e6
if txtime < 1e9:
txtime *= 1e6 # legacy support
latency = blocktime - txtime
accsize += len(txbytes)
acclatency += latency
minlatency = min(minlatency, latency)
maxlatency = max(maxlatency, latency)
txcount += 1
firsttx = min(firsttx, txtime)
lasttx = max(lasttx, txtime)
txtimetxt = datetime.datetime.fromtimestamp(txtime)
txstat.append((txtime, 1))
txstat.append((blocktime, -1))
if index < 5:
print txtimetxt, latency
#print key, connindex, txnumber, hostnamehash, txtimetxt, latency
print "Transactions: ", txcount, "=", formatbytes(accsize)
print " ", "%.3f s" % (lasttx - firsttx), "from", datetime.datetime.fromtimestamp(firsttx), "to", datetime.datetime.fromtimestamp(lasttx)
print "Blocks: ", "%d: from %d to %d" % (blockcount, minheight, maxheight)
print " ", "%.3f s" % (lastblock - firstblock), "from", datetime.datetime.fromtimestamp(firstblock), "to", datetime.datetime.fromtimestamp(lastblock)
print "Tx send rate: ", "%.3f tx/s" % (txcount / (lasttx - firsttx)), "=", formatbytes(accsize / (lasttx - firsttx)) + "/s"
print "Tx throughput: ", "%.3f tx/s" % (txcount / (lastblock - firsttx)), "=", formatbytes(accsize / (lastblock - firsttx)) + "/s"
print "Block throughput:", "%.3f block/s" % (blockcount / (lastblock - firsttx))
print "Avg tx latency: ", "%.3f s" % (acclatency / txcount)
print "Min tx latency: ", "%.3f s" % minlatency
print "Max tx latency: ", "%.3f s" % maxlatency
txstat = sorted(txstat)
cursum = 0
curindex = 0
steps = 1000
stepstat = []
for i in range(steps + 1):
t = firsttx + (lastblock - firsttx) / steps * i
while curindex < len(txstat) and txstat[curindex][0] <= t:
cursum += txstat[curindex][1]
curindex += 1
stepstat.append(cursum)
f = plt.figure(figsize=(15, 5))
plt.plot([i * (lastblock - firsttx) / steps for i in range(steps + 1)], stepstat)
long_title = "Duration: %.1f s, Tx size: %s, Tx send rate: %.3f tx/s = %s/s, Tx throughput: %.3f tx/s = %s/s" % \
(lasttx - firsttx, formatbytes(accsize / txcount), \
txcount / (lasttx - firsttx), formatbytes(accsize / (lasttx - firsttx)), \
txcount / (lastblock - firsttx), formatbytes(accsize / (lastblock - firsttx)))
#plt.title(long_title)
plt.title(report_name)
plt.xlabel("seconds from first tx")
plt.ylabel("txs in backlog")
if report_name != "":
long_filename = "tdmnt-stat-%d-%d-%d-%.1f-%.0f-%.0f.png" % \
(minheight, maxheight, maxblocksize, lasttx - firsttx, accsize / txcount, txcount / (lasttx - firsttx))
#f.savefig(long_filename, bbox_inches='tight')
f.savefig(report_name + ".png", bbox_inches='tight')
plt.show(block=True)

View File

@ -1,46 +0,0 @@
import sys, urllib, json, datetime, time
def uvarint(buf):
x = long(0)
s = 0
for b in buf:
if b < 0x80:
return x | long(b) << s
x |= long(b & 0x7f) << s
s += 7
return 0
def parseutc(utctxt):
#tz conversion may be wrong
now_timestamp = time.time()
offset = datetime.datetime.fromtimestamp(now_timestamp) - datetime.datetime.utcfromtimestamp(now_timestamp)
dt, _, tail = utctxt.partition(".")
if tail == "":
dt, _, _ = utctxt.partition("Z")
tail = "0Z"
pure = int((datetime.datetime.strptime(dt, '%Y-%m-%dT%H:%M:%S') + offset).strftime("%s"))
ns = int(tail.rstrip("Z").ljust(9, "0"), 10)
return pure + ns / 1e9
def formatbytes(value):
if value < 1024:
return "%.0f B" % value
elif value < 1024 * 1024:
return "%.3f KiB" % (value / 1024.0)
else:
return "%.3f MiB" % (value / 1024.0 / 1024.0)
def readjson(url):
response = urllib.urlopen("http://" + url)
return json.loads(response.read())
def getsyncinfo(tmaddress):
status = readjson(tmaddress + "/status")["result"]
if "sync_info" in status: # compatibility
return status["sync_info"]
else:
return status
def getmaxheight(tmaddress):
return getsyncinfo(tmaddress)["latest_block_height"]

View File

@ -1,10 +0,0 @@
python query.py localhost:46257 put a/b=10
python query.py localhost:46257 put "a/c=copy(a/b)"
python query.py localhost:46257 put "a/d=increment(a/c)"
python query.py localhost:46257 put "a/d=increment(a/c)###again"
python query.py localhost:46257 put "a/e=sum(a/c,a/d)"
python query.py localhost:46257 put "a/f=factorial(a/b)"
python query.py localhost:46257 put "c/asum=hiersum(a)"
python query.py localhost:46257 get a/e
python query.py localhost:46257 put "0-200:b/@1/@0=1"
python query.py localhost:46257 put "c/bsum=hiersum(b)"

View File

@ -1,48 +0,0 @@
import sys, urllib, json, datetime, time
from common_parse_utils import parseutc, readjson, getmaxheight
if len(sys.argv) < 2:
print "usage: python parse_chain.py host:port [minheight]"
sys.exit()
blocks_fetch = 20 # tendermint can't return more blocks
tmaddress = sys.argv[1]
maxheight = getmaxheight(tmaddress)
minheight = int(sys.argv[2]) if len(sys.argv) > 2 else max(1, maxheight - 49)
lastnonempty = -1
last_fetched_height = minheight - 1
print "%6s %26s %7s %7s %8s %30s %30s %30s %30s %30s" % ("height", "block time", "txs", "acc.txs", "app_hash", "tx1", "tx2", "tx3", "tx4", "tx5")
for height in range(minheight, maxheight + 1):
if height > last_fetched_height:
last_fetched_height = min(height + blocks_fetch - 1, maxheight)
bulk_data = (readjson(tmaddress + "/blockchain?minHeight=%d&maxHeight=%d" % (height, last_fetched_height)))["result"]["block_metas"]
data = bulk_data[last_fetched_height - height]["header"]
numtxs = data["num_txs"]
totaltxs = data["total_txs"]
app_hash = data["app_hash"]
blocktimetxt = data["time"]
blocktime = parseutc(blocktimetxt)
if numtxs > 0 or height == maxheight or height == lastnonempty + 1:
blockdata = readjson(tmaddress + "/block?height=%d" % height)
txs = blockdata["result"]["block"]["data"]["txs"]
txsummary = ""
if txs:
lastnonempty = height
for tx in txs[0:5]:
txstr = tx.decode('base64')
if len(txstr) > 30:
txsummary += "%27s... " % txstr[0:27]
else:
txsummary += "%30s " % txstr
if len(txs) > 5:
txsummary += "..."
app_hash_to_show = "0x" + app_hash[0:6] if app_hash != "" else "--------"
print "%5s: %s %7d %7d" % (height, datetime.datetime.fromtimestamp(blocktime), numtxs, totaltxs), app_hash_to_show, txsummary
else:
if height == lastnonempty + 2:
print "..."

View File

@ -1,96 +0,0 @@
import sys, urllib, json, datetime, time, hashlib, sha3
from common_parse_utils import readjson, getsyncinfo, getmaxheight
CMD_PUT = "fastput"
CMD_CHECKED_PUT = "put"
CMD_RUN = "run"
CMD_GET_QUERY = "get"
CMD_LS_QUERY = "ls"
def verify_merkle_proof(result, proof, app_hash):
parts = proof.split(", ")
parts_len = len(parts)
for index in range(parts_len, -1, -1):
low_string = parts[index] if index < parts_len else result
low_hash = hashlib.sha3_256(low_string).hexdigest()
high_hashes = parts[index - 1].split(" ") if index > 0 else [app_hash.lower()]
if not any(low_hash in s for s in high_hashes):
return False
return True
def checked_abci_query(tmaddress, height, command, query, tentative_info):
if getmaxheight(tmaddress) < height + 1:
return (height, None, None, None, False, "Cannot verify tentative '%s'! Height is not verifiable" % (info or ""))
apphash = readjson('%s/block?height=%d' % (tmaddress, height + 1))["result"]["block"]["header"]["app_hash"]
response = readjson('%s/abci_query?height=%d&data="%s:%s"' % (tmaddress, height, command, query))["result"]["response"]
(result, proof) = (
response["value"].decode('base64') if "value" in response else None,
response["proof"].decode('base64') if "proof" in response else None
)
if result is None:
return (height, result, proof, apphash, False, "Result is empty")
elif tentative_info is not None and result != tentative_info:
return (height, result, proof, apphash, False, "Verified result '%s' doesn't match tentative '%s'!" % (result, info))
elif proof is None:
return (height, result, proof, apphash, False, "No proof")
elif not verify_merkle_proof(result, proof, apphash) :
return (height, result, proof, apphash, False, "Proof is invalid")
else:
return (height, result, proof, apphash, True, "")
def print_checked_abci_query(tmaddress, height, command, query, tentative_info):
(height, result, proof, apphash, success, message) = checked_abci_query(tmaddress, height, command, query, tentative_info)
print "HEIGHT:", height
print "HASH :", apphash or "NOT_READY"
print "PROOF :", (proof or "NO_PROOF").upper()
print "RESULT:", result or "EMPTY"
if success:
print "OK"
else:
print "BAD :", message
def latest_provable_height(tmaddress):
return getsyncinfo(tmaddress)["latest_block_height"] - 1
def wait_for_height(tmaddress, height):
for w in range(0, 5):
if getmaxheight(tmaddress) >= height:
break
time.sleep(1)
if len(sys.argv) < 3:
print "usage: python query.py host:port command arg"
sys.exit()
tmaddress = sys.argv[1]
command = sys.argv[2]
arg = sys.argv[3]
if command in {CMD_PUT, CMD_CHECKED_PUT, CMD_RUN}:
if command == CMD_RUN:
query_key = "optarg"
tx = query_key + "=" + arg
else:
tx = arg
query_key = tx.split("=")[0]
response = readjson(tmaddress + '/broadcast_tx_commit?tx="' + tx + '"')
if "error" in response:
print "ERROR :", response["error"]["data"]
else:
height = response["result"]["height"]
if response["result"].get("deliver_tx", {}).get("code", "0") != "0":
print "HEIGHT:", height
print "BAD :", log or "NO_LOG"
else:
info = response["result"].get("deliver_tx", {}).get("info")
if command in {CMD_CHECKED_PUT, CMD_RUN} and info is not None:
wait_for_height(tmaddress, height + 1)
print_checked_abci_query(tmaddress, height, "get", query_key, info)
else:
print "HEIGHT:", height
print "INFO: ", info or "EMPTY"
print "OK"
elif command in {CMD_GET_QUERY, CMD_LS_QUERY}:
height = latest_provable_height(tmaddress)
print_checked_abci_query(tmaddress, height, command, arg, None)

View File

@ -1,2 +0,0 @@
#!/bin/bash
python block_report.py $1 "$2" $3 $4 | tee "$2.txt"

View File

@ -33,9 +33,9 @@ Because every computation is verified by the cluster nodes and computation outco
* [A. Cases which the client can detect and handle](#a-cases-which-the-client-can-detect-and-handle)
* [B. Cases which the client can detect, but cannot handle](#b-cases-which-the-client-can-detect-but-cannot-handle)
* [C. Dispute cases](#c-dispute-cases)
* [C1: some nodes honest, some not, no quorum](#dispute-case-c1-some-nodes-honest-some-not-no-quorum)
* [C2: dishonest quorum, minority of honest nodes](#dispute-case-c2-dishonest-quorum-minority-of-honest-nodes)
* [C3: honest quorum, some nodes dishonest or not available](#dispute-case-c3-honest-quorum-some-nodes-dishonest-or-not-available)
* [C1. Some nodes honest, some not, no quorum](#dispute-case-c1-some-nodes-honest-some-not-no-quorum)
* [C2. Dishonest quorum, minority of honest nodes](#dispute-case-c2-dishonest-quorum-minority-of-honest-nodes)
* [C3. Honest quorum, some nodes dishonest or not available](#dispute-case-c3-honest-quorum-some-nodes-dishonest-or-not-available)
## Motivation
The application is a proof-of-concept of a decentralized system with the following properties:
@ -79,7 +79,7 @@ To execute domain-specific logic the application uses its own **State machine**
### State machine and computations correctness
Each node carries a state which is updated using transactions furnished through the consensus engine. Assuming that more than 2/3 of the cluster nodes are honest, the BFT consensus engine guarantees _correctness_ of state transitions. In other words, unless 1/3 or more of the cluster nodes are Byzantine there is no way the cluster will allow an incorrect transition.
Each node carries a state which is updated using transactions furnished through the consensus engine. Assuming that more than 2/3 of the cluster nodes are honest, the BFT consensus engine guarantees *correctness* of state transitions. In other words, unless 1/3 or more of the cluster nodes are Byzantine there is no way the cluster will allow an incorrect transition.
If every transition made since the genesis was correct, we can expect that the state itself is correct too. Results obtained by querying such a state should be correct as well (assuming a state is a verifiable data structure).
@ -87,7 +87,7 @@ However, it's not possible to expect that a cluster can't be taken over by Byzan
This a pretty high probability, and if we want to keep the cluster size reasonably low to have desired cost efficiency another trick should work. We can allow any node in the cluster to escalate to the external trusted **Judge** if it disagrees with state transitions made by the rest of the nodes. In this case, all nodes in the cluster need to be Byzantine to keep the **Judge** uninformed. For the considered case the probability of such event is `~1E-7`.
This way it's possible to improve the probability of noticing an incorrect behavior almost by six orders of magnitude. However, there is a significant difference between the two approaches. Once the cluster has reached consensus, the state transition is made and potentially incorrect results can be immediately used by the client. An escalation mechanism allows to notice an incorrect cluster behavior only _post factum_.
This way it's possible to improve the probability of noticing an incorrect behavior almost by six orders of magnitude. However, there is a significant difference between the two approaches. Once the cluster has reached consensus, the state transition is made and potentially incorrect results can be immediately used by the client. An escalation mechanism allows to notice an incorrect cluster behavior only *post factum*.
To compensate, a **Judge** can penalize malicious nodes by forfeiting their security deposits for the benefit of the client. However, even in this case a client can't be a mission critical application where no amount of compensation would offset the damage made.
@ -372,35 +372,31 @@ This message produced by Monitor thread of the App that checks the following con
#### Dispute case C2: dishonest quorum, minority of honest nodes
This case can also be illustrated using `wrong` key:
```bash
> python query.py localhost:46157 put -v wrong=234
> python query.py localhost:46157 put -v wrong=123
HEIGHT: 3
HASH : NOT_READY
PROOF : NO_PROOF
RESULT: EMPTY
BAD : Cannot verify tentative '234'! Height is not verifiable
HASH : 3E5B81D6C436A5319577637A005FDA99EAA632C360ACA23AE9BB3BD3766CFE02
PROOF : A7FFC6F8BF1ED76651C14756A061D662F580FF4DE43B49FA82D80A4B80F8434A 1AACEE49E178FF7836873CB0D520C5C7D82B772D28997A0EE51A837A5AA5683C 672896E0A9F15E323B6D2166A520701F8423198E0AB3D33415F7E2A844D18454, 10A1E4BF410C6BFD3455EF467700B24842ABE6F9ED6D24C816741F43A8FA8D58
RESULT: wrong123
OK
```
This message is the same as before but the situation is different actually. All nodes except Node 1 obtain wrong app hash, but now those 'wrong' nodes have a quorum! Therefore the result is not confirmed only from the point of view of Node 1. By checking it's log (`screen -x app1`) another Monitor warning can be observed:
The 'wrong' nodes (1st, 2nd, and 3rd) have a quorum (dispite the 4th disagrees with them) and provide their version of state and corresponding `app_hash`. The Client validates the blockchain information and provided response and treats it correct. From the Client's point of view it is impossible in general case to discriminate the correct response and falsified response in presence of a Byzantine quorum.
This example is pretty artificial because the trivial comparison of the target value `123` with the result `wrong123` might be done. However, in case of non-trivial operation the client is unable to reproduce its computation and cannot detect the increrrect response.
By checking the only correct 4th Node log (`screen -x app4`) another Monitor warning can be observed:
```
DISAGREEMENT WITH CLUSTER QUORUM!
```
To achieve this detection the App's Monitor periodically requests its peer's TM Core RPC's for the next block and compares its own `app_hash` with their `app_hash`-es.
To achieve this detection the App's Monitor periodically requests its peer's TM Core RPC's for the next block and compares its own `app_hash` with their `app_hash`-es. In case of disagreement the Monitor immediately raise the dispute to the Judge.
Let's reset the cluster and try again, submit the same transaction, but connect to another node:
```bash
> python query.py localhost:46257 put -v wrong=234
HEIGHT: 3
HASH : CE773BBAD425FE7C7CA54E890B4E02759564F8E9AB1B82ADCF42437122EDC7CD
PROOF : A7FFC6F8BF1ED76651C14756A061D662F580FF4DE43B49FA82D80A4B80F8434A 1AACEE49E178FF7836873CB0D520C5C7D82B772D28997A0EE51A837A5AA5683C B8366C6DD77D18CCC60B6824C17C9C0B95063213E32AC14D82D2BEE6981EDB87, 3B52405688C482C40A924C5CFC3357BEA000FA2FC149BFCC8020A18EA02BDD92
RESULT: wrong234
OK
```
#### Dispute case C3: honest quorum, some nodes dishonest or not available
When quorum (2/3+ nodes of the cluster) exists availability of other nodes does not influence cluster's safety or liveness. This demo app does not implement any special checks for the existence of nodes absent or Byzantine during operation processing. Let's illustrate this using `wrong` key:
When a quorum (2/3+ nodes of the cluster) exists, the availability of other nodes does not influence cluster's safety or liveness. This demo app does not implement any special checks for the existence of nodes absent or Byzantine during operation processing. Let's illustrate this using `wrong` key:
```bash
> python query.py localhost:46157 put -v wrong=4
HEIGHT: 3
HASH : 7B84...
PROOF : A7FF...
HASH : 7B840A448231110FC3746EE06C0053E6EADE213189BDFDB902E7FBA6A486643B
PROOF : A7FFC6F8BF1ED76651C14756A061D662F580FF4DE43B49FA82D80A4B80F8434A 1AACEE49E178FF7836873CB0D520C5C7D82B772D28997A0EE51A837A5AA5683C B103DC8A5244FD6548F7C0DE617EE66D25F79007A993BC15C6EA11D8390E6279, B410677B84ED73FAC43FCF1ABD933151DD417D932A0EF9B0260ECF8B7B72ECB9
RESULT: 4
OK
```