From d469ab27fc58bd86a567e953ec2b2c2346a411fe Mon Sep 17 00:00:00 2001 From: Farhad Shahabi Date: Fri, 28 Jan 2022 11:48:37 -0500 Subject: [PATCH 1/6] Update net_plugin.cpp --- plugins/net_plugin/net_plugin.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 4adb1df9527..a107098dd3a 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -1853,14 +1853,22 @@ namespace eosio { // //----------------------------- - if (head_id == msg.head_id) { + uint32_t network_latency = (std::chrono::system_clock::now().time_since_epoch().count() - msg.time); //calculating network latency ==> current_time - handshake message timestamp + uint32_t num_block_behind_by_latency = 1 + (std::chrono::milliseconds(network_latency) / std::chrono::milliseconds(config::block_interval_ms)); // Number of blocks produced during networl latency period + if (head_id == msg.head_id || ( lib_num < msg.last_irreversible_block_num && msg.last_irreversible_block_num - lib_num <= num_block_behind_by_latency)) { peer_ilog( c, "handshake lib ${lib}, head ${head}, head id ${id}.. sync 0", ("lib", msg.last_irreversible_block_num)("head", msg.head_num)("id", msg.head_id.str().substr(8,16)) ); c->syncing = false; - notice_message note; - note.known_blocks.mode = none; - note.known_trx.mode = catch_up; - note.known_trx.pending = 0; + notice_message note; + note.known_blocks.mode = none; //block mode none + if (head_id != msg.head_id){ + note.known_trx.mode = none; // transaction mode none, since it is a few blocks behind + note.known_trx.pending = head; + } + else{ + note.known_trx.mode = catch_up; + note.known_trx.pending = 0; + } c->enqueue( note ); return; } From 6d324e20093e14e204e6b1bf6cbdcd5d7d482ff9 Mon Sep 17 00:00:00 2001 From: Farhad Shahabi Date: Thu, 10 Mar 2022 21:56:41 -0500 Subject: [PATCH 2/6] # This is a combination of 11 commits. # This is the 1st commit message: Update amazon_linux-2-pinned.dockerfile # This is the commit message #2: Update centos-7.7-pinned.dockerfile # This is the commit message #3: Update ubuntu-18.04-pinned.dockerfile # This is the commit message #4: Update ubuntu-20.04-pinned.dockerfile # This is the commit message #5: Update amazon_linux-2-unpinned.dockerfile # This is the commit message #6: Update centos-7.7-unpinned.dockerfile # This is the commit message #7: Update ubuntu-18.04-unpinned.dockerfile # This is the commit message #8: Update ubuntu-20.04-unpinned.dockerfile # This is the commit message #9: Update protocol.hpp # This is the commit message #10: Update net_plugin.cpp # This is the commit message #11: Update CMakeLists.txt --- .../pinned/amazon_linux-2-pinned.dockerfile | 3 +- .../pinned/centos-7.7-pinned.dockerfile | 1 + .../pinned/ubuntu-18.04-pinned.dockerfile | 3 +- .../pinned/ubuntu-20.04-pinned.dockerfile | 3 +- .../amazon_linux-2-unpinned.dockerfile | 3 +- .../unpinned/centos-7.7-unpinned.dockerfile | 3 +- .../unpinned/ubuntu-18.04-unpinned.dockerfile | 3 +- .../unpinned/ubuntu-20.04-unpinned.dockerfile | 3 +- .../include/eosio/net_plugin/protocol.hpp | 2 +- plugins/net_plugin/net_plugin.cpp | 65 +++++++++---------- tests/CMakeLists.txt | 4 ++ 11 files changed, 52 insertions(+), 41 deletions(-) diff --git a/.cicd/platforms/pinned/amazon_linux-2-pinned.dockerfile b/.cicd/platforms/pinned/amazon_linux-2-pinned.dockerfile index 2e7182bf250..c2a0eba4287 100644 --- a/.cicd/platforms/pinned/amazon_linux-2-pinned.dockerfile +++ b/.cicd/platforms/pinned/amazon_linux-2-pinned.dockerfile @@ -1,12 +1,13 @@ FROM amazonlinux:2.0.20190508 ENV VERSION 1 # install dependencies. +# iproute-tc configures traffic control for p2p_high_latency_test.py test RUN yum update -y && \ yum install -y which git sudo procps-ng util-linux autoconf automake \ libtool make bzip2 bzip2-devel openssl-devel gmp-devel libstdc++ libcurl-devel \ libusbx-devel python3 python3-devel python-devel libedit-devel doxygen \ graphviz patch gcc gcc-c++ vim-common jq net-tools \ - libuuid-devel libtasn1-devel expect socat libseccomp-devel && \ + libuuid-devel libtasn1-devel expect socat libseccomp-devel iproute-tc&& \ yum clean all && rm -rf /var/cache/yum # install erlang and rabbitmq RUN curl -s https://packagecloud.io/install/repositories/rabbitmq/erlang/script.rpm.sh | bash && \ diff --git a/.cicd/platforms/pinned/centos-7.7-pinned.dockerfile b/.cicd/platforms/pinned/centos-7.7-pinned.dockerfile index 51b10d1276c..803365cdff5 100644 --- a/.cicd/platforms/pinned/centos-7.7-pinned.dockerfile +++ b/.cicd/platforms/pinned/centos-7.7-pinned.dockerfile @@ -1,6 +1,7 @@ FROM centos:7.7.1908 ENV VERSION 1 # install dependencies. +# iproute configures traffic control for p2p_high_latency_test.py test RUN yum update -y && \ yum install -y epel-release && \ yum --enablerepo=extras install -y centos-release-scl && \ diff --git a/.cicd/platforms/pinned/ubuntu-18.04-pinned.dockerfile b/.cicd/platforms/pinned/ubuntu-18.04-pinned.dockerfile index dc32d185916..51676ace3f8 100644 --- a/.cicd/platforms/pinned/ubuntu-18.04-pinned.dockerfile +++ b/.cicd/platforms/pinned/ubuntu-18.04-pinned.dockerfile @@ -1,6 +1,7 @@ FROM ubuntu:18.04 ENV VERSION 1 # install dependencies. +# iproute2 configures traffic control for p2p_high_latency_test.py test RUN apt-get update && \ apt-get upgrade -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y git make \ @@ -9,7 +10,7 @@ RUN apt-get update && \ python3-dev python-configparser python-requests python-pip \ autoconf libtool g++ gcc curl zlib1g-dev sudo ruby libusb-1.0-0-dev \ libcurl4-gnutls-dev pkg-config patch vim-common jq rabbitmq-server \ - libtasn1-dev libnss3-dev iproute2 expect gawk socat python3-pip libseccomp-dev uuid-dev && \ + libtasn1-dev libnss3-dev iproute2 expect gawk socat python3-pip libseccomp-dev uuid-dev iproute2 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # install request and requests_unixsocket module diff --git a/.cicd/platforms/pinned/ubuntu-20.04-pinned.dockerfile b/.cicd/platforms/pinned/ubuntu-20.04-pinned.dockerfile index bb082d17fe3..04ed81ce57a 100644 --- a/.cicd/platforms/pinned/ubuntu-20.04-pinned.dockerfile +++ b/.cicd/platforms/pinned/ubuntu-20.04-pinned.dockerfile @@ -1,6 +1,7 @@ FROM ubuntu:20.04 ENV VERSION 1 # install dependencies. +# iproute2 configures traffic control for p2p_high_latency_test.py test RUN apt-get update && \ apt-get upgrade -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y git make \ @@ -8,7 +9,7 @@ RUN apt-get update && \ autotools-dev python2.7 python2.7-dev python3 \ python3-dev python-configparser python3-pip \ autoconf libtool g++ gcc curl zlib1g-dev sudo ruby libusb-1.0-0-dev \ - libcurl4-gnutls-dev pkg-config patch vim-common jq gnupg rabbitmq-server && \ + libcurl4-gnutls-dev pkg-config patch vim-common jq gnupg rabbitmq-server iproute2 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # install request and requests_unixsocket module diff --git a/.cicd/platforms/unpinned/amazon_linux-2-unpinned.dockerfile b/.cicd/platforms/unpinned/amazon_linux-2-unpinned.dockerfile index 01a73c50120..6dffd41a749 100644 --- a/.cicd/platforms/unpinned/amazon_linux-2-unpinned.dockerfile +++ b/.cicd/platforms/unpinned/amazon_linux-2-unpinned.dockerfile @@ -1,11 +1,12 @@ FROM amazonlinux:2.0.20190508 ENV VERSION 1 # install dependencies. +# iproute-tc configures traffic control for p2p_high_latency_test.py test RUN yum update -y && \ yum install -y which git sudo procps-ng util-linux autoconf automake \ libtool make bzip2 bzip2-devel openssl-devel gmp-devel libstdc++ libcurl-devel \ libusbx-devel python3 python3-devel python-devel python3-pip libedit-devel doxygen \ - graphviz clang patch llvm-devel llvm-static vim-common jq && \ + graphviz clang patch llvm-devel llvm-static vim-common jq iproute-tc && \ yum clean all && rm -rf /var/cache/yum # install erlang and rabbitmq RUN curl -s https://packagecloud.io/install/repositories/rabbitmq/erlang/script.rpm.sh | bash && \ diff --git a/.cicd/platforms/unpinned/centos-7.7-unpinned.dockerfile b/.cicd/platforms/unpinned/centos-7.7-unpinned.dockerfile index dd9e33085e4..7823a8dc886 100644 --- a/.cicd/platforms/unpinned/centos-7.7-unpinned.dockerfile +++ b/.cicd/platforms/unpinned/centos-7.7-unpinned.dockerfile @@ -1,6 +1,7 @@ FROM centos:7.7.1908 ENV VERSION 1 # install dependencies. +# iproute configures traffic control for p2p_high_latency_test.py test RUN yum update -y && \ yum install -y epel-release && \ yum --enablerepo=extras install -y centos-release-scl && \ @@ -8,7 +9,7 @@ RUN yum update -y && \ yum --enablerepo=extras install -y which git autoconf automake libtool make bzip2 doxygen \ graphviz bzip2-devel openssl-devel gmp-devel ocaml \ python python-devel rh-python36 file libusbx-devel \ - libcurl-devel patch vim-common jq llvm-toolset-7.0-llvm-devel llvm-toolset-7.0-llvm-static && \ + libcurl-devel patch vim-common jq llvm-toolset-7.0-llvm-devel llvm-toolset-7.0-llvm-static iproute && \ yum clean all && rm -rf /var/cache/yum # install erlang and rabbitmq RUN curl -s https://packagecloud.io/install/repositories/rabbitmq/erlang/script.rpm.sh | bash && \ diff --git a/.cicd/platforms/unpinned/ubuntu-18.04-unpinned.dockerfile b/.cicd/platforms/unpinned/ubuntu-18.04-unpinned.dockerfile index 2c39a1dabe0..695b68975fc 100644 --- a/.cicd/platforms/unpinned/ubuntu-18.04-unpinned.dockerfile +++ b/.cicd/platforms/unpinned/ubuntu-18.04-unpinned.dockerfile @@ -1,13 +1,14 @@ FROM ubuntu:18.04 ENV VERSION 1 # install dependencies. +# iproute2 configures traffic control for p2p_high_latency_test.py test RUN apt-get update && \ apt-get upgrade -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y git make \ bzip2 automake libbz2-dev libssl-dev doxygen graphviz libgmp3-dev \ autotools-dev python2.7 python2.7-dev python3 python3-dev python3-pip \ autoconf libtool curl zlib1g-dev sudo ruby libusb-1.0-0-dev \ - libcurl4-gnutls-dev pkg-config patch llvm-7-dev clang-7 vim-common jq rabbitmq-server && \ + libcurl4-gnutls-dev pkg-config patch llvm-7-dev clang-7 vim-common jq rabbitmq-server iproute2 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # install request and requests_unixsocket module diff --git a/.cicd/platforms/unpinned/ubuntu-20.04-unpinned.dockerfile b/.cicd/platforms/unpinned/ubuntu-20.04-unpinned.dockerfile index 00685393ff5..04d6866ce7d 100644 --- a/.cicd/platforms/unpinned/ubuntu-20.04-unpinned.dockerfile +++ b/.cicd/platforms/unpinned/ubuntu-20.04-unpinned.dockerfile @@ -1,13 +1,14 @@ FROM ubuntu:20.04 ENV VERSION 1 # install dependencies. +# iproute2 configures traffic control for p2p_high_latency_test.py test RUN apt-get update && \ apt-get upgrade -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y git make \ bzip2 automake libbz2-dev libssl-dev doxygen graphviz libgmp3-dev \ autotools-dev python2.7 python2.7-dev python3 python3-dev python3-pip \ autoconf libtool curl zlib1g-dev sudo ruby libusb-1.0-0-dev \ - libcurl4-gnutls-dev pkg-config patch llvm-7-dev clang-7 vim-common jq g++ gnupg rabbitmq-server && \ + libcurl4-gnutls-dev pkg-config patch llvm-7-dev clang-7 vim-common jq g++ gnupg rabbitmq-server iproute2 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # install request and requests_unixsocket module diff --git a/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp b/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp index f1800df75d1..0be8801ddcd 100644 --- a/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp +++ b/plugins/net_plugin/include/eosio/net_plugin/protocol.hpp @@ -29,7 +29,7 @@ namespace eosio { chain_id_type chain_id; ///< used to identify chain fc::sha256 node_id; ///< used to identify peers and prevent self-connect chain::public_key_type key; ///< authentication key; may be a producer or peer key, or empty - tstamp time{0}; + long long time{0}; // this value is nanoseconds fc::sha256 token; ///< digest of time to prove we own the private key of the key above chain::signature_type sig; ///< signature for the digest string p2p_address; diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index a107098dd3a..d86cc0420cd 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -49,6 +49,7 @@ namespace eosio { using connection_ptr = std::shared_ptr; using connection_wptr = std::weak_ptr; + namespace sc = std::chrono; template void verify_strand_in_this_thread(const Strand& strand, const char* func, int line) { @@ -138,6 +139,7 @@ namespace eosio { uint32_t sync_req_span{0}; connection_ptr sync_source; std::atomic sync_state{in_sync}; + static const uint32_t block_interval_ns = sc::duration_cast(sc::milliseconds(config::block_interval_ms)).count(); private: constexpr static auto stage_str( stages s ); @@ -242,9 +244,6 @@ namespace eosio { bool p2p_accept_transactions = true; bool p2p_reject_incomplete_blocks = true; - /// Peer clock may be no more than 1 second skewed from our clock, including network latency. - const std::chrono::system_clock::duration peer_authentication_interval{std::chrono::seconds{1}}; - chain_id_type chain_id; fc::sha256 node_id; string user_agent_name; @@ -1806,6 +1805,7 @@ namespace eosio { } sync_next_expected_num = std::max( lib_num + 1, sync_next_expected_num ); + // p2p_high_latency_test.py test depends on this exact log statement. peer_ilog( c, "Catching up with chain, our last req is ${cc}, theirs is ${t}", ("cc", sync_last_requested_num)("t", target) ); @@ -1839,36 +1839,37 @@ namespace eosio { sync_reset_lib_num(c, false); + long long current_time_ns = sc::duration_cast(sc::system_clock::now().time_since_epoch()).count(); + auto network_latency_ns = current_time_ns - msg.time; // net latency in nanoseconds + // number of blocks syncing node is behind from a peer node + uint32_t nblk_behind_by_net_latency = static_cast(network_latency_ns / block_interval_ns); + // Multiplied by 2 to compensate the time it takes for message to reach peer node, and plus 1 to compensate for integer division truncation + uint32_t nblk_combined_latency = 2 * nblk_behind_by_net_latency + 1; + // message in the log below is used in p2p_high_latency_test.py test + peer_dlog(c, "Network latency is ${lat}ms, ${num} blocks discrepancy by network latency, ${tot_num} blocks discrepancy expected once message received", + ("lat", network_latency_ns/1000000)("num", nblk_behind_by_net_latency)("tot_num", nblk_combined_latency)); //-------------------------------- // sync need checks; (lib == last irreversible block) // // 0. my head block id == peer head id means we are all caught up block wise // 1. my head block num < peer lib - send handshake (if not sent in handle_message) and wait for receipt of notice message to start syncing - // 2. my lib > peer head num - send an last_irr_catch_up notice if not the first generation + // 2. my lib > peer head num + nblk_combined_latency - send last_irr_catch_up notice if not the first generation // - // 3 my head block num < peer head block num - update sync state and send a catchup request - // 4 my head block num >= peer block num send a notice catchup if this is not the first generation + // 3 my head block num + nblk_combined_latency < peer head block num - update sync state and send a catchup request + // 4 my head block num >= peer block num + nblk_combined_latency send a notice catchup if this is not the first generation // 4.1 if peer appears to be on a different fork ( our_id_for( msg.head_num ) != msg.head_id ) // then request peer's blocks // //----------------------------- - uint32_t network_latency = (std::chrono::system_clock::now().time_since_epoch().count() - msg.time); //calculating network latency ==> current_time - handshake message timestamp - uint32_t num_block_behind_by_latency = 1 + (std::chrono::milliseconds(network_latency) / std::chrono::milliseconds(config::block_interval_ms)); // Number of blocks produced during networl latency period - if (head_id == msg.head_id || ( lib_num < msg.last_irreversible_block_num && msg.last_irreversible_block_num - lib_num <= num_block_behind_by_latency)) { + if (head_id == msg.head_id) { peer_ilog( c, "handshake lib ${lib}, head ${head}, head id ${id}.. sync 0", ("lib", msg.last_irreversible_block_num)("head", msg.head_num)("id", msg.head_id.str().substr(8,16)) ); c->syncing = false; - notice_message note; - note.known_blocks.mode = none; //block mode none - if (head_id != msg.head_id){ - note.known_trx.mode = none; // transaction mode none, since it is a few blocks behind - note.known_trx.pending = head; - } - else{ - note.known_trx.mode = catch_up; - note.known_trx.pending = 0; - } + notice_message note; + note.known_blocks.mode = none; + note.known_trx.mode = catch_up; + note.known_trx.pending = 0; c->enqueue( note ); return; } @@ -1881,7 +1882,7 @@ namespace eosio { } return; } - if (lib_num > msg.head_num ) { + if (lib_num > msg.head_num + nblk_combined_latency ) { peer_ilog( c, "handshake lib ${lib}, head ${head}, head id ${id}.. sync 2", ("lib", msg.last_irreversible_block_num)("head", msg.head_num)("id", msg.head_id.str().substr(8,16)) ); if (msg.generation > 1 || c->protocol_version > proto_base) { @@ -1896,13 +1897,13 @@ namespace eosio { return; } - if (head < msg.head_num ) { + if (head + nblk_combined_latency < msg.head_num ) { peer_ilog( c, "handshake lib ${lib}, head ${head}, head id ${id}.. sync 3", ("lib", msg.last_irreversible_block_num)("head", msg.head_num)("id", msg.head_id.str().substr(8,16)) ); c->syncing = false; verify_catchup(c, msg.head_num, msg.head_id); return; - } else { + } else if(head >= msg.head_num + nblk_combined_latency) { peer_ilog( c, "handshake lib ${lib}, head ${head}, head id ${id}.. sync 4", ("lib", msg.last_irreversible_block_num)("head", msg.head_num)("id", msg.head_id.str().substr(8,16)) ); if (msg.generation > 1 || c->protocol_version > proto_base) { @@ -1931,6 +1932,8 @@ namespace eosio { } } ); return; + } else { + peer_dlog( c, "Block discrepancy is within network latency range."); } } @@ -2724,9 +2727,11 @@ namespace eosio { pending_message_buffer.advance_read_ptr( message_length ); return true; } + peer_dlog( this, "received block ${num}, id ${id}..., latency: ${latency}", ("num", bh.block_num())("id", blk_id.str().substr(8,16)) ("latency", (fc::time_point::now() - bh.timestamp).count()/1000) ); + if( !my_impl->sync_master->syncing_with_peer() ) { // guard against peer thinking it needs to send us old blocks uint32_t lib = 0; std::tie( lib, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore ) = my_impl->get_chain_info(); @@ -2737,7 +2742,11 @@ namespace eosio { if( blk_num < last_sent_lib ) { peer_ilog( this, "received block ${n} less than sent lib ${lib}", ("n", blk_num)("lib", last_sent_lib) ); close(); - } else { + } + /*else if(blk_num + num_block_behind_by_latency >= lib){ + peer_ilog( this, "received block ${n} less than lib ${lib} which is beacuse net latency ${net}", ("n", blk_num)("lib", lib)("net", network_latency) ); + } */ + else { peer_ilog( this, "received block ${n} less than lib ${lib}", ("n", blk_num)("lib", lib) ); my_impl->sync_master->reset_last_requested_num(my_impl->sync_master->locked_sync_mutex()); enqueue( (sync_request_message) {0, 0} ); @@ -3598,15 +3607,6 @@ namespace eosio { } } - namespace sc = std::chrono; - sc::system_clock::duration msg_time(msg.time); - auto time = sc::system_clock::now().time_since_epoch(); - if(time - msg_time > peer_authentication_interval) { - fc_elog( logger, "Peer ${peer} sent a handshake with a timestamp skewed by more than ${time}.", - ("peer", msg.p2p_address)("time", "1 second")); // TODO Add to_variant for std::chrono::system_clock::duration - return false; - } - if(msg.sig != chain::signature_type() && msg.token != sha256()) { sha256 hash = fc::sha256::hash(msg.time); if(hash != msg.token) { @@ -3654,7 +3654,6 @@ namespace eosio { // call from connection strand bool connection::populate_handshake( handshake_message& hello ) { - namespace sc = std::chrono; hello.network_version = net_version_base + net_version; uint32_t lib, head; std::tie( lib, std::ignore, head, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e6365d85980..ff330d78d90 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -69,6 +69,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/rodeos_multi_ship_test.py ${CMAKE_CUR configure_file(${CMAKE_CURRENT_SOURCE_DIR}/rodeos_multi_ship_kill_restart.py ${CMAKE_CURRENT_BINARY_DIR}/rodeos_multi_ship_kill_restart.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/rodeos_wasmQL_http_timeout.py ${CMAKE_CURRENT_BINARY_DIR}/rodeos_wasmQL_http_timeout.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/read_only_query_tests.py ${CMAKE_CURRENT_BINARY_DIR}/read_only_query_tests.py COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/p2p_high_latency_test.py ${CMAKE_CURRENT_BINARY_DIR}/p2p_high_latency_test.py COPYONLY) #To run plugin_test with all log from blockchain displayed, put --verbose after --, i.e. plugin_test -- --verbose add_test(NAME plugin_test COMMAND plugin_test --report_level=detailed --color_output) @@ -221,6 +222,9 @@ set_property(TEST nodeos_run_check_lr_test PROPERTY LABELS long_running_tests) add_test(NAME nodeos_remote_lr_test COMMAND tests/nodeos_run_remote_test.py -v --clean-run --dump-error-detail WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) set_property(TEST nodeos_remote_lr_test PROPERTY LABELS long_running_tests) +add_test(NAME p2p_high_latency_test COMMAND tests/p2p_high_latency_test.py -v --clean-run --dump-error-detail WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) +set_property(TEST p2p_high_latency_test PROPERTY LABELS nonparallelizable_tests) + #add_test(NAME distributed_transactions_lr_test COMMAND tests/distributed-transactions-test.py -d 2 -p 21 -n 21 -v --clean-run --dump-error-detail WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) #set_property(TEST distributed_transactions_lr_test PROPERTY LABELS long_running_tests) From bf2e95624d858e0b73e0f6564b2423e8cabeb0ba Mon Sep 17 00:00:00 2001 From: Farhad Shahabi Date: Fri, 11 Mar 2022 10:40:24 -0500 Subject: [PATCH 3/6] Update amazon_linux-2-pinned.dockerfile Update centos-7.7-pinned.dockerfile Update ubuntu-18.04-pinned.dockerfile Update ubuntu-20.04-pinned.dockerfile Update amazon_linux-2-unpinned.dockerfile Update centos-7.7-unpinned.dockerfile Update ubuntu-18.04-unpinned.dockerfile Update ubuntu-20.04-unpinned.dockerfile Update protocol.hpp Update net_plugin.cpp Update CMakeLists.txt Update test.sh --- .cicd/test.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.cicd/test.sh b/.cicd/test.sh index e76881c20c1..4211b074e37 100755 --- a/.cicd/test.sh +++ b/.cicd/test.sh @@ -15,7 +15,8 @@ else # Linux TEST_COMMAND="'\"'$MOUNTED_DIR/$1'\"' ${@: 2}" COMMANDS="echo \"$ $TEST_COMMAND\" && eval $TEST_COMMAND" . "$HELPERS_DIR/file-hash.sh" "$CICD_DIR/platforms/$PLATFORM_TYPE/$IMAGE_TAG.dockerfile" - DOCKER_RUN_COMMAND="docker run --rm --init -v \"\$(pwd):$MOUNTED_DIR\" $(buildkite-intrinsics) -e JOBS -e BUILDKITE_API_KEY '$FULL_TAG' bash -c '$COMMANDS'" + # --cap-add=NET_ADMIN needed to run tc (traffic control in linux kernel) inside docker for p2p_high_latency_test.py test. + DOCKER_RUN_COMMAND="--cap-add=NET_ADMIN --rm --init -v \"\$(pwd):$MOUNTED_DIR\" $(buildkite-intrinsics) -e JOBS -e BUILDKITE_API_KEY '$FULL_TAG' bash -c '$COMMANDS'" set +e # defer error handling to end echo "$ $DOCKER_RUN_COMMAND" eval $DOCKER_RUN_COMMAND From e7f49b5f1ae7e51ff0a13c704d9cdaa9897bb875 Mon Sep 17 00:00:00 2001 From: Farhad Shahabi Date: Fri, 11 Mar 2022 11:03:19 -0500 Subject: [PATCH 4/6] some clean up Added updated test --- .../pinned/amazon_linux-2-pinned.dockerfile | 2 +- plugins/net_plugin/net_plugin.cpp | 5 - tests/p2p_high_latency_test.py | 112 ++++++++++++++++++ 3 files changed, 113 insertions(+), 6 deletions(-) create mode 100755 tests/p2p_high_latency_test.py diff --git a/.cicd/platforms/pinned/amazon_linux-2-pinned.dockerfile b/.cicd/platforms/pinned/amazon_linux-2-pinned.dockerfile index c2a0eba4287..c4d68036fa5 100644 --- a/.cicd/platforms/pinned/amazon_linux-2-pinned.dockerfile +++ b/.cicd/platforms/pinned/amazon_linux-2-pinned.dockerfile @@ -7,7 +7,7 @@ RUN yum update -y && \ libtool make bzip2 bzip2-devel openssl-devel gmp-devel libstdc++ libcurl-devel \ libusbx-devel python3 python3-devel python-devel libedit-devel doxygen \ graphviz patch gcc gcc-c++ vim-common jq net-tools \ - libuuid-devel libtasn1-devel expect socat libseccomp-devel iproute-tc&& \ + libuuid-devel libtasn1-devel expect socat libseccomp-devel iproute-tc && \ yum clean all && rm -rf /var/cache/yum # install erlang and rabbitmq RUN curl -s https://packagecloud.io/install/repositories/rabbitmq/erlang/script.rpm.sh | bash && \ diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index d86cc0420cd..aafd8d8615c 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -2727,11 +2727,9 @@ namespace eosio { pending_message_buffer.advance_read_ptr( message_length ); return true; } - peer_dlog( this, "received block ${num}, id ${id}..., latency: ${latency}", ("num", bh.block_num())("id", blk_id.str().substr(8,16)) ("latency", (fc::time_point::now() - bh.timestamp).count()/1000) ); - if( !my_impl->sync_master->syncing_with_peer() ) { // guard against peer thinking it needs to send us old blocks uint32_t lib = 0; std::tie( lib, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore ) = my_impl->get_chain_info(); @@ -2743,9 +2741,6 @@ namespace eosio { peer_ilog( this, "received block ${n} less than sent lib ${lib}", ("n", blk_num)("lib", last_sent_lib) ); close(); } - /*else if(blk_num + num_block_behind_by_latency >= lib){ - peer_ilog( this, "received block ${n} less than lib ${lib} which is beacuse net latency ${net}", ("n", blk_num)("lib", lib)("net", network_latency) ); - } */ else { peer_ilog( this, "received block ${n} less than lib ${lib}", ("n", blk_num)("lib", lib) ); my_impl->sync_master->reset_last_requested_num(my_impl->sync_master->locked_sync_mutex()); diff --git a/tests/p2p_high_latency_test.py b/tests/p2p_high_latency_test.py new file mode 100755 index 00000000000..53b316f8e16 --- /dev/null +++ b/tests/p2p_high_latency_test.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +from testUtils import Utils, WaitSpec +from Cluster import Cluster +from WalletMgr import WalletMgr +from TestHelper import TestHelper +import signal +import platform +import subprocess +import time +import re + +############################################################### +# p2p connection in high latency network for one producer and one syning node cluster. +# +# This test simulates p2p connections in high latency network. The test case is such that there are one producer +# and one syncing node and a latency of 1100ms is introduced to their p2p connection. +# The expected behavior is that producer recognize the net latency and do not send lib catchup to syncing node. +# As syncing node is always behind, therefore sending lib catchup is useless as producer/peer node gets caught into infinite +# loop of sending lib catch up to syncing node. +############################################################### + +def readlogs(node_num, net_latency): + filename = 'var/lib/node_0{}/stderr.txt'.format(node_num) + f = subprocess.Popen(['tail','-F',filename],\ + stdout=subprocess.PIPE,stderr=subprocess.PIPE) + latRegex = re.compile(r'\d+ms') + t_end = time.time() + 80 # cluster runs for 80 seconds and and logs are being processed + while time.time() <= t_end: + line = f.stdout.readline().decode("utf-8") + print(line) + if 'info' in line and 'Catching up with chain, our last req is ' in line: + Utils.Print("Syncing node is catching up with chain, however it should not due to net latency") + return False + if 'debug' in line and 'Network latency' in line and float(latRegex.search(line).group()[:-2]) < 0.8 * net_latency: + Utils.Print("Network latency is lower than expected.") + return False + + return True +def exec(cmd): + process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = process.communicate() + process.wait() + process.stdout.close() + process.stderr.close() + return err, process.returncode + +Print=Utils.Print + +args = TestHelper.parse_args({"--dump-error-details","--keep-logs","-v","--leave-running","--clean-run"}) +Utils.Debug=args.v + +producers=1 +syncingNodes=1 +totalNodes=producers+syncingNodes +cluster=Cluster(walletd=True) +dumpErrorDetails=args.dump_error_details +keepLogs=args.keep_logs +dontKill=args.leave_running +killAll=args.clean_run + +testSuccessful=False +killEosInstances=not dontKill + +specificExtraNodeosArgs={} +producerNodeId=0 +syncingNodeId=1 + +specificExtraNodeosArgs[producerNodeId]=" --p2p-listen-endpoint 0.0.0.0:{}".format(9876+producerNodeId) +specificExtraNodeosArgs[syncingNodeId]="--p2p-peer-address 0.0.0.0:{}".format(9876+producerNodeId) + +try: + TestHelper.printSystemInfo("BEGIN") + cluster.killall(allInstances=killAll) + cluster.cleanup() + traceNodeosArgs=" --plugin eosio::trace_api_plugin --trace-no-abis --plugin eosio::producer_plugin --produce-time-offset-us 0 --last-block-time-offset-us 0 --cpu-effort-percent 100 \ + --last-block-cpu-effort-percent 100 --producer-threads 1 --plugin eosio::net_plugin --net-threads 1" + if cluster.launch(pnodes=1, totalNodes=totalNodes, totalProducers=1, useBiosBootFile=False, specificExtraNodeosArgs=specificExtraNodeosArgs, extraNodeosArgs=traceNodeosArgs) is False: + Utils.cmdError("launcher") + Utils.errorExit("Failed to stand up eos cluster.") + + cluster.waitOnClusterSync(blockAdvancing=5) + Utils.Print("Cluster in Sync") + cluster.biosNode.kill(signal.SIGTERM) + Utils.Print("Bios node killed") + latency = 1100 # 1100 millisecond + # adding latency to all inbound and outbound traffic + Utils.Print( "adding {}ms latency to network.".format(latency) ) + if platform.system() == 'Darwin': + cmd = 'sudo dnctl pipe 1 config delay {} && \ + echo "dummynet out proto tcp from any to any pipe 1" | sudo pfctl -f - && \ + sudo pfctl -e'.format(latency) + else: + cmd = 'tc qdisc add dev lo root netem delay {}ms'.format(latency) + err, ReturnCode = exec(cmd) + if ReturnCode != 0: + print(err.decode("utf-8")) # print error details of network slowdown initialization commands + Utils.errorExit("failed to initialize network latency, exited with error code {}".format(ReturnCode)) + # processing logs to make sure syncing node doesn't get into lib catch up mode. + testSuccessful=readlogs(syncingNodeId, latency) + if platform.system() == 'Darwin': + cmd = 'sudo pfctl -f /etc/pf.conf && \ + sudo dnctl -q flush && sudo pfctl -d' + else: + cmd = 'tc qdisc del dev lo root netem' + err, ReturnCode = exec(cmd) + if ReturnCode != 0: + print(err.decode("utf-8")) # print error details of network slowdown termination commands + Utils.errorExit("failed to remove network latency, exited with error code {}".format(ReturnCode)) +finally: + TestHelper.shutdown(cluster, None, testSuccessful, killEosInstances, False, keepLogs, killAll, dumpErrorDetails) + +exit(0) \ No newline at end of file From da22f9059f1720545848b1df29681edacfc28a3d Mon Sep 17 00:00:00 2001 From: Farhad Shahabi Date: Fri, 11 Mar 2022 14:33:34 -0500 Subject: [PATCH 5/6] Update centos-7.7-pinned.dockerfile --- .cicd/platforms/pinned/centos-7.7-pinned.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cicd/platforms/pinned/centos-7.7-pinned.dockerfile b/.cicd/platforms/pinned/centos-7.7-pinned.dockerfile index 803365cdff5..9fa90c42155 100644 --- a/.cicd/platforms/pinned/centos-7.7-pinned.dockerfile +++ b/.cicd/platforms/pinned/centos-7.7-pinned.dockerfile @@ -124,5 +124,5 @@ RUN cp ~/.bashrc ~/.bashrc.bak && \ # install node 10 RUN bash -c '. ~/.bashrc; nvm install --lts=dubnium' && \ ln -s "/root/.nvm/versions/node/$(ls -p /root/.nvm/versions/node | sort -Vr | head -1)bin/node" /usr/local/bin/node -RUN yum install -y nodejs && \ +RUN yum install -y nodejs npm && \ yum clean all && rm -rf /var/cache/yum From 0c6a5c7d3549ef516321727b1cd02b8b4690a45f Mon Sep 17 00:00:00 2001 From: Farhad Shahabi Date: Fri, 11 Mar 2022 14:33:37 -0500 Subject: [PATCH 6/6] Update centos-7.7-unpinned.dockerfile --- .cicd/platforms/unpinned/centos-7.7-unpinned.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cicd/platforms/unpinned/centos-7.7-unpinned.dockerfile b/.cicd/platforms/unpinned/centos-7.7-unpinned.dockerfile index 7823a8dc886..2955ba71168 100644 --- a/.cicd/platforms/unpinned/centos-7.7-unpinned.dockerfile +++ b/.cicd/platforms/unpinned/centos-7.7-unpinned.dockerfile @@ -50,5 +50,5 @@ RUN cp ~/.bashrc ~/.bashrc.bak && \ # install node 10 RUN bash -c '. ~/.bashrc; nvm install --lts=dubnium' && \ ln -s "/root/.nvm/versions/node/$(ls -p /root/.nvm/versions/node | sort -Vr | head -1)bin/node" /usr/local/bin/node -RUN yum install -y nodejs && \ +RUN yum install -y nodejs npm && \ yum clean all && rm -rf /var/cache/yum