From 7a4ced0eb016bbf16335e869a68bcffaf1beaa67 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Wed, 1 Feb 2017 09:21:04 -0800 Subject: [PATCH 01/12] Repeat test on MultiSpoutsMultiTasks. --- .../MultiSpoutsMultiTasks.java | 4 +-- .../resources/MultiSpoutsMultiTasks.json | 2 +- scripts/travis/test.sh | 29 ++++++++++--------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/integration-test/src/java/com/twitter/heron/integration_test/topology/multi_spouts_multi_tasks/MultiSpoutsMultiTasks.java b/integration-test/src/java/com/twitter/heron/integration_test/topology/multi_spouts_multi_tasks/MultiSpoutsMultiTasks.java index feebc4193b2..733f6d88440 100644 --- a/integration-test/src/java/com/twitter/heron/integration_test/topology/multi_spouts_multi_tasks/MultiSpoutsMultiTasks.java +++ b/integration-test/src/java/com/twitter/heron/integration_test/topology/multi_spouts_multi_tasks/MultiSpoutsMultiTasks.java @@ -32,8 +32,8 @@ private MultiSpoutsMultiTasks(String[] args) throws MalformedURLException { @Override protected TestTopologyBuilder buildTopology(TestTopologyBuilder builder) { - builder.setSpout("ab-spout-1", new ABSpout(), 3); - builder.setSpout("ab-spout-2", new ABSpout(), 3); + builder.setSpout("ab-spout-1", new ABSpout(true), 3); + builder.setSpout("ab-spout-2", new ABSpout(true), 3); builder.setBolt("identity-bolt", new IdentityBolt(new Fields("word")), 1) .shuffleGrouping("ab-spout-1") .shuffleGrouping("ab-spout-2"); diff --git a/integration-test/src/java/com/twitter/heron/integration_test/topology/multi_spouts_multi_tasks/resources/MultiSpoutsMultiTasks.json b/integration-test/src/java/com/twitter/heron/integration_test/topology/multi_spouts_multi_tasks/resources/MultiSpoutsMultiTasks.json index 5b9de292633..0ce24647b12 100644 --- a/integration-test/src/java/com/twitter/heron/integration_test/topology/multi_spouts_multi_tasks/resources/MultiSpoutsMultiTasks.json +++ b/integration-test/src/java/com/twitter/heron/integration_test/topology/multi_spouts_multi_tasks/resources/MultiSpoutsMultiTasks.json @@ -1 +1 @@ -["A", "B", "A", "B", "A", "B", "A", "B", "A", "B","A", "B", "A", "B", "A", "B", "A", "B", "A", "B","A", "B", "A", "B", "B", "A", "A", "A", "B", "B","A", "B", "A", "B", "A", "B", "A", "B", "A", "B","A", "B", "A", "B", "A", "B", "A", "B", "A", "B","A", "B", "A", "B", "B", "A", "A", "A", "B", "B"] \ No newline at end of file +["A_0", "A_0", "A_0", "A_0", "A_0", "A_0", "A_2", "A_2", "A_2", "A_2", "A_2", "A_2", "A_4", "A_4", "A_4", "A_4", "A_4", "A_4", "A_6", "A_6", "A_6", "A_6", "A_6", "A_6", "A_8", "A_8", "A_8", "A_8", "A_8", "A_8", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_3", "B_3", "B_3", "B_3", "B_3", "B_3", "B_5", "B_5", "B_5", "B_5", "B_5", "B_5", "B_7", "B_7", "B_7", "B_7", "B_7", "B_7", "B_9", "B_9", "B_9", "B_9", "B_9", "B_9"] \ No newline at end of file diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index 1fccb073ae1..aeaa54a877f 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -42,21 +42,24 @@ start_timer "$T" http_server_id=$! trap "kill -9 $http_server_id" SIGINT SIGTERM EXIT -./bazel-bin/integration-test/src/python/test_runner/test-runner.pex \ - -hc heron -tb ${JAVA_INTEGRATION_TESTS_BIN} \ - -rh localhost -rp 8080\ - -tp integration-test/src/java/com/twitter/heron/integration_test/topology/ \ - -cl local -rl heron-staging -ev devel +for i in `seq 1 50`; do + ./bazel-bin/integration-test/src/python/test_runner/test-runner.pex \ + -hc heron -tb ${JAVA_INTEGRATION_TESTS_BIN} \ + -rh localhost -rp 8080\ + -tp integration-test/src/java/com/twitter/heron/integration_test/topology/ \ + -cl local -rl heron-staging -ev devel \ + -ts 'IntegrationTest_MultiSpoutsMultiTasks' +done end_timer "$T" # run the python integration test -T="heron integration-test python" -start_timer "$T" -./bazel-bin/integration-test/src/python/test_runner/test-runner.pex \ - -hc heron -tb ${PYTHON_INTEGRATION_TESTS_BIN} \ - -rh localhost -rp 8080\ - -tp integration-test/src/python/integration_test/topology/ \ - -cl local -rl heron-staging -ev devel -end_timer "$T" +# T="heron integration-test python" +# start_timer "$T" +# ./bazel-bin/integration-test/src/python/test_runner/test-runner.pex \ +# -hc heron -tb ${PYTHON_INTEGRATION_TESTS_BIN} \ +# -rh localhost -rp 8080\ +# -tp integration-test/src/python/integration_test/topology/ \ +# -cl local -rl heron-staging -ev devel +# end_timer "$T" print_timer_summary From 6fd436de7fa6ba09f4203b6a72b88857f36dcd40 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Wed, 1 Feb 2017 09:25:26 -0800 Subject: [PATCH 02/12] Remove unit tests. --- scripts/travis/build.sh | 19 ------------------- scripts/travis/test.sh | 18 ++++-------------- 2 files changed, 4 insertions(+), 33 deletions(-) diff --git a/scripts/travis/build.sh b/scripts/travis/build.sh index db89a4c63b7..1b368fc0125 100755 --- a/scripts/travis/build.sh +++ b/scripts/travis/build.sh @@ -62,25 +62,6 @@ python ${DIR}/save-logs.py "heron_build.txt" bazel\ --bazelrc=tools/travis-ci/bazel.rc build --config=ubuntu heron/... end_timer "$T" -# run heron unit tests -T="heron test non-flaky" -start_timer "$T" -python ${DIR}/save-logs.py "heron_test_non_flaky.txt" bazel\ - --bazelrc=tools/travis-ci/bazel.rc test\ - --test_summary=detailed --test_output=errors\ - --config=ubuntu --test_tag_filters=-flaky heron/... -end_timer "$T" - -# flaky tests are often due to test port race conditions, -# which should be fixed. For now, run them serially -T="heron test flaky" -start_timer "$T" -python ${DIR}/save-logs.py "heron_test_flaky.txt" bazel\ - --bazelrc=tools/travis-ci/bazel.rc test\ - --test_summary=detailed --test_output=errors\ - --config=ubuntu --test_tag_filters=flaky --jobs=0 heron/... -end_timer "$T" - # build packages T="heron build tarpkgs" start_timer "$T" diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index aeaa54a877f..173ddcb6258 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -30,10 +30,10 @@ python ${DIR}/save-logs.py "heron_tools_install.txt" bazel --bazelrc=tools/travi end_timer "$T" # run local integration test -T="heron integration-test local" -start_timer "$T" -python ./bazel-bin/integration-test/src/python/local_test_runner/local-test-runner -end_timer "$T" +# T="heron integration-test local" +# start_timer "$T" +# python ./bazel-bin/integration-test/src/python/local_test_runner/local-test-runner +# end_timer "$T" # run the java integration test T="heron integration-test java" @@ -52,14 +52,4 @@ for i in `seq 1 50`; do done end_timer "$T" -# run the python integration test -# T="heron integration-test python" -# start_timer "$T" -# ./bazel-bin/integration-test/src/python/test_runner/test-runner.pex \ -# -hc heron -tb ${PYTHON_INTEGRATION_TESTS_BIN} \ -# -rh localhost -rp 8080\ -# -tp integration-test/src/python/integration_test/topology/ \ -# -cl local -rl heron-staging -ev devel -# end_timer "$T" - print_timer_summary From b7402bb47912d52e2d9f68b048dc2074a12935a4 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Wed, 1 Feb 2017 11:50:00 -0800 Subject: [PATCH 03/12] Remove -e --- scripts/travis/build.sh | 2 -- scripts/travis/ci.sh | 2 -- scripts/travis/common.sh | 2 -- scripts/travis/test.sh | 1 - 4 files changed, 7 deletions(-) diff --git a/scripts/travis/build.sh b/scripts/travis/build.sh index 1b368fc0125..d1261d24f16 100755 --- a/scripts/travis/build.sh +++ b/scripts/travis/build.sh @@ -4,8 +4,6 @@ # of the below commands fail so we need to chain them in this script. # -set -e - DIR=`dirname $0` source ${DIR}/common.sh diff --git a/scripts/travis/ci.sh b/scripts/travis/ci.sh index 7ee1d177660..9160f09b604 100755 --- a/scripts/travis/ci.sh +++ b/scripts/travis/ci.sh @@ -1,7 +1,5 @@ #!/bin/bash -set -e - DIR=`dirname $0` source ${DIR}/common.sh diff --git a/scripts/travis/common.sh b/scripts/travis/common.sh index 6daa3f13c01..a156748ded3 100755 --- a/scripts/travis/common.sh +++ b/scripts/travis/common.sh @@ -1,7 +1,5 @@ #!/bin/bash -set -e - function die { echo "ERROR: $1" && exit 1; } diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index 173ddcb6258..a7c9b6ef05a 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -2,7 +2,6 @@ # # Script to kick off the travis CI integration test. Fail-fast if any of tthe below commands fail. # -set -e DIR=`dirname $0` source ${DIR}/common.sh From abd4227f2a76b2f028ceee423dd33c247dff11c2 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Wed, 1 Feb 2017 13:19:36 -0800 Subject: [PATCH 04/12] Continue loop on failure. --- scripts/travis/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index a7c9b6ef05a..b7f07dbff21 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -47,7 +47,7 @@ for i in `seq 1 50`; do -rh localhost -rp 8080\ -tp integration-test/src/java/com/twitter/heron/integration_test/topology/ \ -cl local -rl heron-staging -ev devel \ - -ts 'IntegrationTest_MultiSpoutsMultiTasks' + -ts 'IntegrationTest_MultiSpoutsMultiTasks' || true done end_timer "$T" From 16f8279847a2291018a866bb1b6dc426a7d484c4 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Wed, 1 Feb 2017 16:41:26 -0800 Subject: [PATCH 05/12] Debug tuples in stmgr-server. --- heron/stmgr/src/cpp/manager/stmgr-server.cpp | 2 ++ scripts/travis/test.sh | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/heron/stmgr/src/cpp/manager/stmgr-server.cpp b/heron/stmgr/src/cpp/manager/stmgr-server.cpp index fced037edd0..447a0d8eb11 100644 --- a/heron/stmgr/src/cpp/manager/stmgr-server.cpp +++ b/heron/stmgr/src/cpp/manager/stmgr-server.cpp @@ -358,6 +358,8 @@ void StMgrServer::HandleTupleSetMessage(Connection* _conn, ->incr_by(_message->control().fails_size()); } stmgr_->HandleInstanceData(iter->second, instance_info_[iter->second]->local_spout_, _message); + LOG(INFO) << "Dumping tuple!" << std::endl; + LOG(INFO) << _message->DebugString() << std::endl; release(_message); } diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index b7f07dbff21..4f2ac195064 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -41,7 +41,7 @@ start_timer "$T" http_server_id=$! trap "kill -9 $http_server_id" SIGINT SIGTERM EXIT -for i in `seq 1 50`; do +for i in `seq 1 5`; do ./bazel-bin/integration-test/src/python/test_runner/test-runner.pex \ -hc heron -tb ${JAVA_INTEGRATION_TESTS_BIN} \ -rh localhost -rp 8080\ @@ -51,4 +51,6 @@ for i in `seq 1 50`; do done end_timer "$T" +tail -n +1 ~/.herondata/topologies/local/heron-staging/*MultiSpoutsMultiTasks*/log-files/*stmgr*.INFO + print_timer_summary From 2ddc2eedf62f063d0e2e4d4187b54151ecaab17d Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Wed, 1 Feb 2017 16:53:47 -0800 Subject: [PATCH 06/12] Wildcard role. --- scripts/travis/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index 4f2ac195064..51c7966cc4f 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -51,6 +51,6 @@ for i in `seq 1 5`; do done end_timer "$T" -tail -n +1 ~/.herondata/topologies/local/heron-staging/*MultiSpoutsMultiTasks*/log-files/*stmgr*.INFO +tail -n +1 ~/.herondata/topologies/local/*/*MultiSpoutsMultiTasks*/log-files/*stmgr*.INFO print_timer_summary From b3d6d749f2be886a6d0247e740a6b992754862b1 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Wed, 1 Feb 2017 17:11:15 -0800 Subject: [PATCH 07/12] MORE LOGS!! --- .../heron/network/StreamManagerClient.java | 1 + scripts/travis/test.sh | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/heron/instance/src/java/com/twitter/heron/network/StreamManagerClient.java b/heron/instance/src/java/com/twitter/heron/network/StreamManagerClient.java index 3072be1ba85..478792d44ad 100644 --- a/heron/instance/src/java/com/twitter/heron/network/StreamManagerClient.java +++ b/heron/instance/src/java/com/twitter/heron/network/StreamManagerClient.java @@ -288,6 +288,7 @@ private void handleNewTuples2(HeronTuples.HeronTupleSet2 set) { } HeronTuples.HeronTupleSet s = toFeed.build(); + LOG.info(s.toString()); inStreamQueue.offer(s); } diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index 51c7966cc4f..25fc5b6115e 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -41,16 +41,19 @@ start_timer "$T" http_server_id=$! trap "kill -9 $http_server_id" SIGINT SIGTERM EXIT -for i in `seq 1 5`; do - ./bazel-bin/integration-test/src/python/test_runner/test-runner.pex \ - -hc heron -tb ${JAVA_INTEGRATION_TESTS_BIN} \ - -rh localhost -rp 8080\ - -tp integration-test/src/java/com/twitter/heron/integration_test/topology/ \ - -cl local -rl heron-staging -ev devel \ - -ts 'IntegrationTest_MultiSpoutsMultiTasks' || true -done +# Run MultiSpoutsMultiTasks +./bazel-bin/integration-test/src/python/test_runner/test-runner.pex \ + -hc heron -tb ${JAVA_INTEGRATION_TESTS_BIN} \ + -rh localhost -rp 8080\ + -tp integration-test/src/java/com/twitter/heron/integration_test/topology/ \ + -cl local -rl heron-staging -ev devel \ + -ts 'IntegrationTest_MultiSpoutsMultiTasks' || true end_timer "$T" +# Dump out stream manager log tail -n +1 ~/.herondata/topologies/local/*/*MultiSpoutsMultiTasks*/log-files/*stmgr*.INFO +# Dump out Java program's logs +tail -n +1 ~/.herondata/topologies/local/rli/201702011700*/log-files/container_1*.log.0 + print_timer_summary From cd390965456b943b46d002bf03f87610443608db Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Wed, 1 Feb 2017 17:13:33 -0800 Subject: [PATCH 08/12] Wildcard role, again. --- scripts/travis/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index 25fc5b6115e..b4350713e13 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -54,6 +54,6 @@ end_timer "$T" tail -n +1 ~/.herondata/topologies/local/*/*MultiSpoutsMultiTasks*/log-files/*stmgr*.INFO # Dump out Java program's logs -tail -n +1 ~/.herondata/topologies/local/rli/201702011700*/log-files/container_1*.log.0 +tail -n +1 ~/.herondata/topologies/local/*/201702011700*/log-files/container_1*.log.0 print_timer_summary From 992f63e31f51d060bbe6c3cbc9fca2991d6316f6 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Wed, 1 Feb 2017 18:23:55 -0800 Subject: [PATCH 09/12] Correct error. --- scripts/travis/test.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index b4350713e13..70fa30fbda1 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -51,9 +51,11 @@ trap "kill -9 $http_server_id" SIGINT SIGTERM EXIT end_timer "$T" # Dump out stream manager log +echo "DUMPING STMGR LOG" tail -n +1 ~/.herondata/topologies/local/*/*MultiSpoutsMultiTasks*/log-files/*stmgr*.INFO # Dump out Java program's logs -tail -n +1 ~/.herondata/topologies/local/*/201702011700*/log-files/container_1*.log.0 +echo "DUMPING JAVA PROGRAM LOG" +tail -n +1 ~/.herondata/topologies/local/*/*MultiSpoutsMultiTasks*/log-files/container*.log.0 print_timer_summary From a179f60b6b86a904b2c2ccbd75724c5c169ce910 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Thu, 2 Feb 2017 09:50:36 -0800 Subject: [PATCH 10/12] Improve script. --- scripts/travis/test.sh | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index 70fa30fbda1..1e9577ffc42 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -42,20 +42,24 @@ http_server_id=$! trap "kill -9 $http_server_id" SIGINT SIGTERM EXIT # Run MultiSpoutsMultiTasks -./bazel-bin/integration-test/src/python/test_runner/test-runner.pex \ - -hc heron -tb ${JAVA_INTEGRATION_TESTS_BIN} \ - -rh localhost -rp 8080\ - -tp integration-test/src/java/com/twitter/heron/integration_test/topology/ \ - -cl local -rl heron-staging -ev devel \ - -ts 'IntegrationTest_MultiSpoutsMultiTasks' || true +for i in `seq 1 100`; do + rm -rf ~/.herondata + ./bazel-bin/integration-test/src/python/test_runner/test-runner.pex \ + -hc heron -tb ${JAVA_INTEGRATION_TESTS_BIN} \ + -rh localhost -rp 8080\ + -tp integration-test/src/java/com/twitter/heron/integration_test/topology/ \ + -cl local -rl heron-staging -ev devel \ + -ts 'IntegrationTest_MultiSpoutsMultiTasks' + RESULT=$? + if [ $RESULT -ne 0 ]; then + # Dump out stream manager log + echo "DUMPING STMGR LOG" + tail -n +1 ~/.herondata/topologies/local/*/*MultiSpoutsMultiTasks*/log-files/*stmgr*.INFO + # Dump out Java program's logs + echo "DUMPING JAVA PROGRAM LOG" + tail -n +1 ~/.herondata/topologies/local/*/*MultiSpoutsMultiTasks*/log-files/container*.log.0 + exit 1 + fi end_timer "$T" -# Dump out stream manager log -echo "DUMPING STMGR LOG" -tail -n +1 ~/.herondata/topologies/local/*/*MultiSpoutsMultiTasks*/log-files/*stmgr*.INFO - -# Dump out Java program's logs -echo "DUMPING JAVA PROGRAM LOG" -tail -n +1 ~/.herondata/topologies/local/*/*MultiSpoutsMultiTasks*/log-files/container*.log.0 - print_timer_summary From 26d0001bbd377131eb61a93ad339e4c7934fc8da Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Thu, 2 Feb 2017 11:20:47 -0800 Subject: [PATCH 11/12] Missing ``done''. --- scripts/travis/test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index 1e9577ffc42..976697087c0 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -60,6 +60,7 @@ for i in `seq 1 100`; do tail -n +1 ~/.herondata/topologies/local/*/*MultiSpoutsMultiTasks*/log-files/container*.log.0 exit 1 fi +done end_timer "$T" print_timer_summary From cb2567b637a26d71e3ccb2e8b9ba780ba4f39d7d Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Thu, 2 Feb 2017 16:30:10 -0800 Subject: [PATCH 12/12] Make loop proceed --- scripts/travis/test.sh | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh index 976697087c0..1bf5292bc08 100755 --- a/scripts/travis/test.sh +++ b/scripts/travis/test.sh @@ -11,22 +11,13 @@ JAVA_INTEGRATION_TESTS_BIN="${PWD}/bazel-genfiles/integration-test/src/java/inte PYTHON_INTEGRATION_TESTS_BIN="${PWD}/bazel-bin/integration-test/src/python/integration_test/topology/pyheron_integ_topology.pex" # build test related jar -T="heron build integration-test" -start_timer "$T" python ${DIR}/save-logs.py "heron_build_integration_test.txt" bazel --bazelrc=tools/travis-ci/bazel.rc build --config=ubuntu integration-test/src/... -end_timer "$T" # install client -T="heron client install" -start_timer "$T" python ${DIR}/save-logs.py "heron_client_install.txt" bazel --bazelrc=tools/travis-ci/bazel.rc run --config=ubuntu -- scripts/packages:heron-client-install.sh --user -end_timer "$T" # install tools -T="heron tools install" -start_timer "$T" python ${DIR}/save-logs.py "heron_tools_install.txt" bazel --bazelrc=tools/travis-ci/bazel.rc run --config=ubuntu -- scripts/packages:heron-tools-install.sh --user -end_timer "$T" # run local integration test # T="heron integration-test local" @@ -35,8 +26,6 @@ end_timer "$T" # end_timer "$T" # run the java integration test -T="heron integration-test java" -start_timer "$T" ./bazel-bin/integration-test/src/python/http_server/http-server 8080 & http_server_id=$! trap "kill -9 $http_server_id" SIGINT SIGTERM EXIT @@ -61,6 +50,3 @@ for i in `seq 1 100`; do exit 1 fi done -end_timer "$T" - -print_timer_summary