From 370873ed4730ef69b4a76e14923bd5aef1a12685 Mon Sep 17 00:00:00 2001 From: ananth Date: Wed, 3 May 2017 15:13:50 -0700 Subject: [PATCH 01/61] Added Ecs Docker Host Support --- heron/executor/src/python/heron_executor.py | 24 ++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index f14852d3810..f7f240033cb 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -108,6 +108,21 @@ def log_pid_for_process(process_name, pid): def is_docker_environment(): return os.path.isfile('/.dockerenv') +def isEcsAmiInstance(): + meta = 'http://169.254.169.254/latest/meta-data/ami-id' + req = urllib2.Request(meta) + try: + response = urllib2.urlopen(req).read() + if 'ami' in response: + #_msg = 'I am in AWS running on {}'.format(response) + return 1 + else: + #_msg = 'I am in dev - no AWS AMI' + return 0 + except Exception: + #_msg = 'no metadata, not in AWS' + return 0 + class ProcessInfo(object): def __init__(self, process, name, command, attempts=1): """ @@ -155,7 +170,14 @@ def init_parsed_args(self, args): # id within docker, rather than the host's hostname. NOTE: this 'HOST' env variable is not # guaranteed to be set in all Docker executor environments (outside of Marathon) if is_docker_environment(): - self.master_host = os.environ.get('HOST') if 'HOST' in os.environ else socket.gethostname() + # Need to set the HOST environment vairable if docker is for AWS ECS tasks + if isEcsAmiInstance(): + self.master_host = subprocess.Popen(["curl", + "http://169.254.169.254/latest/meta-data/local-ipv4"] + , stdout=subprocess.PIPE).communicate()[0] + os.environ['HOST'] = self.master_host + else: + self.master_host = os.environ.get('HOST') if 'HOST' in os.environ else socket.gethostname() else: self.master_host = socket.gethostname() self.master_port = parsed_args.master_port From 8a10b031c0cdb514060bde2d91d0113dbb2fa74c Mon Sep 17 00:00:00 2001 From: ananth Date: Wed, 3 May 2017 15:16:59 -0700 Subject: [PATCH 02/61] Adding the ecs scheduler --- .../src/yaml/conf/ecs/heron_internals.yaml | 272 ++++++++++++++++++ .../src/yaml/conf/ecs/metrics_sinks.yaml | 81 ++++++ heron/config/src/yaml/conf/ecs/packing.yaml | 2 + heron/config/src/yaml/conf/ecs/scheduler.yaml | 8 + heron/config/src/yaml/conf/ecs/statemgr.yaml | 41 +++ heron/config/src/yaml/conf/ecs/uploader.yaml | 32 +++ 6 files changed, 436 insertions(+) create mode 100644 heron/config/src/yaml/conf/ecs/heron_internals.yaml create mode 100644 heron/config/src/yaml/conf/ecs/metrics_sinks.yaml create mode 100644 heron/config/src/yaml/conf/ecs/packing.yaml create mode 100644 heron/config/src/yaml/conf/ecs/scheduler.yaml create mode 100644 heron/config/src/yaml/conf/ecs/statemgr.yaml create mode 100644 heron/config/src/yaml/conf/ecs/uploader.yaml diff --git a/heron/config/src/yaml/conf/ecs/heron_internals.yaml b/heron/config/src/yaml/conf/ecs/heron_internals.yaml new file mode 100644 index 00000000000..caf8a21ad69 --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/heron_internals.yaml @@ -0,0 +1,272 @@ +################################################################################ +# Default values for various configs used inside Heron. +################################################################################ +# All the config associated with time is in the unit of milli-seconds, +# unless otherwise specified. +################################################################################ +# All the config associated with data size is in the unit of bytes, unless +# otherwise specified. +################################################################################ + +################################################################################ +# System level configs +################################################################################ + +### heron.* configs are general configurations over all componenets + +# The relative path to the logging directory +heron.logging.directory: "log-files" + +# The maximum log file size in MB +heron.logging.maximum.size.mb: 100 + +# The maximum number of log files +heron.logging.maximum.files: 5 + +# The interval in seconds after which to check if the tmaster location has been fetched or not +heron.check.tmaster.location.interval.sec: 120 + +# The interval in seconds to prune logging files in C++ +heron.logging.prune.interval.sec: 300 + +# The interval in seconds to flush log files in C++ +heron.logging.flush.interval.sec: 10 + +# The threshold level to log error +heron.logging.err.threshold: 3 + +# The interval in seconds for different components to export metrics to metrics manager +heron.metrics.export.interval.sec: 60 + +# The maximum count of exceptions in one MetricPublisherPublishMessage protobuf +heron.metrics.max.exceptions.per.message.count: 1024 + +################################################################################ +# Configs related to Stream Manager, starts with heron.streammgr.* +################################################################################ + +# Maximum size in bytes of a packet to be send out from stream manager +heron.streammgr.packet.maximum.size.bytes: 102400 + +# The tuple cache (used for batching) can be drained in two ways: +# (a) Time based +# (b) size based + +# The frequency in ms to drain the tuple cache in stream manager +heron.streammgr.cache.drain.frequency.ms: 10 + +# The sized based threshold in MB for draining the tuple cache +heron.streammgr.cache.drain.size.mb: 100 + +# For efficient acknowledgements +heron.streammgr.xormgr.rotatingmap.nbuckets: 3 + +# The reconnect interval to other stream managers in secs for stream manager client +heron.streammgr.client.reconnect.interval.sec: 1 + +# The reconnect interval to tamster in second for stream manager client +heron.streammgr.client.reconnect.tmaster.interval.sec: 10 + +# The maximum packet size in MB of stream manager's network options +heron.streammgr.network.options.maximum.packet.mb: 100 + +# The interval in seconds to send heartbeat +heron.streammgr.tmaster.heartbeat.interval.sec: 10 + +# Maximum batch size in MB to read by stream manager from socket +heron.streammgr.connection.read.batch.size.mb: 1 + +# Maximum batch size in MB to write by stream manager to socket +heron.streammgr.connection.write.batch.size.mb: 1 + +# Number of times we should wait to see a buffer full while enqueueing data +# before declaring start of back pressure +heron.streammgr.network.backpressure.threshold: 3 + +# High water mark on the num in MB that can be left outstanding on a connection +heron.streammgr.network.backpressure.highwatermark.mb: 100 + +# Low water mark on the num in MB that can be left outstanding on a connection +heron.streammgr.network.backpressure.lowwatermark.mb: 50 + +################################################################################ +# Configs related to Topology Master, starts with heron.tmaster.* +################################################################################ + +# The maximum interval in minutes of metrics to be kept in tmaster +heron.tmaster.metrics.collector.maximum.interval.min: 180 + +# The maximum time to retry to establish the tmaster +heron.tmaster.establish.retry.times: 30 + +# The interval to retry to establish the tmaster +heron.tmaster.establish.retry.interval.sec: 1 + +# Maximum packet size in MB of tmaster's network options to connect to stream managers +heron.tmaster.network.master.options.maximum.packet.mb: 16 + +# Maximum packet size in MB of tmaster's network options to connect to scheduler +heron.tmaster.network.controller.options.maximum.packet.mb: 1 + +# Maximum packet size in MB of tmaster's network options for stat queries +heron.tmaster.network.stats.options.maximum.packet.mb: 1 + +# The interval for tmaster to purge metrics from socket +heron.tmaster.metrics.collector.purge.interval.sec: 60 + +# The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM +heron.tmaster.metrics.collector.maximum.exception: 256 + +# Should the metrics reporter bind on all interfaces +heron.tmaster.metrics.network.bindallinterfaces: False + +# The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) +heron.tmaster.stmgr.state.timeout.sec: 60 + +################################################################################ +# Configs related to Topology Master, starts with heron.metricsmgr.* +################################################################################ + +# The size of packets to read from socket will be determined by the minimal of: +# (a) time based +# (b) size based + +# Time based, the maximum batch time in ms for metricsmgr to read from socket +heron.metricsmgr.network.read.batch.time.ms: 16 + +# Size based, the maximum batch size in bytes to read from socket +heron.metricsmgr.network.read.batch.size.bytes: 32768 + +# The size of packets to write to socket will be determined by the minimum of +# (a) time based +# (b) size based + +# Time based, the maximum batch time in ms for metricsmgr to write to socket +heron.metricsmgr.network.write.batch.time.ms: 16 + +# Size based, the maximum batch size in bytes to write to socket +heron.metricsmgr.network.write.batch.size.bytes: 32768 + +# The maximum socket's send buffer size in bytes +heron.metricsmgr.network.options.socket.send.buffer.size.bytes: 6553600 + +# The maximum socket's received buffer size in bytes of metricsmgr's network options +heron.metricsmgr.network.options.socket.received.buffer.size.bytes: 8738000 + +################################################################################ +# Configs related to Heron Instance, starts with heron.instance.* +################################################################################ + +# The queue capacity (num of items) in bolt for buffer packets to read from stream manager +heron.instance.internal.bolt.read.queue.capacity: 128 + +# The queue capacity (num of items) in bolt for buffer packets to write to stream manager +heron.instance.internal.bolt.write.queue.capacity: 128 + +# The queue capacity (num of items) in spout for buffer packets to read from stream manager +heron.instance.internal.spout.read.queue.capacity: 1024 + +# The queue capacity (num of items) in spout for buffer packets to write to stream manager +heron.instance.internal.spout.write.queue.capacity: 128 + +# The queue capacity (num of items) for metrics packets to write to metrics manager +heron.instance.internal.metrics.write.queue.capacity: 128 + +# The size of packets read from stream manager will be determined by the minimal of +# (a) time based +# (b) size based + +# Time based, the maximum batch time in ms for instance to read from stream manager per attempt +heron.instance.network.read.batch.time.ms: 16 + +# Size based, the maximum batch size in bytes to read from stream manager +heron.instance.network.read.batch.size.bytes: 32768 + +# The size of packets written to stream manager will be determined by the minimum of +# (a) time based +# (b) size based + +# Time based, the maximum batch time in ms for instance to write to stream manager per attempt +heron.instance.network.write.batch.time.ms: 16 + +# Size based, the maximum batch size in bytes to write to stream manager +heron.instance.network.write.batch.size.bytes: 32768 + +# The maximum socket's send buffer size in bytes +heron.instance.network.options.socket.send.buffer.size.bytes: 6553600 + +# The maximum socket's received buffer size in bytes of instance's network options +heron.instance.network.options.socket.received.buffer.size.bytes: 8738000 + +# The maximum # of data tuple to batch in a HeronDataTupleSet protobuf +heron.instance.set.data.tuple.capacity: 1024 + +# The maximum size in bytes of data tuple to batch in a HeronDataTupleSet protobuf +heron.instance.set.data.tuple.size.bytes: 8388608 + +# The maximum # of control tuple to batch in a HeronControlTupleSet protobuf +heron.instance.set.control.tuple.capacity: 1024 + +# The maximum time in ms for a spout to do acknowledgement per attempt, the ack batch could +# also break if there are no more ack tuples to process +heron.instance.ack.batch.time.ms: 128 + +# The maximum time in ms for an spout instance to emit tuples per attempt +heron.instance.emit.batch.time.ms: 16 + +# The maximum batch size in bytes for an spout to emit tuples per attempt +heron.instance.emit.batch.size.bytes: 32768 + +# The maximum time in ms for an bolt instance to execute tuples per attempt +heron.instance.execute.batch.time.ms: 16 + +# The maximum batch size in bytes for an bolt instance to execute tuples per attempt +heron.instance.execute.batch.size.bytes: 32768 + +# The time interval for an instance to check the state change, +# for example, the interval a spout uses to check whether activate/deactivate is invoked +heron.instance.state.check.interval.sec: 5 + +# The time to wait before the instance exits forcibly when uncaught exception happens +heron.instance.force.exit.timeout.ms: 2000 + +# Interval in seconds to reconnect to the stream manager, including the request timeout in connecting +heron.instance.reconnect.streammgr.interval.sec: 5 +heron.instance.reconnect.streammgr.times: 60 + +# Interval in seconds to reconnect to the metrics manager, including the request timeout in connecting +heron.instance.reconnect.metricsmgr.interval.sec: 5 +heron.instance.reconnect.metricsmgr.times: 60 + +# The interval in second for an instance to sample its system metrics, for instance, cpu load. +heron.instance.metrics.system.sample.interval.sec: 10 + +heron.instance.slave.fetch.pplan.interval.sec: 1 + +# For efficient acknowledgement +heron.instance.acknowledgement.nbuckets: 10 + +################################################################################ +# For dynamically tuning the available sizes in the interval read & write queues +# to provide high performance while avoiding GC issues +################################################################################ + +# The expected size on read queue in bolt +heron.instance.tuning.expected.bolt.read.queue.size: 8 + +# The expected size on write queue in bolt +heron.instance.tuning.expected.bolt.write.queue.size: 8 + +# The expected size on read queue in spout +heron.instance.tuning.expected.spout.read.queue.size: 512 + +# The exepected size on write queue in spout +heron.instance.tuning.expected.spout.write.queue.size: 8 + +# The expected size on metrics write queue +heron.instance.tuning.expected.metrics.write.queue.size: 8 + +heron.instance.tuning.current.sample.weight: 0.8 + +# Interval in ms to tune the size of in & out data queue in instance +heron.instance.tuning.interval.ms: 100 diff --git a/heron/config/src/yaml/conf/ecs/metrics_sinks.yaml b/heron/config/src/yaml/conf/ecs/metrics_sinks.yaml new file mode 100644 index 00000000000..e58150482ea --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/metrics_sinks.yaml @@ -0,0 +1,81 @@ +########### These all have default values as shown + +# We would specify the unique sink-id first +sinks: + - file-sink + - tmaster-sink + +########### Now we would specify the detailed configuration for every unique sink +########### Syntax: sink-id: - option(s) + +########### option class is required as we need to instantiate a new instance by reflection +########### option flush-frequency-ms is required to invoke flush() at interval +########### option sink-restart-attempts, representsing # of times to restart a sink when it throws exceptions and dies. +########### If this option is missed, default value 0 would be supplied; negative value represents to restart it forever. + +########### Other options would be constructed as an immutable map passed to IMetricsSink's init(Map conf) as argument, +########### We would be able to fetch value by conf.get(options), for instance: +########### We could get "com.twitter.heron.metricsmgr.sink.FileSink" if conf.get("class") is called inside file-sink's instance + +### Config for file-sink +file-sink: + class: "com.twitter.heron.metricsmgr.sink.FileSink" + flush-frequency-ms: 60000 # 1 min + sink-restart-attempts: -1 # Forever + filename-output: "metrics.json" # File for metrics to write to + file-maximum: 5 # maximum number of file saved in disk + +### Config for tmaster-sink +tmaster-sink: + class: "com.twitter.heron.metricsmgr.sink.tmaster.TMasterSink" + flush-frequency-ms: 60000 + sink-restart-attempts: -1 # Forever + tmaster-location-check-interval-sec: 5 + tmaster-client: + reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient + # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes + socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes + tmaster-metrics-type: + "__emit-count": SUM + "__execute-count": SUM + "__fail-count": SUM + "__ack-count": SUM + "__complete-latency": AVG + "__execute-latency": AVG + "__process-latency": AVG + "__jvm-uptime-secs": LAST + "__jvm-process-cpu-load": LAST + "__jvm-memory-used-mb": LAST + "__jvm-memory-mb-total": LAST + "__jvm-gc-collection-time-ms": LAST + "__server/__time_spent_back_pressure_initiated": SUM + "__time_spent_back_pressure_by_compid": SUM + +### Config for scribe-sink +# scribe-sink: +# class: "com.twitter.heron.metricsmgr.sink.ScribeSink" +# flush-frequency-ms: 60000 +# sink-restart-attempts: -1 # Forever +# scribe-host: "127.0.0.1" # The host of scribe to be exported metrics to +# scribe-port: 1463 # The port of scribe to be exported metrics to +# scribe-category: "scribe-category" # The category of the scribe to be exported metrics to +# service-namespace: "heron" # The service name of the metrics in scribe-category +# scribe-timeout-ms: 200 # The timeout in seconds for metrics manager to write metrics to scribe +# scribe-connect-server-attempts: 2 # The maximum retry attempts to connect to scribe server +# scribe-retry-attempts: 5 # The maximum retry attempts to write metrics to scribe +# scribe-retry-interval-ms: 100 # The interval to retry to write metrics to scribe + +### Config for graphite-sink +### Currently the graphite-sink is disabled +# graphite-sink: +# class: "com.twitter.heron.metricsmgr.sink.GraphiteSink" +# flush-frequency-ms: 60000 +# graphite_host: "127.0.0.1" # The host of graphite to be exported metrics to +# graphite_port: 2004 # The port of graphite to be exported metrics to +# metrics_prefix: "heron" # The prefix of every metrics +# server_max_reconnect-attempts: 20 # The max reconnect attempts when failing to connect to graphite server diff --git a/heron/config/src/yaml/conf/ecs/packing.yaml b/heron/config/src/yaml/conf/ecs/packing.yaml new file mode 100644 index 00000000000..f3021ca03ef --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/packing.yaml @@ -0,0 +1,2 @@ +# packing algorithm for packing instances into containers +heron.class.packing.algorithm: com.twitter.heron.packing.roundrobin.RoundRobinPacking diff --git a/heron/config/src/yaml/conf/ecs/scheduler.yaml b/heron/config/src/yaml/conf/ecs/scheduler.yaml new file mode 100644 index 00000000000..2b0b254f73f --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/scheduler.yaml @@ -0,0 +1,8 @@ +# scheduler class for distributing the topology for execution +heron.class.scheduler: com.twitter.heron.scheduler.ecs.EcsScheduler + +# launcher class for submitting and launching the topology +heron.class.launcher: com.twitter.heron.scheduler.ecs.EcsLauncher + +# location of java - pick it up from shell environment +heron.directory.sandbox.java.home: ${JAVA_HOME} diff --git a/heron/config/src/yaml/conf/ecs/statemgr.yaml b/heron/config/src/yaml/conf/ecs/statemgr.yaml new file mode 100644 index 00000000000..facd0e2fff4 --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/statemgr.yaml @@ -0,0 +1,41 @@ +# local state manager class for managing state in a persistent fashion +heron.class.state.manager: com.twitter.heron.statemgr.zookeeper.curator.CuratorStateManager + +#Un-comment the line below and add ZK IP address +#heron.statemgr.connection.string: "xx.xx.xx.xx:2181" + + +# path of the root address to store the state in a local file system +heron.statemgr.root.path: "/heron" + +#################################################################### +# Following are config for Zk State Manager +#################################################################### +heron.statemgr.zookeeper.is.initialize.tree: True + +heron.statemgr.zookeeper.session.timeout.ms: 30000 + +heron.statemgr.zookeeper.connection.timeout.ms: 30000 + +heron.statemgr.zookeeper.retry.count: 10 + +heron.statemgr.zookeeper.retry.interval.ms: 10000 + +#################################################################### +# Following are config for tunneling +#################################################################### +# Whether we need tunnel if no direct access on zk server +heron.statemgr.is.tunnel.needed: True + +# The connection timeout in ms when trying to connect to zk server +heron.statemgr.tunnel.connection.timeout.ms: 1000 + +# The count of retry to check whether has direct access on zk server +heron.statemgr.tunnel.connection.retry.count: 2 + +# The interval in ms between two retry checking whether has direct access on zk server +heron.statemgr.tunnel.retry.interval.ms: 1000 + +# The count of retry to verify whether seting up a tunnel process +heron.statemgr.tunnel.verify.count: 10 + diff --git a/heron/config/src/yaml/conf/ecs/uploader.yaml b/heron/config/src/yaml/conf/ecs/uploader.yaml new file mode 100644 index 00000000000..a38def7ed77 --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/uploader.yaml @@ -0,0 +1,32 @@ +# uploader class for transferring the topology jar/tar files to storage +heron.class.uploader: com.twitter.heron.uploader.s3.S3Uploader + +# S3 bucket to put the jar file into +#heron.uploader.s3.bucket: bucketname + + +# By default the path prefix will be empty and the full path would be s3://#{bucket}/#{topology_name}/topology.tar.gz +# This allows you to prepend a prefix to the path to specify a sub-folder in which cased the full path would be: +# s3://#{bucket}/#{prefix}/#{topology_name}/topology.tar.gz +# heron.uploader.s3.path_prefix: path/prefix + +# By default, assume AWS S3. However, you can specify a custom url if you are using a +# S3 compatible storage layer (or using a reverse proxy for accessing S3). +# heron.uploader.s3.uri: hostname:port +# If you want to access S3 through a http proxy, use +# heron.upload.s3.proxy_uri: http://username:password@hostname:port +# username and password are optional + +# Specifies a custom region - see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/regions/Regions.html#US_EAST_1 +#heron.uploader.s3.region : meh + + +# AWS Credentials +# By default the S3 Uploader will use the Default Credential Provider Chain for accessing the S3 bucket - +# see http://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#using-the-default-credential-provider-chain +# However you have the option to directly specify aws credentials: +#heron.uploader.s3.access_key: xxxx +#heron.uploader.s3.secret_key: xxxx +# Alternatively to directly specifying aws credentials, you can specify the aws profile +# in case you have multiple AWS profiles in your credentials file (~/.aws/credentials): +# heron.uploader.s3.aws_profile: profile_name From cee04fe8412e387e9103046ddca540149551103e Mon Sep 17 00:00:00 2001 From: ananth Date: Wed, 3 May 2017 15:21:12 -0700 Subject: [PATCH 03/61] AWS ECS schedulers added --- .../heron/scheduler/ecs/EcsContext.java | 65 ++++ .../twitter/heron/scheduler/ecs/EcsKey.java | 53 ++++ .../heron/scheduler/ecs/EcsLauncher.java | 130 ++++++++ .../heron/scheduler/ecs/EcsScheduler.java | 297 ++++++++++++++++++ 4 files changed, 545 insertions(+) create mode 100644 heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java create mode 100644 heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java create mode 100644 heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java create mode 100644 heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java new file mode 100644 index 00000000000..818ca9b8f09 --- /dev/null +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java @@ -0,0 +1,65 @@ +// Copyright 2016 Twitter. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.twitter.heron.scheduler.ecs; + +import com.twitter.heron.spi.common.Config; +import com.twitter.heron.spi.common.Context; +import com.twitter.heron.spi.common.TokenSub; + + +/** + * Created by ananth on 4/29/17. + */ +public class EcsContext extends Context { + public static final String PART1 = "version: '2'\n" + + "services:\n" + + " container_number:\n" + + " image: ananthgs/onlyheronandubuntu\n"; + public static final String CMD = " command: [\"sh\", \"-c\", \"mkdir /s3; cd /s3 ;" + + "aws s3 cp s3://herondockercal/TOPOLOGY_NAME/topology.tar.gz /s3 ;" + + "aws s3 cp s3://herondockercal/heron-core-testbuild-ubuntu14.04.tar.gz /s3 ;cd /s3;" + + " tar -zxvf topology.tar.gz; tar -zxvf heron-core-testbuild-ubuntu14.04.tar.gz;" + + "heron_executor ;\"] \n"; + public static final String ECSNETWORK = " networks:\n" + + " - heron\n" + + " ports:\n" + + " - \"5000:5000\"\n" + + " - \"5001:5001\"\n" + + " - \"5002:5002\"\n" + + " - \"5003:5003\"\n" + + " - \"5004:5004\"\n" + + " - \"5005:5005\"\n" + + " - \"5006:5006\"\n" + + " - \"5007:5007\"\n" + + " - \"5008:5008\"\n" + + " - \"5009:5009\"\n" + + " volumes:\n" + + " - \"herondata:/root/.herondata\"\n" + + "networks:\n" + + " heron:\n" + + " driver: bridge\n" + + "volumes:\n" + + " herondata:\n" + + " driver: local"; + public static final String DESTINATION_JVM = "/usr/lib/jvm/java-8-oracle"; + public static final String COMPOSE_WORKING_DIR = "/tmp/"; + + public static String workingDirectory(Config config) { + String workingDirectory = config.getStringValue( + EcsKey.WORKING_DIRECTORY.value(), EcsKey.WORKING_DIRECTORY.getDefaultString()); + return TokenSub.substitute(config, workingDirectory); + } +} + diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java new file mode 100644 index 00000000000..8393003917d --- /dev/null +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java @@ -0,0 +1,53 @@ +// Copyright 2016 Twitter. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.twitter.heron.scheduler.ecs; + +import com.twitter.heron.spi.common.Key; + +/** + * Created by ananth on 4/29/17. + */ +public enum EcsKey { + // config key for specifying the working directory of a topology + WORKING_DIRECTORY("heron.scheduler.local.working.directory", + "${HOME}/.herondata/topologies/${CLUSTER}/${ROLE}/${TOPOLOGY}"); + + private final String value; + private final Key.Type type; + private final Object defaultValue; + + EcsKey(String value, String defaultValue) { + this.value = value; + this.type = Key.Type.STRING; + this.defaultValue = defaultValue; + } + + public String value() { + return value; + } + + public Object getDefault() { + return defaultValue; + } + + public String getDefaultString() { + if (type != Key.Type.STRING) { + throw new IllegalAccessError(String.format( + "Config Key %s is type %s, getDefaultString() not supported", this.name(), this.type)); + } + return (String) this.defaultValue; + } + +} diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java new file mode 100644 index 00000000000..fb536ccf988 --- /dev/null +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java @@ -0,0 +1,130 @@ +// Copyright 2016 Twitter. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.twitter.heron.scheduler.ecs; + +import java.io.File; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +//import com.twitter.heron.common.basics.SysUtils; +import com.twitter.heron.scheduler.utils.Runtime; +import com.twitter.heron.scheduler.utils.SchedulerUtils; +import com.twitter.heron.spi.common.Config; +import com.twitter.heron.spi.common.Context; +import com.twitter.heron.spi.packing.PackingPlan; +import com.twitter.heron.spi.scheduler.ILauncher; +import com.twitter.heron.spi.utils.ShellUtils; + +/** + * Created by ananth on 4/18/17. + */ +public class EcsLauncher implements ILauncher { + protected static final Logger LOG = Logger.getLogger(EcsLauncher.class.getName()); + + private Config config; + private Config runtime; + + private String topologyWorkingDirectory; + + + @Override + public void initialize(Config mConfig, Config mRuntime) { + this.config = mConfig; + this.runtime = mRuntime; + // get the topology working directory + this.topologyWorkingDirectory = EcsContext.workingDirectory(config); + } + + @Override + public void close() { + + } + + public boolean launch(PackingPlan packing) { + LOG.log(Level.FINE, "Launching topology for local cluster {0}", + EcsContext.cluster(config)); + /** commenting these line as we dont need working directory we use /tmp" + * + */ + if (!setupWorkingDirectory()) { + LOG.severe("Failed to setup working directory"); + return false; + } + + String[] schedulerCmd = getSchedulerCommand(); + + + Process p = startScheduler(schedulerCmd); + + if (p == null) { + LOG.severe("Failed to start SchedulerMain using: " + Arrays.toString(schedulerCmd)); + return false; + } + + LOG.log(Level.FINE, String.format( + "To check the status and logs of the topology, use the working directory %s", + EcsContext.COMPOSE_WORKING_DIR)); + return true; + } + + protected String[] getSchedulerCommand() { + List freePorts = new ArrayList<>(SchedulerUtils.PORTS_REQUIRED_FOR_SCHEDULER); + for (int i = 0; i < SchedulerUtils.PORTS_REQUIRED_FOR_SCHEDULER; i++) { + //freePorts.add(SysUtils.getFreePort()); + freePorts.add(4000 + i); + } + + return SchedulerUtils.schedulerCommand(config, runtime, freePorts); + } + + protected boolean setupWorkingDirectory() { + // get the path of core release URI + String coreReleasePackageURI = EcsContext.corePackageUri(config); + + // form the target dest core release file name + String coreReleaseFileDestination = Paths.get( + topologyWorkingDirectory, "heron-core.tar.gz").toString(); + + // Form the topology package's URI + String topologyPackageURI = Runtime.topologyPackageUri(runtime).toString(); + // form the target topology package file name + String topologyPackageDestination = Paths.get( + topologyWorkingDirectory, "topology.tar.gz").toString(); + + System.out.println("topologyPackageURI :" + topologyPackageURI); + + System.out.println("topologyPackageDestination :" + topologyPackageDestination); + + boolean fileSetup = SchedulerUtils.setupWorkingDirectory( + topologyWorkingDirectory, + coreReleasePackageURI, + coreReleaseFileDestination, + topologyPackageURI, + topologyPackageDestination, + Context.verbose(config)); + + return fileSetup; + } + + protected Process startScheduler(String[] schedulerCmd) { + + return ShellUtils.runASyncProcess(EcsContext.verbose(config), schedulerCmd, + new File(topologyWorkingDirectory)); + } +} diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java new file mode 100644 index 00000000000..5faa9293f79 --- /dev/null +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java @@ -0,0 +1,297 @@ +// Copyright 2016 Twitter. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.twitter.heron.scheduler.ecs; + +//import java.io.FileNotFoundException; +//import java.io.FileWriter; +import java.io.File; +//import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +//import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.attribute.PosixFilePermission; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.logging.Level; +import java.util.logging.Logger; + + +//import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Optional; + +import org.apache.commons.io.IOUtils; + +import com.twitter.heron.proto.scheduler.Scheduler; +import com.twitter.heron.scheduler.UpdateTopologyManager; +import com.twitter.heron.scheduler.utils.SchedulerUtils; +import com.twitter.heron.spi.common.Config; +import com.twitter.heron.spi.common.Context; +import com.twitter.heron.spi.packing.PackingPlan; +import com.twitter.heron.spi.scheduler.IScalable; +import com.twitter.heron.spi.scheduler.IScheduler; +import com.twitter.heron.spi.utils.ShellUtils; + + +/** + * Created by ananth on 4/19/17. + */ +public class EcsScheduler implements IScheduler, IScalable { + private static final Logger LOG = Logger.getLogger(EcsScheduler.class.getName()); + // executor service for monitoring all the containers + private final ExecutorService monitorService = Executors.newCachedThreadPool(); + // map to keep track of the process and the shard it is running + private final Map processToContainer = new ConcurrentHashMap<>(); + private Config config; + private Config runtime; + private UpdateTopologyManager updateTopologyManager; + // has the topology been killed? + private volatile boolean isTopologyKilled = false; + + @Override + public void initialize(Config mConfig, Config mRuntime) { + this.config = mConfig; + this.runtime = mRuntime; + this.updateTopologyManager = + new UpdateTopologyManager(config, runtime, Optional.of(this)); + } + + public void close() { + // Shut down the ExecutorService for monitoring + monitorService.shutdownNow(); + + // Clear the map + processToContainer.clear(); + + if (updateTopologyManager != null) { + updateTopologyManager.close(); + } + } + + + protected int startExecutorSyncProcess(int container) { + String executingInShell = new String(); + executingInShell = getExecutorCommand(container)[0]; + System.out.println("executing in Shell: " + executingInShell); + return ShellUtils.runProcess(executingInShell, null); + } + + protected void startExecutor(final int container) { + LOG.info("Starting a new executor for container: " + container); + int shellOutput = startExecutorSyncProcess(container); + LOG.info("output value for the executor container: " + + container + String.valueOf(shellOutput)); + + } + + private String[] getExecutorCommand(int container) { + List freePorts = new ArrayList<>(SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR); + String ecsComposeCommand = "/usr/local/bin/ecs-cli compose --project-name heron_ex"; + ecsComposeCommand = ecsComposeCommand + String.valueOf(container) + " --file "; + String ecsEnableUpdaload = " up"; + + for (int i = 0; i < SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR; i++) { + //freePorts.add(SysUtils.getFreePort()); + freePorts.add(5000 + (i + (container * 10))); + } + String replaceTopologyBinFile = Context.topologyBinaryFile(config); + String replaeRole = Context.role(config); + String replaceJavaHome = Context.clusterJavaHome(config); + System.out.println("Topology Bin file :" + Context.topologyBinaryFile(config)); + System.out.println("Role :" + Context.role(config)); + System.out.println("JAVA HOME :" + Context.clusterJavaHome(config)); + String[] executorCmd = SchedulerUtils.executorCommand(config, runtime, container, freePorts); + System.out.println("Executor command line: " + Arrays.toString(executorCmd)); + + String finalExecCommand = formHeronExecCommand(executorCmd); + finalExecCommand = finalExecCommand.replace(replaceTopologyBinFile, "heron-examples.jar"); + //finalExecCommand = finalExecCommand.replace(replaeRole, "root"); + finalExecCommand = finalExecCommand.replace(replaceJavaHome, EcsContext.DESTINATION_JVM); + finalExecCommand = finalExecCommand.replaceAll("\"", "'"); + System.out.println("heron exec command: " + finalExecCommand); + String content = null; + String finalCommand = ecsComposeCommand; + FileOutputStream dockerFilestream = null; + String dockerComposeFileName = EcsContext.COMPOSE_WORKING_DIR; + try { + System.out.println("reading file: docker_compose_template.yml "); + + content = EcsContext.PART1 + EcsContext.CMD; + content = content + EcsContext.ECSNETWORK; + content = replacePortNumbers(container, content); + content = content.replaceAll("TOPOLOGY_NAME", EcsContext.topologyName(config)); + content = content.replaceAll("container_number", + "executor" + String.valueOf(container)); + content = content.replace("heron_executor", finalExecCommand); + System.out.println("content to build .yml file : " + content); + dockerComposeFileName = dockerComposeFileName + "/docker_compose"; + dockerComposeFileName = dockerComposeFileName + String.valueOf(container) + ".yml"; + if (Files.exists(Paths.get(dockerComposeFileName))) { + //its from an old submit so delete it + Files.delete(Paths.get(dockerComposeFileName)); + } + final File file = new File(dockerComposeFileName); + file.setWritable(true); + System.out.println("docker compose file " + dockerComposeFileName); + dockerFilestream = new FileOutputStream(dockerComposeFileName); + //IOUtils.write(content, new FileOutputStream(dockerComposeFileName)); + IOUtils.write(content, dockerFilestream); + IOUtils.closeQuietly(dockerFilestream); + setPermissionsOnDockerfile(dockerComposeFileName); + System.out.println("docker compose file permission granted " + dockerComposeFileName); + } catch (IOException e) { + e.printStackTrace(); + } finally { + IOUtils.closeQuietly(dockerFilestream); + } + + finalCommand = finalCommand + dockerComposeFileName + ecsEnableUpdaload; + //return finalCommand.toArray(new String[finalCommand.size()]); + System.out.println("final Ecs Task command " + finalCommand); + return new String[] {finalCommand}; + + } + + public String replacePortNumbers(int container, String content) { + int basePortnumber = 5000; + String localContent = new String(content); + for (int i = 0; i < SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR; i++) { + localContent = localContent.replace(String.valueOf(basePortnumber + i), + String.valueOf(basePortnumber + (i + (container * 10)))); + } + return localContent; + } + + public void setPermissionsOnDockerfile(String fileName) throws IOException { + Set perms = new HashSet(); + //add owners permission + perms.add(PosixFilePermission.OWNER_READ); + perms.add(PosixFilePermission.OWNER_WRITE); + perms.add(PosixFilePermission.OWNER_EXECUTE); + //add group permissions + perms.add(PosixFilePermission.GROUP_READ); + perms.add(PosixFilePermission.GROUP_WRITE); + perms.add(PosixFilePermission.GROUP_EXECUTE); + //add others permissions + perms.add(PosixFilePermission.OTHERS_READ); + perms.add(PosixFilePermission.OTHERS_WRITE); + perms.add(PosixFilePermission.OTHERS_EXECUTE); + + Files.setPosixFilePermissions(Paths.get(fileName), perms); + } + public String formHeronExecCommand(String[] inStringArray) { + StringBuilder builder = new StringBuilder(); + + for (String string : inStringArray) { + if (builder.length() > 0) { + builder.append(" "); + } + builder.append(string); + } + + String stringToReturn = builder.toString(); + return stringToReturn; + } + + /** + * Schedule the provided packed plan + */ + @Override + public boolean onSchedule(PackingPlan packing) { + LOG.info("Starting to deploy topology: " + EcsContext.topologyName(config)); + + synchronized (processToContainer) { + LOG.info("Starting executor for TMaster"); + startExecutor(0); + + // for each container, run its own executor + for (PackingPlan.ContainerPlan container : packing.getContainers()) { + startExecutor(container.getId()); + } + } + + LOG.info("Executor for each container have been started."); + + return true; + } + + @Override + public void addContainers(Set containers) { + synchronized (processToContainer) { + for (PackingPlan.ContainerPlan container : containers) { + if (processToContainer.values().contains(container.getId())) { + throw new RuntimeException(String.format("Found active container for %s, " + + "cannot launch a duplicate container.", container.getId())); + } + startExecutor(container.getId()); + } + } + } + public void removeContainers(Set containersToRemove) { + LOG.log(Level.INFO, + "Kill {0} of {1} containers", + new Object[]{containersToRemove.size(), processToContainer.size()}); + + synchronized (processToContainer) { + // Create a inverse map to be able to get process instance from container id + Map containerToProcessMap = new HashMap<>(); + for (Map.Entry entry : processToContainer.entrySet()) { + containerToProcessMap.put(entry.getValue(), entry.getKey()); + } + + for (PackingPlan.ContainerPlan containerToRemove : containersToRemove) { + int containerId = containerToRemove.getId(); + Process process = containerToProcessMap.get(containerId); + if (process == null) { + LOG.log(Level.WARNING, "Container for id:{0} not found.", containerId); + continue; + } + + // remove the process so that it is not monitored and relaunched + LOG.info("Killing executor for container: " + containerId); + processToContainer.remove(process); + process.destroy(); + LOG.info("Killed executor for container: " + containerId); + } + } + } + + public List getJobLinks() { + return null; + } + + public boolean onKill(Scheduler.KillTopologyRequest request) { + return false; + } + + public boolean onRestart(Scheduler.RestartTopologyRequest request) { + return false; + } + + public boolean onUpdate(Scheduler.UpdateTopologyRequest request) { + return false; + } + + +} From c5b7f3eeefe97cec0d812938a7e9e9742c62d5c1 Mon Sep 17 00:00:00 2001 From: ananth Date: Wed, 3 May 2017 15:28:12 -0700 Subject: [PATCH 04/61] Adding AWS ECS files --- heron/schedulers/src/java/BUILD | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/heron/schedulers/src/java/BUILD b/heron/schedulers/src/java/BUILD index bf8f9a7632d..29203c4afc7 100644 --- a/heron/schedulers/src/java/BUILD +++ b/heron/schedulers/src/java/BUILD @@ -65,6 +65,25 @@ genrule( cmd = "cp $< $@", ) +java_library( + name='ecs-scheduler-java', + srcs = glob(["**/ecs/*.java"]), + deps = scheduler_deps_files, +) + +java_binary( + name='ecs-scheduler-unshaded', + srcs = glob(["**/ecs/*.java"]), + deps = scheduler_deps_files, +) + +genrule( + name = "heron-ecs-scheduler", + srcs = [":ecs-scheduler-unshaded_deploy.jar"], + outs = ["heron-ecs-scheduler.jar"], + cmd = "cp $< $@", +) + java_library( name='aurora-scheduler-java', srcs = glob(["**/aurora/*.java"]), From 8c0261754f82bd8b2da3f04072dc4635c128e4e1 Mon Sep 17 00:00:00 2001 From: ananth Date: Wed, 3 May 2017 16:32:48 -0700 Subject: [PATCH 05/61] adding ecs to build scripts --- tools/rules/heron_client.bzl | 6 ++++++ tools/rules/heron_core.bzl | 1 + 2 files changed, 7 insertions(+) diff --git a/tools/rules/heron_client.bzl b/tools/rules/heron_client.bzl index f458a050e60..94836b0bb4a 100644 --- a/tools/rules/heron_client.bzl +++ b/tools/rules/heron_client.bzl @@ -17,6 +17,11 @@ def heron_client_local_files(): "//heron/config/src/yaml:conf-local-yaml", ] +def heron_client_ecs_files(): + return [ + "//heron/config/src/yaml:conf-ecs-yaml", + ] + def heron_client_aurora_files(): return [ "//heron/config/src/yaml:conf-aurora-yaml", @@ -36,6 +41,7 @@ def heron_client_lib_scheduler_files(): "//heron/schedulers/src/java:heron-yarn-scheduler", "//heron/schedulers/src/java:heron-mesos-scheduler", "//heron/schedulers/src/java:heron-marathon-scheduler", + "//heron/schedulers/src/java:heron-ecs-scheduler", "//heron/packing/src/java:heron-roundrobin-packing", "//heron/packing/src/java:heron-binpacking-packing", ] diff --git a/tools/rules/heron_core.bzl b/tools/rules/heron_core.bzl index 0c8d9d272b6..aeabb590527 100644 --- a/tools/rules/heron_core.bzl +++ b/tools/rules/heron_core.bzl @@ -31,6 +31,7 @@ def heron_core_lib_scheduler_files(): return [ "//heron/scheduler-core/src/java:heron-scheduler", "//heron/schedulers/src/java:heron-local-scheduler", + "//heron/schedulers/src/java:heron-ecs-scheduler", "//heron/schedulers/src/java:heron-slurm-scheduler", "//heron/schedulers/src/java:heron-mesos-scheduler", "//heron/schedulers/src/java:heron-marathon-scheduler", From 725891d23828bce3aeae4e62420d7b15665d61bf Mon Sep 17 00:00:00 2001 From: ananth Date: Wed, 3 May 2017 16:33:54 -0700 Subject: [PATCH 06/61] adding ecs to build --- scripts/packages/BUILD | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/packages/BUILD b/scripts/packages/BUILD index 5b3453de0c0..43a351e1f6e 100644 --- a/scripts/packages/BUILD +++ b/scripts/packages/BUILD @@ -234,6 +234,14 @@ pkg_tar( ] ) +pkg_tar( + name = "heron-client-conf-ecs", + package_dir = "conf/ecs", + files = [ + "//heron/config/src/yaml:conf-ecs-yaml", + ] +) + pkg_tar( name = "heron-client-conf", package_dir = "conf", @@ -259,6 +267,7 @@ pkg_tar( ":heron-client-conf-yarn", ":heron-client-conf-mesos", ":heron-client-conf-marathon", + ":heron-client-conf-ecs", ":heron-client-dist", ":heron-client-examples", ":heron-client-lib-third_party", From 5336c3c426206e32640c7a5d0944684a28c053ff Mon Sep 17 00:00:00 2001 From: ananth Date: Wed, 3 May 2017 16:35:54 -0700 Subject: [PATCH 07/61] Adding AMI Check dependancy --- heron/executor/src/python/heron_executor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index f7f240033cb..2b922a3b91c 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -30,6 +30,7 @@ import time import yaml import socket +import urllib2 from functools import partial From e368e9ab7d3f27360c4dadf8150ba02a54d17127 Mon Sep 17 00:00:00 2001 From: ananth Date: Wed, 3 May 2017 17:13:55 -0700 Subject: [PATCH 08/61] Adding ecs conf --- heron/config/src/yaml/BUILD | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/heron/config/src/yaml/BUILD b/heron/config/src/yaml/BUILD index b7cf4359e48..866db128766 100644 --- a/heron/config/src/yaml/BUILD +++ b/heron/config/src/yaml/BUILD @@ -17,6 +17,11 @@ filegroup( srcs = glob(["conf/local/*.yaml"]), ) +filegroup( + name = "conf-ecs-yaml", + srcs = glob(["conf/ecs/*.yaml"]), +) + filegroup( name = "conf-aurora-yaml", srcs = glob(["conf/aurora/*"]), From 9a80784eb6499542f4738344ea4abc775eda133e Mon Sep 17 00:00:00 2001 From: ananth Date: Thu, 4 May 2017 16:35:39 -0700 Subject: [PATCH 09/61] Added Compose Commands --- .../java/com/twitter/heron/scheduler/ecs/EcsContext.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java index 818ca9b8f09..bc6cac4546c 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java @@ -56,10 +56,18 @@ public class EcsContext extends Context { public static final String DESTINATION_JVM = "/usr/lib/jvm/java-8-oracle"; public static final String COMPOSE_WORKING_DIR = "/tmp/"; + public static final String ECS_WORKING_DIR = "heron.scheduler.ecs.working.directory"; + public static final String ECS_CLUSTER_BINARY = "heron-examples.jar"; + public static final String COMPOSE_CMD = "ecs-cli compose --project-name "; + public static final String UP = " up"; public static String workingDirectory(Config config) { String workingDirectory = config.getStringValue( EcsKey.WORKING_DIRECTORY.value(), EcsKey.WORKING_DIRECTORY.getDefaultString()); return TokenSub.substitute(config, workingDirectory); } + + public static String getEcsWorkingDirectory(Config config) { + return config.getStringValue(ECS_WORKING_DIR); + } } From 0aae0223214301e6edbffdf2732f0a37b1841f2f Mon Sep 17 00:00:00 2001 From: ananth Date: Thu, 4 May 2017 16:36:28 -0700 Subject: [PATCH 10/61] Cleaned up non ecs functions --- .../heron/scheduler/ecs/EcsLauncher.java | 101 ++++-------------- 1 file changed, 20 insertions(+), 81 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java index fb536ccf988..d3c4e3ad574 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java @@ -14,22 +14,24 @@ package com.twitter.heron.scheduler.ecs; -import java.io.File; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.logging.Level; +//import java.io.File; +//import java.nio.file.Paths; +//import java.util.ArrayList; +//import java.util.Arrays; +//import java.util.List; +//import java.util.logging.Level; import java.util.logging.Logger; //import com.twitter.heron.common.basics.SysUtils; -import com.twitter.heron.scheduler.utils.Runtime; -import com.twitter.heron.scheduler.utils.SchedulerUtils; +import com.twitter.heron.scheduler.utils.LauncherUtils; +//import com.twitter.heron.scheduler.utils.Runtime; +//import com.twitter.heron.scheduler.utils.SchedulerUtils; import com.twitter.heron.spi.common.Config; -import com.twitter.heron.spi.common.Context; +//import com.twitter.heron.spi.common.Context; import com.twitter.heron.spi.packing.PackingPlan; import com.twitter.heron.spi.scheduler.ILauncher; -import com.twitter.heron.spi.utils.ShellUtils; +import com.twitter.heron.spi.scheduler.IScheduler; +//import com.twitter.heron.spi.utils.ShellUtils; /** * Created by ananth on 4/18/17. @@ -40,91 +42,28 @@ public class EcsLauncher implements ILauncher { private Config config; private Config runtime; + private String ecsWorkingDirectory; private String topologyWorkingDirectory; - @Override public void initialize(Config mConfig, Config mRuntime) { this.config = mConfig; this.runtime = mRuntime; - // get the topology working directory - this.topologyWorkingDirectory = EcsContext.workingDirectory(config); } @Override public void close() { - + // Do nothing } + @Override public boolean launch(PackingPlan packing) { - LOG.log(Level.FINE, "Launching topology for local cluster {0}", - EcsContext.cluster(config)); - /** commenting these line as we dont need working directory we use /tmp" - * - */ - if (!setupWorkingDirectory()) { - LOG.severe("Failed to setup working directory"); - return false; - } - - String[] schedulerCmd = getSchedulerCommand(); - - - Process p = startScheduler(schedulerCmd); - - if (p == null) { - LOG.severe("Failed to start SchedulerMain using: " + Arrays.toString(schedulerCmd)); - return false; - } - - LOG.log(Level.FINE, String.format( - "To check the status and logs of the topology, use the working directory %s", - EcsContext.COMPOSE_WORKING_DIR)); - return true; - } - - protected String[] getSchedulerCommand() { - List freePorts = new ArrayList<>(SchedulerUtils.PORTS_REQUIRED_FOR_SCHEDULER); - for (int i = 0; i < SchedulerUtils.PORTS_REQUIRED_FOR_SCHEDULER; i++) { - //freePorts.add(SysUtils.getFreePort()); - freePorts.add(4000 + i); - } - - return SchedulerUtils.schedulerCommand(config, runtime, freePorts); - } - - protected boolean setupWorkingDirectory() { - // get the path of core release URI - String coreReleasePackageURI = EcsContext.corePackageUri(config); - - // form the target dest core release file name - String coreReleaseFileDestination = Paths.get( - topologyWorkingDirectory, "heron-core.tar.gz").toString(); - - // Form the topology package's URI - String topologyPackageURI = Runtime.topologyPackageUri(runtime).toString(); - // form the target topology package file name - String topologyPackageDestination = Paths.get( - topologyWorkingDirectory, "topology.tar.gz").toString(); - - System.out.println("topologyPackageURI :" + topologyPackageURI); - - System.out.println("topologyPackageDestination :" + topologyPackageDestination); - - boolean fileSetup = SchedulerUtils.setupWorkingDirectory( - topologyWorkingDirectory, - coreReleasePackageURI, - coreReleaseFileDestination, - topologyPackageURI, - topologyPackageDestination, - Context.verbose(config)); - - return fileSetup; + LauncherUtils launcherUtils = LauncherUtils.getInstance(); + Config ytruntime = launcherUtils.createConfigWithPackingDetails(runtime, packing); + return launcherUtils.onScheduleAsLibrary(config, ytruntime, getScheduler(), packing); } - protected Process startScheduler(String[] schedulerCmd) { - - return ShellUtils.runASyncProcess(EcsContext.verbose(config), schedulerCmd, - new File(topologyWorkingDirectory)); + protected IScheduler getScheduler() { + return new EcsScheduler(); } } From 83bf1a16d2cf81a5d0dc3f678f5d82e3d039652e Mon Sep 17 00:00:00 2001 From: ananth Date: Thu, 4 May 2017 16:37:19 -0700 Subject: [PATCH 11/61] Added temp file for compose --- .../heron/scheduler/ecs/EcsScheduler.java | 195 ++++-------------- 1 file changed, 45 insertions(+), 150 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java index 5faa9293f79..a5ea38a5c74 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java @@ -14,42 +14,23 @@ package com.twitter.heron.scheduler.ecs; -//import java.io.FileNotFoundException; -//import java.io.FileWriter; + import java.io.File; -//import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; -//import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.nio.file.attribute.PosixFilePermission; import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.logging.Level; import java.util.logging.Logger; - -//import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Optional; - import org.apache.commons.io.IOUtils; import com.twitter.heron.proto.scheduler.Scheduler; -import com.twitter.heron.scheduler.UpdateTopologyManager; +//import com.twitter.heron.scheduler.UpdateTopologyManager; import com.twitter.heron.scheduler.utils.SchedulerUtils; import com.twitter.heron.spi.common.Config; import com.twitter.heron.spi.common.Context; import com.twitter.heron.spi.packing.PackingPlan; -import com.twitter.heron.spi.scheduler.IScalable; +//import com.twitter.heron.spi.scheduler.IScalable; import com.twitter.heron.spi.scheduler.IScheduler; import com.twitter.heron.spi.utils.ShellUtils; @@ -57,36 +38,22 @@ /** * Created by ananth on 4/19/17. */ -public class EcsScheduler implements IScheduler, IScalable { +public class EcsScheduler implements IScheduler { private static final Logger LOG = Logger.getLogger(EcsScheduler.class.getName()); - // executor service for monitoring all the containers - private final ExecutorService monitorService = Executors.newCachedThreadPool(); - // map to keep track of the process and the shard it is running - private final Map processToContainer = new ConcurrentHashMap<>(); private Config config; private Config runtime; - private UpdateTopologyManager updateTopologyManager; - // has the topology been killed? - private volatile boolean isTopologyKilled = false; + private volatile boolean isTopologyKilled = false; + private File tempDockerFile = null; @Override public void initialize(Config mConfig, Config mRuntime) { - this.config = mConfig; + this.config = Config.toClusterMode(mConfig); this.runtime = mRuntime; - this.updateTopologyManager = - new UpdateTopologyManager(config, runtime, Optional.of(this)); + } public void close() { - // Shut down the ExecutorService for monitoring - monitorService.shutdownNow(); - - // Clear the map - processToContainer.clear(); - if (updateTopologyManager != null) { - updateTopologyManager.close(); - } } @@ -107,72 +74,47 @@ protected void startExecutor(final int container) { private String[] getExecutorCommand(int container) { List freePorts = new ArrayList<>(SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR); - String ecsComposeCommand = "/usr/local/bin/ecs-cli compose --project-name heron_ex"; - ecsComposeCommand = ecsComposeCommand + String.valueOf(container) + " --file "; - String ecsEnableUpdaload = " up"; - for (int i = 0; i < SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR; i++) { //freePorts.add(SysUtils.getFreePort()); freePorts.add(5000 + (i + (container * 10))); } - String replaceTopologyBinFile = Context.topologyBinaryFile(config); - String replaeRole = Context.role(config); - String replaceJavaHome = Context.clusterJavaHome(config); - System.out.println("Topology Bin file :" + Context.topologyBinaryFile(config)); - System.out.println("Role :" + Context.role(config)); - System.out.println("JAVA HOME :" + Context.clusterJavaHome(config)); String[] executorCmd = SchedulerUtils.executorCommand(config, runtime, container, freePorts); - System.out.println("Executor command line: " + Arrays.toString(executorCmd)); - - String finalExecCommand = formHeronExecCommand(executorCmd); - finalExecCommand = finalExecCommand.replace(replaceTopologyBinFile, "heron-examples.jar"); - //finalExecCommand = finalExecCommand.replace(replaeRole, "root"); - finalExecCommand = finalExecCommand.replace(replaceJavaHome, EcsContext.DESTINATION_JVM); - finalExecCommand = finalExecCommand.replaceAll("\"", "'"); - System.out.println("heron exec command: " + finalExecCommand); - String content = null; - String finalCommand = ecsComposeCommand; + String finalExecCommand = setClusterValues(formHeronExecCommand(executorCmd)); + String ecsTaskProject = EcsContext.topologyName(config) + "_" + String.valueOf(container); FileOutputStream dockerFilestream = null; - String dockerComposeFileName = EcsContext.COMPOSE_WORKING_DIR; + String content = null; try { - System.out.println("reading file: docker_compose_template.yml "); - - content = EcsContext.PART1 + EcsContext.CMD; - content = content + EcsContext.ECSNETWORK; - content = replacePortNumbers(container, content); - content = content.replaceAll("TOPOLOGY_NAME", EcsContext.topologyName(config)); - content = content.replaceAll("container_number", - "executor" + String.valueOf(container)); - content = content.replace("heron_executor", finalExecCommand); - System.out.println("content to build .yml file : " + content); - dockerComposeFileName = dockerComposeFileName + "/docker_compose"; - dockerComposeFileName = dockerComposeFileName + String.valueOf(container) + ".yml"; - if (Files.exists(Paths.get(dockerComposeFileName))) { - //its from an old submit so delete it - Files.delete(Paths.get(dockerComposeFileName)); - } - final File file = new File(dockerComposeFileName); - file.setWritable(true); - System.out.println("docker compose file " + dockerComposeFileName); - dockerFilestream = new FileOutputStream(dockerComposeFileName); - //IOUtils.write(content, new FileOutputStream(dockerComposeFileName)); + tempDockerFile = File.createTempFile("docker", ".yml"); + content = getDockerFileContent(finalExecCommand, container); + tempDockerFile.setWritable(true); + dockerFilestream = new FileOutputStream(tempDockerFile); IOUtils.write(content, dockerFilestream); IOUtils.closeQuietly(dockerFilestream); - setPermissionsOnDockerfile(dockerComposeFileName); - System.out.println("docker compose file permission granted " + dockerComposeFileName); } catch (IOException e) { e.printStackTrace(); } finally { IOUtils.closeQuietly(dockerFilestream); } + String dockerComposeFileName = " --file " + tempDockerFile; - finalCommand = finalCommand + dockerComposeFileName + ecsEnableUpdaload; - //return finalCommand.toArray(new String[finalCommand.size()]); + String finalCommand = EcsContext.COMPOSE_CMD + ecsTaskProject + dockerComposeFileName; + finalCommand = finalCommand + EcsContext.UP; System.out.println("final Ecs Task command " + finalCommand); + tempDockerFile.deleteOnExit(); return new String[] {finalCommand}; } + public String setClusterValues(String localExecCommand) { + String clusterExecCommand = localExecCommand.replace(Context.topologyBinaryFile(config), + EcsContext.ECS_CLUSTER_BINARY); + // line below can be removed once the Cluster JVM TODO is resolved + clusterExecCommand = clusterExecCommand.replace(Context.clusterJavaHome(config), + EcsContext.DESTINATION_JVM); + clusterExecCommand = clusterExecCommand.replaceAll("\"", "'"); + return clusterExecCommand; + } + public String replacePortNumbers(int container, String content) { int basePortnumber = 5000; String localContent = new String(content); @@ -183,23 +125,19 @@ public String replacePortNumbers(int container, String content) { return localContent; } - public void setPermissionsOnDockerfile(String fileName) throws IOException { - Set perms = new HashSet(); - //add owners permission - perms.add(PosixFilePermission.OWNER_READ); - perms.add(PosixFilePermission.OWNER_WRITE); - perms.add(PosixFilePermission.OWNER_EXECUTE); - //add group permissions - perms.add(PosixFilePermission.GROUP_READ); - perms.add(PosixFilePermission.GROUP_WRITE); - perms.add(PosixFilePermission.GROUP_EXECUTE); - //add others permissions - perms.add(PosixFilePermission.OTHERS_READ); - perms.add(PosixFilePermission.OTHERS_WRITE); - perms.add(PosixFilePermission.OTHERS_EXECUTE); - - Files.setPosixFilePermissions(Paths.get(fileName), perms); + public String getDockerFileContent(String execCommand, int container) { + String commandBuiler = EcsContext.PART1 + EcsContext.CMD; + commandBuiler = commandBuiler + EcsContext.ECSNETWORK; + commandBuiler = replacePortNumbers(container, commandBuiler); + commandBuiler = commandBuiler.replaceAll("TOPOLOGY_NAME", + EcsContext.topologyName(config)); + commandBuiler = commandBuiler.replaceAll("container_number", + "executor" + String.valueOf(container)); + commandBuiler = commandBuiler.replace("heron_executor", execCommand); + + return commandBuiler; } + public String formHeronExecCommand(String[] inStringArray) { StringBuilder builder = new StringBuilder(); @@ -221,14 +159,12 @@ public String formHeronExecCommand(String[] inStringArray) { public boolean onSchedule(PackingPlan packing) { LOG.info("Starting to deploy topology: " + EcsContext.topologyName(config)); - synchronized (processToContainer) { - LOG.info("Starting executor for TMaster"); - startExecutor(0); + LOG.info("Starting executor for TMaster"); + startExecutor(0); // for each container, run its own executor - for (PackingPlan.ContainerPlan container : packing.getContainers()) { - startExecutor(container.getId()); - } + for (PackingPlan.ContainerPlan container : packing.getContainers()) { + startExecutor(container.getId()); } LOG.info("Executor for each container have been started."); @@ -236,47 +172,6 @@ public boolean onSchedule(PackingPlan packing) { return true; } - @Override - public void addContainers(Set containers) { - synchronized (processToContainer) { - for (PackingPlan.ContainerPlan container : containers) { - if (processToContainer.values().contains(container.getId())) { - throw new RuntimeException(String.format("Found active container for %s, " - + "cannot launch a duplicate container.", container.getId())); - } - startExecutor(container.getId()); - } - } - } - public void removeContainers(Set containersToRemove) { - LOG.log(Level.INFO, - "Kill {0} of {1} containers", - new Object[]{containersToRemove.size(), processToContainer.size()}); - - synchronized (processToContainer) { - // Create a inverse map to be able to get process instance from container id - Map containerToProcessMap = new HashMap<>(); - for (Map.Entry entry : processToContainer.entrySet()) { - containerToProcessMap.put(entry.getValue(), entry.getKey()); - } - - for (PackingPlan.ContainerPlan containerToRemove : containersToRemove) { - int containerId = containerToRemove.getId(); - Process process = containerToProcessMap.get(containerId); - if (process == null) { - LOG.log(Level.WARNING, "Container for id:{0} not found.", containerId); - continue; - } - - // remove the process so that it is not monitored and relaunched - LOG.info("Killing executor for container: " + containerId); - processToContainer.remove(process); - process.destroy(); - LOG.info("Killed executor for container: " + containerId); - } - } - } - public List getJobLinks() { return null; } From f1379f6c55a461ae779852bf50c2d66b043482da Mon Sep 17 00:00:00 2001 From: ananth Date: Thu, 4 May 2017 16:52:46 -0700 Subject: [PATCH 12/61] Removed reference to temp dir --- .../java/com/twitter/heron/scheduler/ecs/EcsContext.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java index bc6cac4546c..2149e94e319 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java @@ -54,9 +54,6 @@ public class EcsContext extends Context { + " herondata:\n" + " driver: local"; public static final String DESTINATION_JVM = "/usr/lib/jvm/java-8-oracle"; - public static final String COMPOSE_WORKING_DIR = "/tmp/"; - - public static final String ECS_WORKING_DIR = "heron.scheduler.ecs.working.directory"; public static final String ECS_CLUSTER_BINARY = "heron-examples.jar"; public static final String COMPOSE_CMD = "ecs-cli compose --project-name "; public static final String UP = " up"; @@ -65,9 +62,4 @@ public static String workingDirectory(Config config) { EcsKey.WORKING_DIRECTORY.value(), EcsKey.WORKING_DIRECTORY.getDefaultString()); return TokenSub.substitute(config, workingDirectory); } - - public static String getEcsWorkingDirectory(Config config) { - return config.getStringValue(ECS_WORKING_DIR); - } } - From c38322c9acd4e650e6876d79ff7ed0a21ae75712 Mon Sep 17 00:00:00 2001 From: ananth Date: Thu, 4 May 2017 16:58:42 -0700 Subject: [PATCH 13/61] Removed Working Dir reference --- .../java/com/twitter/heron/scheduler/ecs/EcsContext.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java index 2149e94e319..d1f460b9344 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java @@ -14,9 +14,7 @@ package com.twitter.heron.scheduler.ecs; -import com.twitter.heron.spi.common.Config; import com.twitter.heron.spi.common.Context; -import com.twitter.heron.spi.common.TokenSub; /** @@ -57,9 +55,6 @@ public class EcsContext extends Context { public static final String ECS_CLUSTER_BINARY = "heron-examples.jar"; public static final String COMPOSE_CMD = "ecs-cli compose --project-name "; public static final String UP = " up"; - public static String workingDirectory(Config config) { - String workingDirectory = config.getStringValue( - EcsKey.WORKING_DIRECTORY.value(), EcsKey.WORKING_DIRECTORY.getDefaultString()); - return TokenSub.substitute(config, workingDirectory); - } + } + From 7e575b9a8dcffbb4079f2d0ccb9ad0c177109287 Mon Sep 17 00:00:00 2001 From: ananth Date: Thu, 4 May 2017 17:07:47 -0700 Subject: [PATCH 14/61] Removed commented lines for unused imports --- .../twitter/heron/scheduler/ecs/EcsLauncher.java | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java index d3c4e3ad574..973e3ac14b3 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java @@ -14,24 +14,14 @@ package com.twitter.heron.scheduler.ecs; -//import java.io.File; -//import java.nio.file.Paths; -//import java.util.ArrayList; -//import java.util.Arrays; -//import java.util.List; -//import java.util.logging.Level; + import java.util.logging.Logger; -//import com.twitter.heron.common.basics.SysUtils; import com.twitter.heron.scheduler.utils.LauncherUtils; -//import com.twitter.heron.scheduler.utils.Runtime; -//import com.twitter.heron.scheduler.utils.SchedulerUtils; import com.twitter.heron.spi.common.Config; -//import com.twitter.heron.spi.common.Context; import com.twitter.heron.spi.packing.PackingPlan; import com.twitter.heron.spi.scheduler.ILauncher; import com.twitter.heron.spi.scheduler.IScheduler; -//import com.twitter.heron.spi.utils.ShellUtils; /** * Created by ananth on 4/18/17. @@ -42,8 +32,6 @@ public class EcsLauncher implements ILauncher { private Config config; private Config runtime; - private String ecsWorkingDirectory; - private String topologyWorkingDirectory; @Override public void initialize(Config mConfig, Config mRuntime) { From 4c6de0cdccd024ee2050cd078ae25c31ebba5156 Mon Sep 17 00:00:00 2001 From: ananth Date: Thu, 4 May 2017 17:08:32 -0700 Subject: [PATCH 15/61] emoved commented lines on unused imports --- .../com/twitter/heron/scheduler/ecs/EcsScheduler.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java index a5ea38a5c74..7b4e21892b0 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java @@ -25,12 +25,10 @@ import org.apache.commons.io.IOUtils; import com.twitter.heron.proto.scheduler.Scheduler; -//import com.twitter.heron.scheduler.UpdateTopologyManager; import com.twitter.heron.scheduler.utils.SchedulerUtils; import com.twitter.heron.spi.common.Config; import com.twitter.heron.spi.common.Context; import com.twitter.heron.spi.packing.PackingPlan; -//import com.twitter.heron.spi.scheduler.IScalable; import com.twitter.heron.spi.scheduler.IScheduler; import com.twitter.heron.spi.utils.ShellUtils; @@ -96,7 +94,6 @@ private String[] getExecutorCommand(int container) { IOUtils.closeQuietly(dockerFilestream); } String dockerComposeFileName = " --file " + tempDockerFile; - String finalCommand = EcsContext.COMPOSE_CMD + ecsTaskProject + dockerComposeFileName; finalCommand = finalCommand + EcsContext.UP; System.out.println("final Ecs Task command " + finalCommand); @@ -147,7 +144,6 @@ public String formHeronExecCommand(String[] inStringArray) { } builder.append(string); } - String stringToReturn = builder.toString(); return stringToReturn; } @@ -158,17 +154,13 @@ public String formHeronExecCommand(String[] inStringArray) { @Override public boolean onSchedule(PackingPlan packing) { LOG.info("Starting to deploy topology: " + EcsContext.topologyName(config)); - LOG.info("Starting executor for TMaster"); startExecutor(0); - // for each container, run its own executor for (PackingPlan.ContainerPlan container : packing.getContainers()) { startExecutor(container.getId()); } - LOG.info("Executor for each container have been started."); - return true; } @@ -188,5 +180,4 @@ public boolean onUpdate(Scheduler.UpdateTopologyRequest request) { return false; } - } From fefbbc8d38881e347cef20c4b177cd1e4ebee445 Mon Sep 17 00:00:00 2001 From: ananth Date: Thu, 4 May 2017 17:12:21 -0700 Subject: [PATCH 16/61] Removing un referenced EcsKeys class --- .../twitter/heron/scheduler/ecs/EcsKey.java | 53 ------------------- 1 file changed, 53 deletions(-) delete mode 100644 heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java deleted file mode 100644 index 8393003917d..00000000000 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2016 Twitter. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package com.twitter.heron.scheduler.ecs; - -import com.twitter.heron.spi.common.Key; - -/** - * Created by ananth on 4/29/17. - */ -public enum EcsKey { - // config key for specifying the working directory of a topology - WORKING_DIRECTORY("heron.scheduler.local.working.directory", - "${HOME}/.herondata/topologies/${CLUSTER}/${ROLE}/${TOPOLOGY}"); - - private final String value; - private final Key.Type type; - private final Object defaultValue; - - EcsKey(String value, String defaultValue) { - this.value = value; - this.type = Key.Type.STRING; - this.defaultValue = defaultValue; - } - - public String value() { - return value; - } - - public Object getDefault() { - return defaultValue; - } - - public String getDefaultString() { - if (type != Key.Type.STRING) { - throw new IllegalAccessError(String.format( - "Config Key %s is type %s, getDefaultString() not supported", this.name(), this.type)); - } - return (String) this.defaultValue; - } - -} From edaa5fa08ca022f58b93414e68dc8e8b708d5676 Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 5 May 2017 10:41:29 -0700 Subject: [PATCH 17/61] Removed the host env setting if its an Amazon ECS instance --- heron/executor/src/python/heron_executor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index 2b922a3b91c..d773c386cb3 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -176,7 +176,6 @@ def init_parsed_args(self, args): self.master_host = subprocess.Popen(["curl", "http://169.254.169.254/latest/meta-data/local-ipv4"] , stdout=subprocess.PIPE).communicate()[0] - os.environ['HOST'] = self.master_host else: self.master_host = os.environ.get('HOST') if 'HOST' in os.environ else socket.gethostname() else: From 8efc62ef31036fdd060109634785c59804b4d051 Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 12 May 2017 00:46:44 -0700 Subject: [PATCH 18/61] remving had coded values --- .../heron/scheduler/ecs/EcsContext.java | 49 +++++-------------- 1 file changed, 13 insertions(+), 36 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java index d1f460b9344..20c0b2c0faa 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java @@ -14,47 +14,24 @@ package com.twitter.heron.scheduler.ecs; +import com.twitter.heron.spi.common.Config; import com.twitter.heron.spi.common.Context; +import com.twitter.heron.spi.common.TokenSub; - -/** - * Created by ananth on 4/29/17. - */ public class EcsContext extends Context { - public static final String PART1 = "version: '2'\n" - + "services:\n" - + " container_number:\n" - + " image: ananthgs/onlyheronandubuntu\n"; - public static final String CMD = " command: [\"sh\", \"-c\", \"mkdir /s3; cd /s3 ;" - + "aws s3 cp s3://herondockercal/TOPOLOGY_NAME/topology.tar.gz /s3 ;" - + "aws s3 cp s3://herondockercal/heron-core-testbuild-ubuntu14.04.tar.gz /s3 ;cd /s3;" - + " tar -zxvf topology.tar.gz; tar -zxvf heron-core-testbuild-ubuntu14.04.tar.gz;" - + "heron_executor ;\"] \n"; - public static final String ECSNETWORK = " networks:\n" - + " - heron\n" - + " ports:\n" - + " - \"5000:5000\"\n" - + " - \"5001:5001\"\n" - + " - \"5002:5002\"\n" - + " - \"5003:5003\"\n" - + " - \"5004:5004\"\n" - + " - \"5005:5005\"\n" - + " - \"5006:5006\"\n" - + " - \"5007:5007\"\n" - + " - \"5008:5008\"\n" - + " - \"5009:5009\"\n" - + " volumes:\n" - + " - \"herondata:/root/.herondata\"\n" - + "networks:\n" - + " heron:\n" - + " driver: bridge\n" - + "volumes:\n" - + " herondata:\n" - + " driver: local"; - public static final String DESTINATION_JVM = "/usr/lib/jvm/java-8-oracle"; - public static final String ECS_CLUSTER_BINARY = "heron-examples.jar"; public static final String COMPOSE_CMD = "ecs-cli compose --project-name "; public static final String UP = " up"; + public static String ecsClusterBinary(Config config) { + String workingDirectory = config.getStringValue( + EcsKey.ECS_CLUSTER_BINARY.value(), EcsKey.ECS_CLUSTER_BINARY.getDefaultString()); + return TokenSub.substitute(config, workingDirectory); + } + public static String ecsComposeTemplate(Config config) { + String workingDirectory = config.getStringValue( + EcsKey.ECS_COMPOSE_TEMPLATE.value(), EcsKey.ECS_COMPOSE_TEMPLATE.getDefaultString()); + return TokenSub.substitute(config, workingDirectory); + } + } From 0b740334cedd5ba1ef44243cbed2ce76cb6455c0 Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 12 May 2017 00:56:18 -0700 Subject: [PATCH 19/61] removing hard coded values and ports --- .../heron/scheduler/ecs/EcsScheduler.java | 64 +++++++++++-------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java index 7b4e21892b0..556e1c372d4 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java @@ -18,12 +18,16 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; import org.apache.commons.io.IOUtils; +import com.twitter.heron.common.basics.SysUtils; import com.twitter.heron.proto.scheduler.Scheduler; import com.twitter.heron.scheduler.utils.SchedulerUtils; import com.twitter.heron.spi.common.Config; @@ -33,13 +37,11 @@ import com.twitter.heron.spi.utils.ShellUtils; -/** - * Created by ananth on 4/19/17. - */ public class EcsScheduler implements IScheduler { private static final Logger LOG = Logger.getLogger(EcsScheduler.class.getName()); private Config config; private Config runtime; + private StringBuilder nfreePorts; private volatile boolean isTopologyKilled = false; private File tempDockerFile = null; @@ -72,11 +74,21 @@ protected void startExecutor(final int container) { private String[] getExecutorCommand(int container) { List freePorts = new ArrayList<>(SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR); + Integer localFreePort = null; + nfreePorts = new StringBuilder(); for (int i = 0; i < SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR; i++) { - //freePorts.add(SysUtils.getFreePort()); - freePorts.add(5000 + (i + (container * 10))); + localFreePort = SysUtils.getFreePort(); + freePorts.add(localFreePort); + nfreePorts.append("\n - \""); + nfreePorts.append(localFreePort); + nfreePorts.append(":"); + nfreePorts.append(localFreePort); + nfreePorts.append("\""); } + + String[] executorCmd = SchedulerUtils.executorCommand(config, runtime, container, freePorts); + System.out.println("Executor Cmd before replacement %s" + formHeronExecCommand(executorCmd)); String finalExecCommand = setClusterValues(formHeronExecCommand(executorCmd)); String ecsTaskProject = EcsContext.topologyName(config) + "_" + String.valueOf(container); FileOutputStream dockerFilestream = null; @@ -96,42 +108,42 @@ private String[] getExecutorCommand(int container) { String dockerComposeFileName = " --file " + tempDockerFile; String finalCommand = EcsContext.COMPOSE_CMD + ecsTaskProject + dockerComposeFileName; finalCommand = finalCommand + EcsContext.UP; - System.out.println("final Ecs Task command " + finalCommand); - tempDockerFile.deleteOnExit(); + LOG.info("final Ecs Task command " + finalCommand); + //tempDockerFile.deleteOnExit(); return new String[] {finalCommand}; } public String setClusterValues(String localExecCommand) { + //LOG.info("topologyBinaryFile: " + Context.topologyBinaryFile(config)); + //LOG.info(" cluster topologyBinaryFile: " + // + EcsContext.ecsClusterBinary(config)); + //String clusterExecCommand = localExecCommand.replace(Context.topologyBinaryFile(config), + // EcsContext.ECS_CLUSTER_BINARY); String clusterExecCommand = localExecCommand.replace(Context.topologyBinaryFile(config), - EcsContext.ECS_CLUSTER_BINARY); + EcsContext.ecsClusterBinary(config)); + // line below can be removed once the Cluster JVM TODO is resolved - clusterExecCommand = clusterExecCommand.replace(Context.clusterJavaHome(config), - EcsContext.DESTINATION_JVM); + //LOG.info("HERON_CLUSTER_JAVA_HOME: " + // + EcsContext.clusterJavaHome(Config.toClusterMode(config))); + //clusterExecCommand = clusterExecCommand.replace(Context.clusterJavaHome(config), + // EcsContext.DESTINATION_JVM); clusterExecCommand = clusterExecCommand.replaceAll("\"", "'"); return clusterExecCommand; } - public String replacePortNumbers(int container, String content) { - int basePortnumber = 5000; - String localContent = new String(content); - for (int i = 0; i < SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR; i++) { - localContent = localContent.replace(String.valueOf(basePortnumber + i), - String.valueOf(basePortnumber + (i + (container * 10)))); - } - return localContent; - } - public String getDockerFileContent(String execCommand, int container) { - String commandBuiler = EcsContext.PART1 + EcsContext.CMD; - commandBuiler = commandBuiler + EcsContext.ECSNETWORK; - commandBuiler = replacePortNumbers(container, commandBuiler); + public String getDockerFileContent(String execCommand, int container) throws IOException { + + String commandBuiler = new String(Files.readAllBytes( + Paths.get(EcsContext.ecsComposeTemplate(config)))); commandBuiler = commandBuiler.replaceAll("TOPOLOGY_NAME", EcsContext.topologyName(config)); - commandBuiler = commandBuiler.replaceAll("container_number", + commandBuiler = commandBuiler.replaceAll("CONTAINER_NUMBER", "executor" + String.valueOf(container)); - commandBuiler = commandBuiler.replace("heron_executor", execCommand); - + commandBuiler = commandBuiler.replace("HERON_EXECUTOR", execCommand); + commandBuiler = commandBuiler.replace("FREEPORTS", nfreePorts); + //System.out.println("commandBuiler :\n" + commandBuiler); return commandBuiler; } From fceacfa2fbdee5679bdd720527641aed84b2de38 Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 12 May 2017 00:57:05 -0700 Subject: [PATCH 20/61] new file: EcsKey.java --- .../twitter/heron/scheduler/ecs/EcsKey.java | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java new file mode 100644 index 00000000000..ef37a1576bc --- /dev/null +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java @@ -0,0 +1,51 @@ +// Copyright 2016 Twitter. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.twitter.heron.scheduler.ecs; + +import com.twitter.heron.spi.common.Key; + +public enum EcsKey { + // config key for specifying the destination topology binary file + ECS_CLUSTER_BINARY("heron.ecs.topology.binary.file", "heron-examples.jar"), + ECS_COMPOSE_TEMPLATE("heron.ecs.compose.template.file", + "${HOME}/.heron/conf/ecs/ecs_compose_template.yaml"); + + private final String value; + private final Key.Type type; + private final Object defaultValue; + + EcsKey(String value, String defaultValue) { + this.value = value; + this.type = Key.Type.STRING; + this.defaultValue = defaultValue; + } + + public String value() { + return value; + } + + public Object getDefault() { + return defaultValue; + } + + public String getDefaultString() { + if (type != Key.Type.STRING) { + throw new IllegalAccessError(String.format( + "Config Key %s is type %s, getDefaultString() not supported", this.name(), this.type)); + } + return (String) this.defaultValue; + } + +} From 532fffecc023d18fe6ada98cbace54301c313c13 Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 12 May 2017 00:57:37 -0700 Subject: [PATCH 21/61] modified: EcsLauncher.java --- .../src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java index 973e3ac14b3..4c3d4c84b8f 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java @@ -23,9 +23,6 @@ import com.twitter.heron.spi.scheduler.ILauncher; import com.twitter.heron.spi.scheduler.IScheduler; -/** - * Created by ananth on 4/18/17. - */ public class EcsLauncher implements ILauncher { protected static final Logger LOG = Logger.getLogger(EcsLauncher.class.getName()); From 2d08fe50e447aea8d4fd5e91b8279f16f3329cba Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 12 May 2017 00:59:09 -0700 Subject: [PATCH 22/61] fixing java and other paths --- heron/config/src/yaml/conf/ecs/scheduler.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/heron/config/src/yaml/conf/ecs/scheduler.yaml b/heron/config/src/yaml/conf/ecs/scheduler.yaml index 2b0b254f73f..ae714c7e57c 100644 --- a/heron/config/src/yaml/conf/ecs/scheduler.yaml +++ b/heron/config/src/yaml/conf/ecs/scheduler.yaml @@ -5,4 +5,9 @@ heron.class.scheduler: com.twitter.heron.scheduler.ecs.Ecs heron.class.launcher: com.twitter.heron.scheduler.ecs.EcsLauncher # location of java - pick it up from shell environment -heron.directory.sandbox.java.home: ${JAVA_HOME} +heron.directory.sandbox.java.home: /usr/lib/jvm/java-8-oracle + +heron.ecs.topology.binary.file: heron-examples.jar + +# location of java - pick it up from shell environment +heron.ecs.compose.template.file: ${HOME}/.heron/conf/ecs/ecs_compose_template.yaml \ No newline at end of file From 57d9c91538f7c0a527c1b8dd8de606627b1c7ce6 Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 12 May 2017 00:59:44 -0700 Subject: [PATCH 23/61] new file: ecs_compose_template.yaml --- .../src/yaml/conf/ecs/ecs_compose_template.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100755 heron/config/src/yaml/conf/ecs/ecs_compose_template.yaml diff --git a/heron/config/src/yaml/conf/ecs/ecs_compose_template.yaml b/heron/config/src/yaml/conf/ecs/ecs_compose_template.yaml new file mode 100755 index 00000000000..44cdc75c1bb --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/ecs_compose_template.yaml @@ -0,0 +1,16 @@ +version: '2' +services: + CONTAINER_NUMBER: + image: ananthgs/onlyheronandubuntu + command: ["sh", "-c", "mkdir /s3; cd /s3 ;aws s3 cp s3://herondockercal/TOPOLOGY_NAME/topology.tar.gz /s3 ;aws s3 cp s3://herondockercal/heron-core-testbuild-ubuntu14.04.tar.gz /s3 ;cd /s3; tar -zxvf topology.tar.gz; tar -zxvf heron-core-testbuild-ubuntu14.04.tar.gz; HERON_EXECUTOR ;"] + networks: + - heron + ports:FREEPORTS + volumes: + - "herondata:/root/.herondata" +networks: + heron: + driver: bridge +volumes: + herondata: + driver: local From 3db478f97821023131b1fc3d383b07f97c3c1439 Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 12 May 2017 17:08:01 -0700 Subject: [PATCH 24/61] tunneling set to false --- heron/config/src/yaml/conf/ecs/statemgr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/config/src/yaml/conf/ecs/statemgr.yaml b/heron/config/src/yaml/conf/ecs/statemgr.yaml index facd0e2fff4..e86e22679c4 100644 --- a/heron/config/src/yaml/conf/ecs/statemgr.yaml +++ b/heron/config/src/yaml/conf/ecs/statemgr.yaml @@ -25,7 +25,7 @@ heron.statemgr.zookeeper.retry.interval.ms: 10000 # Following are config for tunneling #################################################################### # Whether we need tunnel if no direct access on zk server -heron.statemgr.is.tunnel.needed: True +heron.statemgr.is.tunnel.needed: False # The connection timeout in ms when trying to connect to zk server heron.statemgr.tunnel.connection.timeout.ms: 1000 From 4a36f40eab03dab4fef5d069c4c904c9ea257271 Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 12 May 2017 17:49:45 -0700 Subject: [PATCH 25/61] update gethost to handle docker and ecs AMI --- heron/executor/src/python/heron_executor.py | 46 ++++++++++++--------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index d773c386cb3..c2ec9514941 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -55,7 +55,8 @@ def print_usage(): " " " " " " - " ") + " " + " ") def id_map(prefix, container_plans, add_zero_id=False): ids = {} @@ -106,12 +107,10 @@ def log_pid_for_process(process_name, pid): Log.info('Logging pid %d to file %s' %(pid, filename)) atomic_write_file(filename, str(pid)) -def is_docker_environment(): - return os.path.isfile('/.dockerenv') - -def isEcsAmiInstance(): - meta = 'http://169.254.169.254/latest/meta-data/ami-id' +def isEcsAmiInstance(ecs_ami): + meta = ecs_ami + '/ami-id' req = urllib2.Request(meta) + try: response = urllib2.urlopen(req).read() if 'ami' in response: @@ -124,6 +123,26 @@ def isEcsAmiInstance(): #_msg = 'no metadata, not in AWS' return 0 +def getHost(ecs_ami): + if not ecs_ami: + return socket.gethostname() + else: + l_host = '' + # Needed for Docker environments since the hostname of a docker container is the container's + # id within docker, rather than the host's hostname. NOTE: this 'HOST' env variable is not + # guaranteed to be set in all Docker executor environments (outside of Marathon) + if os.path.isfile('/.dockerenv'): + # Need to set the HOST environment vaira ble if docker is for AWS ECS tasks + if isEcsAmiInstance(ecs_ami): + l_host = subprocess.Popen(["curl", ecs_ami + "/local-ipv4"] + , stdout=subprocess.PIPE).communicate()[0] + else: + l_host = os.environ.get('HOST') if 'HOST' in os.environ else socket.gethostname() + else: + l_host = socket.gethostname() + return l_host + + class ProcessInfo(object): def __init__(self, process, name, command, attempts=1): """ @@ -167,19 +186,7 @@ def init_parsed_args(self, args): base64.b64decode(parsed_args.instance_jvm_opts.lstrip('"'). rstrip('"').replace('=', '=')) self.classpath = parsed_args.classpath - # Needed for Docker environments since the hostname of a docker container is the container's - # id within docker, rather than the host's hostname. NOTE: this 'HOST' env variable is not - # guaranteed to be set in all Docker executor environments (outside of Marathon) - if is_docker_environment(): - # Need to set the HOST environment vairable if docker is for AWS ECS tasks - if isEcsAmiInstance(): - self.master_host = subprocess.Popen(["curl", - "http://169.254.169.254/latest/meta-data/local-ipv4"] - , stdout=subprocess.PIPE).communicate()[0] - else: - self.master_host = os.environ.get('HOST') if 'HOST' in os.environ else socket.gethostname() - else: - self.master_host = socket.gethostname() + self.master_host = getHost(parsed_args.ecsAmiUrl) self.master_port = parsed_args.master_port self.tmaster_controller_port = parsed_args.tmaster_controller_port self.tmaster_stats_port = parsed_args.tmaster_stats_port @@ -283,6 +290,7 @@ def parse_args(args): parser.add_argument("metricscachemgr_classpath") parser.add_argument("metricscachemgr_masterport") parser.add_argument("metricscachemgr_statsport") + parser.add_argument("ecsAmiUrl") parsed_args, unknown_args = parser.parse_known_args(args[1:]) From 36dd734193631bc25770df6dd9aa61fb8cd8f313 Mon Sep 17 00:00:00 2001 From: ananth Date: Sat, 13 May 2017 23:36:43 -0700 Subject: [PATCH 26/61] passing esc ami host param --- heron/executor/src/python/heron_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index c2ec9514941..1b024edc6e8 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -290,7 +290,7 @@ def parse_args(args): parser.add_argument("metricscachemgr_classpath") parser.add_argument("metricscachemgr_masterport") parser.add_argument("metricscachemgr_statsport") - parser.add_argument("ecsAmiUrl") + parser.add_argument("ecsAmiUrl", nargs='?', default="") parsed_args, unknown_args = parser.parse_known_args(args[1:]) From 9582f54238981bba0d3b4ed13076db21833f0df2 Mon Sep 17 00:00:00 2001 From: ananth Date: Sat, 13 May 2017 23:39:09 -0700 Subject: [PATCH 27/61] adding kill to schedulers --- .../heron/scheduler/ecs/EcsScheduler.java | 57 ++++++++----------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java index 556e1c372d4..c774b63e172 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java @@ -25,6 +25,8 @@ import java.util.List; import java.util.logging.Logger; +import com.google.common.annotations.VisibleForTesting; + import org.apache.commons.io.IOUtils; import com.twitter.heron.common.basics.SysUtils; @@ -42,36 +44,36 @@ public class EcsScheduler implements IScheduler { private Config config; private Config runtime; private StringBuilder nfreePorts; - private volatile boolean isTopologyKilled = false; private File tempDockerFile = null; + @Override public void initialize(Config mConfig, Config mRuntime) { this.config = Config.toClusterMode(mConfig); this.runtime = mRuntime; - } + @Override public void close() { - } + @VisibleForTesting protected int startExecutorSyncProcess(int container) { String executingInShell = new String(); executingInShell = getExecutorCommand(container)[0]; - System.out.println("executing in Shell: " + executingInShell); return ShellUtils.runProcess(executingInShell, null); } + @VisibleForTesting protected void startExecutor(final int container) { LOG.info("Starting a new executor for container: " + container); int shellOutput = startExecutorSyncProcess(container); LOG.info("output value for the executor container: " + container + String.valueOf(shellOutput)); - } + @VisibleForTesting private String[] getExecutorCommand(int container) { List freePorts = new ArrayList<>(SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR); Integer localFreePort = null; @@ -85,12 +87,9 @@ private String[] getExecutorCommand(int container) { nfreePorts.append(localFreePort); nfreePorts.append("\""); } - - String[] executorCmd = SchedulerUtils.executorCommand(config, runtime, container, freePorts); - System.out.println("Executor Cmd before replacement %s" + formHeronExecCommand(executorCmd)); String finalExecCommand = setClusterValues(formHeronExecCommand(executorCmd)); - String ecsTaskProject = EcsContext.topologyName(config) + "_" + String.valueOf(container); + String ecsTaskProject = EcsContext.topologyName(config) + "_" + container; FileOutputStream dockerFilestream = null; String content = null; try { @@ -101,40 +100,27 @@ private String[] getExecutorCommand(int container) { IOUtils.write(content, dockerFilestream); IOUtils.closeQuietly(dockerFilestream); } catch (IOException e) { - e.printStackTrace(); + LOG.severe("Unable to create ecs task for container: " + container); } finally { IOUtils.closeQuietly(dockerFilestream); } - String dockerComposeFileName = " --file " + tempDockerFile; - String finalCommand = EcsContext.COMPOSE_CMD + ecsTaskProject + dockerComposeFileName; - finalCommand = finalCommand + EcsContext.UP; - LOG.info("final Ecs Task command " + finalCommand); - //tempDockerFile.deleteOnExit(); + String finalCommand = String.format("%s %s --file %s up", + EcsContext.composeupCmd(config), + ecsTaskProject, tempDockerFile); + //LOG.info("final Ecs Task command " + finalCommand); + tempDockerFile.deleteOnExit(); return new String[] {finalCommand}; - } - public String setClusterValues(String localExecCommand) { - //LOG.info("topologyBinaryFile: " + Context.topologyBinaryFile(config)); - //LOG.info(" cluster topologyBinaryFile: " - // + EcsContext.ecsClusterBinary(config)); - //String clusterExecCommand = localExecCommand.replace(Context.topologyBinaryFile(config), - // EcsContext.ECS_CLUSTER_BINARY); + private String setClusterValues(String localExecCommand) { String clusterExecCommand = localExecCommand.replace(Context.topologyBinaryFile(config), EcsContext.ecsClusterBinary(config)); - - // line below can be removed once the Cluster JVM TODO is resolved - //LOG.info("HERON_CLUSTER_JAVA_HOME: " - // + EcsContext.clusterJavaHome(Config.toClusterMode(config))); - //clusterExecCommand = clusterExecCommand.replace(Context.clusterJavaHome(config), - // EcsContext.DESTINATION_JVM); clusterExecCommand = clusterExecCommand.replaceAll("\"", "'"); return clusterExecCommand; } + private String getDockerFileContent(String execCommand, int container) throws IOException { - public String getDockerFileContent(String execCommand, int container) throws IOException { - String commandBuiler = new String(Files.readAllBytes( Paths.get(EcsContext.ecsComposeTemplate(config)))); commandBuiler = commandBuiler.replaceAll("TOPOLOGY_NAME", @@ -147,15 +133,16 @@ public String getDockerFileContent(String execCommand, int container) throws IOE return commandBuiler; } - public String formHeronExecCommand(String[] inStringArray) { + private String formHeronExecCommand(String[] inStringArray) { StringBuilder builder = new StringBuilder(); - for (String string : inStringArray) { if (builder.length() > 0) { builder.append(" "); } builder.append(string); } + builder.append(" "); + builder.append(EcsContext.AmiInstanceUrl(config)); String stringToReturn = builder.toString(); return stringToReturn; } @@ -180,14 +167,18 @@ public List getJobLinks() { return null; } + @Override public boolean onKill(Scheduler.KillTopologyRequest request) { - return false; + ShellUtils.runProcess(EcsContext.composeStopCmd(config), null); + return true; } + @Override public boolean onRestart(Scheduler.RestartTopologyRequest request) { return false; } + @Override public boolean onUpdate(Scheduler.UpdateTopologyRequest request) { return false; } From 3a2af2145a901d64bda30481daa791e224e6f384 Mon Sep 17 00:00:00 2001 From: ananth Date: Sat, 13 May 2017 23:39:50 -0700 Subject: [PATCH 28/61] modified: EcsKey.java --- .../src/java/com/twitter/heron/scheduler/ecs/EcsKey.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java index ef37a1576bc..a6fc7db1174 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java @@ -20,7 +20,11 @@ public enum EcsKey { // config key for specifying the destination topology binary file ECS_CLUSTER_BINARY("heron.ecs.topology.binary.file", "heron-examples.jar"), ECS_COMPOSE_TEMPLATE("heron.ecs.compose.template.file", - "${HOME}/.heron/conf/ecs/ecs_compose_template.yaml"); + "${HOME}/.heron/conf/ecs/ecs_compose_template.yaml"), + ECS_AMI_INSTANCE("heron.ecs.ami.instance", "http://169.254.169.254/latest/meta-data/local-ipv4"), + ECS_COMPOSE_UPCMD("heron.ecs.compose.up", "ecs-cli compose --project-name "), + ECS_COMPOSE_STOP("heron.ecs.compose.up", "ecs-cli stop"), + ECS_COMPOSE_LIST("heron.ecs.compose.up", "ecs-cli ps"); private final String value; private final Key.Type type; From 406f298181f461220dde13cb7c82f8e9c8d4c2ad Mon Sep 17 00:00:00 2001 From: ananth Date: Sat, 13 May 2017 23:40:15 -0700 Subject: [PATCH 29/61] removed hard coding values modified: EcsContext.java --- .../heron/scheduler/ecs/EcsContext.java | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java index 20c0b2c0faa..f51b4b65a4c 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java @@ -19,19 +19,38 @@ import com.twitter.heron.spi.common.TokenSub; public class EcsContext extends Context { - public static final String COMPOSE_CMD = "ecs-cli compose --project-name "; - public static final String UP = " up"; public static String ecsClusterBinary(Config config) { String workingDirectory = config.getStringValue( EcsKey.ECS_CLUSTER_BINARY.value(), EcsKey.ECS_CLUSTER_BINARY.getDefaultString()); return TokenSub.substitute(config, workingDirectory); } + public static String ecsComposeTemplate(Config config) { - String workingDirectory = config.getStringValue( + String composeTemplate = config.getStringValue( EcsKey.ECS_COMPOSE_TEMPLATE.value(), EcsKey.ECS_COMPOSE_TEMPLATE.getDefaultString()); - return TokenSub.substitute(config, workingDirectory); + return TokenSub.substitute(config, composeTemplate); } + public static String AmiInstanceUrl(Config config) { + String amiInstanceUrl = config.getStringValue( + EcsKey.ECS_AMI_INSTANCE.value(), EcsKey.ECS_AMI_INSTANCE.getDefaultString()); + return TokenSub.substitute(config, amiInstanceUrl); + } + public static String composeupCmd(Config config) { + String amiInstanceUrl = config.getStringValue( + EcsKey.ECS_COMPOSE_UPCMD.value(), EcsKey.ECS_COMPOSE_UPCMD.getDefaultString()); + return TokenSub.substitute(config, amiInstanceUrl); + } + public static String composeStopCmd(Config config) { + String amiInstanceUrl = config.getStringValue( + EcsKey.ECS_COMPOSE_STOP.value(), EcsKey.ECS_COMPOSE_STOP.getDefaultString()); + return TokenSub.substitute(config, amiInstanceUrl); + } + public static String composeListCmd(Config config) { + String amiInstanceUrl = config.getStringValue( + EcsKey.ECS_COMPOSE_LIST.value(), EcsKey.ECS_COMPOSE_LIST.getDefaultString()); + return TokenSub.substitute(config, amiInstanceUrl); + } } From 5b163696ef58372580ddd311ff3de1a9950c02bc Mon Sep 17 00:00:00 2001 From: ananth Date: Mon, 15 May 2017 23:57:36 -0700 Subject: [PATCH 30/61] added list tasks context --- .../twitter/heron/scheduler/ecs/EcsContext.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java index f51b4b65a4c..4d0cdcc799c 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java @@ -52,5 +52,20 @@ public static String composeListCmd(Config config) { EcsKey.ECS_COMPOSE_LIST.value(), EcsKey.ECS_COMPOSE_LIST.getDefaultString()); return TokenSub.substitute(config, amiInstanceUrl); } + public static String composeListby(Config config) { + String amiInstanceUrl = config.getStringValue( + EcsKey.ECS_LIST_BY.value(), EcsKey.ECS_LIST_BY.getDefaultString()); + return TokenSub.substitute(config, amiInstanceUrl); + } + public static String composeFamilyName(Config config) { + String amiInstanceUrl = config.getStringValue( + EcsKey.ECS_GET_FAMILY.value(), EcsKey.ECS_GET_FAMILY.getDefaultString()); + return TokenSub.substitute(config, amiInstanceUrl); + } + public static String composeTaskTag(Config config) { + String amiInstanceUrl = config.getStringValue( + EcsKey.ECS_TASK_TAG.value(), EcsKey.ECS_TASK_TAG.getDefaultString()); + return TokenSub.substitute(config, amiInstanceUrl); + } } From 3f321cfa04a12d3e8d99205b861c5d72f396d10a Mon Sep 17 00:00:00 2001 From: ananth Date: Mon, 15 May 2017 23:58:12 -0700 Subject: [PATCH 31/61] added List Tasks Keys --- .../src/java/com/twitter/heron/scheduler/ecs/EcsKey.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java index a6fc7db1174..a268f67bf18 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java @@ -24,7 +24,13 @@ public enum EcsKey { ECS_AMI_INSTANCE("heron.ecs.ami.instance", "http://169.254.169.254/latest/meta-data/local-ipv4"), ECS_COMPOSE_UPCMD("heron.ecs.compose.up", "ecs-cli compose --project-name "), ECS_COMPOSE_STOP("heron.ecs.compose.up", "ecs-cli stop"), - ECS_COMPOSE_LIST("heron.ecs.compose.up", "ecs-cli ps"); + //ECS_COMPOSE_LIST("heron.ecs.compose.up", "ecs-cli ps") + ECS_COMPOSE_LIST("heron.ecs.compose.up", "aws ecs list-tasks --family "), + ECS_LIST_BY("heron.ecs.list.by", "families"), + ECS_TASK_TAG("heron.ecs.task.tag", "taskArns"), + ECS_GET_FAMILY("heron.ecs.family.name", + "aws ecs list-task-definition-families --family-prefix ecscompose-"); + private final String value; private final Key.Type type; From ac7ca93d7342a233fe4f0575185f5d7ff03e4a6a Mon Sep 17 00:00:00 2001 From: ananth Date: Mon, 15 May 2017 23:58:41 -0700 Subject: [PATCH 32/61] added getJobLinks with ecs Task ID's --- .../heron/scheduler/ecs/EcsScheduler.java | 84 +++++++++++++++++-- 1 file changed, 76 insertions(+), 8 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java index c774b63e172..59119c248ca 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java @@ -14,18 +14,19 @@ package com.twitter.heron.scheduler.ecs; - import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; - import java.util.ArrayList; +//import java.util.Arrays; import java.util.List; import java.util.logging.Logger; -import com.google.common.annotations.VisibleForTesting; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.io.IOUtils; @@ -38,7 +39,6 @@ import com.twitter.heron.spi.scheduler.IScheduler; import com.twitter.heron.spi.utils.ShellUtils; - public class EcsScheduler implements IScheduler { private static final Logger LOG = Logger.getLogger(EcsScheduler.class.getName()); private Config config; @@ -58,14 +58,12 @@ public void close() { } - @VisibleForTesting protected int startExecutorSyncProcess(int container) { String executingInShell = new String(); executingInShell = getExecutorCommand(container)[0]; return ShellUtils.runProcess(executingInShell, null); } - @VisibleForTesting protected void startExecutor(final int container) { LOG.info("Starting a new executor for container: " + container); int shellOutput = startExecutorSyncProcess(container); @@ -73,7 +71,6 @@ protected void startExecutor(final int container) { + container + String.valueOf(shellOutput)); } - @VisibleForTesting private String[] getExecutorCommand(int container) { List freePorts = new ArrayList<>(SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR); Integer localFreePort = null; @@ -160,11 +157,78 @@ public boolean onSchedule(PackingPlan packing) { startExecutor(container.getId()); } LOG.info("Executor for each container have been started."); + LOG.info("Listing each of tasks started."); + // List jobLinks = new ArrayList(); + //jobLinks = getJobLinks(); return true; } + @Override public List getJobLinks() { - return null; + List list = new ArrayList(); + StringBuilder familyListcmd = new StringBuilder(); + //String tempStr = "aws ecs list-task-definition-families --family-prefix ecscompose-"; + familyListcmd.append(EcsContext.composeFamilyName(config)); + familyListcmd.append(EcsContext.topologyName(config)); + //tempStr = tempStr + EcsContext.topologyName(config); + LOG.info("final list cmd:" + familyListcmd); + StringBuilder stdout = new StringBuilder(); + StringBuilder stderr = new StringBuilder(); + List familyString; + int status = ShellUtils.runProcess(familyListcmd.toString(), stdout); + if (status != 0) { + LOG.severe(String.format( + "Failed to run process. Command=%s, STDOUT=%s, STDERR=%s", + familyListcmd.toString(), stdout, stderr)); + } else { + String listjsonString = stdout.toString(); + try { + familyString = parseJsonName(listjsonString, EcsContext.composeListby(config)); + for (String familyName : familyString) { + StringBuilder listout = new StringBuilder(); + StringBuilder listerr = new StringBuilder(); + StringBuilder taskListcmd = new StringBuilder(); + taskListcmd.append(EcsContext.composeListCmd(config)); + taskListcmd.append(" "); + taskListcmd.append(familyName); + int jobListstatus = ShellUtils.runProcess(taskListcmd.toString(), listout); + if (jobListstatus != 0) { + LOG.severe(String.format( + "Failed to run process. Command=%s, STDOUT=%s, STDERR=%s", + taskListcmd.toString(), listout, listerr)); + } else { + List taskString = parseJsonName(listout.toString(), + EcsContext.composeTaskTag(config)); + for (String taskId : taskString) { + list.add(taskId); + } + } + } + } catch (JsonParseException e) { + LOG.severe("Unable to get list due to Parsing issues"); + } catch (IOException ioe) { + LOG.severe("Unable to get list due to IO issues"); + } + } + return list; + } + + private List parseJsonName(String jString, String jName) throws JsonParseException, + IOException { + ObjectMapper mapper = new ObjectMapper(); + JsonNode actualObj = mapper.readTree(jString); + //JsonNode jsonNode1 = actualObj.get("families"); + List jsonList = new ArrayList(); + final JsonNode arrNode = new ObjectMapper().readTree(jString).get(jName); + if (arrNode.isArray()) { + for (final JsonNode objNode : arrNode) { + String taskDefn = objNode.asText(); + jsonList.add(taskDefn.toString()); + } + } else { + jsonList.add(arrNode.textValue()); + } + return jsonList; } @Override @@ -175,11 +239,15 @@ public boolean onKill(Scheduler.KillTopologyRequest request) { @Override public boolean onRestart(Scheduler.RestartTopologyRequest request) { + // TODO(ananthgs): Need to see if re-starting each task is good. + LOG.severe("Topology onRestart not implemented by this scheduler Please use kill & start."); return false; } @Override public boolean onUpdate(Scheduler.UpdateTopologyRequest request) { + // TODO(ananthgs): Need to decide how to get pplans and update + LOG.severe("Topology onUpdate not implemented by this scheduler."); return false; } From 5204be22ff8da23afecada1e4d7c11e44d9a481e Mon Sep 17 00:00:00 2001 From: ananth Date: Tue, 16 May 2017 00:00:15 -0700 Subject: [PATCH 33/61] adde jackson JSON jars for ecs Schedulers --- heron/schedulers/src/java/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/heron/schedulers/src/java/BUILD b/heron/schedulers/src/java/BUILD index 29203c4afc7..a33e400adf4 100644 --- a/heron/schedulers/src/java/BUILD +++ b/heron/schedulers/src/java/BUILD @@ -6,6 +6,9 @@ common_deps_files = [ "//heron/common/src/java:basics-java", "@commons_io_commons_io//jar", "@com_google_guava_guava//jar", + "@com_fasterxml_jackson_core_jackson_annotations//jar", + "@com_fasterxml_jackson_core_jackson_core//jar", + "@com_fasterxml_jackson_core_jackson_databind//jar", ] spi_deps_files = [ From 52f40bbaf55c9f74425ae9f7477ea09edffc039e Mon Sep 17 00:00:00 2001 From: ananth Date: Tue, 16 May 2017 12:57:52 -0700 Subject: [PATCH 34/61] new file: heron/config/src/yaml/conf/ecs/setupEcs.sh --- heron/config/src/yaml/conf/ecs/setupEcs.sh | 91 ++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100755 heron/config/src/yaml/conf/ecs/setupEcs.sh diff --git a/heron/config/src/yaml/conf/ecs/setupEcs.sh b/heron/config/src/yaml/conf/ecs/setupEcs.sh new file mode 100755 index 00000000000..0e02a153ce3 --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/setupEcs.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +##### PLEASE NOTE USE THIS AS A REFETECE. YOU MAY NEED TO CHANGE REGION/CLUSTER/KEYPAIR VALUES + + +REGIONS="" + +if [ -z "$(which aws)" ]; then + echo "error: Cannot find AWS-CLI, please make sure it's installed" + exit 1 +fi + +if [ -z "$(which ecs-cli)" ]; then + echo "error: Cannot find AWS-CLI, please make sure it's installed" + exit 1 +fi + +REGION=$(aws configure list 2> /dev/null | grep region | awk '{ print $2 }') +if [ -z "$REGION" ]; then + echo "error: Region not set, please make sure to run 'aws configure'" + exit 1 +fi + +AMI=$(aws --region $REGION ec2 describe-images --filters Name=root-device-type,Values=ebs Name=architecture,Values=x86_64 Name=virtualization-type,Values=hvm Name=name,Values=*ubuntu-xenial-16.04-amd64-server-20161214 --query 'Images[*].{ID:ImageId}' --output text) + +# Check that setup wasn't already run +CLUSTER_STATUS=$(aws ecs describe-clusters --clusters ecs-heron-cluster --query 'clusters[0].status' --output text) +if [ "$CLUSTER_STATUS" != "None" -a "$CLUSTER_STATUS" != "INACTIVE" ]; then + echo "error: ECS cluster weave-ecs-demo-cluster is active, cleanup first" + exit 1 +fi + +set -euo pipefail + +# Cluster +echo -n "Creating ECS cluster (ecs-heron-cluster) .. " +aws ecs create-cluster --cluster-name ecs-heron-cluster > /dev/null +echo "done" + +# Security group +echo -n "Creating Security Group (weave-ecs-demo) .. " +SECURITY_GROUP_ID=$(aws ec2 create-security-group --group-name ecs-heron-group --description 'ECS Heron' --query 'GroupId' --output text) +# Wait for the group to get associated with the VPC +sleep 5 +#opening all ports as port assignment will be random +aws ec2 authorize-security-group-ingress --group-id $SECURITY_GROUP_ID --protocol tcp --port 0-65535 --cidr 0.0.0.0/0 + +# Key pair +echo -n "Creating Key Pair (ecs-heron-key, file ecs-heron-key.pem) .. " +aws ec2 create-key-pair --key-name ecs-heron-key --query 'KeyMaterial' --output text > ecs-heron-key.pem +chmod 600 ecs-heron-key.pem +echo "done" + +# IAM role +echo -n "Creating IAM role (ecs-heron-role) .. " +aws iam create-role --role-name ecs-heron-role --assume-role-policy-document file://data/ecs-heron-role.json > /dev/null +aws iam put-role-policy --role-name ecs-heron-role --policy-name ecs-heron-policy --policy-document file://data/ecs-heron-policy.json +aws iam create-instance-profile --instance-profile-name ecs-heron-instance-profile > /dev/null +# Wait for the instance profile to be ready, otherwise we get an error when trying to use it +while ! aws iam get-instance-profile --instance-profile-name ecs-heron-instance-profile 2>&1 > /dev/null; do + sleep 2 +done +aws iam add-role-to-instance-profile --instance-profile-name ecs-heron-instance-profile --role-name ecs-heron-role +echo "done" + + +# Launch configuration +echo -n "Creating Launch Configuration (ecs-heron-launch-configuration) .. " +# Wait for the role to be ready, otherwise we get: +# A client error (ValidationError) occurred when calling the CreateLaunchConfiguration operation: You are not authorized to perform this operation. +# Unfortunately even if you can list the profile, "aws autoscaling create-launch-configuration" barks about it not existing so lets sleep instead + +sleep 15 + +TMP_USER_DATA_FILE=$(mktemp /tmp/weave-ecs-demo-user-data-XXXX) +trap 'rm $TMP_USER_DATA_FILE' EXIT +cp data/set-ecs-cluster-name.sh $TMP_USER_DATA_FILE +if [ -n "$SCOPE_AAS_PROBE_TOKEN" ]; then + echo "echo SERVICE_TOKEN=$SCOPE_AAS_PROBE_TOKEN >> /etc/weave/scope.config" >> $TMP_USER_DATA_FILE +fi + +aws autoscaling create-launch-configuration --image-id $AMI --launch-configuration-name ecs-heron-launch-configuration --key-name ecs-heron-key --security-groups $SECURITY_GROUP_ID --instance-type t2.micro --user-data file://$TMP_USER_DATA_FILE --iam-instance-profile ecs-heron-instance-profile --associate-public-ip-address --instance-monitoring Enabled=false +echo "done" + +#set up the ecs cli config +ecs-cli configure --region $REGION --access-key ecs-heron-key --cluster $CLUSTER + +#create a CloudFormation template +ecs-cli up --keypair ecs --capability-iam --size 2 --instance-type t2.micro + +echo "Setup is ready! Please submit the ECS HERON Topology" From e74b0ef761c6fa164181c8958af3f1d9c31c977b Mon Sep 17 00:00:00 2001 From: ananth Date: Tue, 16 May 2017 12:59:27 -0700 Subject: [PATCH 35/61] new file: heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh --- heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh diff --git a/heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh b/heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh new file mode 100644 index 00000000000..7bf28fb9796 --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh @@ -0,0 +1,2 @@ +#!/bin/bash +echo ECS_CLUSTER=weave-ecs-demo-cluster >> /etc/ecs/ecs.config From 0824fa057ba88271a9b126ed077e5c06c3d9b69d Mon Sep 17 00:00:00 2001 From: ananth Date: Tue, 16 May 2017 12:59:52 -0700 Subject: [PATCH 36/61] new file: heron/config/src/yaml/conf/ecs/ecs-heron-policy.json --- .../src/yaml/conf/ecs/ecs-heron-policy.json | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 heron/config/src/yaml/conf/ecs/ecs-heron-policy.json diff --git a/heron/config/src/yaml/conf/ecs/ecs-heron-policy.json b/heron/config/src/yaml/conf/ecs/ecs-heron-policy.json new file mode 100644 index 00000000000..1d34b4b863f --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/ecs-heron-policy.json @@ -0,0 +1,28 @@ +{ + "Version": "2017-04-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ecs:CreateCluster", + "ecs:DeregisterContainerInstance", + "ecs:DiscoverPollEndpoint", + "ecs:Poll", + "ecs:RegisterContainerInstance", + "ecs:Submit*", + "ecs:ListClusters", + "ecs:ListContainerInstances", + "ecs:DescribeContainerInstances", + "ecs:ListServices", + "ecs:DescribeTasks", + "ecs:DescribeServices", + "ec2:DescribeInstances", + "ec2:DescribeTags", + "autoscaling:DescribeAutoScalingInstances" + ], + "Resource": [ + "*" + ] + } + ] +} From d3a7df5df2bd4970d19fa83c055c82dfdb246cba Mon Sep 17 00:00:00 2001 From: ananth Date: Tue, 16 May 2017 13:00:15 -0700 Subject: [PATCH 37/61] new file: heron/config/src/yaml/conf/ecs/ecs-heron-role.json --- heron/config/src/yaml/conf/ecs/ecs-heron-role.json | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 heron/config/src/yaml/conf/ecs/ecs-heron-role.json diff --git a/heron/config/src/yaml/conf/ecs/ecs-heron-role.json b/heron/config/src/yaml/conf/ecs/ecs-heron-role.json new file mode 100644 index 00000000000..0f0b0ed1b21 --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/ecs-heron-role.json @@ -0,0 +1,8 @@ +{ + "Version": "2017-4-17", + "Statement": { + "Effect": "Allow", + "Principal": {"Service": "ec2.amazonaws.com"}, + "Action": "sts:AssumeRole" + } +} From f65f865f5b38ff15d59143b2f9934e60b9b95232 Mon Sep 17 00:00:00 2001 From: ananth Date: Tue, 16 May 2017 13:19:15 -0700 Subject: [PATCH 38/61] fixed echo messages --- heron/config/src/yaml/conf/ecs/setupEcs.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/heron/config/src/yaml/conf/ecs/setupEcs.sh b/heron/config/src/yaml/conf/ecs/setupEcs.sh index 0e02a153ce3..7329e5a5c3d 100755 --- a/heron/config/src/yaml/conf/ecs/setupEcs.sh +++ b/heron/config/src/yaml/conf/ecs/setupEcs.sh @@ -26,7 +26,7 @@ AMI=$(aws --region $REGION ec2 describe-images --filters Name=root-device-type, # Check that setup wasn't already run CLUSTER_STATUS=$(aws ecs describe-clusters --clusters ecs-heron-cluster --query 'clusters[0].status' --output text) if [ "$CLUSTER_STATUS" != "None" -a "$CLUSTER_STATUS" != "INACTIVE" ]; then - echo "error: ECS cluster weave-ecs-demo-cluster is active, cleanup first" + echo "error: ECS cluster ecs-heron-cluster is active, cleanup first" exit 1 fi @@ -38,7 +38,7 @@ aws ecs create-cluster --cluster-name ecs-heron-cluster > /dev/null echo "done" # Security group -echo -n "Creating Security Group (weave-ecs-demo) .. " +echo -n "Creating Security Group (ecs-heron-demo) .. " SECURITY_GROUP_ID=$(aws ec2 create-security-group --group-name ecs-heron-group --description 'ECS Heron' --query 'GroupId' --output text) # Wait for the group to get associated with the VPC sleep 5 @@ -72,11 +72,11 @@ echo -n "Creating Launch Configuration (ecs-heron-launch-configuration) .. " sleep 15 -TMP_USER_DATA_FILE=$(mktemp /tmp/weave-ecs-demo-user-data-XXXX) +TMP_USER_DATA_FILE=$(mktemp /tmp/ecs-heron-user-data-XXXX) trap 'rm $TMP_USER_DATA_FILE' EXIT cp data/set-ecs-cluster-name.sh $TMP_USER_DATA_FILE if [ -n "$SCOPE_AAS_PROBE_TOKEN" ]; then - echo "echo SERVICE_TOKEN=$SCOPE_AAS_PROBE_TOKEN >> /etc/weave/scope.config" >> $TMP_USER_DATA_FILE + echo "echo SERVICE_TOKEN=$SCOPE_AAS_PROBE_TOKEN >> /etc/ecs-heron/scope.config" >> $TMP_USER_DATA_FILE fi aws autoscaling create-launch-configuration --image-id $AMI --launch-configuration-name ecs-heron-launch-configuration --key-name ecs-heron-key --security-groups $SECURITY_GROUP_ID --instance-type t2.micro --user-data file://$TMP_USER_DATA_FILE --iam-instance-profile ecs-heron-instance-profile --associate-public-ip-address --instance-monitoring Enabled=false From 15a91f742b9c03f1d744a4b656c22eae20101353 Mon Sep 17 00:00:00 2001 From: ananth Date: Tue, 16 May 2017 13:26:05 -0700 Subject: [PATCH 39/61] Fixed Log messages --- .../heron/scheduler/ecs/EcsScheduler.java | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java index 59119c248ca..79cd9da88f6 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java @@ -167,11 +167,9 @@ public boolean onSchedule(PackingPlan packing) { public List getJobLinks() { List list = new ArrayList(); StringBuilder familyListcmd = new StringBuilder(); - //String tempStr = "aws ecs list-task-definition-families --family-prefix ecscompose-"; familyListcmd.append(EcsContext.composeFamilyName(config)); familyListcmd.append(EcsContext.topologyName(config)); - //tempStr = tempStr + EcsContext.topologyName(config); - LOG.info("final list cmd:" + familyListcmd); + LOG.info(String.format("final list cmd: %s", familyListcmd)); StringBuilder stdout = new StringBuilder(); StringBuilder stderr = new StringBuilder(); List familyString; @@ -233,8 +231,19 @@ private List parseJsonName(String jString, String jName) throws JsonPars @Override public boolean onKill(Scheduler.KillTopologyRequest request) { - ShellUtils.runProcess(EcsContext.composeStopCmd(config), null); - return true; + StringBuilder stdout = new StringBuilder(); + StringBuilder stderr = new StringBuilder(); + int status = ShellUtils.runProcess(EcsContext.composeStopCmd(config), null); + if (status != 0) { + LOG.severe(String.format( + "Failed to run process. Command=%s, STDOUT=%s, STDERR=%s", + EcsContext.composeStopCmd(config), stdout, stderr)); + isTopologyKilled = false; + } else { + LOG.info("Topology Taks stop Successful"); + isTopologyKilled = true; + } + return isTopologyKilled; } @Override From 96a150de0bc1a7b465f4cb32dd525af54d3353a4 Mon Sep 17 00:00:00 2001 From: ananth Date: Thu, 18 May 2017 22:37:05 -0700 Subject: [PATCH 40/61] new file: EcsSchedulerTest.java --- .../heron/scheduler/ecs/EcsSchedulerTest.java | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 heron/schedulers/tests/java/com/twitter/heron/scheduler/ecs/EcsSchedulerTest.java diff --git a/heron/schedulers/tests/java/com/twitter/heron/scheduler/ecs/EcsSchedulerTest.java b/heron/schedulers/tests/java/com/twitter/heron/scheduler/ecs/EcsSchedulerTest.java new file mode 100644 index 00000000000..a7c4d53afcc --- /dev/null +++ b/heron/schedulers/tests/java/com/twitter/heron/scheduler/ecs/EcsSchedulerTest.java @@ -0,0 +1,177 @@ +// Copyright 2016 Twitter. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.twitter.heron.scheduler.ecs; + +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; +//import java.util.concurrent.ExecutorService; +//import java.util.concurrent.TimeUnit; + +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +//import org.mockito.Matchers; +import org.mockito.Mockito; +//import static org.mockito.Mockito.mock; +import org.powermock.api.mockito.PowerMockito; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; + +import com.twitter.heron.api.generated.TopologyAPI; +import com.twitter.heron.common.basics.PackageType; +import com.twitter.heron.scheduler.utils.LauncherUtils; +import com.twitter.heron.scheduler.utils.SchedulerUtils; +import com.twitter.heron.spi.common.Config; +import com.twitter.heron.spi.common.ConfigLoader; + +import com.twitter.heron.spi.common.Key; +import com.twitter.heron.spi.packing.IPacking; +import com.twitter.heron.spi.packing.PackingPlan; +import com.twitter.heron.spi.scheduler.ILauncher; +import com.twitter.heron.spi.statemgr.SchedulerStateManagerAdaptor; +import com.twitter.heron.spi.utils.PackingTestUtils; +import com.twitter.heron.spi.utils.TopologyTests; + + +@RunWith(PowerMockRunner.class) +@PrepareForTest({SchedulerUtils.class, ConfigLoader.class}) + +public class EcsSchedulerTest { + + private static final String TOPOLOGY_NAME = "testTopology"; + private static final String CLUSTER = "testCluster"; + private static final String ROLE = "testRole"; + private static final String ENVIRON = "testEnviron"; + private static final String BUILD_VERSION = "live"; + private static final String BUILD_USER = "user"; + private static final String TOPOLOGY_DEFINITION_FILE = "topologyDefFile"; + private static final String TOPOLOGY_BINARY_FILE = "topologyBinFile"; + private static final String TOPOLOGY_PACKAGE_TYPE = "tar"; + private static final String[] EXECUTOR_CMD = {"executor", "_", "cmd"}; + private static final String DOCKER_CONTENT = "testDockerContent"; + private static final String TEST_DATA_PATH = + "/__main__/heron/spi/tests/java/com/twitter/heron/spi/common/testdata"; + private final String heronHome = + Paths.get(System.getenv("JAVA_RUNFILES"), TEST_DATA_PATH).toString(); + private final String configPath = Paths.get(heronHome, "local").toString(); + private static final String TEST_DOCKER_PATH = + "/__main__/heron/config/src/yaml/conf/ecs/ecs_compose_template.yaml"; + private final String dockerPath = Paths.get(heronHome, "").toString(); + private static EcsScheduler scheduler; + private Config clusterConfig; + private Config runtime; + private EcsContext ecsContext; + private Set containerPlans; + + @Before + public void before() throws Exception { + scheduler = Mockito.spy(EcsScheduler.class); + + Config rawConfig = ConfigLoader.loadConfig( + heronHome, configPath, "/release/file", "/override/file"); + clusterConfig = Mockito.spy(Config.toClusterMode(rawConfig)); + + runtime = Mockito.mock(Config.class); + + scheduler.initialize(clusterConfig, runtime); + } + @After + public void after() throws Exception { + + } + @BeforeClass + public static void beforeClass() throws Exception { + + } + + @AfterClass + public static void afterClass() throws Exception { + scheduler.close(); + } + + private Config createRunnerRuntime( + com.twitter.heron.api.Config topologyConfig) throws Exception { + Config lRuntime = Mockito.spy(Config.newBuilder().build()); + ILauncher launcher = Mockito.mock(ILauncher.class); + IPacking packing = Mockito.mock(IPacking.class); + SchedulerStateManagerAdaptor adaptor = Mockito.mock(SchedulerStateManagerAdaptor.class); + //TopologyAPI.Topology topology = createTopology(topologyConfig); + TopologyAPI.Topology topology = TopologyTests.createTopology( + TOPOLOGY_NAME, new com.twitter.heron.api.Config(), new HashMap(), + new HashMap()); + + Mockito.doReturn(launcher).when(lRuntime).get(Key.LAUNCHER_CLASS_INSTANCE); + Mockito.doReturn(adaptor).when(lRuntime).get(Key.SCHEDULER_STATE_MANAGER_ADAPTOR); + Mockito.doReturn(topology).when(lRuntime).get(Key.TOPOLOGY_DEFINITION); + + PackingPlan packingPlan = Mockito.mock(PackingPlan.class); + Mockito.when(packingPlan.getContainers()).thenReturn( + new HashSet()); + Mockito.when(packingPlan.getComponentRamDistribution()).thenReturn("ramdist"); + Mockito.when(packingPlan.getId()).thenReturn("packing_plan_id"); + containerPlans = new HashSet<>(); + containerPlans.add(PackingTestUtils.testContainerPlan(1)); // just need it to be of size 1 + Mockito.when(packingPlan.getContainers()).thenReturn(containerPlans); + Mockito.when(packing.pack()).thenReturn(packingPlan); + + LauncherUtils mockLauncherUtils = Mockito.mock(LauncherUtils.class); + Mockito.when(mockLauncherUtils.createPackingPlan(Mockito.any(Config.class), + Mockito.any(Config.class))) + .thenReturn(packingPlan); + PowerMockito.spy(LauncherUtils.class); + PowerMockito.doReturn(mockLauncherUtils).when(LauncherUtils.class, "getInstance"); + + + return lRuntime; + } + @Test + public void testOnSchedule() throws Exception { + + + Mockito.doReturn(TOPOLOGY_DEFINITION_FILE). + when(clusterConfig).get(Key.TOPOLOGY_DEFINITION_FILE); + Mockito.doReturn(TOPOLOGY_BINARY_FILE).when(clusterConfig).get(Key.TOPOLOGY_BINARY_FILE); + Mockito.doReturn(PackageType.getPackageType("test.tar")). + when(clusterConfig).get(Key.TOPOLOGY_PACKAGE_TYPE); + + TopologyAPI.Topology topology = TopologyTests.createTopology( + TOPOLOGY_NAME, new com.twitter.heron.api.Config(), new HashMap(), + new HashMap()); + Mockito.when(runtime.get(Key.TOPOLOGY_DEFINITION)).thenReturn(topology); + Mockito.when(runtime.get(Key.TOPOLOGY_DEFINITION_FILE)).thenReturn(TOPOLOGY_DEFINITION_FILE); + + ecsContext = Mockito.mock(EcsContext.class); + PowerMockito.mockStatic(EcsContext.class); + Mockito.when(EcsContext.ecsComposeTemplate(clusterConfig)).thenReturn(TEST_DOCKER_PATH); + + PackingPlan packingPlan = Mockito.mock(PackingPlan.class); + Set containers = new HashSet<>(); + containers.add(PackingTestUtils.testContainerPlan(1)); + //containers.add(PackingTestUtils.testContainerPlan(2)); + //containers.add(PackingTestUtils.testContainerPlan(3)); + Mockito.when(packingPlan.getContainers()).thenReturn(containers); + + containers.add(Mockito.mock(PackingPlan.ContainerPlan.class)); + PackingPlan validPlan = + new PackingPlan(TOPOLOGY_NAME, containers); + Assert.assertTrue(scheduler.onSchedule(validPlan)); + } +} From f2e426752bb77a6e44d1aaed0871e624bd705863 Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 19 May 2017 13:33:56 -0700 Subject: [PATCH 41/61] modified: setupEcs.sh --- heron/config/src/yaml/conf/ecs/setupEcs.sh | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/heron/config/src/yaml/conf/ecs/setupEcs.sh b/heron/config/src/yaml/conf/ecs/setupEcs.sh index 7329e5a5c3d..933abb94149 100755 --- a/heron/config/src/yaml/conf/ecs/setupEcs.sh +++ b/heron/config/src/yaml/conf/ecs/setupEcs.sh @@ -1,10 +1,12 @@ #!/bin/bash -##### PLEASE NOTE USE THIS AS A REFETECE. YOU MAY NEED TO CHANGE REGION/CLUSTER/KEYPAIR VALUES +##### PLEASE NOTE USE THIS AS A REFERENCE. YOU MAY NEED TO CHANGE REGION/CLUSTER/AMI/KEYPAIR VALUES REGIONS="" +SCOPE_AAS_PROBE_TOKEN="$1" + if [ -z "$(which aws)" ]; then echo "error: Cannot find AWS-CLI, please make sure it's installed" exit 1 @@ -39,22 +41,22 @@ echo "done" # Security group echo -n "Creating Security Group (ecs-heron-demo) .. " -SECURITY_GROUP_ID=$(aws ec2 create-security-group --group-name ecs-heron-group --description 'ECS Heron' --query 'GroupId' --output text) +SECURITY_GROUP_ID=$(aws ec2 create-security-group --group-name ecs-heron-securitygroup --description 'ECS Heron' --query 'GroupId' --output text) # Wait for the group to get associated with the VPC sleep 5 #opening all ports as port assignment will be random aws ec2 authorize-security-group-ingress --group-id $SECURITY_GROUP_ID --protocol tcp --port 0-65535 --cidr 0.0.0.0/0 # Key pair -echo -n "Creating Key Pair (ecs-heron-key, file ecs-heron-key.pem) .. " -aws ec2 create-key-pair --key-name ecs-heron-key --query 'KeyMaterial' --output text > ecs-heron-key.pem -chmod 600 ecs-heron-key.pem +echo -n "Creating Key Pair (ecs-heron-keypair, file ecs-heron-keypair.pem) .. " +aws ec2 create-key-pair --key-name ecs-heron-keypair --query 'KeyMaterial' --output text > ecs-heron-keypair.pem +chmod 600 ecs-heron-keypair.pem echo "done" # IAM role echo -n "Creating IAM role (ecs-heron-role) .. " -aws iam create-role --role-name ecs-heron-role --assume-role-policy-document file://data/ecs-heron-role.json > /dev/null -aws iam put-role-policy --role-name ecs-heron-role --policy-name ecs-heron-policy --policy-document file://data/ecs-heron-policy.json +aws iam create-role --role-name ecs-heron-role --assume-role-policy-document file://ecs-heron-role.json > /dev/null +aws iam put-role-policy --role-name ecs-heron-role --policy-name ecs-heron-policy --policy-document file://ecs-heron-policy.json aws iam create-instance-profile --instance-profile-name ecs-heron-instance-profile > /dev/null # Wait for the instance profile to be ready, otherwise we get an error when trying to use it while ! aws iam get-instance-profile --instance-profile-name ecs-heron-instance-profile 2>&1 > /dev/null; do @@ -68,24 +70,23 @@ echo "done" echo -n "Creating Launch Configuration (ecs-heron-launch-configuration) .. " # Wait for the role to be ready, otherwise we get: # A client error (ValidationError) occurred when calling the CreateLaunchConfiguration operation: You are not authorized to perform this operation. -# Unfortunately even if you can list the profile, "aws autoscaling create-launch-configuration" barks about it not existing so lets sleep instead sleep 15 TMP_USER_DATA_FILE=$(mktemp /tmp/ecs-heron-user-data-XXXX) trap 'rm $TMP_USER_DATA_FILE' EXIT -cp data/set-ecs-cluster-name.sh $TMP_USER_DATA_FILE +cp set-ecs-cluster-name.sh $TMP_USER_DATA_FILE if [ -n "$SCOPE_AAS_PROBE_TOKEN" ]; then echo "echo SERVICE_TOKEN=$SCOPE_AAS_PROBE_TOKEN >> /etc/ecs-heron/scope.config" >> $TMP_USER_DATA_FILE fi -aws autoscaling create-launch-configuration --image-id $AMI --launch-configuration-name ecs-heron-launch-configuration --key-name ecs-heron-key --security-groups $SECURITY_GROUP_ID --instance-type t2.micro --user-data file://$TMP_USER_DATA_FILE --iam-instance-profile ecs-heron-instance-profile --associate-public-ip-address --instance-monitoring Enabled=false +aws autoscaling create-launch-configuration --image-id $AMI --launch-configuration-name ecs-heron-launch-configuration --key-name ecs-heron-keypair --security-groups $SECURITY_GROUP_ID --instance-type t2.micro --user-data file://$TMP_USER_DATA_FILE --iam-instance-profile ecs-heron-instance-profile --associate-public-ip-address --instance-monitoring Enabled=false echo "done" #set up the ecs cli config -ecs-cli configure --region $REGION --access-key ecs-heron-key --cluster $CLUSTER +ecs-cli configure --region $REGION --access-key ecs-heron-keypair --cluster ecs-heron-cluster #create a CloudFormation template -ecs-cli up --keypair ecs --capability-iam --size 2 --instance-type t2.micro +ecs-cli up --keypair ecs-heron-keypair --capability-iam --size 2 --instance-type t2.micro echo "Setup is ready! Please submit the ECS HERON Topology" From be514cbbca92df10a1874afc4a09a324e70a20df Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 19 May 2017 13:34:58 -0700 Subject: [PATCH 42/61] fixed version number modified: ecs-heron-role.json --- heron/config/src/yaml/conf/ecs/ecs-heron-role.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/config/src/yaml/conf/ecs/ecs-heron-role.json b/heron/config/src/yaml/conf/ecs/ecs-heron-role.json index 0f0b0ed1b21..f025fed4763 100644 --- a/heron/config/src/yaml/conf/ecs/ecs-heron-role.json +++ b/heron/config/src/yaml/conf/ecs/ecs-heron-role.json @@ -1,5 +1,5 @@ { - "Version": "2017-4-17", + "Version": "2012-10-17", "Statement": { "Effect": "Allow", "Principal": {"Service": "ec2.amazonaws.com"}, From 0ece9cbc08ebf278f1278f5b421643ae6821df9c Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 19 May 2017 13:35:55 -0700 Subject: [PATCH 43/61] fixed version number modified: ecs-heron-policy.json --- heron/config/src/yaml/conf/ecs/ecs-heron-policy.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/config/src/yaml/conf/ecs/ecs-heron-policy.json b/heron/config/src/yaml/conf/ecs/ecs-heron-policy.json index 1d34b4b863f..27583a9828e 100644 --- a/heron/config/src/yaml/conf/ecs/ecs-heron-policy.json +++ b/heron/config/src/yaml/conf/ecs/ecs-heron-policy.json @@ -1,5 +1,5 @@ { - "Version": "2017-04-17", + "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", From 2ef6665d1ac081489bd479e9876ec59a7ad40380 Mon Sep 17 00:00:00 2001 From: ananth Date: Fri, 19 May 2017 13:37:22 -0700 Subject: [PATCH 44/61] modified: set-ecs-cluster-name.sh --- heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh b/heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh index 7bf28fb9796..fafe4a335d3 100644 --- a/heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh +++ b/heron/config/src/yaml/conf/ecs/set-ecs-cluster-name.sh @@ -1,2 +1,2 @@ #!/bin/bash -echo ECS_CLUSTER=weave-ecs-demo-cluster >> /etc/ecs/ecs.config +echo ECS_CLUSTER=ecs-heron-cluster >> /etc/ecs-heron/ecs.config From c224c820a814ad39ff5737ce43b20dfd0fcb057d Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 12:39:51 -0700 Subject: [PATCH 45/61] new file: README --- heron/config/src/yaml/conf/ecs/README | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 heron/config/src/yaml/conf/ecs/README diff --git a/heron/config/src/yaml/conf/ecs/README b/heron/config/src/yaml/conf/ecs/README new file mode 100644 index 00000000000..f2d19eb6017 --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/README @@ -0,0 +1,11 @@ +This folder contains sample configs needed for using zookeeper in LocalScheduler. +In order to run LocalScheduler, you need to set up a running zookeeper server basing on the config inside statemgr.yaml: +1. Set up the appropriate connection string. +2. Create following required nodes in zookeeper (one time effort): + /{heron.statemgr.root.path}/tmasters + /{heron.statemgr.root.path}/topologies + /{heron.statemgr.root.path}/pplans + /{heron.statemgr.root.path}/executionstate + /{heron.statemgr.root.path}/schedulers + +Then you can run LocalScheduler with zookeeper state manager. From 30f434888e539e6e886f18ca8415c4320026372e Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 12:45:13 -0700 Subject: [PATCH 46/61] Clean up file for AWS --- heron/config/src/yaml/conf/ecs/cleanUp.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100755 heron/config/src/yaml/conf/ecs/cleanUp.sh diff --git a/heron/config/src/yaml/conf/ecs/cleanUp.sh b/heron/config/src/yaml/conf/ecs/cleanUp.sh new file mode 100755 index 00000000000..b7190e3b299 --- /dev/null +++ b/heron/config/src/yaml/conf/ecs/cleanUp.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +aws autoscaling delete-launch-configuration --launch-configuration-name ecs-heron-launch-configuration +echo "done delete-launch-configuration" +aws iam remove-role-from-instance-profile --instance-profile-name ecs-heron-instance-profile --role-name ecs-heron-role +echo "done iam remove-role-from-instance-profile" +aws iam delete-instance-profile --instance-profile-name ecs-heron-instance-profile +echo "done iam iam delete-instance-profile" +aws iam delete-role-policy --role-name ecs-heron-role --policy-name ecs-heron-policy +echo "done iam delete-role-policy " +aws iam delete-role --role-name ecs-heron-role +echo "done iam delete-role " +aws ec2 delete-key-pair --key-name ecs-heron-keypair +rm -f ecs-heron-key*.pem + +GROUP_ID=$(aws ec2 describe-security-groups --query 'SecurityGroups[?GroupName==`ecs-heron-securitygroup`].GroupId' --output text) +aws ec2 delete-security-group --group-id "$GROUP_ID" +echo "done delete security-groups " + +aws ecs delete-cluster --cluster ecs-heron-cluster +echo "done delete cluster " From b362b4da714177ff5919a9f7a9f51c2bc75fdd14 Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 12:49:52 -0700 Subject: [PATCH 47/61] modified: ecs_compose_template.yaml --- heron/config/src/yaml/conf/ecs/ecs_compose_template.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/heron/config/src/yaml/conf/ecs/ecs_compose_template.yaml b/heron/config/src/yaml/conf/ecs/ecs_compose_template.yaml index 44cdc75c1bb..305c9f058cb 100755 --- a/heron/config/src/yaml/conf/ecs/ecs_compose_template.yaml +++ b/heron/config/src/yaml/conf/ecs/ecs_compose_template.yaml @@ -2,7 +2,8 @@ version: '2' services: CONTAINER_NUMBER: image: ananthgs/onlyheronandubuntu - command: ["sh", "-c", "mkdir /s3; cd /s3 ;aws s3 cp s3://herondockercal/TOPOLOGY_NAME/topology.tar.gz /s3 ;aws s3 cp s3://herondockercal/heron-core-testbuild-ubuntu14.04.tar.gz /s3 ;cd /s3; tar -zxvf topology.tar.gz; tar -zxvf heron-core-testbuild-ubuntu14.04.tar.gz; HERON_EXECUTOR ;"] + #command: ["sh", "-c", "mkdir /s3; cd /s3 ;aws s3 cp s3://herondockercal/TOPOLOGY_NAME/topology.tar.gz /s3 ;aws s3 cp s3://herondockercal/heron-core-testbuild-ubuntu14.04.tar.gz /s3 ;cd /s3; tar -zxvf topology.tar.gz; tar -zxvf heron-core-testbuild-ubuntu14.04.tar.gz; HERON_EXECUTOR ;"] + command: ["sh", "-c", "mkdir /s3; cd /s3 ;aws s3 cp s3://herondockercal/TOPOLOGY_NAME/topology.tar.gz /s3 ;aws s3 cp s3://herondockercal/heron-core.tar.gz /s3 ;cd /s3; tar -zxvf topology.tar.gz; tar -zxvf heron-core.tar.gz; HERON_EXECUTOR ;"] networks: - heron ports:FREEPORTS From 4b05a87662222eabd1bcf5ced1a1748b99bb2440 Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 12:50:40 -0700 Subject: [PATCH 48/61] modified: setupEcs.sh --- heron/config/src/yaml/conf/ecs/setupEcs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/config/src/yaml/conf/ecs/setupEcs.sh b/heron/config/src/yaml/conf/ecs/setupEcs.sh index 933abb94149..e35e266f9fa 100755 --- a/heron/config/src/yaml/conf/ecs/setupEcs.sh +++ b/heron/config/src/yaml/conf/ecs/setupEcs.sh @@ -84,9 +84,9 @@ aws autoscaling create-launch-configuration --image-id $AMI --launch-configurati echo "done" #set up the ecs cli config -ecs-cli configure --region $REGION --access-key ecs-heron-keypair --cluster ecs-heron-cluster +ecs-cli configure --force --region $REGION --access-key ecs-heron-keypair --cluster ecs-heron-cluster #create a CloudFormation template -ecs-cli up --keypair ecs-heron-keypair --capability-iam --size 2 --instance-type t2.micro +ecs-cli up --force --keypair ecs-heron-keypair --capability-iam --size 2 --instance-type m4.large echo "Setup is ready! Please submit the ECS HERON Topology" From 2d9dbec57163eedd6b77c191b7f4dd2cb8477035 Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 12:51:45 -0700 Subject: [PATCH 49/61] modified: scheduler.yaml --- heron/config/src/yaml/conf/ecs/scheduler.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/heron/config/src/yaml/conf/ecs/scheduler.yaml b/heron/config/src/yaml/conf/ecs/scheduler.yaml index ae714c7e57c..d3d8a73007f 100644 --- a/heron/config/src/yaml/conf/ecs/scheduler.yaml +++ b/heron/config/src/yaml/conf/ecs/scheduler.yaml @@ -5,9 +5,14 @@ heron.class.scheduler: com.twitter.heron.scheduler.ecs.Ecs heron.class.launcher: com.twitter.heron.scheduler.ecs.EcsLauncher # location of java - pick it up from shell environment -heron.directory.sandbox.java.home: /usr/lib/jvm/java-8-oracle +#heron.directory.sandbox.java.home: /usr/lib/jvm/java-8-oracle +#heron.directory.sandbox.java.home: ${JAVA_HOME} heron.ecs.topology.binary.file: heron-examples.jar +heron.scheduler.ecs.working.directory: ${HOME}/.herondata/topologies/${CLUSTER}/${ROLE}/${TOPOLOGY} + # location of java - pick it up from shell environment -heron.ecs.compose.template.file: ${HOME}/.heron/conf/ecs/ecs_compose_template.yaml \ No newline at end of file +heron.ecs.compose.template.file: ${HOME}/.heron/conf/ecs/ecs_compose_template.yaml + +heron.ecs.ami.instance: http://169.254.169.254/latest/meta-data \ No newline at end of file From 0119231d9d240b4a2c82b83ae15e707d9af87788 Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 13:04:15 -0700 Subject: [PATCH 50/61] added cluster binary context --- .../java/com/twitter/heron/scheduler/ecs/EcsContext.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java index 4d0cdcc799c..722ced29a7c 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsContext.java @@ -21,8 +21,13 @@ public class EcsContext extends Context { public static String ecsClusterBinary(Config config) { - String workingDirectory = config.getStringValue( + String ecsClusterBinary = config.getStringValue( EcsKey.ECS_CLUSTER_BINARY.value(), EcsKey.ECS_CLUSTER_BINARY.getDefaultString()); + return TokenSub.substitute(config, ecsClusterBinary); + } + public static String workingDirectory(Config config) { + String workingDirectory = config.getStringValue( + EcsKey.WORKING_DIRECTORY.value(), EcsKey.WORKING_DIRECTORY.getDefaultString()); return TokenSub.substitute(config, workingDirectory); } From 986e7f79097550f072733a1e468e8155f01e23f5 Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 13:07:00 -0700 Subject: [PATCH 51/61] Added scheduler realated changes for scheduler info to be put in zookeeper. --- .../twitter/heron/scheduler/ecs/EcsKey.java | 6 +- .../heron/scheduler/ecs/EcsLauncher.java | 79 ++++++++++++++++++- .../heron/scheduler/ecs/EcsScheduler.java | 50 +++++++----- 3 files changed, 110 insertions(+), 25 deletions(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java index a268f67bf18..7882ae35b17 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java @@ -23,13 +23,15 @@ public enum EcsKey { "${HOME}/.heron/conf/ecs/ecs_compose_template.yaml"), ECS_AMI_INSTANCE("heron.ecs.ami.instance", "http://169.254.169.254/latest/meta-data/local-ipv4"), ECS_COMPOSE_UPCMD("heron.ecs.compose.up", "ecs-cli compose --project-name "), - ECS_COMPOSE_STOP("heron.ecs.compose.up", "ecs-cli stop"), + ECS_COMPOSE_STOP("heron.ecs.compose.up", "aws ecs stop-task --cluster default --task "), //ECS_COMPOSE_LIST("heron.ecs.compose.up", "ecs-cli ps") ECS_COMPOSE_LIST("heron.ecs.compose.up", "aws ecs list-tasks --family "), ECS_LIST_BY("heron.ecs.list.by", "families"), ECS_TASK_TAG("heron.ecs.task.tag", "taskArns"), ECS_GET_FAMILY("heron.ecs.family.name", - "aws ecs list-task-definition-families --family-prefix ecscompose-"); + "aws ecs list-task-definition-families --family-prefix ecscompose-"), + WORKING_DIRECTORY("heron.scheduler.ecs.working.directory", + "${HOME}/.herondata/topologies/${CLUSTER}/${ROLE}/${TOPOLOGY}"); private final String value; diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java index 4c3d4c84b8f..daf15d8247a 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsLauncher.java @@ -15,25 +15,37 @@ package com.twitter.heron.scheduler.ecs; +import java.io.File; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Level; import java.util.logging.Logger; +import com.twitter.heron.common.basics.SysUtils; import com.twitter.heron.scheduler.utils.LauncherUtils; +import com.twitter.heron.scheduler.utils.Runtime; +import com.twitter.heron.scheduler.utils.SchedulerUtils; import com.twitter.heron.spi.common.Config; +import com.twitter.heron.spi.common.Context; import com.twitter.heron.spi.packing.PackingPlan; import com.twitter.heron.spi.scheduler.ILauncher; import com.twitter.heron.spi.scheduler.IScheduler; +import com.twitter.heron.spi.utils.ShellUtils; public class EcsLauncher implements ILauncher { protected static final Logger LOG = Logger.getLogger(EcsLauncher.class.getName()); private Config config; private Config runtime; - + private String topologyWorkingDirectory; @Override public void initialize(Config mConfig, Config mRuntime) { this.config = mConfig; this.runtime = mRuntime; + this.topologyWorkingDirectory = EcsContext.workingDirectory(config); } @Override @@ -41,13 +53,74 @@ public void close() { // Do nothing } - @Override - public boolean launch(PackingPlan packing) { + //@Override + public boolean launch_old(PackingPlan packing) { LauncherUtils launcherUtils = LauncherUtils.getInstance(); Config ytruntime = launcherUtils.createConfigWithPackingDetails(runtime, packing); return launcherUtils.onScheduleAsLibrary(config, ytruntime, getScheduler(), packing); } + @Override + public boolean launch(PackingPlan packing) { + LOG.log(Level.FINE, "Launching topology for local cluster {0}", + EcsContext.cluster(config)); + + if (!setupWorkingDirectory()) { + LOG.severe("Failed to setup working directory"); + return false; + } + + String[] schedulerCmd = getSchedulerCommand(); + Process p = startScheduler(schedulerCmd); + if (p == null) { + LOG.severe("Failed to start SchedulerMain using: " + Arrays.toString(schedulerCmd)); + return false; + } + + LOG.log(Level.FINE, String.format( + "To check the status and logs of the topology, use the working directory %s", + EcsContext.workingDirectory(config))); + + return true; + + } + + protected String[] getSchedulerCommand() { + List freePorts = new ArrayList<>(SchedulerUtils.PORTS_REQUIRED_FOR_SCHEDULER); + for (int i = 0; i < SchedulerUtils.PORTS_REQUIRED_FOR_SCHEDULER; i++) { + freePorts.add(SysUtils.getFreePort()); + } + + return SchedulerUtils.schedulerCommand(config, runtime, freePorts); + } + + protected Process startScheduler(String[] schedulerCmd) { + return ShellUtils.runASyncProcess(EcsContext.verbose(config), schedulerCmd, + new File(topologyWorkingDirectory)); + } + protected boolean setupWorkingDirectory() { + // get the path of core release URI + String coreReleasePackageURI = EcsContext.corePackageUri(config); + + // form the target dest core release file name + String coreReleaseFileDestination = Paths.get( + topologyWorkingDirectory, "heron-core.tar.gz").toString(); + + // Form the topology package's URI + String topologyPackageURI = Runtime.topologyPackageUri(runtime).toString(); + + // form the target topology package file name + String topologyPackageDestination = Paths.get( + topologyWorkingDirectory, "topology.tar.gz").toString(); + + return SchedulerUtils.setupWorkingDirectory( + topologyWorkingDirectory, + coreReleasePackageURI, + coreReleaseFileDestination, + topologyPackageURI, + topologyPackageDestination, + Context.verbose(config)); + } protected IScheduler getScheduler() { return new EcsScheduler(); } diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java index 79cd9da88f6..ac3215778c1 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsScheduler.java @@ -27,6 +27,7 @@ import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.IOUtils; @@ -49,7 +50,11 @@ public class EcsScheduler implements IScheduler { @Override public void initialize(Config mConfig, Config mRuntime) { - this.config = Config.toClusterMode(mConfig); + if (Config.toClusterMode(mConfig) != null) { + this.config = Config.toClusterMode(mConfig); + } else { + this.config = mConfig; + } this.runtime = mRuntime; } @@ -63,7 +68,7 @@ protected int startExecutorSyncProcess(int container) { executingInShell = getExecutorCommand(container)[0]; return ShellUtils.runProcess(executingInShell, null); } - + @VisibleForTesting protected void startExecutor(final int container) { LOG.info("Starting a new executor for container: " + container); int shellOutput = startExecutorSyncProcess(container); @@ -71,7 +76,7 @@ protected void startExecutor(final int container) { + container + String.valueOf(shellOutput)); } - private String[] getExecutorCommand(int container) { + protected String[] getExecutorCommand(int container) { List freePorts = new ArrayList<>(SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR); Integer localFreePort = null; nfreePorts = new StringBuilder(); @@ -104,19 +109,19 @@ private String[] getExecutorCommand(int container) { String finalCommand = String.format("%s %s --file %s up", EcsContext.composeupCmd(config), ecsTaskProject, tempDockerFile); - //LOG.info("final Ecs Task command " + finalCommand); - tempDockerFile.deleteOnExit(); + LOG.info("final Ecs Task command " + finalCommand); + //tempDockerFile.deleteOnExit(); return new String[] {finalCommand}; } - private String setClusterValues(String localExecCommand) { + protected String setClusterValues(String localExecCommand) { String clusterExecCommand = localExecCommand.replace(Context.topologyBinaryFile(config), EcsContext.ecsClusterBinary(config)); clusterExecCommand = clusterExecCommand.replaceAll("\"", "'"); return clusterExecCommand; } - private String getDockerFileContent(String execCommand, int container) throws IOException { + protected String getDockerFileContent(String execCommand, int container) throws IOException { String commandBuiler = new String(Files.readAllBytes( Paths.get(EcsContext.ecsComposeTemplate(config)))); @@ -126,7 +131,7 @@ private String getDockerFileContent(String execCommand, int container) throws IO "executor" + String.valueOf(container)); commandBuiler = commandBuiler.replace("HERON_EXECUTOR", execCommand); commandBuiler = commandBuiler.replace("FREEPORTS", nfreePorts); - //System.out.println("commandBuiler :\n" + commandBuiler); + System.out.println("commandBuiler :\n" + commandBuiler); return commandBuiler; } @@ -157,9 +162,6 @@ public boolean onSchedule(PackingPlan packing) { startExecutor(container.getId()); } LOG.info("Executor for each container have been started."); - LOG.info("Listing each of tasks started."); - // List jobLinks = new ArrayList(); - //jobLinks = getJobLinks(); return true; } @@ -233,15 +235,23 @@ private List parseJsonName(String jString, String jName) throws JsonPars public boolean onKill(Scheduler.KillTopologyRequest request) { StringBuilder stdout = new StringBuilder(); StringBuilder stderr = new StringBuilder(); - int status = ShellUtils.runProcess(EcsContext.composeStopCmd(config), null); - if (status != 0) { - LOG.severe(String.format( - "Failed to run process. Command=%s, STDOUT=%s, STDERR=%s", - EcsContext.composeStopCmd(config), stdout, stderr)); - isTopologyKilled = false; - } else { - LOG.info("Topology Taks stop Successful"); - isTopologyKilled = true; + + List taskList = getJobLinks(); + for (String taskId : taskList) { + StringBuilder killJob = new StringBuilder(); + killJob.append(EcsContext.composeStopCmd(config)); + killJob.append(" "); + killJob.append(taskId); + int status = ShellUtils.runProcess(killJob.toString(), null); + if (status != 0) { + LOG.severe(String.format( + "Failed to run process. Command=%s, STDOUT=%s, STDERR=%s", + EcsContext.composeStopCmd(config), stdout, stderr)); + isTopologyKilled = false; + } else { + LOG.info("Topology Task stop Successful"); + isTopologyKilled = true; + } } return isTopologyKilled; } From f13b137da3511f3dba6210be9cfc2f5238120f83 Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 13:08:42 -0700 Subject: [PATCH 52/61] Added files for on schedule testing --- .../heron/scheduler/ecs/EcsSchedulerTest.java | 43 ++----------------- 1 file changed, 4 insertions(+), 39 deletions(-) diff --git a/heron/schedulers/tests/java/com/twitter/heron/scheduler/ecs/EcsSchedulerTest.java b/heron/schedulers/tests/java/com/twitter/heron/scheduler/ecs/EcsSchedulerTest.java index a7c4d53afcc..745db610107 100644 --- a/heron/schedulers/tests/java/com/twitter/heron/scheduler/ecs/EcsSchedulerTest.java +++ b/heron/schedulers/tests/java/com/twitter/heron/scheduler/ecs/EcsSchedulerTest.java @@ -37,16 +37,16 @@ import com.twitter.heron.api.generated.TopologyAPI; import com.twitter.heron.common.basics.PackageType; -import com.twitter.heron.scheduler.utils.LauncherUtils; +//import com.twitter.heron.scheduler.utils.LauncherUtils; import com.twitter.heron.scheduler.utils.SchedulerUtils; import com.twitter.heron.spi.common.Config; import com.twitter.heron.spi.common.ConfigLoader; import com.twitter.heron.spi.common.Key; -import com.twitter.heron.spi.packing.IPacking; +//import com.twitter.heron.spi.packing.IPacking; import com.twitter.heron.spi.packing.PackingPlan; -import com.twitter.heron.spi.scheduler.ILauncher; -import com.twitter.heron.spi.statemgr.SchedulerStateManagerAdaptor; +//import com.twitter.heron.spi.scheduler.ILauncher; +//import com.twitter.heron.spi.statemgr.SchedulerStateManagerAdaptor; import com.twitter.heron.spi.utils.PackingTestUtils; import com.twitter.heron.spi.utils.TopologyTests; @@ -107,41 +107,6 @@ public static void afterClass() throws Exception { scheduler.close(); } - private Config createRunnerRuntime( - com.twitter.heron.api.Config topologyConfig) throws Exception { - Config lRuntime = Mockito.spy(Config.newBuilder().build()); - ILauncher launcher = Mockito.mock(ILauncher.class); - IPacking packing = Mockito.mock(IPacking.class); - SchedulerStateManagerAdaptor adaptor = Mockito.mock(SchedulerStateManagerAdaptor.class); - //TopologyAPI.Topology topology = createTopology(topologyConfig); - TopologyAPI.Topology topology = TopologyTests.createTopology( - TOPOLOGY_NAME, new com.twitter.heron.api.Config(), new HashMap(), - new HashMap()); - - Mockito.doReturn(launcher).when(lRuntime).get(Key.LAUNCHER_CLASS_INSTANCE); - Mockito.doReturn(adaptor).when(lRuntime).get(Key.SCHEDULER_STATE_MANAGER_ADAPTOR); - Mockito.doReturn(topology).when(lRuntime).get(Key.TOPOLOGY_DEFINITION); - - PackingPlan packingPlan = Mockito.mock(PackingPlan.class); - Mockito.when(packingPlan.getContainers()).thenReturn( - new HashSet()); - Mockito.when(packingPlan.getComponentRamDistribution()).thenReturn("ramdist"); - Mockito.when(packingPlan.getId()).thenReturn("packing_plan_id"); - containerPlans = new HashSet<>(); - containerPlans.add(PackingTestUtils.testContainerPlan(1)); // just need it to be of size 1 - Mockito.when(packingPlan.getContainers()).thenReturn(containerPlans); - Mockito.when(packing.pack()).thenReturn(packingPlan); - - LauncherUtils mockLauncherUtils = Mockito.mock(LauncherUtils.class); - Mockito.when(mockLauncherUtils.createPackingPlan(Mockito.any(Config.class), - Mockito.any(Config.class))) - .thenReturn(packingPlan); - PowerMockito.spy(LauncherUtils.class); - PowerMockito.doReturn(mockLauncherUtils).when(LauncherUtils.class, "getInstance"); - - - return lRuntime; - } @Test public void testOnSchedule() throws Exception { From 4cc598b397aa41e36d88f7d5482f66aa5c71275a Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 13:11:40 -0700 Subject: [PATCH 53/61] JAVA home for enable AWS scheduler --- .../com/twitter/heron/scheduler/utils/SchedulerUtils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/scheduler-core/src/java/com/twitter/heron/scheduler/utils/SchedulerUtils.java b/heron/scheduler-core/src/java/com/twitter/heron/scheduler/utils/SchedulerUtils.java index 3d103aedad9..61ad97f12a1 100644 --- a/heron/scheduler-core/src/java/com/twitter/heron/scheduler/utils/SchedulerUtils.java +++ b/heron/scheduler-core/src/java/com/twitter/heron/scheduler/utils/SchedulerUtils.java @@ -62,7 +62,7 @@ public static String[] schedulerCommand( List commands = new ArrayList<>(); // The java executable should be "{JAVA_HOME}/bin/java" - String javaExecutable = String.format("%s/%s", Context.clusterJavaHome(config), "bin/java"); + String javaExecutable = String.format("%s/%s", Context.javaHome(config), "bin/java"); commands.add(javaExecutable); commands.add("-cp"); @@ -88,7 +88,7 @@ public static String[] schedulerCommand( * @param freePorts list of free ports * @return String[] representing the arguments to start heron-scheduler */ - public static String[] schedulerCommandArgs( + public static String[] schedulerCommandArgs( Config config, Config runtime, List freePorts) { // First let us have some safe checks if (freePorts.size() < PORTS_REQUIRED_FOR_SCHEDULER) { From 168a47491a28de1a68f431e90571a88108fed02b Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 22:36:11 -0700 Subject: [PATCH 54/61] Updating with correct document link --- heron/config/src/yaml/conf/ecs/README | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/heron/config/src/yaml/conf/ecs/README b/heron/config/src/yaml/conf/ecs/README index f2d19eb6017..f6308aa8159 100644 --- a/heron/config/src/yaml/conf/ecs/README +++ b/heron/config/src/yaml/conf/ecs/README @@ -1,11 +1,6 @@ -This folder contains sample configs needed for using zookeeper in LocalScheduler. -In order to run LocalScheduler, you need to set up a running zookeeper server basing on the config inside statemgr.yaml: -1. Set up the appropriate connection string. -2. Create following required nodes in zookeeper (one time effort): - /{heron.statemgr.root.path}/tmasters - /{heron.statemgr.root.path}/topologies - /{heron.statemgr.root.path}/pplans - /{heron.statemgr.root.path}/executionstate - /{heron.statemgr.root.path}/schedulers +This folder contains sample configs needed for using running heron on AWS Cluster +Please follow the steps at this google doc for detailed set up and workflow: -Then you can run LocalScheduler with zookeeper state manager. +https://docs.google.com/document/d/1ecbCuA46cIKPfY0SP0F1dcRlei4DIPz3pZ6ZSZ5zZgc/edit + +Then you can run Heorn on AWS !!! From f19d354e2d4bc3920a02432c3ad7e6050f63f44c Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 22:36:55 -0700 Subject: [PATCH 55/61] Adding Ecs AMI check as non mandatory field --- heron/executor/src/python/heron_executor.py | 48 +++++++++++++++------ 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index 1b024edc6e8..4558b25d8b5 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -30,6 +30,7 @@ import time import yaml import socket +import traceback import urllib2 from functools import partial @@ -143,6 +144,14 @@ def getHost(ecs_ami): return l_host +def stdout_log_fn(cmd): + """Simple function callback that is used to log the streaming output of a subprocess command + :param cmd: the name of the command which will be added to the log line + :return: None + """ + # Log the messages to stdout and strip off the newline because Log.info adds one automatically + return lambda line: Log.info("%s stdout: %s", cmd, line.rstrip('\n')) + class ProcessInfo(object): def __init__(self, process, name, command, attempts=1): """ @@ -640,24 +649,37 @@ def _untar_if_tar(self): # pylint: disable=no-self-use def _wait_process_std_out_err(self, name, process): ''' Wait for the termination of a process and log its stdout & stderr ''' - (process_stdout, process_stderr) = process.communicate() - if process_stdout: - Log.info("%s stdout: %s" %(name, process_stdout)) - if process_stderr: - Log.info("%s stderr: %s" %(name, process_stderr)) + log.stream_process_stdout(process, stdout_log_fn(name)) + process.wait() def _run_process(self, name, cmd, env_to_exec=None): Log.info("Running %s process as %s" % (name, ' '.join(cmd))) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=env_to_exec) + try: + # stderr is redirected to stdout so that it can more easily be logged. stderr has a max buffer + # size and can cause the child process to deadlock if it fills up + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + env=env_to_exec, bufsize=1) - def _run_blocking_process(self, cmd, is_shell, env_to_exec=None): - Log.info("Running blocking process as %s" % cmd) - process = subprocess.Popen(cmd, shell=is_shell, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, env=env_to_exec) + log.async_stream_process_stdout(process, stdout_log_fn(name)) + except Exception: + Log.info("Exception running command %:", cmd) + traceback.print_exc() - # wait for termination - self._wait_process_std_out_err("", process) + return process + + def _run_blocking_process(self, cmd, is_shell=False, env_to_exec=None): + Log.info("Running blocking process as %s" % cmd) + try: + # stderr is redirected to stdout so that it can more easily be logged. stderr has a max buffer + # size and can cause the child process to deadlock if it fills up + process = subprocess.Popen(cmd, shell=is_shell, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, env=env_to_exec) + + # wait for termination + self._wait_process_std_out_err(cmd, process) + except Exception: + Log.info("Exception running command %:", cmd) + traceback.print_exc() # return the exit code return process.returncode From 034eb1a5e8daa7d6a721fb9b927740b9add4cfd4 Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 22:40:42 -0700 Subject: [PATCH 56/61] modified: tools/rules/heron_core.bzl --- tools/rules/heron_core.bzl | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/rules/heron_core.bzl b/tools/rules/heron_core.bzl index aeabb590527..763b0abd7fc 100644 --- a/tools/rules/heron_core.bzl +++ b/tools/rules/heron_core.bzl @@ -25,7 +25,9 @@ def heron_core_lib_files(): heron_core_lib_packing_files() + \ heron_core_lib_metricsmgr_files() + \ heron_core_lib_statemgr_files() + \ - heron_core_lib_instance_files() + heron_core_lib_instance_files() + \ + heron_core_lib_ckptmgr_files() + \ + heron_core_lib_statefulstorages_files() def heron_core_lib_scheduler_files(): return [ @@ -63,3 +65,14 @@ def heron_core_lib_instance_files(): return [ "//heron/instance/src/java:heron-instance", ] + +def heron_core_lib_ckptmgr_files(): + return [ + "//heron/ckptmgr/src/java:heron-ckptmgr", + ] + +def heron_core_lib_statefulstorages_files(): + return [ + "//heron/statefulstorages/src/java:heron-localfs-statefulstorage", + "//heron/statefulstorages/src/java:heron-hdfs-statefulstorage", + ] From 246b9227045e71d41c8472f9086595627e16c279 Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 22:43:31 -0700 Subject: [PATCH 57/61] modified: scripts/packages/BUILD --- scripts/packages/BUILD | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/packages/BUILD b/scripts/packages/BUILD index 43a351e1f6e..7c8d9a68a5c 100644 --- a/scripts/packages/BUILD +++ b/scripts/packages/BUILD @@ -21,6 +21,8 @@ load("/tools/rules/heron_core", "heron_core_lib_metricsmgr_files") load("/tools/rules/heron_core", "heron_core_lib_metricscachemgr_files") load("/tools/rules/heron_core", "heron_core_lib_packing_files") load("/tools/rules/heron_core", "heron_core_lib_statemgr_files") +load("/tools/rules/heron_core", "heron_core_lib_ckptmgr_files") +load("/tools/rules/heron_core", "heron_core_lib_statefulstorages_files") load("/tools/rules/heron_tools", "heron_tools_files") load("/tools/rules/heron_tools", "heron_tools_bin_files") @@ -71,6 +73,8 @@ pkg_tar( ":heron-core-lib-metricscachemgr", ":heron-core-lib-statemgr", ":heron-core-lib-instance", + ":heron-core-lib-ckptmgr", + ":heron-core-lib-statefulstorages", ], ) @@ -116,6 +120,18 @@ pkg_tar( files = heron_core_lib_instance_files(), ) +pkg_tar( + name = "heron-core-lib-ckptmgr", + package_dir = "heron-core/lib/ckptmgr", + files = heron_core_lib_ckptmgr_files(), +) + +pkg_tar( + name = "heron-core-lib-statefulstorages", + package_dir = "heron-core/lib/statefulstorages", + files = heron_core_lib_statefulstorages_files(), +) + ################################################################################ # Heron tools packaging ################################################################################ From f6f52d9b389357f6a8e05903e63bd82e50234945 Mon Sep 17 00:00:00 2001 From: ananth Date: Sun, 21 May 2017 22:51:07 -0700 Subject: [PATCH 58/61] merging with changes on upstream --- heron/executor/src/python/heron_executor.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index 4558b25d8b5..4cca969f161 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -31,6 +31,7 @@ import yaml import socket import traceback +import traceback import urllib2 from functools import partial @@ -144,6 +145,14 @@ def getHost(ecs_ami): return l_host +def stdout_log_fn(cmd): + """Simple function callback that is used to log the streaming output of a subprocess command + :param cmd: the name of the command which will be added to the log line + :return: None + """ + # Log the messages to stdout and strip off the newline because Log.info adds one automatically + return lambda line: Log.info("%s stdout: %s", cmd, line.rstrip('\n')) + def stdout_log_fn(cmd): """Simple function callback that is used to log the streaming output of a subprocess command :param cmd: the name of the command which will be added to the log line From 84b669c982505e682aeecef222624a1b5c7f08b0 Mon Sep 17 00:00:00 2001 From: ananth Date: Mon, 22 May 2017 10:08:56 -0700 Subject: [PATCH 59/61] Fixed duplicate lines due to merge with head branch --- heron/executor/src/python/heron_executor.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index f705cf5da7c..a63a9d46421 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -31,7 +31,7 @@ import yaml import socket import urllib2 - +import traceback from functools import partial @@ -151,21 +151,6 @@ def stdout_log_fn(cmd): # Log the messages to stdout and strip off the newline because Log.info adds one automatically return lambda line: Log.info("%s stdout: %s", cmd, line.rstrip('\n')) -def stdout_log_fn(cmd): - """Simple function callback that is used to log the streaming output of a subprocess command - :param cmd: the name of the command which will be added to the log line - :return: None - """ - # Log the messages to stdout and strip off the newline because Log.info adds one automatically - return lambda line: Log.info("%s stdout: %s", cmd, line.rstrip('\n')) - -def stdout_log_fn(cmd): - """Simple function callback that is used to log the streaming output of a subprocess command - :param cmd: the name of the command which will be added to the log line - :return: None - """ - # Log the messages to stdout and strip off the newline because Log.info adds one automatically - return lambda line: Log.info("%s stdout: %s", cmd, line.rstrip('\n')) class ProcessInfo(object): def __init__(self, process, name, command, attempts=1): From 88d5ae6ce99d4ab9fd1edc09d5d88165ec777fc8 Mon Sep 17 00:00:00 2001 From: ananth Date: Mon, 22 May 2017 10:10:21 -0700 Subject: [PATCH 60/61] Fixed Cluster name to ecs-heron-cluster --- .../src/java/com/twitter/heron/scheduler/ecs/EcsKey.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java index 7882ae35b17..490680c6c8d 100644 --- a/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java +++ b/heron/schedulers/src/java/com/twitter/heron/scheduler/ecs/EcsKey.java @@ -23,7 +23,7 @@ public enum EcsKey { "${HOME}/.heron/conf/ecs/ecs_compose_template.yaml"), ECS_AMI_INSTANCE("heron.ecs.ami.instance", "http://169.254.169.254/latest/meta-data/local-ipv4"), ECS_COMPOSE_UPCMD("heron.ecs.compose.up", "ecs-cli compose --project-name "), - ECS_COMPOSE_STOP("heron.ecs.compose.up", "aws ecs stop-task --cluster default --task "), + ECS_COMPOSE_STOP("heron.ecs.compose.up", "aws ecs stop-task --cluster ecs-heron-cluster --task "), //ECS_COMPOSE_LIST("heron.ecs.compose.up", "ecs-cli ps") ECS_COMPOSE_LIST("heron.ecs.compose.up", "aws ecs list-tasks --family "), ECS_LIST_BY("heron.ecs.list.by", "families"), From 2f1bde1e9988edc5dac18a6727d06dcff32425e5 Mon Sep 17 00:00:00 2001 From: ananth Date: Mon, 22 May 2017 10:53:03 -0700 Subject: [PATCH 61/61] modified: heron/config/src/yaml/conf/ecs/setupEcs.sh --- heron/config/src/yaml/conf/ecs/setupEcs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/config/src/yaml/conf/ecs/setupEcs.sh b/heron/config/src/yaml/conf/ecs/setupEcs.sh index e35e266f9fa..16b5f67203d 100755 --- a/heron/config/src/yaml/conf/ecs/setupEcs.sh +++ b/heron/config/src/yaml/conf/ecs/setupEcs.sh @@ -40,7 +40,7 @@ aws ecs create-cluster --cluster-name ecs-heron-cluster > /dev/null echo "done" # Security group -echo -n "Creating Security Group (ecs-heron-demo) .. " +echo -n "Creating Security Group (ecs-heron-securitygroup) .. " SECURITY_GROUP_ID=$(aws ec2 create-security-group --group-name ecs-heron-securitygroup --description 'ECS Heron' --query 'GroupId' --output text) # Wait for the group to get associated with the VPC sleep 5