@@ -63,6 +63,11 @@ defmodule CloudWatch do
6363 log_stream_name = Keyword . get ( opts , :log_stream_name )
6464 max_buffer_size = Keyword . get ( opts , :max_buffer_size , @ default_max_buffer_size )
6565 max_timeout = Keyword . get ( opts , :max_timeout , @ default_max_timeout )
66+ purge_buffer_if_throttled? = Keyword . get ( opts , :purge_buffer_if_throttled , false ) # see "ThrottlingException"
67+
68+ # Limit out of memory problems - slow CloudWatch connection may cause message queue grow out of bounds
69+ max_heap_size = 4194304 # words are 8 bytes, i.e. 32 MiB
70+ :erlang . process_flag ( :max_heap_size , max_heap_size ) # see http://erlang.org/doc/man/erlang.html
6671
6772 # AWS configuration, only if needed by the AWS library
6873 region = Keyword . get ( opts , :region )
@@ -81,6 +86,7 @@ defmodule CloudWatch do
8186 log_stream_name: log_stream_name ,
8287 max_buffer_size: max_buffer_size ,
8388 max_timeout: max_timeout ,
89+ purge_buffer_if_throttled: purge_buffer_if_throttled? ,
8490 sequence_token: nil ,
8591 flushed_at: nil
8692 }
@@ -94,7 +100,6 @@ defmodule CloudWatch do
94100 message = state . format
95101 |> Logger.Formatter . format ( level , msg , ts , md )
96102 |> IO . chardata_to_string
97- #buffer = List.insert_at(buffer, -1, %InputLogEvent{message: message, timestamp: ts}) # performance impact of adding at the end?
98103 buffer = [ % InputLogEvent { message: message , timestamp: ts } | buffer ] # buffer order is not relevant, we'll reverse or sort later if needed
99104 % { state | buffer: buffer , buffer_length: buffer_length + 1 , buffer_size: buffer_size + byte_size ( message ) + 26 }
100105 end
@@ -115,12 +120,14 @@ defmodule CloudWatch do
115120 do_flush ( state , opts , log_group_name , log_stream_name )
116121 end
117122
118- defp do_flush ( % { buffer: buffer } = state , opts , log_group_name , log_stream_name ) do
123+ defp do_flush ( % { buffer: buffer , buffer_length: buffer_length } = state , opts , log_group_name , log_stream_name ) do
119124 events = % { logEvents: Enum . sort_by ( buffer , & ( & 1 . timestamp ) ) ,
120125 logGroupName: log_group_name , logStreamName: log_stream_name , sequenceToken: state . sequence_token }
121126 case AwsProxy . put_log_events ( state . client , events ) do
122127 { :ok , % { "nextSequenceToken" => next_sequence_token } , _ } ->
123- { :ok , state |> purge_buffer ( ) |> Map . put ( :sequence_token , next_sequence_token ) }
128+ { :ok , state |> purge_buffer ( ) |> Map . put ( :sequence_token , next_sequence_token )
129+ # |> add_internal_info("CloudWatch Log flushed buffer (#{inspect buffer_length} messages)")
130+ }
124131 { :error , { "DataAlreadyAcceptedException" , "The given batch of log events has already been accepted. The next batch can be sent with sequenceToken: " <> next_sequence_token } } ->
125132 state
126133 |> Map . put ( :sequence_token , next_sequence_token )
@@ -148,11 +155,21 @@ defmodule CloudWatch do
148155 |> do_flush ( opts , log_group_name , log_stream_name )
149156 { :error , { "ThrottlingException" , "Rate exceeded" } } ->
150157 # AWS limit is 5 requests per second per log stream. We are supposed to re-try after a delay
151- # Sleeping here is a quick and dirty hack with possible unwanted consequences
152- # Better approach: introduce a blackout period. Start removing old logs if buffer size exceeded 1 MB during blackout
153- state = state |> add_internal_error ( "CloudWatch Log ThrottlingException: delaying transfer" )
154- Process . sleep ( 500 )
155- flush ( state , opts )
158+ if state . purge_buffer_if_throttled do
159+ # Safe option: delay the transfer by removing all messages from the buffer (some messages will be lost!).
160+ {
161+ :ok ,
162+ state
163+ |> purge_buffer ( )
164+ |> add_internal_error ( "CloudWatch Log ThrottlingException: #{ inspect buffer_length } messages were lost!}" )
165+ }
166+ else
167+ # Sleeping here is a quick and dirty hack with possible unwanted consequences
168+ # Better approach: introduce a blackout period. Start removing old logs if buffer size exceeded 1 MB during blackout
169+ state = state |> add_internal_error ( "CloudWatch Log ThrottlingException: delaying transfer" )
170+ Process . sleep ( 500 )
171+ flush ( state , opts )
172+ end
156173 { :error , { "ExpiredTokenException" , _ } } ->
157174 # aws-elixir may require restarting of state.client; ex_aws handles expired tokens internally
158175 flush ( state , opts )
@@ -167,6 +184,10 @@ defmodule CloudWatch do
167184 add_internal_message ( state , :error , msg )
168185 end
169186
187+ # defp add_internal_info(state, msg) do
188+ # add_internal_message(state, :info, msg)
189+ # end
190+
170191 defp add_internal_message ( state , level , msg ) do
171192 utc_log? = Application . get_env ( :logger , :utc_log , false )
172193 state
0 commit comments