diff --git a/README.md b/README.md index 03548b4..581b72d 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ [![Release][release-badge]][release] [![License][license-image]][license] -[![Discourse posts][discourse-image]][discourse] Examples of how to automate creating a [Snowplow Community pipeline](https://github.com/snowplow/snowplow). @@ -13,15 +12,12 @@ These examples cover deploying an Iglu Server, for hosting your schemas, and a S | Tool | Cloud | Components | Status | Deployment Summary | |------------|-------|------------------------------|---------------------------------------------|----------------------------------------------| | Terraform | AWS | Iglu Server | [Published](terraform/aws/iglu_server) | | -| Terraform | AWS | Pipeline (PostgreSQL) | [Published](terraform/aws/pipeline) | [AWS PostgreSQL Summary][deploypgsum-aws] | | Terraform | AWS | Pipeline (Snowflake) | [Published](terraform/aws/pipeline) | [AWS Snowflake Summary][deploysfsum-aws] | | Terraform | AWS | Pipeline (Redshift) | [Published](terraform/aws/pipeline) | [AWS Redshift Summary][deployrssum-aws] | | Terraform | AWS | Pipeline (Databricks) | [Published](terraform/aws/pipeline) | [AWS Databricks Summary][deploydbsum-aws] | | Terraform | GCP | Iglu Server | [Published](terraform/gcp/iglu_server) | | -| Terraform | GCP | Pipeline (PostgreSQL) | [Published](terraform/gcp/pipeline) | [GCP PostgreSQL Summary][deploypgsum-gcp] | | Terraform | GCP | Pipeline (BigQuery) | [Published](terraform/gcp/pipeline) | [GCP BigQuery Summary][deploybqsum-gcp] | | Terraform | Azure | Iglu Server | [Published](terraform/azure/iglu_server) | | -| Terraform | Azure | Pipeline (Snowflake) | [Published](terraform/azure/pipeline) | [Azure Snowflake Summary][deploysfsum-azure] | | Terraform | Azure | Pipeline (Databricks) | [Published](terraform/azure/pipeline) | [Azure Snowflake Summary][deploydbsum-azure] | | Terraform | Azure | Pipeline (Synapse Analytics) | [Published](terraform/azure/pipeline) | [Azure Snowflake Summary][deploysasum-azure] | @@ -62,13 +58,10 @@ Licensed under the [Snowplow Limited Use License Agreement][license]. _(If you a [installguide]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-quick-start/ [faq]: https://docs.snowplow.io/docs/getting-started-on-community-edition/faq/ -[deploypgsum-aws]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=postgres&cloud=aws [deploysfsum-aws]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=snowflake&cloud=aws [deployrssum-aws]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=redshift&cloud=aws [deploydbsum-aws]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=databricks&cloud=aws -[deploypgsum-gcp]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=postgres&cloud=gcp [deploybqsum-gcp]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=bigquery&cloud=gcp -[deploysfsum-azure]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=snowflake&cloud=azure [deploydbsum-azure]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=databricks&cloud=azure [deploysasum-azure]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=synapse&cloud=azure @@ -76,10 +69,7 @@ Licensed under the [Snowplow Limited Use License Agreement][license]. _(If you a [license-image]: https://img.shields.io/badge/license-Snowplow--Limited--Use-blue.svg?style=flat [license-faq]: https://docs.snowplow.io/docs/contributing/limited-use-license-faq/ -[discourse-image]: https://img.shields.io/discourse/posts?server=https%3A%2F%2Fdiscourse.snowplow.io%2F -[discourse]: http://discourse.snowplow.io/ - [release]: https://github.com/snowplow/snowplow/releases -[release-badge]: https://img.shields.io/badge/Snowplow-25.06-6638b8 +[release-badge]: https://img.shields.io/badge/Snowplow-25.10-6638b8 [tf-docs]: https://github.com/terraform-docs/terraform-docs diff --git a/terraform/aws/iglu_server/default/README.md b/terraform/aws/iglu_server/default/README.md index e87a797..4fdb2c0 100644 --- a/terraform/aws/iglu_server/default/README.md +++ b/terraform/aws/iglu_server/default/README.md @@ -17,8 +17,8 @@ | Name | Source | Version | |------|--------|---------| | [iglu\_lb](#module\_iglu\_lb) | snowplow-devops/alb/aws | 0.2.0 | -| [iglu\_rds](#module\_iglu\_rds) | snowplow-devops/rds/aws | 0.4.0 | -| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ec2/aws | 0.5.0 | +| [iglu\_rds](#module\_iglu\_rds) | snowplow-devops/rds/aws | 0.5.0 | +| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ec2/aws | 0.6.1 | ## Resources @@ -43,7 +43,7 @@ | [cloudwatch\_logs\_enabled](#input\_cloudwatch\_logs\_enabled) | Whether application logs should be reported to CloudWatch | `bool` | `true` | no | | [cloudwatch\_logs\_retention\_days](#input\_cloudwatch\_logs\_retention\_days) | The length of time in days to retain logs for | `number` | `7` | no | | [iam\_permissions\_boundary](#input\_iam\_permissions\_boundary) | The permissions boundary ARN to set on IAM roles created | `string` | `""` | no | -| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
}) | {
"certificate_arn": "",
"enabled": false
} | no |
+| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer | object({
enabled = bool
certificate_arn = string
}) | {
"certificate_arn": "",
"enabled": false
} | no |
| [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no |
| [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no |
| [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no |
diff --git a/terraform/aws/iglu_server/default/main.tf b/terraform/aws/iglu_server/default/main.tf
index 77b6a20..cae8158 100644
--- a/terraform/aws/iglu_server/default/main.tf
+++ b/terraform/aws/iglu_server/default/main.tf
@@ -38,12 +38,10 @@ module "iglu_lb" {
module "iglu_server" {
source = "snowplow-devops/iglu-server-ec2/aws"
- version = "0.5.0"
+ version = "0.6.1"
accept_limited_use_license = var.accept_limited_use_license
- app_version = "0.14.0"
-
name = "${var.prefix}-iglu-server"
vpc_id = var.vpc_id
subnet_ids = var.public_subnet_ids
diff --git a/terraform/aws/iglu_server/secure/README.md b/terraform/aws/iglu_server/secure/README.md
index c8bef9a..e6d8148 100644
--- a/terraform/aws/iglu_server/secure/README.md
+++ b/terraform/aws/iglu_server/secure/README.md
@@ -17,8 +17,8 @@
| Name | Source | Version |
|------|--------|---------|
| [iglu\_lb](#module\_iglu\_lb) | snowplow-devops/alb/aws | 0.2.0 |
-| [iglu\_rds](#module\_iglu\_rds) | snowplow-devops/rds/aws | 0.4.0 |
-| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ec2/aws | 0.5.0 |
+| [iglu\_rds](#module\_iglu\_rds) | snowplow-devops/rds/aws | 0.5.0 |
+| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ec2/aws | 0.6.1 |
## Resources
@@ -44,7 +44,7 @@
| [cloudwatch\_logs\_enabled](#input\_cloudwatch\_logs\_enabled) | Whether application logs should be reported to CloudWatch | `bool` | `true` | no |
| [cloudwatch\_logs\_retention\_days](#input\_cloudwatch\_logs\_retention\_days) | The length of time in days to retain logs for | `number` | `7` | no |
| [iam\_permissions\_boundary](#input\_iam\_permissions\_boundary) | The permissions boundary ARN to set on IAM roles created | `string` | `""` | no |
-| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer | object({
enabled = bool
certificate_arn = string
}) | {
"certificate_arn": "",
"enabled": false
} | no |
+| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer | object({
enabled = bool
certificate_arn = string
}) | {
"certificate_arn": "",
"enabled": false
} | no |
| [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no |
| [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no |
| [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no |
diff --git a/terraform/aws/iglu_server/secure/main.tf b/terraform/aws/iglu_server/secure/main.tf
index b7e407e..1d2eca3 100644
--- a/terraform/aws/iglu_server/secure/main.tf
+++ b/terraform/aws/iglu_server/secure/main.tf
@@ -38,12 +38,10 @@ module "iglu_lb" {
module "iglu_server" {
source = "snowplow-devops/iglu-server-ec2/aws"
- version = "0.5.0"
+ version = "0.6.1"
accept_limited_use_license = var.accept_limited_use_license
- app_version = "0.14.0"
-
name = "${var.prefix}-iglu-server"
vpc_id = var.vpc_id
subnet_ids = var.private_subnet_ids
diff --git a/terraform/aws/pipeline/default/README.md b/terraform/aws/pipeline/default/README.md
index 95d2148..86d0524 100644
--- a/terraform/aws/pipeline/default/README.md
+++ b/terraform/aws/pipeline/default/README.md
@@ -18,25 +18,19 @@
|------|--------|---------|
| [bad\_1\_stream](#module\_bad\_1\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 |
| [bad\_2\_stream](#module\_bad\_2\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 |
-| [collector\_kinesis](#module\_collector\_kinesis) | snowplow-devops/collector-kinesis-ec2/aws | 0.9.0 |
+| [collector\_kinesis](#module\_collector\_kinesis) | snowplow-devops/collector-kinesis-ec2/aws | 0.10.1 |
| [collector\_lb](#module\_collector\_lb) | snowplow-devops/alb/aws | 0.2.0 |
-| [db\_loader](#module\_db\_loader) | snowplow-devops/databricks-loader-ec2/aws | 0.2.0 |
-| [db\_transformer\_wrp](#module\_db\_transformer\_wrp) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 |
-| [enrich\_kinesis](#module\_enrich\_kinesis) | snowplow-devops/enrich-kinesis-ec2/aws | 0.6.0 |
+| [db\_loader](#module\_db\_loader) | snowplow-devops/databricks-loader-ec2/aws | 0.3.0 |
+| [db\_transformer\_wrp](#module\_db\_transformer\_wrp) | snowplow-devops/transformer-kinesis-ec2/aws | 0.5.0 |
+| [enrich\_kinesis](#module\_enrich\_kinesis) | snowplow-devops/enrich-kinesis-ec2/aws | 0.7.1 |
| [enriched\_stream](#module\_enriched\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 |
-| [postgres\_loader\_bad](#module\_postgres\_loader\_bad) | snowplow-devops/postgres-loader-kinesis-ec2/aws | 0.5.0 |
-| [postgres\_loader\_enriched](#module\_postgres\_loader\_enriched) | snowplow-devops/postgres-loader-kinesis-ec2/aws | 0.5.0 |
-| [postgres\_loader\_rds](#module\_postgres\_loader\_rds) | snowplow-devops/rds/aws | 0.4.0 |
| [raw\_stream](#module\_raw\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 |
-| [rs\_loader](#module\_rs\_loader) | snowplow-devops/redshift-loader-ec2/aws | 0.2.0 |
-| [rs\_transformer\_stsv](#module\_rs\_transformer\_stsv) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 |
-| [s3\_loader\_bad](#module\_s3\_loader\_bad) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.5.0 |
-| [s3\_loader\_enriched](#module\_s3\_loader\_enriched) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.5.0 |
-| [s3\_loader\_raw](#module\_s3\_loader\_raw) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.5.0 |
+| [rs\_loader](#module\_rs\_loader) | snowplow-devops/redshift-loader-ec2/aws | 0.4.0 |
+| [rs\_transformer\_stsv](#module\_rs\_transformer\_stsv) | snowplow-devops/transformer-kinesis-ec2/aws | 0.5.0 |
+| [s3\_loader\_bad](#module\_s3\_loader\_bad) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.6.0 |
+| [s3\_loader\_enriched](#module\_s3\_loader\_enriched) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.6.0 |
| [s3\_pipeline\_bucket](#module\_s3\_pipeline\_bucket) | snowplow-devops/s3-bucket/aws | 0.2.0 |
-| [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-ec2/aws | 0.3.0 |
-| [sf\_transformer\_wrj](#module\_sf\_transformer\_wrj) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 |
-| [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.1.0 |
+| [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.2.1 |
## Resources
@@ -45,7 +39,6 @@
| [aws_key_pair.pipeline](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/key_pair) | resource |
| [aws_sqs_queue.db_message_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource |
| [aws_sqs_queue.rs_message_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource |
-| [aws_sqs_queue.sf_message_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource |
## Inputs
@@ -73,12 +66,6 @@
| [databricks\_transformer\_window\_period\_min](#input\_databricks\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no |
| [iam\_permissions\_boundary](#input\_iam\_permissions\_boundary) | The permissions boundary ARN to set on IAM roles created | `string` | `""` | no |
| [pipeline\_kcl\_write\_max\_capacity](#input\_pipeline\_kcl\_write\_max\_capacity) | Increasing this is important to increase throughput at very high pipeline volumes | `number` | `50` | no |
-| [postgres\_db\_enabled](#input\_postgres\_db\_enabled) | Whether to enable loading into a Postgres Database | `bool` | `false` | no |
-| [postgres\_db\_ip\_allowlist](#input\_postgres\_db\_ip\_allowlist) | An optional list of CIDR ranges to allow traffic from | `list(any)` | `[]` | no |
-| [postgres\_db\_name](#input\_postgres\_db\_name) | The name of the database to connect to | `string` | `""` | no |
-| [postgres\_db\_password](#input\_postgres\_db\_password) | The password to use to connect to the database | `string` | `""` | no |
-| [postgres\_db\_publicly\_accessible](#input\_postgres\_db\_publicly\_accessible) | Whether to make the Postgres RDS instance accessible over the internet | `bool` | `false` | no |
-| [postgres\_db\_username](#input\_postgres\_db\_username) | The username to use to connect to the database | `string` | `""` | no |
| [redshift\_database](#input\_redshift\_database) | Redshift database name | `string` | `""` | no |
| [redshift\_enabled](#input\_redshift\_enabled) | Whether to enable loading into a Redshift Database | `bool` | `false` | no |
| [redshift\_host](#input\_redshift\_host) | Redshift cluster hostname | `string` | `""` | no |
@@ -90,23 +77,13 @@
| [s3\_bucket\_deploy](#input\_s3\_bucket\_deploy) | Whether this module should create a new bucket with the specified name - if the bucket already exists set this to false | `bool` | `true` | no |
| [s3\_bucket\_object\_prefix](#input\_s3\_bucket\_object\_prefix) | An optional prefix under which Snowplow data will be saved (Note: your prefix must end with a trailing '/') | `string` | `""` | no |
| [s3\_enriched\_enabled](#input\_s3\_enriched\_enabled) | Whether to enable loading of enriched data into S3 from Kinesis | `bool` | `true` | no |
-| [s3\_raw\_enabled](#input\_s3\_raw\_enabled) | Whether to enable loading of raw data into S3 from Kinesis | `bool` | `false` | no |
-| [snowflake\_account](#input\_snowflake\_account) | Snowflake account to use | `string` | `""` | no |
-| [snowflake\_database](#input\_snowflake\_database) | Snowflake database name | `string` | `""` | no |
-| [snowflake\_enabled](#input\_snowflake\_enabled) | Whether to enable loading into a Snowflake Database | `bool` | `false` | no |
-| [snowflake\_loader\_password](#input\_snowflake\_loader\_password) | The password to use for the loader user | `string` | `""` | no |
-| [snowflake\_loader\_user](#input\_snowflake\_loader\_user) | The Snowflake user used by Snowflake Loader | `string` | `""` | no |
-| [snowflake\_region](#input\_snowflake\_region) | Region of Snowflake account | `string` | `""` | no |
-| [snowflake\_schema](#input\_snowflake\_schema) | Snowflake schema name | `string` | `""` | no |
| [snowflake\_streaming\_account\_url](#input\_snowflake\_streaming\_account\_url) | Snowflake account URL to use | `string` | `""` | no |
| [snowflake\_streaming\_database](#input\_snowflake\_streaming\_database) | Snowflake database name | `string` | `""` | no |
| [snowflake\_streaming\_enabled](#input\_snowflake\_streaming\_enabled) | Whether to enable loading into a Snowflake Database with a Streaming Loader | `bool` | `false` | no |
| [snowflake\_streaming\_loader\_private\_key](#input\_snowflake\_streaming\_loader\_private\_key) | The private key to use for the loader user | `string` | `""` | no |
| [snowflake\_streaming\_loader\_user](#input\_snowflake\_streaming\_loader\_user) | The Snowflake user used by Snowflake Streaming Loader | `string` | `""` | no |
| [snowflake\_streaming\_schema](#input\_snowflake\_streaming\_schema) | Snowflake schema name | `string` | `""` | no |
-| [snowflake\_transformer\_window\_period\_min](#input\_snowflake\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no |
-| [snowflake\_warehouse](#input\_snowflake\_warehouse) | Snowflake warehouse name | `string` | `""` | no |
-| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer | object({
enabled = bool
certificate_arn = string
}) | {
"certificate_arn": "",
"enabled": false
} | no |
+| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer | object({
enabled = bool
certificate_arn = string
}) | {
"certificate_arn": "",
"enabled": false
} | no |
| [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no |
| [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no |
| [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no |
@@ -116,6 +93,3 @@
| Name | Description |
|------|-------------|
| [collector\_dns\_name](#output\_collector\_dns\_name) | The ALB DNS name for the Pipeline Collector |
-| [postgres\_db\_address](#output\_postgres\_db\_address) | The RDS DNS name where your data is being streamed |
-| [postgres\_db\_id](#output\_postgres\_db\_id) | The ID of the RDS instance |
-| [postgres\_db\_port](#output\_postgres\_db\_port) | The RDS port where your data is being streamed |
diff --git a/terraform/aws/pipeline/default/main.tf b/terraform/aws/pipeline/default/main.tf
index 160fedf..b5a47b0 100644
--- a/terraform/aws/pipeline/default/main.tf
+++ b/terraform/aws/pipeline/default/main.tf
@@ -86,12 +86,10 @@ module "collector_lb" {
module "collector_kinesis" {
source = "snowplow-devops/collector-kinesis-ec2/aws"
- version = "0.9.0"
+ version = "0.10.1"
accept_limited_use_license = var.accept_limited_use_license
- app_version = "3.3.0"
-
name = "${var.prefix}-collector-server"
vpc_id = var.vpc_id
subnet_ids = var.public_subnet_ids
@@ -118,12 +116,10 @@ module "collector_kinesis" {
# 3. Deploy Enrichment
module "enrich_kinesis" {
source = "snowplow-devops/enrich-kinesis-ec2/aws"
- version = "0.6.0"
+ version = "0.7.1"
accept_limited_use_license = var.accept_limited_use_license
- app_version = "5.2.0"
-
name = "${var.prefix}-enrich-server"
vpc_id = var.vpc_id
subnet_ids = var.public_subnet_ids
diff --git a/terraform/aws/pipeline/default/outputs.tf b/terraform/aws/pipeline/default/outputs.tf
index e15163a..1d1e81a 100644
--- a/terraform/aws/pipeline/default/outputs.tf
+++ b/terraform/aws/pipeline/default/outputs.tf
@@ -2,20 +2,3 @@ output "collector_dns_name" {
description = "The ALB DNS name for the Pipeline Collector"
value = module.collector_lb.dns_name
}
-
-# --- Target: PostgreSQL
-
-output "postgres_db_address" {
- description = "The RDS DNS name where your data is being streamed"
- value = var.postgres_db_enabled ? module.postgres_loader_rds[0].address : null
-}
-
-output "postgres_db_port" {
- description = "The RDS port where your data is being streamed"
- value = var.postgres_db_enabled ? module.postgres_loader_rds[0].port : null
-}
-
-output "postgres_db_id" {
- description = "The ID of the RDS instance"
- value = var.postgres_db_enabled ? module.postgres_loader_rds[0].id : null
-}
diff --git a/terraform/aws/pipeline/default/target_amazon_s3.tf b/terraform/aws/pipeline/default/target_amazon_s3.tf
index e6dae32..923efc4 100644
--- a/terraform/aws/pipeline/default/target_amazon_s3.tf
+++ b/terraform/aws/pipeline/default/target_amazon_s3.tf
@@ -1,38 +1,6 @@
-module "s3_loader_raw" {
- source = "snowplow-devops/s3-loader-kinesis-ec2/aws"
- version = "0.5.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.s3_raw_enabled ? 1 : 0
-
- name = "${var.prefix}-s3-loader-raw-server"
- vpc_id = var.vpc_id
- subnet_ids = var.public_subnet_ids
- in_stream_name = module.raw_stream.name
- bad_stream_name = module.bad_1_stream.name
- s3_bucket_name = local.s3_pipeline_bucket_name
- s3_object_prefix = "${var.s3_bucket_object_prefix}raw/"
-
- ssh_key_name = aws_key_pair.pipeline.key_name
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- iam_permissions_boundary = var.iam_permissions_boundary
-
- kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity
-
- tags = var.tags
-
- cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
- cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
-}
-
module "s3_loader_bad" {
source = "snowplow-devops/s3-loader-kinesis-ec2/aws"
- version = "0.5.0"
+ version = "0.6.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -67,7 +35,7 @@ module "s3_loader_bad" {
module "s3_loader_enriched" {
source = "snowplow-devops/s3-loader-kinesis-ec2/aws"
- version = "0.5.0"
+ version = "0.6.0"
accept_limited_use_license = var.accept_limited_use_license
diff --git a/terraform/aws/pipeline/default/target_databricks.tf b/terraform/aws/pipeline/default/target_databricks.tf
index a92b8c8..f169f23 100644
--- a/terraform/aws/pipeline/default/target_databricks.tf
+++ b/terraform/aws/pipeline/default/target_databricks.tf
@@ -9,7 +9,7 @@ resource "aws_sqs_queue" "db_message_queue" {
module "db_transformer_wrp" {
source = "snowplow-devops/transformer-kinesis-ec2/aws"
- version = "0.4.0"
+ version = "0.5.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -19,6 +19,8 @@ module "db_transformer_wrp" {
vpc_id = var.vpc_id
subnet_ids = var.public_subnet_ids
+ instance_type = "t3a.large"
+
stream_name = module.enriched_stream.name
s3_bucket_name = local.s3_pipeline_bucket_name
s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/widerow/parquet"
@@ -48,7 +50,7 @@ module "db_transformer_wrp" {
module "db_loader" {
source = "snowplow-devops/databricks-loader-ec2/aws"
- version = "0.2.0"
+ version = "0.3.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -58,6 +60,8 @@ module "db_loader" {
vpc_id = var.vpc_id
subnet_ids = var.public_subnet_ids
+ instance_type = "t3a.small"
+
sqs_queue_name = aws_sqs_queue.db_message_queue[0].name
deltalake_catalog = var.databricks_catalog
diff --git a/terraform/aws/pipeline/default/target_postgres.tf b/terraform/aws/pipeline/default/target_postgres.tf
deleted file mode 100644
index 7643831..0000000
--- a/terraform/aws/pipeline/default/target_postgres.tf
+++ /dev/null
@@ -1,104 +0,0 @@
-module "postgres_loader_rds" {
- source = "snowplow-devops/rds/aws"
- version = "0.5.0"
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-pipeline-rds"
- vpc_id = var.vpc_id
- subnet_ids = var.public_subnet_ids
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- publicly_accessible = var.postgres_db_publicly_accessible
- additional_ip_allowlist = var.postgres_db_ip_allowlist
-
- ca_cert_identifier = "rds-ca-rsa2048-g1"
-
- tags = var.tags
-}
-
-module "postgres_loader_enriched" {
- source = "snowplow-devops/postgres-loader-kinesis-ec2/aws"
- version = "0.5.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-postgres-loader-enriched-server"
- vpc_id = var.vpc_id
- subnet_ids = var.public_subnet_ids
-
- in_stream_name = module.enriched_stream.name
- purpose = "ENRICHED_EVENTS"
- schema_name = "atomic"
-
- ssh_key_name = aws_key_pair.pipeline.key_name
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- iam_permissions_boundary = var.iam_permissions_boundary
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- # Linking in the custom Iglu Server here
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- db_sg_id = module.postgres_loader_rds[0].sg_id
- db_host = module.postgres_loader_rds[0].address
- db_port = module.postgres_loader_rds[0].port
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity
-
- tags = var.tags
-
- cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
- cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
-}
-
-module "postgres_loader_bad" {
- source = "snowplow-devops/postgres-loader-kinesis-ec2/aws"
- version = "0.5.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-postgres-loader-bad-server"
- vpc_id = var.vpc_id
- subnet_ids = var.public_subnet_ids
-
- in_stream_name = module.bad_1_stream.name
- purpose = "JSON"
- schema_name = "atomic_bad"
-
- ssh_key_name = aws_key_pair.pipeline.key_name
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- iam_permissions_boundary = var.iam_permissions_boundary
-
- # Linking in the custom Iglu Server here
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- db_sg_id = module.postgres_loader_rds[0].sg_id
- db_host = module.postgres_loader_rds[0].address
- db_port = module.postgres_loader_rds[0].port
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity
-
- tags = var.tags
-
- cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
- cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
-}
diff --git a/terraform/aws/pipeline/default/target_redshift.tf b/terraform/aws/pipeline/default/target_redshift.tf
index f9944bc..9c34b19 100644
--- a/terraform/aws/pipeline/default/target_redshift.tf
+++ b/terraform/aws/pipeline/default/target_redshift.tf
@@ -9,7 +9,7 @@ resource "aws_sqs_queue" "rs_message_queue" {
module "rs_transformer_stsv" {
source = "snowplow-devops/transformer-kinesis-ec2/aws"
- version = "0.4.0"
+ version = "0.5.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -19,6 +19,8 @@ module "rs_transformer_stsv" {
vpc_id = var.vpc_id
subnet_ids = var.public_subnet_ids
+ instance_type = "t3a.large"
+
stream_name = module.enriched_stream.name
s3_bucket_name = local.s3_pipeline_bucket_name
s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/shredded/tsv"
@@ -48,7 +50,7 @@ module "rs_transformer_stsv" {
module "rs_loader" {
source = "snowplow-devops/redshift-loader-ec2/aws"
- version = "0.2.0"
+ version = "0.4.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -58,6 +60,8 @@ module "rs_loader" {
vpc_id = var.vpc_id
subnet_ids = var.public_subnet_ids
+ instance_type = "t3a.small"
+
sqs_queue_name = aws_sqs_queue.rs_message_queue[0].name
redshift_host = var.redshift_host
diff --git a/terraform/aws/pipeline/default/target_snowflake.tf b/terraform/aws/pipeline/default/target_snowflake.tf
deleted file mode 100644
index e7acce4..0000000
--- a/terraform/aws/pipeline/default/target_snowflake.tf
+++ /dev/null
@@ -1,86 +0,0 @@
-resource "aws_sqs_queue" "sf_message_queue" {
- count = var.snowflake_enabled ? 1 : 0
-
- content_based_deduplication = true
- name = "${var.prefix}-sf-loader.fifo"
- fifo_queue = true
- kms_master_key_id = "alias/aws/sqs"
-}
-
-module "sf_transformer_wrj" {
- source = "snowplow-devops/transformer-kinesis-ec2/aws"
- version = "0.4.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.snowflake_enabled ? 1 : 0
-
- name = "${var.prefix}-transformer-server-wrj"
- vpc_id = var.vpc_id
- subnet_ids = var.public_subnet_ids
-
- stream_name = module.enriched_stream.name
- s3_bucket_name = local.s3_pipeline_bucket_name
- s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/widerow/json"
- window_period_min = var.snowflake_transformer_window_period_min
- sqs_queue_name = aws_sqs_queue.sf_message_queue[0].name
-
- transformation_type = "widerow"
- widerow_file_format = "json"
-
- ssh_key_name = aws_key_pair.pipeline.key_name
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- iam_permissions_boundary = var.iam_permissions_boundary
-
- kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity
-
- tags = var.tags
-
- cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
- cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
-}
-
-module "sf_loader" {
- source = "snowplow-devops/snowflake-loader-ec2/aws"
- version = "0.3.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.snowflake_enabled ? 1 : 0
-
- name = "${var.prefix}-sf-loader-server"
- vpc_id = var.vpc_id
- subnet_ids = var.public_subnet_ids
-
- sqs_queue_name = aws_sqs_queue.sf_message_queue[0].name
-
- snowflake_loader_user = var.snowflake_loader_user
- snowflake_password = var.snowflake_loader_password
- snowflake_warehouse = var.snowflake_warehouse
- snowflake_database = var.snowflake_database
- snowflake_schema = var.snowflake_schema
- snowflake_region = var.snowflake_region
- snowflake_account = var.snowflake_account
- snowflake_aws_s3_bucket_name = local.s3_pipeline_bucket_name
-
- ssh_key_name = aws_key_pair.pipeline.key_name
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- iam_permissions_boundary = var.iam_permissions_boundary
-
- tags = var.tags
-
- cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
- cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
-}
diff --git a/terraform/aws/pipeline/default/target_snowflake_streaming.tf b/terraform/aws/pipeline/default/target_snowflake_streaming.tf
index 26f677d..dcf5633 100644
--- a/terraform/aws/pipeline/default/target_snowflake_streaming.tf
+++ b/terraform/aws/pipeline/default/target_snowflake_streaming.tf
@@ -1,6 +1,6 @@
module "snowflake_streaming_loader_enriched" {
source = "snowplow-devops/snowflake-streaming-loader-ec2/aws"
- version = "0.1.0"
+ version = "0.2.1"
accept_limited_use_license = var.accept_limited_use_license
diff --git a/terraform/aws/pipeline/default/terraform.tfvars b/terraform/aws/pipeline/default/terraform.tfvars
index 7f5506b..b48f062 100644
--- a/terraform/aws/pipeline/default/terraform.tfvars
+++ b/terraform/aws/pipeline/default/terraform.tfvars
@@ -40,38 +40,9 @@ ssl_information = {
# --- TARGETS CONFIGURATION ZONE --- #
# --- Target: Amazon S3
-s3_raw_enabled = false
s3_bad_enabled = true
s3_enriched_enabled = true
-# --- Target: PostgreSQL
-postgres_db_enabled = false
-
-postgres_db_name = "snowplow"
-postgres_db_username = "snowplow"
-# Change and keep this secret!
-postgres_db_password = "Hell0W0rld!2"
-# IP ranges that you want to query the Pipeline Postgres RDS from
-# Note: this exposes your data to the internet - take care to ensure your allowlist is strict enough
-# or provide a way to access the database through the VPC instead
-postgres_db_publicly_accessible = true
-postgres_db_ip_allowlist = ["999.999.999.999/32", "888.888.888.888/32"]
-
-# --- Target: Snowflake
-# Follow the guide to get input values for the loader:
-# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws
-snowflake_enabled = false
-
-snowflake_account = "object({
enabled = bool
certificate_arn = string
}) | {
"certificate_arn": "",
"enabled": false
} | no |
+| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer | object({
enabled = bool
certificate_arn = string
}) | {
"certificate_arn": "",
"enabled": false
} | no |
| [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no |
| [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no |
| [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no |
@@ -116,6 +94,3 @@
| Name | Description |
|------|-------------|
| [collector\_dns\_name](#output\_collector\_dns\_name) | The ALB DNS name for the Pipeline Collector |
-| [postgres\_db\_address](#output\_postgres\_db\_address) | The RDS DNS name where your data is being streamed |
-| [postgres\_db\_id](#output\_postgres\_db\_id) | The ID of the RDS instance |
-| [postgres\_db\_port](#output\_postgres\_db\_port) | The RDS port where your data is being streamed |
diff --git a/terraform/aws/pipeline/secure/main.tf b/terraform/aws/pipeline/secure/main.tf
index da0836c..a2fd2a0 100644
--- a/terraform/aws/pipeline/secure/main.tf
+++ b/terraform/aws/pipeline/secure/main.tf
@@ -86,12 +86,10 @@ module "collector_lb" {
module "collector_kinesis" {
source = "snowplow-devops/collector-kinesis-ec2/aws"
- version = "0.9.0"
+ version = "0.10.1"
accept_limited_use_license = var.accept_limited_use_license
- app_version = "3.3.0"
-
name = "${var.prefix}-collector-server"
vpc_id = var.vpc_id
subnet_ids = var.private_subnet_ids
@@ -120,12 +118,10 @@ module "collector_kinesis" {
# 3. Deploy Enrichment
module "enrich_kinesis" {
source = "snowplow-devops/enrich-kinesis-ec2/aws"
- version = "0.6.0"
+ version = "0.7.1"
accept_limited_use_license = var.accept_limited_use_license
- app_version = "5.2.0"
-
name = "${var.prefix}-enrich-server"
vpc_id = var.vpc_id
subnet_ids = var.private_subnet_ids
diff --git a/terraform/aws/pipeline/secure/outputs.tf b/terraform/aws/pipeline/secure/outputs.tf
index e15163a..1d1e81a 100644
--- a/terraform/aws/pipeline/secure/outputs.tf
+++ b/terraform/aws/pipeline/secure/outputs.tf
@@ -2,20 +2,3 @@ output "collector_dns_name" {
description = "The ALB DNS name for the Pipeline Collector"
value = module.collector_lb.dns_name
}
-
-# --- Target: PostgreSQL
-
-output "postgres_db_address" {
- description = "The RDS DNS name where your data is being streamed"
- value = var.postgres_db_enabled ? module.postgres_loader_rds[0].address : null
-}
-
-output "postgres_db_port" {
- description = "The RDS port where your data is being streamed"
- value = var.postgres_db_enabled ? module.postgres_loader_rds[0].port : null
-}
-
-output "postgres_db_id" {
- description = "The ID of the RDS instance"
- value = var.postgres_db_enabled ? module.postgres_loader_rds[0].id : null
-}
diff --git a/terraform/aws/pipeline/secure/target_amazon_s3.tf b/terraform/aws/pipeline/secure/target_amazon_s3.tf
index 89254e4..d0412dc 100644
--- a/terraform/aws/pipeline/secure/target_amazon_s3.tf
+++ b/terraform/aws/pipeline/secure/target_amazon_s3.tf
@@ -1,40 +1,6 @@
-module "s3_loader_raw" {
- source = "snowplow-devops/s3-loader-kinesis-ec2/aws"
- version = "0.5.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.s3_raw_enabled ? 1 : 0
-
- name = "${var.prefix}-s3-loader-raw-server"
- vpc_id = var.vpc_id
- subnet_ids = var.private_subnet_ids
- in_stream_name = module.raw_stream.name
- bad_stream_name = module.bad_1_stream.name
- s3_bucket_name = local.s3_pipeline_bucket_name
- s3_object_prefix = "${var.s3_bucket_object_prefix}raw/"
-
- ssh_key_name = aws_key_pair.pipeline.key_name
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- iam_permissions_boundary = var.iam_permissions_boundary
-
- kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity
-
- associate_public_ip_address = false
-
- tags = var.tags
-
- cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
- cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
-}
-
module "s3_loader_bad" {
source = "snowplow-devops/s3-loader-kinesis-ec2/aws"
- version = "0.5.0"
+ version = "0.6.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -71,7 +37,7 @@ module "s3_loader_bad" {
module "s3_loader_enriched" {
source = "snowplow-devops/s3-loader-kinesis-ec2/aws"
- version = "0.5.0"
+ version = "0.6.0"
accept_limited_use_license = var.accept_limited_use_license
diff --git a/terraform/aws/pipeline/secure/target_databricks.tf b/terraform/aws/pipeline/secure/target_databricks.tf
index 33304c6..e712748 100644
--- a/terraform/aws/pipeline/secure/target_databricks.tf
+++ b/terraform/aws/pipeline/secure/target_databricks.tf
@@ -9,7 +9,7 @@ resource "aws_sqs_queue" "db_message_queue" {
module "db_transformer_wrp" {
source = "snowplow-devops/transformer-kinesis-ec2/aws"
- version = "0.4.0"
+ version = "0.5.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -19,6 +19,8 @@ module "db_transformer_wrp" {
vpc_id = var.vpc_id
subnet_ids = var.private_subnet_ids
+ instance_type = "t3a.large"
+
stream_name = module.enriched_stream.name
s3_bucket_name = local.s3_pipeline_bucket_name
s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/widerow/parquet"
@@ -50,7 +52,7 @@ module "db_transformer_wrp" {
module "db_loader" {
source = "snowplow-devops/databricks-loader-ec2/aws"
- version = "0.2.0"
+ version = "0.3.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -60,6 +62,8 @@ module "db_loader" {
vpc_id = var.vpc_id
subnet_ids = var.private_subnet_ids
+ instance_type = "t3a.small"
+
sqs_queue_name = aws_sqs_queue.db_message_queue[0].name
deltalake_catalog = var.databricks_catalog
diff --git a/terraform/aws/pipeline/secure/target_postgres.tf b/terraform/aws/pipeline/secure/target_postgres.tf
deleted file mode 100644
index 78b39f0..0000000
--- a/terraform/aws/pipeline/secure/target_postgres.tf
+++ /dev/null
@@ -1,108 +0,0 @@
-module "postgres_loader_rds" {
- source = "snowplow-devops/rds/aws"
- version = "0.5.0"
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-pipeline-rds"
- vpc_id = var.vpc_id
- subnet_ids = var.private_subnet_ids
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- publicly_accessible = false
- additional_ip_allowlist = var.postgres_db_ip_allowlist
-
- ca_cert_identifier = "rds-ca-rsa2048-g1"
-
- tags = var.tags
-}
-
-module "postgres_loader_enriched" {
- source = "snowplow-devops/postgres-loader-kinesis-ec2/aws"
- version = "0.5.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-postgres-loader-enriched-server"
- vpc_id = var.vpc_id
- subnet_ids = var.private_subnet_ids
-
- in_stream_name = module.enriched_stream.name
- purpose = "ENRICHED_EVENTS"
- schema_name = "atomic"
-
- ssh_key_name = aws_key_pair.pipeline.key_name
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- iam_permissions_boundary = var.iam_permissions_boundary
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- # Linking in the custom Iglu Server here
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- db_sg_id = module.postgres_loader_rds[0].sg_id
- db_host = module.postgres_loader_rds[0].address
- db_port = module.postgres_loader_rds[0].port
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity
-
- associate_public_ip_address = false
-
- tags = var.tags
-
- cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
- cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
-}
-
-module "postgres_loader_bad" {
- source = "snowplow-devops/postgres-loader-kinesis-ec2/aws"
- version = "0.5.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-postgres-loader-bad-server"
- vpc_id = var.vpc_id
- subnet_ids = var.private_subnet_ids
-
- in_stream_name = module.bad_1_stream.name
- purpose = "JSON"
- schema_name = "atomic_bad"
-
- ssh_key_name = aws_key_pair.pipeline.key_name
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- iam_permissions_boundary = var.iam_permissions_boundary
-
- # Linking in the custom Iglu Server here
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- db_sg_id = module.postgres_loader_rds[0].sg_id
- db_host = module.postgres_loader_rds[0].address
- db_port = module.postgres_loader_rds[0].port
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity
-
- associate_public_ip_address = false
-
- tags = var.tags
-
- cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
- cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
-}
diff --git a/terraform/aws/pipeline/secure/target_redshift.tf b/terraform/aws/pipeline/secure/target_redshift.tf
index a4e8244..ff4a969 100644
--- a/terraform/aws/pipeline/secure/target_redshift.tf
+++ b/terraform/aws/pipeline/secure/target_redshift.tf
@@ -9,7 +9,7 @@ resource "aws_sqs_queue" "rs_message_queue" {
module "rs_transformer_stsv" {
source = "snowplow-devops/transformer-kinesis-ec2/aws"
- version = "0.4.0"
+ version = "0.5.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -19,6 +19,8 @@ module "rs_transformer_stsv" {
vpc_id = var.vpc_id
subnet_ids = var.private_subnet_ids
+ instance_type = "t3a.large"
+
stream_name = module.enriched_stream.name
s3_bucket_name = local.s3_pipeline_bucket_name
s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/shredded/tsv"
@@ -50,7 +52,7 @@ module "rs_transformer_stsv" {
module "rs_loader" {
source = "snowplow-devops/redshift-loader-ec2/aws"
- version = "0.2.0"
+ version = "0.4.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -60,6 +62,8 @@ module "rs_loader" {
vpc_id = var.vpc_id
subnet_ids = var.private_subnet_ids
+ instance_type = "t3a.small"
+
sqs_queue_name = aws_sqs_queue.rs_message_queue[0].name
redshift_host = var.redshift_host
diff --git a/terraform/aws/pipeline/secure/target_snowflake.tf b/terraform/aws/pipeline/secure/target_snowflake.tf
deleted file mode 100644
index b7aa2fa..0000000
--- a/terraform/aws/pipeline/secure/target_snowflake.tf
+++ /dev/null
@@ -1,90 +0,0 @@
-resource "aws_sqs_queue" "sf_message_queue" {
- count = var.snowflake_enabled ? 1 : 0
-
- content_based_deduplication = true
- name = "${var.prefix}-sf-loader.fifo"
- fifo_queue = true
- kms_master_key_id = "alias/aws/sqs"
-}
-
-module "sf_transformer_wrj" {
- source = "snowplow-devops/transformer-kinesis-ec2/aws"
- version = "0.4.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.snowflake_enabled ? 1 : 0
-
- name = "${var.prefix}-transformer-server-wrj"
- vpc_id = var.vpc_id
- subnet_ids = var.private_subnet_ids
-
- stream_name = module.enriched_stream.name
- s3_bucket_name = local.s3_pipeline_bucket_name
- s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/widerow/json"
- window_period_min = var.snowflake_transformer_window_period_min
- sqs_queue_name = aws_sqs_queue.sf_message_queue[0].name
-
- transformation_type = "widerow"
- widerow_file_format = "json"
-
- ssh_key_name = aws_key_pair.pipeline.key_name
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- iam_permissions_boundary = var.iam_permissions_boundary
-
- kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity
-
- associate_public_ip_address = false
-
- tags = var.tags
-
- cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
- cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
-}
-
-module "sf_loader" {
- source = "snowplow-devops/snowflake-loader-ec2/aws"
- version = "0.3.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.snowflake_enabled ? 1 : 0
-
- name = "${var.prefix}-sf-loader-server"
- vpc_id = var.vpc_id
- subnet_ids = var.private_subnet_ids
-
- sqs_queue_name = aws_sqs_queue.sf_message_queue[0].name
-
- snowflake_loader_user = var.snowflake_loader_user
- snowflake_password = var.snowflake_loader_password
- snowflake_warehouse = var.snowflake_warehouse
- snowflake_database = var.snowflake_database
- snowflake_schema = var.snowflake_schema
- snowflake_region = var.snowflake_region
- snowflake_account = var.snowflake_account
- snowflake_aws_s3_bucket_name = local.s3_pipeline_bucket_name
-
- ssh_key_name = aws_key_pair.pipeline.key_name
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- iam_permissions_boundary = var.iam_permissions_boundary
-
- associate_public_ip_address = false
-
- tags = var.tags
-
- cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
- cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
-}
diff --git a/terraform/aws/pipeline/secure/target_snowflake_streaming.tf b/terraform/aws/pipeline/secure/target_snowflake_streaming.tf
index 858d856..7fad6f6 100644
--- a/terraform/aws/pipeline/secure/target_snowflake_streaming.tf
+++ b/terraform/aws/pipeline/secure/target_snowflake_streaming.tf
@@ -1,6 +1,6 @@
module "snowflake_streaming_loader_enriched" {
source = "snowplow-devops/snowflake-streaming-loader-ec2/aws"
- version = "0.1.0"
+ version = "0.2.1"
accept_limited_use_license = var.accept_limited_use_license
diff --git a/terraform/aws/pipeline/secure/terraform.tfvars b/terraform/aws/pipeline/secure/terraform.tfvars
index b48e94f..b9f2a81 100644
--- a/terraform/aws/pipeline/secure/terraform.tfvars
+++ b/terraform/aws/pipeline/secure/terraform.tfvars
@@ -43,37 +43,9 @@ ssl_information = {
# --- TARGETS CONFIGURATION ZONE --- #
# --- Target: Amazon S3
-s3_raw_enabled = false
s3_bad_enabled = true
s3_enriched_enabled = true
-# --- Target: PostgreSQL
-postgres_db_enabled = false
-
-postgres_db_name = "snowplow"
-postgres_db_username = "snowplow"
-# Change and keep this secret!
-postgres_db_password = "Hell0W0rld!2"
-# IP ranges that you want to query the Pipeline Postgres RDS from
-# Note: this exposes your data to the internet - take care to ensure your allowlist is strict enough
-# or provide a way to access the database through the VPC instead
-postgres_db_ip_allowlist = ["999.999.999.999/32", "888.888.888.888/32"]
-
-# --- Target: Snowflake
-# Follow the guide to get input values for the loader:
-# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws
-snowflake_enabled = false
-
-snowflake_account = "object({
enabled = bool
data = string
password = string
}) | {
"data": "",
"enabled": false,
"password": ""
} | no |
+| [ssl\_information](#input\_ssl\_information) | SSL certificate information to optionally bind to the load balancer | object({
enabled = bool
data = string
password = string
}) | {
"data": "",
"enabled": false,
"password": ""
} | no |
| [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no |
| [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no |
| [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no |
diff --git a/terraform/azure/iglu_server/main.tf b/terraform/azure/iglu_server/main.tf
index bf9e0c3..48b8a3d 100644
--- a/terraform/azure/iglu_server/main.tf
+++ b/terraform/azure/iglu_server/main.tf
@@ -1,13 +1,12 @@
module "iglu_db" {
source = "snowplow-devops/postgresql-server/azurerm"
- version = "0.1.1"
+ version = "0.2.0"
name = "${var.prefix}-iglu-db"
resource_group_name = var.resource_group_name
- subnet_id = var.subnet_id_servers
-
- additional_ip_allowlist = var.iglu_db_ip_allowlist
+ vnet_id = var.vnet_id
+ subnet_id = var.subnet_id_database
db_name = var.iglu_db_name
db_username = var.iglu_db_username
@@ -35,12 +34,10 @@ module "iglu_lb" {
module "iglu_server" {
source = "snowplow-devops/iglu-server-vmss/azurerm"
- version = "0.2.0"
+ version = "0.3.0"
accept_limited_use_license = var.accept_limited_use_license
- app_version = "0.14.0"
-
name = "${var.prefix}-iglu-server"
resource_group_name = var.resource_group_name
subnet_id = var.subnet_id_servers
diff --git a/terraform/azure/iglu_server/terraform.tfvars b/terraform/azure/iglu_server/terraform.tfvars
index c3cbb1f..e21cc35 100644
--- a/terraform/azure/iglu_server/terraform.tfvars
+++ b/terraform/azure/iglu_server/terraform.tfvars
@@ -8,9 +8,15 @@ prefix = "snowplow"
# The name of the resource group to deploy Iglu into
resource_group_name = "object({
enabled = bool
data = string
password = string
}) | {
"data": "",
"enabled": false,
"password": ""
} | no |
+| [ssl\_information](#input\_ssl\_information) | SSL certificate information to optionally bind to the load balancer | object({
enabled = bool
data = string
password = string
}) | {
"data": "",
"enabled": false,
"password": ""
} | no |
| [storage\_account\_deploy](#input\_storage\_account\_deploy) | Whether this module should create a new storage account with the specified name - if the account already exists set this to false | `bool` | `true` | no |
| [stream\_type](#input\_stream\_type) | The stream type to use as the Kafka Cluster between components (options: azure\_event\_hubs, confluent\_cloud) | `string` | `"azure_event_hubs"` | no |
| [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no |
diff --git a/terraform/azure/pipeline/main.tf b/terraform/azure/pipeline/main.tf
index 8004ddd..df7c1c4 100644
--- a/terraform/azure/pipeline/main.tf
+++ b/terraform/azure/pipeline/main.tf
@@ -111,12 +111,10 @@ module "collector_lb" {
module "collector_eh" {
source = "snowplow-devops/collector-event-hub-vmss/azurerm"
- version = "0.3.0"
+ version = "0.4.0"
accept_limited_use_license = var.accept_limited_use_license
- app_version = "3.3.0"
-
name = "${var.prefix}-collector"
resource_group_name = var.resource_group_name
subnet_id = var.subnet_id_servers
@@ -147,12 +145,10 @@ module "collector_eh" {
# 4. Deploy Enrich stack
module "enrich_eh" {
source = "snowplow-devops/enrich-event-hub-vmss/azurerm"
- version = "0.3.0"
+ version = "0.4.0"
accept_limited_use_license = var.accept_limited_use_license
- app_version = "5.2.0"
-
name = "${var.prefix}-enrich"
resource_group_name = var.resource_group_name
subnet_id = var.subnet_id_servers
diff --git a/terraform/azure/pipeline/target_lake_loader.tf b/terraform/azure/pipeline/target_lake_loader.tf
index 81e1207..e44dda4 100644
--- a/terraform/azure/pipeline/target_lake_loader.tf
+++ b/terraform/azure/pipeline/target_lake_loader.tf
@@ -10,7 +10,7 @@ module "lake_storage_container" {
module "lake_loader" {
source = "snowplow-devops/lake-loader-vmss/azurerm"
- version = "0.3.0"
+ version = "0.4.0"
accept_limited_use_license = var.accept_limited_use_license
diff --git a/terraform/azure/pipeline/target_snowflake.tf b/terraform/azure/pipeline/target_snowflake.tf
deleted file mode 100644
index cfd802a..0000000
--- a/terraform/azure/pipeline/target_snowflake.tf
+++ /dev/null
@@ -1,110 +0,0 @@
-module "sf_message_queue_eh_topic" {
- source = "snowplow-devops/event-hub/azurerm"
- version = "0.1.1"
-
- count = local.use_azure_event_hubs && var.snowflake_enabled ? 1 : 0
-
- name = "snowflake-loader-topic"
- namespace_name = join("", module.eh_namespace.*.name)
- resource_group_name = var.resource_group_name
-}
-
-locals {
- snowflake_loader_topic_name = local.use_azure_event_hubs ? join("", module.sf_message_queue_eh_topic.*.name) : var.confluent_cloud_snowflake_loader_topic_name
-}
-
-module "sf_transformer_storage_container" {
- source = "snowplow-devops/storage-container/azurerm"
- version = "0.1.1"
-
- count = var.snowflake_enabled ? 1 : 0
-
- name = "snowflake-transformer-container"
- storage_account_name = local.storage_account_name
-}
-
-module "sf_transformer_wrj" {
- source = "snowplow-devops/transformer-event-hub-vmss/azurerm"
- version = "0.3.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.snowflake_enabled ? 1 : 0
-
- name = "${var.prefix}-snowflake-transformer"
- resource_group_name = var.resource_group_name
- subnet_id = var.subnet_id_servers
-
- enriched_topic_name = local.enriched_topic_name
- enriched_topic_kafka_username = local.kafka_username
- enriched_topic_kafka_password = local.use_azure_event_hubs ? join("", module.enriched_eh_topic.*.read_only_primary_connection_string) : var.confluent_cloud_api_secret
- queue_topic_name = local.snowflake_loader_topic_name
- queue_topic_kafka_username = local.kafka_username
- queue_topic_kafka_password = local.use_azure_event_hubs ? join("", module.sf_message_queue_eh_topic.*.read_write_primary_connection_string) : var.confluent_cloud_api_secret
- eh_namespace_name = local.eh_namespace_name
- kafka_brokers = local.kafka_brokers
-
- kafka_source = var.stream_type
-
- storage_account_name = local.storage_account_name
- storage_container_name = module.sf_transformer_storage_container[0].name
- window_period_min = var.snowflake_transformer_window_period_min
-
- widerow_file_format = "json"
-
- ssh_public_key = var.ssh_public_key
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- tags = var.tags
-
- depends_on = [module.sf_transformer_storage_container]
-}
-
-module "sf_loader" {
- source = "snowplow-devops/snowflake-loader-vmss/azurerm"
- version = "0.3.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.snowflake_enabled ? 1 : 0
-
- name = "${var.prefix}-snowflake-loader"
- resource_group_name = var.resource_group_name
- subnet_id = var.subnet_id_servers
-
- queue_topic_name = local.snowflake_loader_topic_name
- queue_topic_kafka_username = local.kafka_username
- queue_topic_kafka_password = local.use_azure_event_hubs ? join("", module.sf_message_queue_eh_topic.*.read_only_primary_connection_string) : var.confluent_cloud_api_secret
- eh_namespace_name = local.eh_namespace_name
- kafka_brokers = local.kafka_brokers
-
- kafka_source = var.stream_type
-
- storage_account_name = local.storage_account_name
- storage_container_name_for_transformer_output = module.sf_transformer_storage_container[0].name
-
- snowflake_loader_user = var.snowflake_loader_user
- snowflake_password = var.snowflake_loader_password
- snowflake_warehouse = var.snowflake_warehouse
- snowflake_database = var.snowflake_database
- snowflake_schema = var.snowflake_schema
- snowflake_region = var.snowflake_region
- snowflake_account = var.snowflake_account
-
- ssh_public_key = var.ssh_public_key
- ssh_ip_allowlist = var.ssh_ip_allowlist
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- tags = var.tags
-
- depends_on = [module.sf_transformer_storage_container]
-}
diff --git a/terraform/azure/pipeline/terraform.tfvars b/terraform/azure/pipeline/terraform.tfvars
index 2bfd17d..19a2641 100644
--- a/terraform/azure/pipeline/terraform.tfvars
+++ b/terraform/azure/pipeline/terraform.tfvars
@@ -58,25 +58,9 @@ confluent_cloud_api_secret = ""
confluent_cloud_bootstrap_server = ""
# Names of the created topics within the deployed cluster
-confluent_cloud_raw_topic_name = "raw"
-confluent_cloud_enriched_topic_name = "enriched"
-confluent_cloud_bad_1_topic_name = "bad-1"
-confluent_cloud_snowflake_loader_topic_name = "snowflake-loader"
-
-# --- Target: Snowflake
-# Follow the guide to get input values for the loader:
-# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start/
-snowflake_enabled = false
-
-snowflake_account = "list(object({
name = string
value = string
})) | `[]` | no |
-| [postgres\_db\_enabled](#input\_postgres\_db\_enabled) | Whether to enable loading into a Postgres Database | `bool` | `false` | no |
-| [postgres\_db\_tier](#input\_postgres\_db\_tier) | The instance type to assign to the deployed Cloud SQL instance | `string` | `"db-g1-small"` | no |
| [ssh\_key\_pairs](#input\_ssh\_key\_pairs) | The list of SSH key-pairs to add to the servers | list(object({
user_name = string
public_key = string
})) | `[]` | no |
| [ssl\_information](#input\_ssl\_information) | The ID of an Google Managed certificate to bind to the load balancer | object({
enabled = bool
certificate_id = string
}) | {
"certificate_id": "",
"enabled": false
} | no |
| [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no |
@@ -69,7 +57,4 @@
|------|-------------|
| [bigquery\_db\_dataset\_id](#output\_bigquery\_db\_dataset\_id) | The ID of the BigQuery dataset where your data is being streamed |
| [bq\_loader\_bad\_rows\_topic\_name](#output\_bq\_loader\_bad\_rows\_topic\_name) | The name of the topic for bad rows emitted from the BigQuery loader |
-| [bq\_loader\_dead\_letter\_bucket\_name](#output\_bq\_loader\_dead\_letter\_bucket\_name) | The name of the GCS bucket for dead letter events emitted from the BigQuery loader |
| [collector\_ip\_address](#output\_collector\_ip\_address) | The IP address for the Pipeline Collector |
-| [postgres\_db\_address](#output\_postgres\_db\_address) | The IP address of the database where your data is being streamed |
-| [postgres\_db\_port](#output\_postgres\_db\_port) | The port of the database where your data is being streamed |
diff --git a/terraform/gcp/pipeline/default/main.tf b/terraform/gcp/pipeline/default/main.tf
index 7d92e9c..57a71f1 100644
--- a/terraform/gcp/pipeline/default/main.tf
+++ b/terraform/gcp/pipeline/default/main.tf
@@ -46,7 +46,7 @@ module "enriched_topic" {
# 2. Deploy Collector stack
module "collector_pubsub" {
source = "snowplow-devops/collector-pubsub-ce/google"
- version = "0.6.0"
+ version = "0.7.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -88,7 +88,7 @@ module "collector_lb" {
# 3. Deploy Enrichment
module "enrich_pubsub" {
source = "snowplow-devops/enrich-pubsub-ce/google"
- version = "0.4.0"
+ version = "0.5.0"
accept_limited_use_license = var.accept_limited_use_license
diff --git a/terraform/gcp/pipeline/default/outputs.tf b/terraform/gcp/pipeline/default/outputs.tf
index 6f80d33..9b29019 100644
--- a/terraform/gcp/pipeline/default/outputs.tf
+++ b/terraform/gcp/pipeline/default/outputs.tf
@@ -3,26 +3,11 @@ output "collector_ip_address" {
value = module.collector_lb.ip_address
}
-output "postgres_db_address" {
- description = "The IP address of the database where your data is being streamed"
- value = join("", module.postgres_db.*.first_ip_address)
-}
-
-output "postgres_db_port" {
- description = "The port of the database where your data is being streamed"
- value = join("", module.postgres_db.*.port)
-}
-
output "bigquery_db_dataset_id" {
description = "The ID of the BigQuery dataset where your data is being streamed"
value = join("", google_bigquery_dataset.bigquery_db.*.dataset_id)
}
-output "bq_loader_dead_letter_bucket_name" {
- description = "The name of the GCS bucket for dead letter events emitted from the BigQuery loader"
- value = join("", google_storage_bucket.bq_loader_dead_letter_bucket.*.name)
-}
-
output "bq_loader_bad_rows_topic_name" {
description = "The name of the topic for bad rows emitted from the BigQuery loader"
value = join("", module.bq_bad_rows_topic.*.name)
diff --git a/terraform/gcp/pipeline/default/target_bigquery.tf b/terraform/gcp/pipeline/default/target_bigquery.tf
index b610e59..97b2183 100644
--- a/terraform/gcp/pipeline/default/target_bigquery.tf
+++ b/terraform/gcp/pipeline/default/target_bigquery.tf
@@ -18,26 +18,9 @@ resource "google_bigquery_dataset" "bigquery_db" {
labels = var.labels
}
-resource "google_storage_bucket" "bq_loader_dead_letter_bucket" {
- count = var.bigquery_db_enabled && var.bigquery_loader_dead_letter_bucket_deploy ? 1 : 0
-
- name = var.bigquery_loader_dead_letter_bucket_name
- location = var.region
- force_destroy = true
-
- labels = var.labels
-}
-
-locals {
- bq_loader_dead_letter_bucket_name = coalesce(
- join("", google_storage_bucket.bq_loader_dead_letter_bucket.*.name),
- var.bigquery_loader_dead_letter_bucket_name,
- )
-}
-
module "bigquery_loader" {
source = "snowplow-devops/bigquery-loader-pubsub-ce/google"
- version = "0.4.0"
+ version = "0.5.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -53,10 +36,9 @@ module "bigquery_loader" {
ssh_ip_allowlist = var.ssh_ip_allowlist
ssh_key_pairs = var.ssh_key_pairs
- input_topic_name = module.enriched_topic.name
- bad_rows_topic_name = join("", module.bq_bad_rows_topic.*.name)
- gcs_dead_letter_bucket_name = local.bq_loader_dead_letter_bucket_name
- bigquery_dataset_id = join("", google_bigquery_dataset.bigquery_db.*.dataset_id)
+ input_topic_name = module.enriched_topic.name
+ bad_rows_topic_id = join("", module.bq_bad_rows_topic.*.id)
+ bigquery_dataset_id = join("", google_bigquery_dataset.bigquery_db.*.dataset_id)
# Linking in the custom Iglu Server here
custom_iglu_resolvers = local.custom_iglu_resolvers
diff --git a/terraform/gcp/pipeline/default/target_postgres.tf b/terraform/gcp/pipeline/default/target_postgres.tf
deleted file mode 100644
index c64da23..0000000
--- a/terraform/gcp/pipeline/default/target_postgres.tf
+++ /dev/null
@@ -1,93 +0,0 @@
-module "postgres_db" {
- source = "snowplow-devops/cloud-sql/google"
- version = "0.4.1"
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-postgres-db"
-
- region = var.region
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- authorized_networks = var.postgres_db_authorized_networks
-
- tier = var.postgres_db_tier
-
- labels = var.labels
-}
-
-module "postgres_loader_enriched" {
- source = "snowplow-devops/postgres-loader-pubsub-ce/google"
- version = "0.5.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-pg-loader-enriched"
-
- network = var.network
- subnetwork = var.subnetwork
- region = var.region
- project_id = var.project_id
-
- ssh_ip_allowlist = var.ssh_ip_allowlist
- ssh_key_pairs = var.ssh_key_pairs
-
- in_topic_name = module.enriched_topic.name
- purpose = "ENRICHED_EVENTS"
- schema_name = "atomic"
-
- db_instance_name = join("", module.postgres_db.*.connection_name)
- db_port = join("", module.postgres_db.*.port)
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- # Linking in the custom Iglu Server here
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- labels = var.labels
-}
-
-module "postgres_loader_bad" {
- source = "snowplow-devops/postgres-loader-pubsub-ce/google"
- version = "0.5.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-pg-loader-bad"
-
- network = var.network
- subnetwork = var.subnetwork
- region = var.region
- project_id = var.project_id
-
- ssh_ip_allowlist = var.ssh_ip_allowlist
- ssh_key_pairs = var.ssh_key_pairs
-
- in_topic_name = module.bad_1_topic.name
- purpose = "JSON"
- schema_name = "atomic_bad"
-
- db_instance_name = join("", module.postgres_db.*.connection_name)
- db_port = join("", module.postgres_db.*.port)
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- # Linking in the custom Iglu Server here
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- labels = var.labels
-}
diff --git a/terraform/gcp/pipeline/default/terraform.tfvars b/terraform/gcp/pipeline/default/terraform.tfvars
index cc38f66..4653650 100644
--- a/terraform/gcp/pipeline/default/terraform.tfvars
+++ b/terraform/gcp/pipeline/default/terraform.tfvars
@@ -42,41 +42,9 @@ ssl_information = {
# --- TARGETS CONFIGURATION ZONE --- #
-# --- Target: PostgreSQL
-postgres_db_enabled = false
-
-postgres_db_name = "snowplow"
-postgres_db_username = "snowplow"
-# Change and keep this secret!
-postgres_db_password = "Hell0W0rld!2"
-# IP ranges that you want to query the Pipeline Postgres Cloud SQL instance from directly over the internet. An alternative access method is to leverage
-# the Cloud SQL Proxy service which creates an IAM authenticated tunnel to the instance
-#
-# Details: https://cloud.google.com/sql/docs/postgres/sql-proxy
-#
-# Note: this exposes your data to the internet - take care to ensure your allowlist is strict enough
-postgres_db_authorized_networks = [
- {
- name = "foo"
- value = "999.999.999.999/32"
- },
- {
- name = "bar"
- value = "888.888.888.888/32"
- }
-]
-# Note: the size of the database instance determines the number of concurrent connections - each Postgres Loader instance creates 10 open connections so having
-# a sufficiently powerful database tier is important to not running out of connection slots
-postgres_db_tier = "db-g1-small"
-
# --- Target: BigQuery
bigquery_db_enabled = false
-# To use an existing bucket set this to false
-bigquery_loader_dead_letter_bucket_deploy = true
-# Must be globally unique so will need to be updated before applying
-bigquery_loader_dead_letter_bucket_name = "sp-bq-loader-dead-letter"
-
# --- ADVANCED CONFIGURATION ZONE --- #
# See for more information: https://registry.terraform.io/modules/snowplow-devops/collector-pubsub-ce/google/latest#telemetry
diff --git a/terraform/gcp/pipeline/default/variables.tf b/terraform/gcp/pipeline/default/variables.tf
index 498cf19..696d30c 100644
--- a/terraform/gcp/pipeline/default/variables.tf
+++ b/terraform/gcp/pipeline/default/variables.tf
@@ -64,61 +64,12 @@ variable "iglu_super_api_key" {
sensitive = true
}
-variable "postgres_db_enabled" {
- description = "Whether to enable loading into a Postgres Database"
- default = false
- type = bool
-}
-
-variable "postgres_db_name" {
- description = "The name of the database to connect to"
- type = string
-}
-
-variable "postgres_db_username" {
- description = "The username to use to connect to the database"
- type = string
-}
-
-variable "postgres_db_password" {
- description = "The password to use to connect to the database"
- type = string
- sensitive = true
-}
-
-variable "postgres_db_authorized_networks" {
- description = "The list of CIDR ranges to allow access to the Pipeline Database over"
- default = []
- type = list(object({
- name = string
- value = string
- }))
-}
-
-variable "postgres_db_tier" {
- description = "The instance type to assign to the deployed Cloud SQL instance"
- type = string
- default = "db-g1-small"
-}
-
variable "bigquery_db_enabled" {
description = "Whether to enable loading into a BigQuery Dataset"
default = false
type = bool
}
-variable "bigquery_loader_dead_letter_bucket_deploy" {
- description = "Whether this module should create a new bucket with the specified name - if the bucket already exists set this to false"
- default = true
- type = bool
-}
-
-variable "bigquery_loader_dead_letter_bucket_name" {
- description = "The name of the GCS bucket to use for dead-letter output of loader"
- default = ""
- type = string
-}
-
variable "telemetry_enabled" {
description = "Whether or not to send telemetry information back to Snowplow Analytics Ltd"
type = bool
diff --git a/terraform/gcp/pipeline/secure/README.md b/terraform/gcp/pipeline/secure/README.md
index 6a7f8c2..62c4df5 100644
--- a/terraform/gcp/pipeline/secure/README.md
+++ b/terraform/gcp/pipeline/secure/README.md
@@ -17,15 +17,12 @@
| Name | Source | Version |
|------|--------|---------|
| [bad\_1\_topic](#module\_bad\_1\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 |
-| [bigquery\_loader](#module\_bigquery\_loader) | snowplow-devops/bigquery-loader-pubsub-ce/google | 0.4.0 |
+| [bigquery\_loader](#module\_bigquery\_loader) | snowplow-devops/bigquery-loader-pubsub-ce/google | 0.5.0 |
| [bq\_bad\_rows\_topic](#module\_bq\_bad\_rows\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 |
| [collector\_lb](#module\_collector\_lb) | snowplow-devops/lb/google | 0.3.0 |
-| [collector\_pubsub](#module\_collector\_pubsub) | snowplow-devops/collector-pubsub-ce/google | 0.6.0 |
-| [enrich\_pubsub](#module\_enrich\_pubsub) | snowplow-devops/enrich-pubsub-ce/google | 0.4.0 |
+| [collector\_pubsub](#module\_collector\_pubsub) | snowplow-devops/collector-pubsub-ce/google | 0.7.0 |
+| [enrich\_pubsub](#module\_enrich\_pubsub) | snowplow-devops/enrich-pubsub-ce/google | 0.5.0 |
| [enriched\_topic](#module\_enriched\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 |
-| [postgres\_db](#module\_postgres\_db) | snowplow-devops/cloud-sql/google | 0.4.1 |
-| [postgres\_loader\_bad](#module\_postgres\_loader\_bad) | snowplow-devops/postgres-loader-pubsub-ce/google | 0.5.0 |
-| [postgres\_loader\_enriched](#module\_postgres\_loader\_enriched) | snowplow-devops/postgres-loader-pubsub-ce/google | 0.5.0 |
| [raw\_topic](#module\_raw\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 |
## Resources
@@ -33,7 +30,6 @@
| Name | Type |
|------|------|
| [google_bigquery_dataset.bigquery_db](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/bigquery_dataset) | resource |
-| [google_storage_bucket.bq_loader_dead_letter_bucket](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/storage_bucket) | resource |
## Inputs
@@ -42,9 +38,6 @@
| [iglu\_server\_dns\_name](#input\_iglu\_server\_dns\_name) | The DNS name of your Iglu Server | `string` | n/a | yes |
| [iglu\_super\_api\_key](#input\_iglu\_super\_api\_key) | A UUIDv4 string to use as the master API key for Iglu Server management | `string` | n/a | yes |
| [network](#input\_network) | The name of the network to deploy within | `string` | n/a | yes |
-| [postgres\_db\_name](#input\_postgres\_db\_name) | The name of the database to connect to | `string` | n/a | yes |
-| [postgres\_db\_password](#input\_postgres\_db\_password) | The password to use to connect to the database | `string` | n/a | yes |
-| [postgres\_db\_username](#input\_postgres\_db\_username) | The username to use to connect to the database | `string` | n/a | yes |
| [prefix](#input\_prefix) | Will be prefixed to all resource names. Use to easily identify the resources created | `string` | n/a | yes |
| [project\_id](#input\_project\_id) | The project ID in which the stack is being deployed | `string` | n/a | yes |
| [region](#input\_region) | The name of the region to deploy within | `string` | n/a | yes |
@@ -52,12 +45,7 @@
| [subnetwork](#input\_subnetwork) | The name of the sub-network to deploy within | `string` | n/a | yes |
| [accept\_limited\_use\_license](#input\_accept\_limited\_use\_license) | Acceptance of the SLULA terms (https://docs.snowplow.io/limited-use-license-1.0/) | `bool` | `false` | no |
| [bigquery\_db\_enabled](#input\_bigquery\_db\_enabled) | Whether to enable loading into a BigQuery Dataset | `bool` | `false` | no |
-| [bigquery\_loader\_dead\_letter\_bucket\_deploy](#input\_bigquery\_loader\_dead\_letter\_bucket\_deploy) | Whether this module should create a new bucket with the specified name - if the bucket already exists set this to false | `bool` | `true` | no |
-| [bigquery\_loader\_dead\_letter\_bucket\_name](#input\_bigquery\_loader\_dead\_letter\_bucket\_name) | The name of the GCS bucket to use for dead-letter output of loader | `string` | `""` | no |
| [labels](#input\_labels) | The labels to append to the resources in this module | `map(string)` | `{}` | no |
-| [postgres\_db\_authorized\_networks](#input\_postgres\_db\_authorized\_networks) | The list of CIDR ranges to allow access to the Pipeline Database over | list(object({
name = string
value = string
})) | `[]` | no |
-| [postgres\_db\_enabled](#input\_postgres\_db\_enabled) | Whether to enable loading into a Postgres Database | `bool` | `false` | no |
-| [postgres\_db\_tier](#input\_postgres\_db\_tier) | The instance type to assign to the deployed Cloud SQL instance | `string` | `"db-g1-small"` | no |
| [ssh\_key\_pairs](#input\_ssh\_key\_pairs) | The list of SSH key-pairs to add to the servers | list(object({
user_name = string
public_key = string
})) | `[]` | no |
| [ssl\_information](#input\_ssl\_information) | The ID of an Google Managed certificate to bind to the load balancer | object({
enabled = bool
certificate_id = string
}) | {
"certificate_id": "",
"enabled": false
} | no |
| [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no |
@@ -69,7 +57,4 @@
|------|-------------|
| [bigquery\_db\_dataset\_id](#output\_bigquery\_db\_dataset\_id) | The ID of the BigQuery dataset where your data is being streamed |
| [bq\_loader\_bad\_rows\_topic\_name](#output\_bq\_loader\_bad\_rows\_topic\_name) | The name of the topic for bad rows emitted from the BigQuery loader |
-| [bq\_loader\_dead\_letter\_bucket\_name](#output\_bq\_loader\_dead\_letter\_bucket\_name) | The name of the GCS bucket for dead letter events emitted from the BigQuery loader |
| [collector\_ip\_address](#output\_collector\_ip\_address) | The IP address for the Pipeline Collector |
-| [postgres\_db\_address](#output\_postgres\_db\_address) | The IP address of the database where your data is being streamed |
-| [postgres\_db\_port](#output\_postgres\_db\_port) | The port of the database where your data is being streamed |
diff --git a/terraform/gcp/pipeline/secure/main.tf b/terraform/gcp/pipeline/secure/main.tf
index 4723074..c3d177e 100644
--- a/terraform/gcp/pipeline/secure/main.tf
+++ b/terraform/gcp/pipeline/secure/main.tf
@@ -46,7 +46,7 @@ module "enriched_topic" {
# 2. Deploy Collector stack
module "collector_pubsub" {
source = "snowplow-devops/collector-pubsub-ce/google"
- version = "0.6.0"
+ version = "0.7.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -90,7 +90,7 @@ module "collector_lb" {
# 3. Deploy Enrichment
module "enrich_pubsub" {
source = "snowplow-devops/enrich-pubsub-ce/google"
- version = "0.4.0"
+ version = "0.5.0"
accept_limited_use_license = var.accept_limited_use_license
diff --git a/terraform/gcp/pipeline/secure/outputs.tf b/terraform/gcp/pipeline/secure/outputs.tf
index 6f80d33..9b29019 100644
--- a/terraform/gcp/pipeline/secure/outputs.tf
+++ b/terraform/gcp/pipeline/secure/outputs.tf
@@ -3,26 +3,11 @@ output "collector_ip_address" {
value = module.collector_lb.ip_address
}
-output "postgres_db_address" {
- description = "The IP address of the database where your data is being streamed"
- value = join("", module.postgres_db.*.first_ip_address)
-}
-
-output "postgres_db_port" {
- description = "The port of the database where your data is being streamed"
- value = join("", module.postgres_db.*.port)
-}
-
output "bigquery_db_dataset_id" {
description = "The ID of the BigQuery dataset where your data is being streamed"
value = join("", google_bigquery_dataset.bigquery_db.*.dataset_id)
}
-output "bq_loader_dead_letter_bucket_name" {
- description = "The name of the GCS bucket for dead letter events emitted from the BigQuery loader"
- value = join("", google_storage_bucket.bq_loader_dead_letter_bucket.*.name)
-}
-
output "bq_loader_bad_rows_topic_name" {
description = "The name of the topic for bad rows emitted from the BigQuery loader"
value = join("", module.bq_bad_rows_topic.*.name)
diff --git a/terraform/gcp/pipeline/secure/target_bigquery.tf b/terraform/gcp/pipeline/secure/target_bigquery.tf
index 85ae35a..b37bfb6 100644
--- a/terraform/gcp/pipeline/secure/target_bigquery.tf
+++ b/terraform/gcp/pipeline/secure/target_bigquery.tf
@@ -18,26 +18,9 @@ resource "google_bigquery_dataset" "bigquery_db" {
labels = var.labels
}
-resource "google_storage_bucket" "bq_loader_dead_letter_bucket" {
- count = var.bigquery_db_enabled && var.bigquery_loader_dead_letter_bucket_deploy ? 1 : 0
-
- name = var.bigquery_loader_dead_letter_bucket_name
- location = var.region
- force_destroy = true
-
- labels = var.labels
-}
-
-locals {
- bq_loader_dead_letter_bucket_name = coalesce(
- join("", google_storage_bucket.bq_loader_dead_letter_bucket.*.name),
- var.bigquery_loader_dead_letter_bucket_name,
- )
-}
-
module "bigquery_loader" {
source = "snowplow-devops/bigquery-loader-pubsub-ce/google"
- version = "0.4.0"
+ version = "0.5.0"
accept_limited_use_license = var.accept_limited_use_license
@@ -53,10 +36,9 @@ module "bigquery_loader" {
ssh_ip_allowlist = var.ssh_ip_allowlist
ssh_key_pairs = var.ssh_key_pairs
- input_topic_name = module.enriched_topic.name
- bad_rows_topic_name = join("", module.bq_bad_rows_topic.*.name)
- gcs_dead_letter_bucket_name = local.bq_loader_dead_letter_bucket_name
- bigquery_dataset_id = join("", google_bigquery_dataset.bigquery_db.*.dataset_id)
+ input_topic_name = module.enriched_topic.name
+ bad_rows_topic_id = join("", module.bq_bad_rows_topic.*.id)
+ bigquery_dataset_id = join("", google_bigquery_dataset.bigquery_db.*.dataset_id)
# Linking in the custom Iglu Server here
custom_iglu_resolvers = local.custom_iglu_resolvers
diff --git a/terraform/gcp/pipeline/secure/target_postgres.tf b/terraform/gcp/pipeline/secure/target_postgres.tf
deleted file mode 100644
index 7ef38bf..0000000
--- a/terraform/gcp/pipeline/secure/target_postgres.tf
+++ /dev/null
@@ -1,97 +0,0 @@
-module "postgres_db" {
- source = "snowplow-devops/cloud-sql/google"
- version = "0.4.1"
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-postgres-db"
-
- region = var.region
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- authorized_networks = var.postgres_db_authorized_networks
-
- tier = var.postgres_db_tier
-
- labels = var.labels
-}
-
-module "postgres_loader_enriched" {
- source = "snowplow-devops/postgres-loader-pubsub-ce/google"
- version = "0.5.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-pg-loader-enriched"
-
- network = var.network
- subnetwork = var.subnetwork
- region = var.region
- project_id = var.project_id
-
- ssh_ip_allowlist = var.ssh_ip_allowlist
- ssh_key_pairs = var.ssh_key_pairs
-
- in_topic_name = module.enriched_topic.name
- purpose = "ENRICHED_EVENTS"
- schema_name = "atomic"
-
- db_instance_name = join("", module.postgres_db.*.connection_name)
- db_port = join("", module.postgres_db.*.port)
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- # Linking in the custom Iglu Server here
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- associate_public_ip_address = false
-
- labels = var.labels
-}
-
-module "postgres_loader_bad" {
- source = "snowplow-devops/postgres-loader-pubsub-ce/google"
- version = "0.5.0"
-
- accept_limited_use_license = var.accept_limited_use_license
-
- count = var.postgres_db_enabled ? 1 : 0
-
- name = "${var.prefix}-pg-loader-bad"
-
- network = var.network
- subnetwork = var.subnetwork
- region = var.region
- project_id = var.project_id
-
- ssh_ip_allowlist = var.ssh_ip_allowlist
- ssh_key_pairs = var.ssh_key_pairs
-
- in_topic_name = module.bad_1_topic.name
- purpose = "JSON"
- schema_name = "atomic_bad"
-
- db_instance_name = join("", module.postgres_db.*.connection_name)
- db_port = join("", module.postgres_db.*.port)
- db_name = var.postgres_db_name
- db_username = var.postgres_db_username
- db_password = var.postgres_db_password
-
- # Linking in the custom Iglu Server here
- custom_iglu_resolvers = local.custom_iglu_resolvers
-
- telemetry_enabled = var.telemetry_enabled
- user_provided_id = var.user_provided_id
-
- associate_public_ip_address = false
-
- labels = var.labels
-}
diff --git a/terraform/gcp/pipeline/secure/terraform.tfvars b/terraform/gcp/pipeline/secure/terraform.tfvars
index 1edddb8..3a31494 100644
--- a/terraform/gcp/pipeline/secure/terraform.tfvars
+++ b/terraform/gcp/pipeline/secure/terraform.tfvars
@@ -41,41 +41,9 @@ ssl_information = {
# --- TARGETS CONFIGURATION ZONE --- #
-# --- Target: PostgreSQL
-postgres_db_enabled = false
-
-postgres_db_name = "snowplow"
-postgres_db_username = "snowplow"
-# Change and keep this secret!
-postgres_db_password = "Hell0W0rld!2"
-# IP ranges that you want to query the Pipeline Postgres Cloud SQL instance from directly over the internet. An alternative access method is to leverage
-# the Cloud SQL Proxy service which creates an IAM authenticated tunnel to the instance
-#
-# Details: https://cloud.google.com/sql/docs/postgres/sql-proxy
-#
-# Note: this exposes your data to the internet - take care to ensure your allowlist is strict enough
-postgres_db_authorized_networks = [
- {
- name = "foo"
- value = "999.999.999.999/32"
- },
- {
- name = "bar"
- value = "888.888.888.888/32"
- }
-]
-# Note: the size of the database instance determines the number of concurrent connections - each Postgres Loader instance creates 10 open connections so having
-# a sufficiently powerful database tier is important to not running out of connection slots
-postgres_db_tier = "db-g1-small"
-
# --- Target: BigQuery
bigquery_db_enabled = false
-# To use an existing bucket set this to false
-bigquery_loader_dead_letter_bucket_deploy = true
-# Must be globally unique so will need to be updated before applying
-bigquery_loader_dead_letter_bucket_name = "sp-bq-loader-dead-letter"
-
# --- ADVANCED CONFIGURATION ZONE --- #
# See for more information: https://registry.terraform.io/modules/snowplow-devops/collector-pubsub-ce/google/latest#telemetry
diff --git a/terraform/gcp/pipeline/secure/variables.tf b/terraform/gcp/pipeline/secure/variables.tf
index 498cf19..696d30c 100644
--- a/terraform/gcp/pipeline/secure/variables.tf
+++ b/terraform/gcp/pipeline/secure/variables.tf
@@ -64,61 +64,12 @@ variable "iglu_super_api_key" {
sensitive = true
}
-variable "postgres_db_enabled" {
- description = "Whether to enable loading into a Postgres Database"
- default = false
- type = bool
-}
-
-variable "postgres_db_name" {
- description = "The name of the database to connect to"
- type = string
-}
-
-variable "postgres_db_username" {
- description = "The username to use to connect to the database"
- type = string
-}
-
-variable "postgres_db_password" {
- description = "The password to use to connect to the database"
- type = string
- sensitive = true
-}
-
-variable "postgres_db_authorized_networks" {
- description = "The list of CIDR ranges to allow access to the Pipeline Database over"
- default = []
- type = list(object({
- name = string
- value = string
- }))
-}
-
-variable "postgres_db_tier" {
- description = "The instance type to assign to the deployed Cloud SQL instance"
- type = string
- default = "db-g1-small"
-}
-
variable "bigquery_db_enabled" {
description = "Whether to enable loading into a BigQuery Dataset"
default = false
type = bool
}
-variable "bigquery_loader_dead_letter_bucket_deploy" {
- description = "Whether this module should create a new bucket with the specified name - if the bucket already exists set this to false"
- default = true
- type = bool
-}
-
-variable "bigquery_loader_dead_letter_bucket_name" {
- description = "The name of the GCS bucket to use for dead-letter output of loader"
- default = ""
- type = string
-}
-
variable "telemetry_enabled" {
description = "Whether or not to send telemetry information back to Snowplow Analytics Ltd"
type = bool