diff --git a/README.md b/README.md index 03548b4..581b72d 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ [![Release][release-badge]][release] [![License][license-image]][license] -[![Discourse posts][discourse-image]][discourse] Examples of how to automate creating a [Snowplow Community pipeline](https://github.com/snowplow/snowplow). @@ -13,15 +12,12 @@ These examples cover deploying an Iglu Server, for hosting your schemas, and a S | Tool | Cloud | Components | Status | Deployment Summary | |------------|-------|------------------------------|---------------------------------------------|----------------------------------------------| | Terraform | AWS | Iglu Server | [Published](terraform/aws/iglu_server) | | -| Terraform | AWS | Pipeline (PostgreSQL) | [Published](terraform/aws/pipeline) | [AWS PostgreSQL Summary][deploypgsum-aws] | | Terraform | AWS | Pipeline (Snowflake) | [Published](terraform/aws/pipeline) | [AWS Snowflake Summary][deploysfsum-aws] | | Terraform | AWS | Pipeline (Redshift) | [Published](terraform/aws/pipeline) | [AWS Redshift Summary][deployrssum-aws] | | Terraform | AWS | Pipeline (Databricks) | [Published](terraform/aws/pipeline) | [AWS Databricks Summary][deploydbsum-aws] | | Terraform | GCP | Iglu Server | [Published](terraform/gcp/iglu_server) | | -| Terraform | GCP | Pipeline (PostgreSQL) | [Published](terraform/gcp/pipeline) | [GCP PostgreSQL Summary][deploypgsum-gcp] | | Terraform | GCP | Pipeline (BigQuery) | [Published](terraform/gcp/pipeline) | [GCP BigQuery Summary][deploybqsum-gcp] | | Terraform | Azure | Iglu Server | [Published](terraform/azure/iglu_server) | | -| Terraform | Azure | Pipeline (Snowflake) | [Published](terraform/azure/pipeline) | [Azure Snowflake Summary][deploysfsum-azure] | | Terraform | Azure | Pipeline (Databricks) | [Published](terraform/azure/pipeline) | [Azure Snowflake Summary][deploydbsum-azure] | | Terraform | Azure | Pipeline (Synapse Analytics) | [Published](terraform/azure/pipeline) | [Azure Snowflake Summary][deploysasum-azure] | @@ -62,13 +58,10 @@ Licensed under the [Snowplow Limited Use License Agreement][license]. _(If you a [installguide]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-quick-start/ [faq]: https://docs.snowplow.io/docs/getting-started-on-community-edition/faq/ -[deploypgsum-aws]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=postgres&cloud=aws [deploysfsum-aws]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=snowflake&cloud=aws [deployrssum-aws]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=redshift&cloud=aws [deploydbsum-aws]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=databricks&cloud=aws -[deploypgsum-gcp]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=postgres&cloud=gcp [deploybqsum-gcp]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=bigquery&cloud=gcp -[deploysfsum-azure]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=snowflake&cloud=azure [deploydbsum-azure]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=databricks&cloud=azure [deploysasum-azure]: https://docs.snowplow.io/docs/getting-started-on-community-edition/what-is-deployed/?warehouse=synapse&cloud=azure @@ -76,10 +69,7 @@ Licensed under the [Snowplow Limited Use License Agreement][license]. _(If you a [license-image]: https://img.shields.io/badge/license-Snowplow--Limited--Use-blue.svg?style=flat [license-faq]: https://docs.snowplow.io/docs/contributing/limited-use-license-faq/ -[discourse-image]: https://img.shields.io/discourse/posts?server=https%3A%2F%2Fdiscourse.snowplow.io%2F -[discourse]: http://discourse.snowplow.io/ - [release]: https://github.com/snowplow/snowplow/releases -[release-badge]: https://img.shields.io/badge/Snowplow-25.06-6638b8 +[release-badge]: https://img.shields.io/badge/Snowplow-25.10-6638b8 [tf-docs]: https://github.com/terraform-docs/terraform-docs diff --git a/terraform/aws/iglu_server/default/README.md b/terraform/aws/iglu_server/default/README.md index e87a797..4fdb2c0 100644 --- a/terraform/aws/iglu_server/default/README.md +++ b/terraform/aws/iglu_server/default/README.md @@ -17,8 +17,8 @@ | Name | Source | Version | |------|--------|---------| | [iglu\_lb](#module\_iglu\_lb) | snowplow-devops/alb/aws | 0.2.0 | -| [iglu\_rds](#module\_iglu\_rds) | snowplow-devops/rds/aws | 0.4.0 | -| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ec2/aws | 0.5.0 | +| [iglu\_rds](#module\_iglu\_rds) | snowplow-devops/rds/aws | 0.5.0 | +| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ec2/aws | 0.6.1 | ## Resources @@ -43,7 +43,7 @@ | [cloudwatch\_logs\_enabled](#input\_cloudwatch\_logs\_enabled) | Whether application logs should be reported to CloudWatch | `bool` | `true` | no | | [cloudwatch\_logs\_retention\_days](#input\_cloudwatch\_logs\_retention\_days) | The length of time in days to retain logs for | `number` | `7` | no | | [iam\_permissions\_boundary](#input\_iam\_permissions\_boundary) | The permissions boundary ARN to set on IAM roles created | `string` | `""` | no | -| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
})
|
{
"certificate_arn": "",
"enabled": false
}
| no | +| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
})
|
{
"certificate_arn": "",
"enabled": false
}
| no | | [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no | | [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no | | [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no | diff --git a/terraform/aws/iglu_server/default/main.tf b/terraform/aws/iglu_server/default/main.tf index 77b6a20..cae8158 100644 --- a/terraform/aws/iglu_server/default/main.tf +++ b/terraform/aws/iglu_server/default/main.tf @@ -38,12 +38,10 @@ module "iglu_lb" { module "iglu_server" { source = "snowplow-devops/iglu-server-ec2/aws" - version = "0.5.0" + version = "0.6.1" accept_limited_use_license = var.accept_limited_use_license - app_version = "0.14.0" - name = "${var.prefix}-iglu-server" vpc_id = var.vpc_id subnet_ids = var.public_subnet_ids diff --git a/terraform/aws/iglu_server/secure/README.md b/terraform/aws/iglu_server/secure/README.md index c8bef9a..e6d8148 100644 --- a/terraform/aws/iglu_server/secure/README.md +++ b/terraform/aws/iglu_server/secure/README.md @@ -17,8 +17,8 @@ | Name | Source | Version | |------|--------|---------| | [iglu\_lb](#module\_iglu\_lb) | snowplow-devops/alb/aws | 0.2.0 | -| [iglu\_rds](#module\_iglu\_rds) | snowplow-devops/rds/aws | 0.4.0 | -| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ec2/aws | 0.5.0 | +| [iglu\_rds](#module\_iglu\_rds) | snowplow-devops/rds/aws | 0.5.0 | +| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ec2/aws | 0.6.1 | ## Resources @@ -44,7 +44,7 @@ | [cloudwatch\_logs\_enabled](#input\_cloudwatch\_logs\_enabled) | Whether application logs should be reported to CloudWatch | `bool` | `true` | no | | [cloudwatch\_logs\_retention\_days](#input\_cloudwatch\_logs\_retention\_days) | The length of time in days to retain logs for | `number` | `7` | no | | [iam\_permissions\_boundary](#input\_iam\_permissions\_boundary) | The permissions boundary ARN to set on IAM roles created | `string` | `""` | no | -| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
})
|
{
"certificate_arn": "",
"enabled": false
}
| no | +| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
})
|
{
"certificate_arn": "",
"enabled": false
}
| no | | [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no | | [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no | | [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no | diff --git a/terraform/aws/iglu_server/secure/main.tf b/terraform/aws/iglu_server/secure/main.tf index b7e407e..1d2eca3 100644 --- a/terraform/aws/iglu_server/secure/main.tf +++ b/terraform/aws/iglu_server/secure/main.tf @@ -38,12 +38,10 @@ module "iglu_lb" { module "iglu_server" { source = "snowplow-devops/iglu-server-ec2/aws" - version = "0.5.0" + version = "0.6.1" accept_limited_use_license = var.accept_limited_use_license - app_version = "0.14.0" - name = "${var.prefix}-iglu-server" vpc_id = var.vpc_id subnet_ids = var.private_subnet_ids diff --git a/terraform/aws/pipeline/default/README.md b/terraform/aws/pipeline/default/README.md index 95d2148..86d0524 100644 --- a/terraform/aws/pipeline/default/README.md +++ b/terraform/aws/pipeline/default/README.md @@ -18,25 +18,19 @@ |------|--------|---------| | [bad\_1\_stream](#module\_bad\_1\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 | | [bad\_2\_stream](#module\_bad\_2\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 | -| [collector\_kinesis](#module\_collector\_kinesis) | snowplow-devops/collector-kinesis-ec2/aws | 0.9.0 | +| [collector\_kinesis](#module\_collector\_kinesis) | snowplow-devops/collector-kinesis-ec2/aws | 0.10.1 | | [collector\_lb](#module\_collector\_lb) | snowplow-devops/alb/aws | 0.2.0 | -| [db\_loader](#module\_db\_loader) | snowplow-devops/databricks-loader-ec2/aws | 0.2.0 | -| [db\_transformer\_wrp](#module\_db\_transformer\_wrp) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 | -| [enrich\_kinesis](#module\_enrich\_kinesis) | snowplow-devops/enrich-kinesis-ec2/aws | 0.6.0 | +| [db\_loader](#module\_db\_loader) | snowplow-devops/databricks-loader-ec2/aws | 0.3.0 | +| [db\_transformer\_wrp](#module\_db\_transformer\_wrp) | snowplow-devops/transformer-kinesis-ec2/aws | 0.5.0 | +| [enrich\_kinesis](#module\_enrich\_kinesis) | snowplow-devops/enrich-kinesis-ec2/aws | 0.7.1 | | [enriched\_stream](#module\_enriched\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 | -| [postgres\_loader\_bad](#module\_postgres\_loader\_bad) | snowplow-devops/postgres-loader-kinesis-ec2/aws | 0.5.0 | -| [postgres\_loader\_enriched](#module\_postgres\_loader\_enriched) | snowplow-devops/postgres-loader-kinesis-ec2/aws | 0.5.0 | -| [postgres\_loader\_rds](#module\_postgres\_loader\_rds) | snowplow-devops/rds/aws | 0.4.0 | | [raw\_stream](#module\_raw\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 | -| [rs\_loader](#module\_rs\_loader) | snowplow-devops/redshift-loader-ec2/aws | 0.2.0 | -| [rs\_transformer\_stsv](#module\_rs\_transformer\_stsv) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 | -| [s3\_loader\_bad](#module\_s3\_loader\_bad) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.5.0 | -| [s3\_loader\_enriched](#module\_s3\_loader\_enriched) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.5.0 | -| [s3\_loader\_raw](#module\_s3\_loader\_raw) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.5.0 | +| [rs\_loader](#module\_rs\_loader) | snowplow-devops/redshift-loader-ec2/aws | 0.4.0 | +| [rs\_transformer\_stsv](#module\_rs\_transformer\_stsv) | snowplow-devops/transformer-kinesis-ec2/aws | 0.5.0 | +| [s3\_loader\_bad](#module\_s3\_loader\_bad) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.6.0 | +| [s3\_loader\_enriched](#module\_s3\_loader\_enriched) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.6.0 | | [s3\_pipeline\_bucket](#module\_s3\_pipeline\_bucket) | snowplow-devops/s3-bucket/aws | 0.2.0 | -| [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-ec2/aws | 0.3.0 | -| [sf\_transformer\_wrj](#module\_sf\_transformer\_wrj) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 | -| [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.1.0 | +| [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.2.1 | ## Resources @@ -45,7 +39,6 @@ | [aws_key_pair.pipeline](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/key_pair) | resource | | [aws_sqs_queue.db_message_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | | [aws_sqs_queue.rs_message_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | -| [aws_sqs_queue.sf_message_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | ## Inputs @@ -73,12 +66,6 @@ | [databricks\_transformer\_window\_period\_min](#input\_databricks\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no | | [iam\_permissions\_boundary](#input\_iam\_permissions\_boundary) | The permissions boundary ARN to set on IAM roles created | `string` | `""` | no | | [pipeline\_kcl\_write\_max\_capacity](#input\_pipeline\_kcl\_write\_max\_capacity) | Increasing this is important to increase throughput at very high pipeline volumes | `number` | `50` | no | -| [postgres\_db\_enabled](#input\_postgres\_db\_enabled) | Whether to enable loading into a Postgres Database | `bool` | `false` | no | -| [postgres\_db\_ip\_allowlist](#input\_postgres\_db\_ip\_allowlist) | An optional list of CIDR ranges to allow traffic from | `list(any)` | `[]` | no | -| [postgres\_db\_name](#input\_postgres\_db\_name) | The name of the database to connect to | `string` | `""` | no | -| [postgres\_db\_password](#input\_postgres\_db\_password) | The password to use to connect to the database | `string` | `""` | no | -| [postgres\_db\_publicly\_accessible](#input\_postgres\_db\_publicly\_accessible) | Whether to make the Postgres RDS instance accessible over the internet | `bool` | `false` | no | -| [postgres\_db\_username](#input\_postgres\_db\_username) | The username to use to connect to the database | `string` | `""` | no | | [redshift\_database](#input\_redshift\_database) | Redshift database name | `string` | `""` | no | | [redshift\_enabled](#input\_redshift\_enabled) | Whether to enable loading into a Redshift Database | `bool` | `false` | no | | [redshift\_host](#input\_redshift\_host) | Redshift cluster hostname | `string` | `""` | no | @@ -90,23 +77,13 @@ | [s3\_bucket\_deploy](#input\_s3\_bucket\_deploy) | Whether this module should create a new bucket with the specified name - if the bucket already exists set this to false | `bool` | `true` | no | | [s3\_bucket\_object\_prefix](#input\_s3\_bucket\_object\_prefix) | An optional prefix under which Snowplow data will be saved (Note: your prefix must end with a trailing '/') | `string` | `""` | no | | [s3\_enriched\_enabled](#input\_s3\_enriched\_enabled) | Whether to enable loading of enriched data into S3 from Kinesis | `bool` | `true` | no | -| [s3\_raw\_enabled](#input\_s3\_raw\_enabled) | Whether to enable loading of raw data into S3 from Kinesis | `bool` | `false` | no | -| [snowflake\_account](#input\_snowflake\_account) | Snowflake account to use | `string` | `""` | no | -| [snowflake\_database](#input\_snowflake\_database) | Snowflake database name | `string` | `""` | no | -| [snowflake\_enabled](#input\_snowflake\_enabled) | Whether to enable loading into a Snowflake Database | `bool` | `false` | no | -| [snowflake\_loader\_password](#input\_snowflake\_loader\_password) | The password to use for the loader user | `string` | `""` | no | -| [snowflake\_loader\_user](#input\_snowflake\_loader\_user) | The Snowflake user used by Snowflake Loader | `string` | `""` | no | -| [snowflake\_region](#input\_snowflake\_region) | Region of Snowflake account | `string` | `""` | no | -| [snowflake\_schema](#input\_snowflake\_schema) | Snowflake schema name | `string` | `""` | no | | [snowflake\_streaming\_account\_url](#input\_snowflake\_streaming\_account\_url) | Snowflake account URL to use | `string` | `""` | no | | [snowflake\_streaming\_database](#input\_snowflake\_streaming\_database) | Snowflake database name | `string` | `""` | no | | [snowflake\_streaming\_enabled](#input\_snowflake\_streaming\_enabled) | Whether to enable loading into a Snowflake Database with a Streaming Loader | `bool` | `false` | no | | [snowflake\_streaming\_loader\_private\_key](#input\_snowflake\_streaming\_loader\_private\_key) | The private key to use for the loader user | `string` | `""` | no | | [snowflake\_streaming\_loader\_user](#input\_snowflake\_streaming\_loader\_user) | The Snowflake user used by Snowflake Streaming Loader | `string` | `""` | no | | [snowflake\_streaming\_schema](#input\_snowflake\_streaming\_schema) | Snowflake schema name | `string` | `""` | no | -| [snowflake\_transformer\_window\_period\_min](#input\_snowflake\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no | -| [snowflake\_warehouse](#input\_snowflake\_warehouse) | Snowflake warehouse name | `string` | `""` | no | -| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
})
|
{
"certificate_arn": "",
"enabled": false
}
| no | +| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
})
|
{
"certificate_arn": "",
"enabled": false
}
| no | | [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no | | [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no | | [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no | @@ -116,6 +93,3 @@ | Name | Description | |------|-------------| | [collector\_dns\_name](#output\_collector\_dns\_name) | The ALB DNS name for the Pipeline Collector | -| [postgres\_db\_address](#output\_postgres\_db\_address) | The RDS DNS name where your data is being streamed | -| [postgres\_db\_id](#output\_postgres\_db\_id) | The ID of the RDS instance | -| [postgres\_db\_port](#output\_postgres\_db\_port) | The RDS port where your data is being streamed | diff --git a/terraform/aws/pipeline/default/main.tf b/terraform/aws/pipeline/default/main.tf index 160fedf..b5a47b0 100644 --- a/terraform/aws/pipeline/default/main.tf +++ b/terraform/aws/pipeline/default/main.tf @@ -86,12 +86,10 @@ module "collector_lb" { module "collector_kinesis" { source = "snowplow-devops/collector-kinesis-ec2/aws" - version = "0.9.0" + version = "0.10.1" accept_limited_use_license = var.accept_limited_use_license - app_version = "3.3.0" - name = "${var.prefix}-collector-server" vpc_id = var.vpc_id subnet_ids = var.public_subnet_ids @@ -118,12 +116,10 @@ module "collector_kinesis" { # 3. Deploy Enrichment module "enrich_kinesis" { source = "snowplow-devops/enrich-kinesis-ec2/aws" - version = "0.6.0" + version = "0.7.1" accept_limited_use_license = var.accept_limited_use_license - app_version = "5.2.0" - name = "${var.prefix}-enrich-server" vpc_id = var.vpc_id subnet_ids = var.public_subnet_ids diff --git a/terraform/aws/pipeline/default/outputs.tf b/terraform/aws/pipeline/default/outputs.tf index e15163a..1d1e81a 100644 --- a/terraform/aws/pipeline/default/outputs.tf +++ b/terraform/aws/pipeline/default/outputs.tf @@ -2,20 +2,3 @@ output "collector_dns_name" { description = "The ALB DNS name for the Pipeline Collector" value = module.collector_lb.dns_name } - -# --- Target: PostgreSQL - -output "postgres_db_address" { - description = "The RDS DNS name where your data is being streamed" - value = var.postgres_db_enabled ? module.postgres_loader_rds[0].address : null -} - -output "postgres_db_port" { - description = "The RDS port where your data is being streamed" - value = var.postgres_db_enabled ? module.postgres_loader_rds[0].port : null -} - -output "postgres_db_id" { - description = "The ID of the RDS instance" - value = var.postgres_db_enabled ? module.postgres_loader_rds[0].id : null -} diff --git a/terraform/aws/pipeline/default/target_amazon_s3.tf b/terraform/aws/pipeline/default/target_amazon_s3.tf index e6dae32..923efc4 100644 --- a/terraform/aws/pipeline/default/target_amazon_s3.tf +++ b/terraform/aws/pipeline/default/target_amazon_s3.tf @@ -1,38 +1,6 @@ -module "s3_loader_raw" { - source = "snowplow-devops/s3-loader-kinesis-ec2/aws" - version = "0.5.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.s3_raw_enabled ? 1 : 0 - - name = "${var.prefix}-s3-loader-raw-server" - vpc_id = var.vpc_id - subnet_ids = var.public_subnet_ids - in_stream_name = module.raw_stream.name - bad_stream_name = module.bad_1_stream.name - s3_bucket_name = local.s3_pipeline_bucket_name - s3_object_prefix = "${var.s3_bucket_object_prefix}raw/" - - ssh_key_name = aws_key_pair.pipeline.key_name - ssh_ip_allowlist = var.ssh_ip_allowlist - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - iam_permissions_boundary = var.iam_permissions_boundary - - kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity - - tags = var.tags - - cloudwatch_logs_enabled = var.cloudwatch_logs_enabled - cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days -} - module "s3_loader_bad" { source = "snowplow-devops/s3-loader-kinesis-ec2/aws" - version = "0.5.0" + version = "0.6.0" accept_limited_use_license = var.accept_limited_use_license @@ -67,7 +35,7 @@ module "s3_loader_bad" { module "s3_loader_enriched" { source = "snowplow-devops/s3-loader-kinesis-ec2/aws" - version = "0.5.0" + version = "0.6.0" accept_limited_use_license = var.accept_limited_use_license diff --git a/terraform/aws/pipeline/default/target_databricks.tf b/terraform/aws/pipeline/default/target_databricks.tf index a92b8c8..f169f23 100644 --- a/terraform/aws/pipeline/default/target_databricks.tf +++ b/terraform/aws/pipeline/default/target_databricks.tf @@ -9,7 +9,7 @@ resource "aws_sqs_queue" "db_message_queue" { module "db_transformer_wrp" { source = "snowplow-devops/transformer-kinesis-ec2/aws" - version = "0.4.0" + version = "0.5.0" accept_limited_use_license = var.accept_limited_use_license @@ -19,6 +19,8 @@ module "db_transformer_wrp" { vpc_id = var.vpc_id subnet_ids = var.public_subnet_ids + instance_type = "t3a.large" + stream_name = module.enriched_stream.name s3_bucket_name = local.s3_pipeline_bucket_name s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/widerow/parquet" @@ -48,7 +50,7 @@ module "db_transformer_wrp" { module "db_loader" { source = "snowplow-devops/databricks-loader-ec2/aws" - version = "0.2.0" + version = "0.3.0" accept_limited_use_license = var.accept_limited_use_license @@ -58,6 +60,8 @@ module "db_loader" { vpc_id = var.vpc_id subnet_ids = var.public_subnet_ids + instance_type = "t3a.small" + sqs_queue_name = aws_sqs_queue.db_message_queue[0].name deltalake_catalog = var.databricks_catalog diff --git a/terraform/aws/pipeline/default/target_postgres.tf b/terraform/aws/pipeline/default/target_postgres.tf deleted file mode 100644 index 7643831..0000000 --- a/terraform/aws/pipeline/default/target_postgres.tf +++ /dev/null @@ -1,104 +0,0 @@ -module "postgres_loader_rds" { - source = "snowplow-devops/rds/aws" - version = "0.5.0" - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-pipeline-rds" - vpc_id = var.vpc_id - subnet_ids = var.public_subnet_ids - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - publicly_accessible = var.postgres_db_publicly_accessible - additional_ip_allowlist = var.postgres_db_ip_allowlist - - ca_cert_identifier = "rds-ca-rsa2048-g1" - - tags = var.tags -} - -module "postgres_loader_enriched" { - source = "snowplow-devops/postgres-loader-kinesis-ec2/aws" - version = "0.5.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-postgres-loader-enriched-server" - vpc_id = var.vpc_id - subnet_ids = var.public_subnet_ids - - in_stream_name = module.enriched_stream.name - purpose = "ENRICHED_EVENTS" - schema_name = "atomic" - - ssh_key_name = aws_key_pair.pipeline.key_name - ssh_ip_allowlist = var.ssh_ip_allowlist - - iam_permissions_boundary = var.iam_permissions_boundary - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - # Linking in the custom Iglu Server here - custom_iglu_resolvers = local.custom_iglu_resolvers - - db_sg_id = module.postgres_loader_rds[0].sg_id - db_host = module.postgres_loader_rds[0].address - db_port = module.postgres_loader_rds[0].port - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity - - tags = var.tags - - cloudwatch_logs_enabled = var.cloudwatch_logs_enabled - cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days -} - -module "postgres_loader_bad" { - source = "snowplow-devops/postgres-loader-kinesis-ec2/aws" - version = "0.5.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-postgres-loader-bad-server" - vpc_id = var.vpc_id - subnet_ids = var.public_subnet_ids - - in_stream_name = module.bad_1_stream.name - purpose = "JSON" - schema_name = "atomic_bad" - - ssh_key_name = aws_key_pair.pipeline.key_name - ssh_ip_allowlist = var.ssh_ip_allowlist - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - iam_permissions_boundary = var.iam_permissions_boundary - - # Linking in the custom Iglu Server here - custom_iglu_resolvers = local.custom_iglu_resolvers - - db_sg_id = module.postgres_loader_rds[0].sg_id - db_host = module.postgres_loader_rds[0].address - db_port = module.postgres_loader_rds[0].port - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity - - tags = var.tags - - cloudwatch_logs_enabled = var.cloudwatch_logs_enabled - cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days -} diff --git a/terraform/aws/pipeline/default/target_redshift.tf b/terraform/aws/pipeline/default/target_redshift.tf index f9944bc..9c34b19 100644 --- a/terraform/aws/pipeline/default/target_redshift.tf +++ b/terraform/aws/pipeline/default/target_redshift.tf @@ -9,7 +9,7 @@ resource "aws_sqs_queue" "rs_message_queue" { module "rs_transformer_stsv" { source = "snowplow-devops/transformer-kinesis-ec2/aws" - version = "0.4.0" + version = "0.5.0" accept_limited_use_license = var.accept_limited_use_license @@ -19,6 +19,8 @@ module "rs_transformer_stsv" { vpc_id = var.vpc_id subnet_ids = var.public_subnet_ids + instance_type = "t3a.large" + stream_name = module.enriched_stream.name s3_bucket_name = local.s3_pipeline_bucket_name s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/shredded/tsv" @@ -48,7 +50,7 @@ module "rs_transformer_stsv" { module "rs_loader" { source = "snowplow-devops/redshift-loader-ec2/aws" - version = "0.2.0" + version = "0.4.0" accept_limited_use_license = var.accept_limited_use_license @@ -58,6 +60,8 @@ module "rs_loader" { vpc_id = var.vpc_id subnet_ids = var.public_subnet_ids + instance_type = "t3a.small" + sqs_queue_name = aws_sqs_queue.rs_message_queue[0].name redshift_host = var.redshift_host diff --git a/terraform/aws/pipeline/default/target_snowflake.tf b/terraform/aws/pipeline/default/target_snowflake.tf deleted file mode 100644 index e7acce4..0000000 --- a/terraform/aws/pipeline/default/target_snowflake.tf +++ /dev/null @@ -1,86 +0,0 @@ -resource "aws_sqs_queue" "sf_message_queue" { - count = var.snowflake_enabled ? 1 : 0 - - content_based_deduplication = true - name = "${var.prefix}-sf-loader.fifo" - fifo_queue = true - kms_master_key_id = "alias/aws/sqs" -} - -module "sf_transformer_wrj" { - source = "snowplow-devops/transformer-kinesis-ec2/aws" - version = "0.4.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.snowflake_enabled ? 1 : 0 - - name = "${var.prefix}-transformer-server-wrj" - vpc_id = var.vpc_id - subnet_ids = var.public_subnet_ids - - stream_name = module.enriched_stream.name - s3_bucket_name = local.s3_pipeline_bucket_name - s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/widerow/json" - window_period_min = var.snowflake_transformer_window_period_min - sqs_queue_name = aws_sqs_queue.sf_message_queue[0].name - - transformation_type = "widerow" - widerow_file_format = "json" - - ssh_key_name = aws_key_pair.pipeline.key_name - ssh_ip_allowlist = var.ssh_ip_allowlist - - custom_iglu_resolvers = local.custom_iglu_resolvers - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - iam_permissions_boundary = var.iam_permissions_boundary - - kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity - - tags = var.tags - - cloudwatch_logs_enabled = var.cloudwatch_logs_enabled - cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days -} - -module "sf_loader" { - source = "snowplow-devops/snowflake-loader-ec2/aws" - version = "0.3.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.snowflake_enabled ? 1 : 0 - - name = "${var.prefix}-sf-loader-server" - vpc_id = var.vpc_id - subnet_ids = var.public_subnet_ids - - sqs_queue_name = aws_sqs_queue.sf_message_queue[0].name - - snowflake_loader_user = var.snowflake_loader_user - snowflake_password = var.snowflake_loader_password - snowflake_warehouse = var.snowflake_warehouse - snowflake_database = var.snowflake_database - snowflake_schema = var.snowflake_schema - snowflake_region = var.snowflake_region - snowflake_account = var.snowflake_account - snowflake_aws_s3_bucket_name = local.s3_pipeline_bucket_name - - ssh_key_name = aws_key_pair.pipeline.key_name - ssh_ip_allowlist = var.ssh_ip_allowlist - - custom_iglu_resolvers = local.custom_iglu_resolvers - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - iam_permissions_boundary = var.iam_permissions_boundary - - tags = var.tags - - cloudwatch_logs_enabled = var.cloudwatch_logs_enabled - cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days -} diff --git a/terraform/aws/pipeline/default/target_snowflake_streaming.tf b/terraform/aws/pipeline/default/target_snowflake_streaming.tf index 26f677d..dcf5633 100644 --- a/terraform/aws/pipeline/default/target_snowflake_streaming.tf +++ b/terraform/aws/pipeline/default/target_snowflake_streaming.tf @@ -1,6 +1,6 @@ module "snowflake_streaming_loader_enriched" { source = "snowplow-devops/snowflake-streaming-loader-ec2/aws" - version = "0.1.0" + version = "0.2.1" accept_limited_use_license = var.accept_limited_use_license diff --git a/terraform/aws/pipeline/default/terraform.tfvars b/terraform/aws/pipeline/default/terraform.tfvars index 7f5506b..b48f062 100644 --- a/terraform/aws/pipeline/default/terraform.tfvars +++ b/terraform/aws/pipeline/default/terraform.tfvars @@ -40,38 +40,9 @@ ssl_information = { # --- TARGETS CONFIGURATION ZONE --- # # --- Target: Amazon S3 -s3_raw_enabled = false s3_bad_enabled = true s3_enriched_enabled = true -# --- Target: PostgreSQL -postgres_db_enabled = false - -postgres_db_name = "snowplow" -postgres_db_username = "snowplow" -# Change and keep this secret! -postgres_db_password = "Hell0W0rld!2" -# IP ranges that you want to query the Pipeline Postgres RDS from -# Note: this exposes your data to the internet - take care to ensure your allowlist is strict enough -# or provide a way to access the database through the VPC instead -postgres_db_publicly_accessible = true -postgres_db_ip_allowlist = ["999.999.999.999/32", "888.888.888.888/32"] - -# --- Target: Snowflake -# Follow the guide to get input values for the loader: -# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws -snowflake_enabled = false - -snowflake_account = "" -snowflake_region = "" -snowflake_loader_user = "" -snowflake_loader_password = "" -snowflake_database = "" -snowflake_schema = "" -snowflake_warehouse = "" -# This controls how often data will be loading into Snowflake -snowflake_transformer_window_period_min = 1 - # --- Target: Snowflake Streaming # Follow the guide to get input values for the loader: # https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws diff --git a/terraform/aws/pipeline/default/variables.tf b/terraform/aws/pipeline/default/variables.tf index 1f5e675..36cee69 100644 --- a/terraform/aws/pipeline/default/variables.tf +++ b/terraform/aws/pipeline/default/variables.tf @@ -123,12 +123,6 @@ variable "cloudwatch_logs_retention_days" { # --- Target: Amazon S3 -variable "s3_raw_enabled" { - description = "Whether to enable loading of raw data into S3 from Kinesis" - default = false - type = bool -} - variable "s3_bad_enabled" { description = "Whether to enable loading of bad data into S3 from Kinesis" default = true @@ -141,102 +135,6 @@ variable "s3_enriched_enabled" { type = bool } -# --- Target: PostgreSQL - -variable "postgres_db_enabled" { - description = "Whether to enable loading into a Postgres Database" - default = false - type = bool -} - -variable "postgres_db_name" { - description = "The name of the database to connect to" - type = string - default = "" -} - -variable "postgres_db_username" { - description = "The username to use to connect to the database" - type = string - default = "" -} - -variable "postgres_db_password" { - description = "The password to use to connect to the database" - type = string - sensitive = true - default = "" -} - -variable "postgres_db_publicly_accessible" { - description = "Whether to make the Postgres RDS instance accessible over the internet" - type = bool - default = false -} - -variable "postgres_db_ip_allowlist" { - description = "An optional list of CIDR ranges to allow traffic from" - type = list(any) - default = [] -} - -# --- Target: SnowflakeDB - -variable "snowflake_enabled" { - description = "Whether to enable loading into a Snowflake Database" - default = false - type = bool -} - -variable "snowflake_account" { - description = "Snowflake account to use" - type = string - default = "" -} - -variable "snowflake_region" { - description = "Region of Snowflake account" - type = string - default = "" -} - -variable "snowflake_loader_password" { - description = "The password to use for the loader user" - type = string - sensitive = true - default = "" -} - -variable "snowflake_loader_user" { - description = "The Snowflake user used by Snowflake Loader" - type = string - default = "" -} - -variable "snowflake_database" { - description = "Snowflake database name" - type = string - default = "" -} - -variable "snowflake_schema" { - description = "Snowflake schema name" - type = string - default = "" -} - -variable "snowflake_warehouse" { - description = "Snowflake warehouse name" - type = string - default = "" -} - -variable "snowflake_transformer_window_period_min" { - description = "Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes" - type = number - default = 5 -} - # --- Target: SnowflakeDB Streaming variable "snowflake_streaming_enabled" { diff --git a/terraform/aws/pipeline/secure/README.md b/terraform/aws/pipeline/secure/README.md index deaa851..7f14231 100644 --- a/terraform/aws/pipeline/secure/README.md +++ b/terraform/aws/pipeline/secure/README.md @@ -18,25 +18,19 @@ |------|--------|---------| | [bad\_1\_stream](#module\_bad\_1\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 | | [bad\_2\_stream](#module\_bad\_2\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 | -| [collector\_kinesis](#module\_collector\_kinesis) | snowplow-devops/collector-kinesis-ec2/aws | 0.9.0 | +| [collector\_kinesis](#module\_collector\_kinesis) | snowplow-devops/collector-kinesis-ec2/aws | 0.10.1 | | [collector\_lb](#module\_collector\_lb) | snowplow-devops/alb/aws | 0.2.0 | -| [db\_loader](#module\_db\_loader) | snowplow-devops/databricks-loader-ec2/aws | 0.2.0 | -| [db\_transformer\_wrp](#module\_db\_transformer\_wrp) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 | -| [enrich\_kinesis](#module\_enrich\_kinesis) | snowplow-devops/enrich-kinesis-ec2/aws | 0.6.0 | +| [db\_loader](#module\_db\_loader) | snowplow-devops/databricks-loader-ec2/aws | 0.3.0 | +| [db\_transformer\_wrp](#module\_db\_transformer\_wrp) | snowplow-devops/transformer-kinesis-ec2/aws | 0.5.0 | +| [enrich\_kinesis](#module\_enrich\_kinesis) | snowplow-devops/enrich-kinesis-ec2/aws | 0.7.1 | | [enriched\_stream](#module\_enriched\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 | -| [postgres\_loader\_bad](#module\_postgres\_loader\_bad) | snowplow-devops/postgres-loader-kinesis-ec2/aws | 0.5.0 | -| [postgres\_loader\_enriched](#module\_postgres\_loader\_enriched) | snowplow-devops/postgres-loader-kinesis-ec2/aws | 0.5.0 | -| [postgres\_loader\_rds](#module\_postgres\_loader\_rds) | snowplow-devops/rds/aws | 0.4.0 | | [raw\_stream](#module\_raw\_stream) | snowplow-devops/kinesis-stream/aws | 0.3.0 | -| [rs\_loader](#module\_rs\_loader) | snowplow-devops/redshift-loader-ec2/aws | 0.2.0 | -| [rs\_transformer\_stsv](#module\_rs\_transformer\_stsv) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 | -| [s3\_loader\_bad](#module\_s3\_loader\_bad) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.5.0 | -| [s3\_loader\_enriched](#module\_s3\_loader\_enriched) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.5.0 | -| [s3\_loader\_raw](#module\_s3\_loader\_raw) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.5.0 | +| [rs\_loader](#module\_rs\_loader) | snowplow-devops/redshift-loader-ec2/aws | 0.4.0 | +| [rs\_transformer\_stsv](#module\_rs\_transformer\_stsv) | snowplow-devops/transformer-kinesis-ec2/aws | 0.5.0 | +| [s3\_loader\_bad](#module\_s3\_loader\_bad) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.6.0 | +| [s3\_loader\_enriched](#module\_s3\_loader\_enriched) | snowplow-devops/s3-loader-kinesis-ec2/aws | 0.6.0 | | [s3\_pipeline\_bucket](#module\_s3\_pipeline\_bucket) | snowplow-devops/s3-bucket/aws | 0.2.0 | -| [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-ec2/aws | 0.3.0 | -| [sf\_transformer\_wrj](#module\_sf\_transformer\_wrj) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 | -| [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.1.0 | +| [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.2.1 | ## Resources @@ -45,7 +39,6 @@ | [aws_key_pair.pipeline](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/key_pair) | resource | | [aws_sqs_queue.db_message_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | | [aws_sqs_queue.rs_message_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | -| [aws_sqs_queue.sf_message_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | ## Inputs @@ -74,11 +67,6 @@ | [databricks\_transformer\_window\_period\_min](#input\_databricks\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no | | [iam\_permissions\_boundary](#input\_iam\_permissions\_boundary) | The permissions boundary ARN to set on IAM roles created | `string` | `""` | no | | [pipeline\_kcl\_write\_max\_capacity](#input\_pipeline\_kcl\_write\_max\_capacity) | Increasing this is important to increase throughput at very high pipeline volumes | `number` | `50` | no | -| [postgres\_db\_enabled](#input\_postgres\_db\_enabled) | Whether to enable loading into a Postgres Database | `bool` | `false` | no | -| [postgres\_db\_ip\_allowlist](#input\_postgres\_db\_ip\_allowlist) | An optional list of CIDR ranges to allow traffic from | `list(any)` | `[]` | no | -| [postgres\_db\_name](#input\_postgres\_db\_name) | The name of the database to connect to | `string` | `""` | no | -| [postgres\_db\_password](#input\_postgres\_db\_password) | The password to use to connect to the database | `string` | `""` | no | -| [postgres\_db\_username](#input\_postgres\_db\_username) | The username to use to connect to the database | `string` | `""` | no | | [redshift\_database](#input\_redshift\_database) | Redshift database name | `string` | `""` | no | | [redshift\_enabled](#input\_redshift\_enabled) | Whether to enable loading into a Redshift Database | `bool` | `false` | no | | [redshift\_host](#input\_redshift\_host) | Redshift cluster hostname | `string` | `""` | no | @@ -90,23 +78,13 @@ | [s3\_bucket\_deploy](#input\_s3\_bucket\_deploy) | Whether this module should create a new bucket with the specified name - if the bucket already exists set this to false | `bool` | `true` | no | | [s3\_bucket\_object\_prefix](#input\_s3\_bucket\_object\_prefix) | An optional prefix under which Snowplow data will be saved (Note: your prefix must end with a trailing '/') | `string` | `""` | no | | [s3\_enriched\_enabled](#input\_s3\_enriched\_enabled) | Whether to enable loading of enriched data into S3 from Kinesis | `bool` | `true` | no | -| [s3\_raw\_enabled](#input\_s3\_raw\_enabled) | Whether to enable loading of raw data into S3 from Kinesis | `bool` | `false` | no | -| [snowflake\_account](#input\_snowflake\_account) | Snowflake account to use | `string` | `""` | no | -| [snowflake\_database](#input\_snowflake\_database) | Snowflake database name | `string` | `""` | no | -| [snowflake\_enabled](#input\_snowflake\_enabled) | Whether to enable loading into a Snowflake Database | `bool` | `false` | no | -| [snowflake\_loader\_password](#input\_snowflake\_loader\_password) | The password to use for the loader user | `string` | `""` | no | -| [snowflake\_loader\_user](#input\_snowflake\_loader\_user) | The Snowflake user used by Snowflake Loader | `string` | `""` | no | -| [snowflake\_region](#input\_snowflake\_region) | Region of Snowflake account | `string` | `""` | no | -| [snowflake\_schema](#input\_snowflake\_schema) | Snowflake schema name | `string` | `""` | no | | [snowflake\_streaming\_account\_url](#input\_snowflake\_streaming\_account\_url) | Snowflake account URL to use | `string` | `""` | no | | [snowflake\_streaming\_database](#input\_snowflake\_streaming\_database) | Snowflake database name | `string` | `""` | no | | [snowflake\_streaming\_enabled](#input\_snowflake\_streaming\_enabled) | Whether to enable loading into a Snowflake Database with a Streaming Loader | `bool` | `false` | no | | [snowflake\_streaming\_loader\_private\_key](#input\_snowflake\_streaming\_loader\_private\_key) | The private key to use for the loader user | `string` | `""` | no | | [snowflake\_streaming\_loader\_user](#input\_snowflake\_streaming\_loader\_user) | The Snowflake user used by Snowflake Streaming Loader | `string` | `""` | no | | [snowflake\_streaming\_schema](#input\_snowflake\_streaming\_schema) | Snowflake schema name | `string` | `""` | no | -| [snowflake\_transformer\_window\_period\_min](#input\_snowflake\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no | -| [snowflake\_warehouse](#input\_snowflake\_warehouse) | Snowflake warehouse name | `string` | `""` | no | -| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
})
|
{
"certificate_arn": "",
"enabled": false
}
| no | +| [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
})
|
{
"certificate_arn": "",
"enabled": false
}
| no | | [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no | | [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no | | [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no | @@ -116,6 +94,3 @@ | Name | Description | |------|-------------| | [collector\_dns\_name](#output\_collector\_dns\_name) | The ALB DNS name for the Pipeline Collector | -| [postgres\_db\_address](#output\_postgres\_db\_address) | The RDS DNS name where your data is being streamed | -| [postgres\_db\_id](#output\_postgres\_db\_id) | The ID of the RDS instance | -| [postgres\_db\_port](#output\_postgres\_db\_port) | The RDS port where your data is being streamed | diff --git a/terraform/aws/pipeline/secure/main.tf b/terraform/aws/pipeline/secure/main.tf index da0836c..a2fd2a0 100644 --- a/terraform/aws/pipeline/secure/main.tf +++ b/terraform/aws/pipeline/secure/main.tf @@ -86,12 +86,10 @@ module "collector_lb" { module "collector_kinesis" { source = "snowplow-devops/collector-kinesis-ec2/aws" - version = "0.9.0" + version = "0.10.1" accept_limited_use_license = var.accept_limited_use_license - app_version = "3.3.0" - name = "${var.prefix}-collector-server" vpc_id = var.vpc_id subnet_ids = var.private_subnet_ids @@ -120,12 +118,10 @@ module "collector_kinesis" { # 3. Deploy Enrichment module "enrich_kinesis" { source = "snowplow-devops/enrich-kinesis-ec2/aws" - version = "0.6.0" + version = "0.7.1" accept_limited_use_license = var.accept_limited_use_license - app_version = "5.2.0" - name = "${var.prefix}-enrich-server" vpc_id = var.vpc_id subnet_ids = var.private_subnet_ids diff --git a/terraform/aws/pipeline/secure/outputs.tf b/terraform/aws/pipeline/secure/outputs.tf index e15163a..1d1e81a 100644 --- a/terraform/aws/pipeline/secure/outputs.tf +++ b/terraform/aws/pipeline/secure/outputs.tf @@ -2,20 +2,3 @@ output "collector_dns_name" { description = "The ALB DNS name for the Pipeline Collector" value = module.collector_lb.dns_name } - -# --- Target: PostgreSQL - -output "postgres_db_address" { - description = "The RDS DNS name where your data is being streamed" - value = var.postgres_db_enabled ? module.postgres_loader_rds[0].address : null -} - -output "postgres_db_port" { - description = "The RDS port where your data is being streamed" - value = var.postgres_db_enabled ? module.postgres_loader_rds[0].port : null -} - -output "postgres_db_id" { - description = "The ID of the RDS instance" - value = var.postgres_db_enabled ? module.postgres_loader_rds[0].id : null -} diff --git a/terraform/aws/pipeline/secure/target_amazon_s3.tf b/terraform/aws/pipeline/secure/target_amazon_s3.tf index 89254e4..d0412dc 100644 --- a/terraform/aws/pipeline/secure/target_amazon_s3.tf +++ b/terraform/aws/pipeline/secure/target_amazon_s3.tf @@ -1,40 +1,6 @@ -module "s3_loader_raw" { - source = "snowplow-devops/s3-loader-kinesis-ec2/aws" - version = "0.5.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.s3_raw_enabled ? 1 : 0 - - name = "${var.prefix}-s3-loader-raw-server" - vpc_id = var.vpc_id - subnet_ids = var.private_subnet_ids - in_stream_name = module.raw_stream.name - bad_stream_name = module.bad_1_stream.name - s3_bucket_name = local.s3_pipeline_bucket_name - s3_object_prefix = "${var.s3_bucket_object_prefix}raw/" - - ssh_key_name = aws_key_pair.pipeline.key_name - ssh_ip_allowlist = var.ssh_ip_allowlist - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - iam_permissions_boundary = var.iam_permissions_boundary - - kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity - - associate_public_ip_address = false - - tags = var.tags - - cloudwatch_logs_enabled = var.cloudwatch_logs_enabled - cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days -} - module "s3_loader_bad" { source = "snowplow-devops/s3-loader-kinesis-ec2/aws" - version = "0.5.0" + version = "0.6.0" accept_limited_use_license = var.accept_limited_use_license @@ -71,7 +37,7 @@ module "s3_loader_bad" { module "s3_loader_enriched" { source = "snowplow-devops/s3-loader-kinesis-ec2/aws" - version = "0.5.0" + version = "0.6.0" accept_limited_use_license = var.accept_limited_use_license diff --git a/terraform/aws/pipeline/secure/target_databricks.tf b/terraform/aws/pipeline/secure/target_databricks.tf index 33304c6..e712748 100644 --- a/terraform/aws/pipeline/secure/target_databricks.tf +++ b/terraform/aws/pipeline/secure/target_databricks.tf @@ -9,7 +9,7 @@ resource "aws_sqs_queue" "db_message_queue" { module "db_transformer_wrp" { source = "snowplow-devops/transformer-kinesis-ec2/aws" - version = "0.4.0" + version = "0.5.0" accept_limited_use_license = var.accept_limited_use_license @@ -19,6 +19,8 @@ module "db_transformer_wrp" { vpc_id = var.vpc_id subnet_ids = var.private_subnet_ids + instance_type = "t3a.large" + stream_name = module.enriched_stream.name s3_bucket_name = local.s3_pipeline_bucket_name s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/widerow/parquet" @@ -50,7 +52,7 @@ module "db_transformer_wrp" { module "db_loader" { source = "snowplow-devops/databricks-loader-ec2/aws" - version = "0.2.0" + version = "0.3.0" accept_limited_use_license = var.accept_limited_use_license @@ -60,6 +62,8 @@ module "db_loader" { vpc_id = var.vpc_id subnet_ids = var.private_subnet_ids + instance_type = "t3a.small" + sqs_queue_name = aws_sqs_queue.db_message_queue[0].name deltalake_catalog = var.databricks_catalog diff --git a/terraform/aws/pipeline/secure/target_postgres.tf b/terraform/aws/pipeline/secure/target_postgres.tf deleted file mode 100644 index 78b39f0..0000000 --- a/terraform/aws/pipeline/secure/target_postgres.tf +++ /dev/null @@ -1,108 +0,0 @@ -module "postgres_loader_rds" { - source = "snowplow-devops/rds/aws" - version = "0.5.0" - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-pipeline-rds" - vpc_id = var.vpc_id - subnet_ids = var.private_subnet_ids - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - publicly_accessible = false - additional_ip_allowlist = var.postgres_db_ip_allowlist - - ca_cert_identifier = "rds-ca-rsa2048-g1" - - tags = var.tags -} - -module "postgres_loader_enriched" { - source = "snowplow-devops/postgres-loader-kinesis-ec2/aws" - version = "0.5.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-postgres-loader-enriched-server" - vpc_id = var.vpc_id - subnet_ids = var.private_subnet_ids - - in_stream_name = module.enriched_stream.name - purpose = "ENRICHED_EVENTS" - schema_name = "atomic" - - ssh_key_name = aws_key_pair.pipeline.key_name - ssh_ip_allowlist = var.ssh_ip_allowlist - - iam_permissions_boundary = var.iam_permissions_boundary - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - # Linking in the custom Iglu Server here - custom_iglu_resolvers = local.custom_iglu_resolvers - - db_sg_id = module.postgres_loader_rds[0].sg_id - db_host = module.postgres_loader_rds[0].address - db_port = module.postgres_loader_rds[0].port - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity - - associate_public_ip_address = false - - tags = var.tags - - cloudwatch_logs_enabled = var.cloudwatch_logs_enabled - cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days -} - -module "postgres_loader_bad" { - source = "snowplow-devops/postgres-loader-kinesis-ec2/aws" - version = "0.5.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-postgres-loader-bad-server" - vpc_id = var.vpc_id - subnet_ids = var.private_subnet_ids - - in_stream_name = module.bad_1_stream.name - purpose = "JSON" - schema_name = "atomic_bad" - - ssh_key_name = aws_key_pair.pipeline.key_name - ssh_ip_allowlist = var.ssh_ip_allowlist - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - iam_permissions_boundary = var.iam_permissions_boundary - - # Linking in the custom Iglu Server here - custom_iglu_resolvers = local.custom_iglu_resolvers - - db_sg_id = module.postgres_loader_rds[0].sg_id - db_host = module.postgres_loader_rds[0].address - db_port = module.postgres_loader_rds[0].port - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity - - associate_public_ip_address = false - - tags = var.tags - - cloudwatch_logs_enabled = var.cloudwatch_logs_enabled - cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days -} diff --git a/terraform/aws/pipeline/secure/target_redshift.tf b/terraform/aws/pipeline/secure/target_redshift.tf index a4e8244..ff4a969 100644 --- a/terraform/aws/pipeline/secure/target_redshift.tf +++ b/terraform/aws/pipeline/secure/target_redshift.tf @@ -9,7 +9,7 @@ resource "aws_sqs_queue" "rs_message_queue" { module "rs_transformer_stsv" { source = "snowplow-devops/transformer-kinesis-ec2/aws" - version = "0.4.0" + version = "0.5.0" accept_limited_use_license = var.accept_limited_use_license @@ -19,6 +19,8 @@ module "rs_transformer_stsv" { vpc_id = var.vpc_id subnet_ids = var.private_subnet_ids + instance_type = "t3a.large" + stream_name = module.enriched_stream.name s3_bucket_name = local.s3_pipeline_bucket_name s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/shredded/tsv" @@ -50,7 +52,7 @@ module "rs_transformer_stsv" { module "rs_loader" { source = "snowplow-devops/redshift-loader-ec2/aws" - version = "0.2.0" + version = "0.4.0" accept_limited_use_license = var.accept_limited_use_license @@ -60,6 +62,8 @@ module "rs_loader" { vpc_id = var.vpc_id subnet_ids = var.private_subnet_ids + instance_type = "t3a.small" + sqs_queue_name = aws_sqs_queue.rs_message_queue[0].name redshift_host = var.redshift_host diff --git a/terraform/aws/pipeline/secure/target_snowflake.tf b/terraform/aws/pipeline/secure/target_snowflake.tf deleted file mode 100644 index b7aa2fa..0000000 --- a/terraform/aws/pipeline/secure/target_snowflake.tf +++ /dev/null @@ -1,90 +0,0 @@ -resource "aws_sqs_queue" "sf_message_queue" { - count = var.snowflake_enabled ? 1 : 0 - - content_based_deduplication = true - name = "${var.prefix}-sf-loader.fifo" - fifo_queue = true - kms_master_key_id = "alias/aws/sqs" -} - -module "sf_transformer_wrj" { - source = "snowplow-devops/transformer-kinesis-ec2/aws" - version = "0.4.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.snowflake_enabled ? 1 : 0 - - name = "${var.prefix}-transformer-server-wrj" - vpc_id = var.vpc_id - subnet_ids = var.private_subnet_ids - - stream_name = module.enriched_stream.name - s3_bucket_name = local.s3_pipeline_bucket_name - s3_bucket_object_prefix = "${var.s3_bucket_object_prefix}transformed/good/widerow/json" - window_period_min = var.snowflake_transformer_window_period_min - sqs_queue_name = aws_sqs_queue.sf_message_queue[0].name - - transformation_type = "widerow" - widerow_file_format = "json" - - ssh_key_name = aws_key_pair.pipeline.key_name - ssh_ip_allowlist = var.ssh_ip_allowlist - - custom_iglu_resolvers = local.custom_iglu_resolvers - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - iam_permissions_boundary = var.iam_permissions_boundary - - kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity - - associate_public_ip_address = false - - tags = var.tags - - cloudwatch_logs_enabled = var.cloudwatch_logs_enabled - cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days -} - -module "sf_loader" { - source = "snowplow-devops/snowflake-loader-ec2/aws" - version = "0.3.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.snowflake_enabled ? 1 : 0 - - name = "${var.prefix}-sf-loader-server" - vpc_id = var.vpc_id - subnet_ids = var.private_subnet_ids - - sqs_queue_name = aws_sqs_queue.sf_message_queue[0].name - - snowflake_loader_user = var.snowflake_loader_user - snowflake_password = var.snowflake_loader_password - snowflake_warehouse = var.snowflake_warehouse - snowflake_database = var.snowflake_database - snowflake_schema = var.snowflake_schema - snowflake_region = var.snowflake_region - snowflake_account = var.snowflake_account - snowflake_aws_s3_bucket_name = local.s3_pipeline_bucket_name - - ssh_key_name = aws_key_pair.pipeline.key_name - ssh_ip_allowlist = var.ssh_ip_allowlist - - custom_iglu_resolvers = local.custom_iglu_resolvers - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - iam_permissions_boundary = var.iam_permissions_boundary - - associate_public_ip_address = false - - tags = var.tags - - cloudwatch_logs_enabled = var.cloudwatch_logs_enabled - cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days -} diff --git a/terraform/aws/pipeline/secure/target_snowflake_streaming.tf b/terraform/aws/pipeline/secure/target_snowflake_streaming.tf index 858d856..7fad6f6 100644 --- a/terraform/aws/pipeline/secure/target_snowflake_streaming.tf +++ b/terraform/aws/pipeline/secure/target_snowflake_streaming.tf @@ -1,6 +1,6 @@ module "snowflake_streaming_loader_enriched" { source = "snowplow-devops/snowflake-streaming-loader-ec2/aws" - version = "0.1.0" + version = "0.2.1" accept_limited_use_license = var.accept_limited_use_license diff --git a/terraform/aws/pipeline/secure/terraform.tfvars b/terraform/aws/pipeline/secure/terraform.tfvars index b48e94f..b9f2a81 100644 --- a/terraform/aws/pipeline/secure/terraform.tfvars +++ b/terraform/aws/pipeline/secure/terraform.tfvars @@ -43,37 +43,9 @@ ssl_information = { # --- TARGETS CONFIGURATION ZONE --- # # --- Target: Amazon S3 -s3_raw_enabled = false s3_bad_enabled = true s3_enriched_enabled = true -# --- Target: PostgreSQL -postgres_db_enabled = false - -postgres_db_name = "snowplow" -postgres_db_username = "snowplow" -# Change and keep this secret! -postgres_db_password = "Hell0W0rld!2" -# IP ranges that you want to query the Pipeline Postgres RDS from -# Note: this exposes your data to the internet - take care to ensure your allowlist is strict enough -# or provide a way to access the database through the VPC instead -postgres_db_ip_allowlist = ["999.999.999.999/32", "888.888.888.888/32"] - -# --- Target: Snowflake -# Follow the guide to get input values for the loader: -# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws -snowflake_enabled = false - -snowflake_account = "" -snowflake_region = "" -snowflake_loader_user = "" -snowflake_loader_password = "" -snowflake_database = "" -snowflake_schema = "" -snowflake_warehouse = "" -# This controls how often data will be loading into Snowflake -snowflake_transformer_window_period_min = 1 - # --- Target: Snowflake Streaming # Follow the guide to get input values for the loader: # https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws diff --git a/terraform/aws/pipeline/secure/variables.tf b/terraform/aws/pipeline/secure/variables.tf index 912af35..4038d41 100644 --- a/terraform/aws/pipeline/secure/variables.tf +++ b/terraform/aws/pipeline/secure/variables.tf @@ -128,12 +128,6 @@ variable "cloudwatch_logs_retention_days" { # --- Target: Amazon S3 -variable "s3_raw_enabled" { - description = "Whether to enable loading of raw data into S3 from Kinesis" - default = false - type = bool -} - variable "s3_bad_enabled" { description = "Whether to enable loading of bad data into S3 from Kinesis" default = true @@ -146,96 +140,6 @@ variable "s3_enriched_enabled" { type = bool } -# --- Target: PostgreSQL - -variable "postgres_db_enabled" { - description = "Whether to enable loading into a Postgres Database" - default = false - type = bool -} - -variable "postgres_db_name" { - description = "The name of the database to connect to" - type = string - default = "" -} - -variable "postgres_db_username" { - description = "The username to use to connect to the database" - type = string - default = "" -} - -variable "postgres_db_password" { - description = "The password to use to connect to the database" - type = string - sensitive = true - default = "" -} - -variable "postgres_db_ip_allowlist" { - description = "An optional list of CIDR ranges to allow traffic from" - type = list(any) - default = [] -} - -# --- Target: SnowflakeDB - -variable "snowflake_enabled" { - description = "Whether to enable loading into a Snowflake Database" - default = false - type = bool -} - -variable "snowflake_account" { - description = "Snowflake account to use" - type = string - default = "" -} - -variable "snowflake_region" { - description = "Region of Snowflake account" - type = string - default = "" -} - -variable "snowflake_loader_password" { - description = "The password to use for the loader user" - type = string - sensitive = true - default = "" -} - -variable "snowflake_loader_user" { - description = "The Snowflake user used by Snowflake Loader" - type = string - default = "" -} - -variable "snowflake_database" { - description = "Snowflake database name" - type = string - default = "" -} - -variable "snowflake_schema" { - description = "Snowflake schema name" - type = string - default = "" -} - -variable "snowflake_warehouse" { - description = "Snowflake warehouse name" - type = string - default = "" -} - -variable "snowflake_transformer_window_period_min" { - description = "Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes" - type = number - default = 5 -} - # --- Target: SnowflakeDB Streaming variable "snowflake_streaming_enabled" { diff --git a/terraform/azure/base/README.md b/terraform/azure/base/README.md index 5a75f29..3e636a6 100644 --- a/terraform/azure/base/README.md +++ b/terraform/azure/base/README.md @@ -15,7 +15,7 @@ | Name | Source | Version | |------|--------|---------| -| [vnet](#module\_vnet) | snowplow-devops/vnet/azurerm | 0.1.2 | +| [vnet](#module\_vnet) | snowplow-devops/vnet/azurerm | 0.2.0 | ## Resources diff --git a/terraform/azure/base/main.tf b/terraform/azure/base/main.tf index e586d3a..f3783b4 100644 --- a/terraform/azure/base/main.tf +++ b/terraform/azure/base/main.tf @@ -7,7 +7,7 @@ resource "azurerm_resource_group" "rg" { module "vnet" { source = "snowplow-devops/vnet/azurerm" - version = "0.1.2" + version = "0.2.0" name = "${var.prefix}-vnet" resource_group_name = azurerm_resource_group.rg.name diff --git a/terraform/azure/iglu_server/README.md b/terraform/azure/iglu_server/README.md index 1959c13..bc88de6 100644 --- a/terraform/azure/iglu_server/README.md +++ b/terraform/azure/iglu_server/README.md @@ -13,9 +13,9 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [iglu\_db](#module\_iglu\_db) | snowplow-devops/postgresql-server/azurerm | 0.1.1 | +| [iglu\_db](#module\_iglu\_db) | snowplow-devops/postgresql-server/azurerm | 0.2.0 | | [iglu\_lb](#module\_iglu\_lb) | snowplow-devops/lb/azurerm | 0.2.0 | -| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-vmss/azurerm | 0.2.0 | +| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-vmss/azurerm | 0.3.0 | ## Resources @@ -33,11 +33,12 @@ No resources. | [resource\_group\_name](#input\_resource\_group\_name) | The name of the resource group to deploy resources within | `string` | n/a | yes | | [ssh\_ip\_allowlist](#input\_ssh\_ip\_allowlist) | The list of CIDR ranges to allow SSH traffic from | `list(any)` | n/a | yes | | [ssh\_public\_key](#input\_ssh\_public\_key) | The SSH public key to use for the deployment | `string` | n/a | yes | +| [subnet\_id\_database](#input\_subnet\_id\_database) | The ID of the subnet to deploy the database into (e.g. iglu1) | `string` | n/a | yes | | [subnet\_id\_lb](#input\_subnet\_id\_lb) | The ID of the subnet to deploy the load balancer into (e.g. iglu-agw1) | `string` | n/a | yes | -| [subnet\_id\_servers](#input\_subnet\_id\_servers) | The ID of the subnet to deploy the servers into (e.g. iglu1) | `string` | n/a | yes | +| [subnet\_id\_servers](#input\_subnet\_id\_servers) | The ID of the subnet to deploy the servers into (e.g. iglu-vmss1) | `string` | n/a | yes | +| [vnet\_id](#input\_vnet\_id) | The ID of the VNet being deployed into | `string` | n/a | yes | | [accept\_limited\_use\_license](#input\_accept\_limited\_use\_license) | Acceptance of the SLULA terms (https://docs.snowplow.io/limited-use-license-1.0/) | `bool` | `false` | no | -| [iglu\_db\_ip\_allowlist](#input\_iglu\_db\_ip\_allowlist) | An optional list of CIDR ranges to allow traffic from | `list(any)` | `[]` | no | -| [ssl\_information](#input\_ssl\_information) | SSL certificate information to optionally bind to the load balancer |
object({
enabled = bool
data = string
password = string
})
|
{
"data": "",
"enabled": false,
"password": ""
}
| no | +| [ssl\_information](#input\_ssl\_information) | SSL certificate information to optionally bind to the load balancer |
object({
enabled = bool
data = string
password = string
})
|
{
"data": "",
"enabled": false,
"password": ""
}
| no | | [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no | | [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no | | [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no | diff --git a/terraform/azure/iglu_server/main.tf b/terraform/azure/iglu_server/main.tf index bf9e0c3..48b8a3d 100644 --- a/terraform/azure/iglu_server/main.tf +++ b/terraform/azure/iglu_server/main.tf @@ -1,13 +1,12 @@ module "iglu_db" { source = "snowplow-devops/postgresql-server/azurerm" - version = "0.1.1" + version = "0.2.0" name = "${var.prefix}-iglu-db" resource_group_name = var.resource_group_name - subnet_id = var.subnet_id_servers - - additional_ip_allowlist = var.iglu_db_ip_allowlist + vnet_id = var.vnet_id + subnet_id = var.subnet_id_database db_name = var.iglu_db_name db_username = var.iglu_db_username @@ -35,12 +34,10 @@ module "iglu_lb" { module "iglu_server" { source = "snowplow-devops/iglu-server-vmss/azurerm" - version = "0.2.0" + version = "0.3.0" accept_limited_use_license = var.accept_limited_use_license - app_version = "0.14.0" - name = "${var.prefix}-iglu-server" resource_group_name = var.resource_group_name subnet_id = var.subnet_id_servers diff --git a/terraform/azure/iglu_server/terraform.tfvars b/terraform/azure/iglu_server/terraform.tfvars index c3cbb1f..e21cc35 100644 --- a/terraform/azure/iglu_server/terraform.tfvars +++ b/terraform/azure/iglu_server/terraform.tfvars @@ -8,9 +8,15 @@ prefix = "snowplow" # The name of the resource group to deploy Iglu into resource_group_name = "" +# ID of the VNet being deployed into +vnet_id = "" + # ID of the dedicated subnet to deploy the load balancer into subnet_id_lb = "" +# ID of the subnet to deploy the actual Iglu Server database into +subnet_id_database = "" + # ID of the subnet to deploy the actual Iglu Server application into subnet_id_servers = "" diff --git a/terraform/azure/iglu_server/variables.tf b/terraform/azure/iglu_server/variables.tf index 7459a6b..2054416 100644 --- a/terraform/azure/iglu_server/variables.tf +++ b/terraform/azure/iglu_server/variables.tf @@ -19,13 +19,23 @@ variable "resource_group_name" { type = string } +variable "vnet_id" { + description = "The ID of the VNet being deployed into" + type = string +} + variable "subnet_id_lb" { description = "The ID of the subnet to deploy the load balancer into (e.g. iglu-agw1)" type = string } +variable "subnet_id_database" { + description = "The ID of the subnet to deploy the database into (e.g. iglu1)" + type = string +} + variable "subnet_id_servers" { - description = "The ID of the subnet to deploy the servers into (e.g. iglu1)" + description = "The ID of the subnet to deploy the servers into (e.g. iglu-vmss1)" type = string } @@ -66,12 +76,6 @@ variable "iglu_super_api_key" { sensitive = true } -variable "iglu_db_ip_allowlist" { - description = "An optional list of CIDR ranges to allow traffic from" - type = list(any) - default = [] -} - variable "ssl_information" { description = "SSL certificate information to optionally bind to the load balancer" type = object({ diff --git a/terraform/azure/pipeline/README.md b/terraform/azure/pipeline/README.md index 63199d5..0631ae7 100644 --- a/terraform/azure/pipeline/README.md +++ b/terraform/azure/pipeline/README.md @@ -3,6 +3,7 @@ | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 1.0.0 | +| [azuread](#requirement\_azuread) | >= 2.43.0, < 2.44.0 | | [azurerm](#requirement\_azurerm) | >= 3.58.0 | ## Providers @@ -14,19 +15,15 @@ No providers. | Name | Source | Version | |------|--------|---------| | [bad\_1\_eh\_topic](#module\_bad\_1\_eh\_topic) | snowplow-devops/event-hub/azurerm | 0.1.1 | -| [collector\_eh](#module\_collector\_eh) | snowplow-devops/collector-event-hub-vmss/azurerm | 0.3.0 | +| [collector\_eh](#module\_collector\_eh) | snowplow-devops/collector-event-hub-vmss/azurerm | 0.4.0 | | [collector\_lb](#module\_collector\_lb) | snowplow-devops/lb/azurerm | 0.2.0 | | [eh\_namespace](#module\_eh\_namespace) | snowplow-devops/event-hub-namespace/azurerm | 0.1.1 | -| [enrich\_eh](#module\_enrich\_eh) | snowplow-devops/enrich-event-hub-vmss/azurerm | 0.3.0 | +| [enrich\_eh](#module\_enrich\_eh) | snowplow-devops/enrich-event-hub-vmss/azurerm | 0.4.0 | | [enriched\_eh\_topic](#module\_enriched\_eh\_topic) | snowplow-devops/event-hub/azurerm | 0.1.1 | -| [lake\_loader](#module\_lake\_loader) | snowplow-devops/lake-loader-vmss/azurerm | 0.3.0 | +| [lake\_loader](#module\_lake\_loader) | snowplow-devops/lake-loader-vmss/azurerm | 0.4.0 | | [lake\_storage\_container](#module\_lake\_storage\_container) | snowplow-devops/storage-container/azurerm | 0.1.1 | | [raw\_eh\_topic](#module\_raw\_eh\_topic) | snowplow-devops/event-hub/azurerm | 0.1.1 | -| [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-vmss/azurerm | 0.3.0 | -| [sf\_message\_queue\_eh\_topic](#module\_sf\_message\_queue\_eh\_topic) | snowplow-devops/event-hub/azurerm | 0.1.1 | -| [sf\_transformer\_storage\_container](#module\_sf\_transformer\_storage\_container) | snowplow-devops/storage-container/azurerm | 0.1.1 | -| [sf\_transformer\_wrj](#module\_sf\_transformer\_wrj) | snowplow-devops/transformer-event-hub-vmss/azurerm | 0.3.0 | -| [storage\_account](#module\_storage\_account) | snowplow-devops/storage-account/azurerm | 0.1.2 | +| [storage\_account](#module\_storage\_account) | snowplow-devops/storage-account/azurerm | 0.1.3 | ## Resources @@ -52,18 +49,8 @@ No resources. | [confluent\_cloud\_bootstrap\_server](#input\_confluent\_cloud\_bootstrap\_server) | Confluent Cloud cluster bootstrap server | `string` | `""` | no | | [confluent\_cloud\_enriched\_topic\_name](#input\_confluent\_cloud\_enriched\_topic\_name) | Confluent Cloud 'enriched' topic name | `string` | `"enriched"` | no | | [confluent\_cloud\_raw\_topic\_name](#input\_confluent\_cloud\_raw\_topic\_name) | Confluent Cloud 'raw' topic name | `string` | `"raw"` | no | -| [confluent\_cloud\_snowflake\_loader\_topic\_name](#input\_confluent\_cloud\_snowflake\_loader\_topic\_name) | Confluent Cloud 'snowflake-loader' topic name | `string` | `"snowflake-loader"` | no | | [lake\_enabled](#input\_lake\_enabled) | Whether to load all data into a Storage Container to build a data-lake based on Delta format | `bool` | `false` | no | -| [snowflake\_account](#input\_snowflake\_account) | Snowflake account to use | `string` | `""` | no | -| [snowflake\_database](#input\_snowflake\_database) | Snowflake database name | `string` | `""` | no | -| [snowflake\_enabled](#input\_snowflake\_enabled) | Whether to enable loading into a Snowflake Database | `bool` | `false` | no | -| [snowflake\_loader\_password](#input\_snowflake\_loader\_password) | The password to use for the loader user | `string` | `""` | no | -| [snowflake\_loader\_user](#input\_snowflake\_loader\_user) | The Snowflake user used by Snowflake Loader | `string` | `""` | no | -| [snowflake\_region](#input\_snowflake\_region) | Region of Snowflake account | `string` | `""` | no | -| [snowflake\_schema](#input\_snowflake\_schema) | Snowflake schema name | `string` | `""` | no | -| [snowflake\_transformer\_window\_period\_min](#input\_snowflake\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no | -| [snowflake\_warehouse](#input\_snowflake\_warehouse) | Snowflake warehouse name | `string` | `""` | no | -| [ssl\_information](#input\_ssl\_information) | SSL certificate information to optionally bind to the load balancer |
object({
enabled = bool
data = string
password = string
})
|
{
"data": "",
"enabled": false,
"password": ""
}
| no | +| [ssl\_information](#input\_ssl\_information) | SSL certificate information to optionally bind to the load balancer |
object({
enabled = bool
data = string
password = string
})
|
{
"data": "",
"enabled": false,
"password": ""
}
| no | | [storage\_account\_deploy](#input\_storage\_account\_deploy) | Whether this module should create a new storage account with the specified name - if the account already exists set this to false | `bool` | `true` | no | | [stream\_type](#input\_stream\_type) | The stream type to use as the Kafka Cluster between components (options: azure\_event\_hubs, confluent\_cloud) | `string` | `"azure_event_hubs"` | no | | [tags](#input\_tags) | The tags to append to the resources in this module | `map(string)` | `{}` | no | diff --git a/terraform/azure/pipeline/main.tf b/terraform/azure/pipeline/main.tf index 8004ddd..df7c1c4 100644 --- a/terraform/azure/pipeline/main.tf +++ b/terraform/azure/pipeline/main.tf @@ -111,12 +111,10 @@ module "collector_lb" { module "collector_eh" { source = "snowplow-devops/collector-event-hub-vmss/azurerm" - version = "0.3.0" + version = "0.4.0" accept_limited_use_license = var.accept_limited_use_license - app_version = "3.3.0" - name = "${var.prefix}-collector" resource_group_name = var.resource_group_name subnet_id = var.subnet_id_servers @@ -147,12 +145,10 @@ module "collector_eh" { # 4. Deploy Enrich stack module "enrich_eh" { source = "snowplow-devops/enrich-event-hub-vmss/azurerm" - version = "0.3.0" + version = "0.4.0" accept_limited_use_license = var.accept_limited_use_license - app_version = "5.2.0" - name = "${var.prefix}-enrich" resource_group_name = var.resource_group_name subnet_id = var.subnet_id_servers diff --git a/terraform/azure/pipeline/target_lake_loader.tf b/terraform/azure/pipeline/target_lake_loader.tf index 81e1207..e44dda4 100644 --- a/terraform/azure/pipeline/target_lake_loader.tf +++ b/terraform/azure/pipeline/target_lake_loader.tf @@ -10,7 +10,7 @@ module "lake_storage_container" { module "lake_loader" { source = "snowplow-devops/lake-loader-vmss/azurerm" - version = "0.3.0" + version = "0.4.0" accept_limited_use_license = var.accept_limited_use_license diff --git a/terraform/azure/pipeline/target_snowflake.tf b/terraform/azure/pipeline/target_snowflake.tf deleted file mode 100644 index cfd802a..0000000 --- a/terraform/azure/pipeline/target_snowflake.tf +++ /dev/null @@ -1,110 +0,0 @@ -module "sf_message_queue_eh_topic" { - source = "snowplow-devops/event-hub/azurerm" - version = "0.1.1" - - count = local.use_azure_event_hubs && var.snowflake_enabled ? 1 : 0 - - name = "snowflake-loader-topic" - namespace_name = join("", module.eh_namespace.*.name) - resource_group_name = var.resource_group_name -} - -locals { - snowflake_loader_topic_name = local.use_azure_event_hubs ? join("", module.sf_message_queue_eh_topic.*.name) : var.confluent_cloud_snowflake_loader_topic_name -} - -module "sf_transformer_storage_container" { - source = "snowplow-devops/storage-container/azurerm" - version = "0.1.1" - - count = var.snowflake_enabled ? 1 : 0 - - name = "snowflake-transformer-container" - storage_account_name = local.storage_account_name -} - -module "sf_transformer_wrj" { - source = "snowplow-devops/transformer-event-hub-vmss/azurerm" - version = "0.3.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.snowflake_enabled ? 1 : 0 - - name = "${var.prefix}-snowflake-transformer" - resource_group_name = var.resource_group_name - subnet_id = var.subnet_id_servers - - enriched_topic_name = local.enriched_topic_name - enriched_topic_kafka_username = local.kafka_username - enriched_topic_kafka_password = local.use_azure_event_hubs ? join("", module.enriched_eh_topic.*.read_only_primary_connection_string) : var.confluent_cloud_api_secret - queue_topic_name = local.snowflake_loader_topic_name - queue_topic_kafka_username = local.kafka_username - queue_topic_kafka_password = local.use_azure_event_hubs ? join("", module.sf_message_queue_eh_topic.*.read_write_primary_connection_string) : var.confluent_cloud_api_secret - eh_namespace_name = local.eh_namespace_name - kafka_brokers = local.kafka_brokers - - kafka_source = var.stream_type - - storage_account_name = local.storage_account_name - storage_container_name = module.sf_transformer_storage_container[0].name - window_period_min = var.snowflake_transformer_window_period_min - - widerow_file_format = "json" - - ssh_public_key = var.ssh_public_key - ssh_ip_allowlist = var.ssh_ip_allowlist - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - custom_iglu_resolvers = local.custom_iglu_resolvers - - tags = var.tags - - depends_on = [module.sf_transformer_storage_container] -} - -module "sf_loader" { - source = "snowplow-devops/snowflake-loader-vmss/azurerm" - version = "0.3.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.snowflake_enabled ? 1 : 0 - - name = "${var.prefix}-snowflake-loader" - resource_group_name = var.resource_group_name - subnet_id = var.subnet_id_servers - - queue_topic_name = local.snowflake_loader_topic_name - queue_topic_kafka_username = local.kafka_username - queue_topic_kafka_password = local.use_azure_event_hubs ? join("", module.sf_message_queue_eh_topic.*.read_only_primary_connection_string) : var.confluent_cloud_api_secret - eh_namespace_name = local.eh_namespace_name - kafka_brokers = local.kafka_brokers - - kafka_source = var.stream_type - - storage_account_name = local.storage_account_name - storage_container_name_for_transformer_output = module.sf_transformer_storage_container[0].name - - snowflake_loader_user = var.snowflake_loader_user - snowflake_password = var.snowflake_loader_password - snowflake_warehouse = var.snowflake_warehouse - snowflake_database = var.snowflake_database - snowflake_schema = var.snowflake_schema - snowflake_region = var.snowflake_region - snowflake_account = var.snowflake_account - - ssh_public_key = var.ssh_public_key - ssh_ip_allowlist = var.ssh_ip_allowlist - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - custom_iglu_resolvers = local.custom_iglu_resolvers - - tags = var.tags - - depends_on = [module.sf_transformer_storage_container] -} diff --git a/terraform/azure/pipeline/terraform.tfvars b/terraform/azure/pipeline/terraform.tfvars index 2bfd17d..19a2641 100644 --- a/terraform/azure/pipeline/terraform.tfvars +++ b/terraform/azure/pipeline/terraform.tfvars @@ -58,25 +58,9 @@ confluent_cloud_api_secret = "" confluent_cloud_bootstrap_server = "" # Names of the created topics within the deployed cluster -confluent_cloud_raw_topic_name = "raw" -confluent_cloud_enriched_topic_name = "enriched" -confluent_cloud_bad_1_topic_name = "bad-1" -confluent_cloud_snowflake_loader_topic_name = "snowflake-loader" - -# --- Target: Snowflake -# Follow the guide to get input values for the loader: -# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start/ -snowflake_enabled = false - -snowflake_account = "" -snowflake_region = "" -snowflake_loader_user = "" -snowflake_loader_password = "" -snowflake_database = "" -snowflake_schema = "" -snowflake_warehouse = "" -# This controls how often data will be loading into Snowflake -snowflake_transformer_window_period_min = 1 +confluent_cloud_raw_topic_name = "raw" +confluent_cloud_enriched_topic_name = "enriched" +confluent_cloud_bad_1_topic_name = "bad-1" # --- Target: Lake # Follow the guide to get input values for the loader: diff --git a/terraform/azure/pipeline/variables.tf b/terraform/azure/pipeline/variables.tf index a984885..bb8befd 100644 --- a/terraform/azure/pipeline/variables.tf +++ b/terraform/azure/pipeline/variables.tf @@ -152,69 +152,6 @@ variable "confluent_cloud_bad_1_topic_name" { type = string } -variable "confluent_cloud_snowflake_loader_topic_name" { - description = "Confluent Cloud 'snowflake-loader' topic name" - default = "snowflake-loader" - type = string -} - -# --- Target: SnowflakeDB - -variable "snowflake_enabled" { - description = "Whether to enable loading into a Snowflake Database" - default = false - type = bool -} - -variable "snowflake_account" { - description = "Snowflake account to use" - type = string - default = "" -} - -variable "snowflake_region" { - description = "Region of Snowflake account" - type = string - default = "" -} - -variable "snowflake_loader_password" { - description = "The password to use for the loader user" - type = string - sensitive = true - default = "" -} - -variable "snowflake_loader_user" { - description = "The Snowflake user used by Snowflake Loader" - type = string - default = "" -} - -variable "snowflake_database" { - description = "Snowflake database name" - type = string - default = "" -} - -variable "snowflake_schema" { - description = "Snowflake schema name" - type = string - default = "" -} - -variable "snowflake_warehouse" { - description = "Snowflake warehouse name" - type = string - default = "" -} - -variable "snowflake_transformer_window_period_min" { - description = "Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes" - type = number - default = 5 -} - # --- Target: Lake variable "lake_enabled" { diff --git a/terraform/gcp/iglu_server/default/README.md b/terraform/gcp/iglu_server/default/README.md index f974971..9f6837e 100644 --- a/terraform/gcp/iglu_server/default/README.md +++ b/terraform/gcp/iglu_server/default/README.md @@ -16,7 +16,7 @@ No providers. |------|--------|---------| | [iglu\_db](#module\_iglu\_db) | snowplow-devops/cloud-sql/google | 0.4.1 | | [iglu\_lb](#module\_iglu\_lb) | snowplow-devops/lb/google | 0.3.0 | -| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ce/google | 0.6.0 | +| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ce/google | 0.7.0 | ## Resources diff --git a/terraform/gcp/iglu_server/default/main.tf b/terraform/gcp/iglu_server/default/main.tf index b4a390b..e678ef9 100644 --- a/terraform/gcp/iglu_server/default/main.tf +++ b/terraform/gcp/iglu_server/default/main.tf @@ -19,12 +19,10 @@ module "iglu_db" { module "iglu_server" { source = "snowplow-devops/iglu-server-ce/google" - version = "0.6.0" + version = "0.7.0" accept_limited_use_license = var.accept_limited_use_license - app_version = "0.14.0" - name = "${var.prefix}-iglu-server" project_id = var.project_id diff --git a/terraform/gcp/iglu_server/secure/README.md b/terraform/gcp/iglu_server/secure/README.md index f974971..9f6837e 100644 --- a/terraform/gcp/iglu_server/secure/README.md +++ b/terraform/gcp/iglu_server/secure/README.md @@ -16,7 +16,7 @@ No providers. |------|--------|---------| | [iglu\_db](#module\_iglu\_db) | snowplow-devops/cloud-sql/google | 0.4.1 | | [iglu\_lb](#module\_iglu\_lb) | snowplow-devops/lb/google | 0.3.0 | -| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ce/google | 0.6.0 | +| [iglu\_server](#module\_iglu\_server) | snowplow-devops/iglu-server-ce/google | 0.7.0 | ## Resources diff --git a/terraform/gcp/iglu_server/secure/main.tf b/terraform/gcp/iglu_server/secure/main.tf index caee185..baf4d82 100644 --- a/terraform/gcp/iglu_server/secure/main.tf +++ b/terraform/gcp/iglu_server/secure/main.tf @@ -19,12 +19,10 @@ module "iglu_db" { module "iglu_server" { source = "snowplow-devops/iglu-server-ce/google" - version = "0.6.0" + version = "0.7.0" accept_limited_use_license = var.accept_limited_use_license - app_version = "0.14.0" - name = "${var.prefix}-iglu-server" project_id = var.project_id diff --git a/terraform/gcp/pipeline/default/README.md b/terraform/gcp/pipeline/default/README.md index 0e0b578..e733e73 100644 --- a/terraform/gcp/pipeline/default/README.md +++ b/terraform/gcp/pipeline/default/README.md @@ -17,15 +17,12 @@ | Name | Source | Version | |------|--------|---------| | [bad\_1\_topic](#module\_bad\_1\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 | -| [bigquery\_loader](#module\_bigquery\_loader) | snowplow-devops/bigquery-loader-pubsub-ce/google | 0.4.0 | +| [bigquery\_loader](#module\_bigquery\_loader) | snowplow-devops/bigquery-loader-pubsub-ce/google | 0.5.0 | | [bq\_bad\_rows\_topic](#module\_bq\_bad\_rows\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 | | [collector\_lb](#module\_collector\_lb) | snowplow-devops/lb/google | 0.3.0 | -| [collector\_pubsub](#module\_collector\_pubsub) | snowplow-devops/collector-pubsub-ce/google | 0.6.0 | -| [enrich\_pubsub](#module\_enrich\_pubsub) | snowplow-devops/enrich-pubsub-ce/google | 0.4.0 | +| [collector\_pubsub](#module\_collector\_pubsub) | snowplow-devops/collector-pubsub-ce/google | 0.7.0 | +| [enrich\_pubsub](#module\_enrich\_pubsub) | snowplow-devops/enrich-pubsub-ce/google | 0.5.0 | | [enriched\_topic](#module\_enriched\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 | -| [postgres\_db](#module\_postgres\_db) | snowplow-devops/cloud-sql/google | 0.4.1 | -| [postgres\_loader\_bad](#module\_postgres\_loader\_bad) | snowplow-devops/postgres-loader-pubsub-ce/google | 0.5.0 | -| [postgres\_loader\_enriched](#module\_postgres\_loader\_enriched) | snowplow-devops/postgres-loader-pubsub-ce/google | 0.5.0 | | [raw\_topic](#module\_raw\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 | ## Resources @@ -33,7 +30,6 @@ | Name | Type | |------|------| | [google_bigquery_dataset.bigquery_db](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/bigquery_dataset) | resource | -| [google_storage_bucket.bq_loader_dead_letter_bucket](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/storage_bucket) | resource | ## Inputs @@ -42,9 +38,6 @@ | [iglu\_server\_dns\_name](#input\_iglu\_server\_dns\_name) | The DNS name of your Iglu Server | `string` | n/a | yes | | [iglu\_super\_api\_key](#input\_iglu\_super\_api\_key) | A UUIDv4 string to use as the master API key for Iglu Server management | `string` | n/a | yes | | [network](#input\_network) | The name of the network to deploy within | `string` | n/a | yes | -| [postgres\_db\_name](#input\_postgres\_db\_name) | The name of the database to connect to | `string` | n/a | yes | -| [postgres\_db\_password](#input\_postgres\_db\_password) | The password to use to connect to the database | `string` | n/a | yes | -| [postgres\_db\_username](#input\_postgres\_db\_username) | The username to use to connect to the database | `string` | n/a | yes | | [prefix](#input\_prefix) | Will be prefixed to all resource names. Use to easily identify the resources created | `string` | n/a | yes | | [project\_id](#input\_project\_id) | The project ID in which the stack is being deployed | `string` | n/a | yes | | [region](#input\_region) | The name of the region to deploy within | `string` | n/a | yes | @@ -52,12 +45,7 @@ | [subnetwork](#input\_subnetwork) | The name of the sub-network to deploy within | `string` | n/a | yes | | [accept\_limited\_use\_license](#input\_accept\_limited\_use\_license) | Acceptance of the SLULA terms (https://docs.snowplow.io/limited-use-license-1.0/) | `bool` | `false` | no | | [bigquery\_db\_enabled](#input\_bigquery\_db\_enabled) | Whether to enable loading into a BigQuery Dataset | `bool` | `false` | no | -| [bigquery\_loader\_dead\_letter\_bucket\_deploy](#input\_bigquery\_loader\_dead\_letter\_bucket\_deploy) | Whether this module should create a new bucket with the specified name - if the bucket already exists set this to false | `bool` | `true` | no | -| [bigquery\_loader\_dead\_letter\_bucket\_name](#input\_bigquery\_loader\_dead\_letter\_bucket\_name) | The name of the GCS bucket to use for dead-letter output of loader | `string` | `""` | no | | [labels](#input\_labels) | The labels to append to the resources in this module | `map(string)` | `{}` | no | -| [postgres\_db\_authorized\_networks](#input\_postgres\_db\_authorized\_networks) | The list of CIDR ranges to allow access to the Pipeline Database over |
list(object({
name = string
value = string
}))
| `[]` | no | -| [postgres\_db\_enabled](#input\_postgres\_db\_enabled) | Whether to enable loading into a Postgres Database | `bool` | `false` | no | -| [postgres\_db\_tier](#input\_postgres\_db\_tier) | The instance type to assign to the deployed Cloud SQL instance | `string` | `"db-g1-small"` | no | | [ssh\_key\_pairs](#input\_ssh\_key\_pairs) | The list of SSH key-pairs to add to the servers |
list(object({
user_name = string
public_key = string
}))
| `[]` | no | | [ssl\_information](#input\_ssl\_information) | The ID of an Google Managed certificate to bind to the load balancer |
object({
enabled = bool
certificate_id = string
})
|
{
"certificate_id": "",
"enabled": false
}
| no | | [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no | @@ -69,7 +57,4 @@ |------|-------------| | [bigquery\_db\_dataset\_id](#output\_bigquery\_db\_dataset\_id) | The ID of the BigQuery dataset where your data is being streamed | | [bq\_loader\_bad\_rows\_topic\_name](#output\_bq\_loader\_bad\_rows\_topic\_name) | The name of the topic for bad rows emitted from the BigQuery loader | -| [bq\_loader\_dead\_letter\_bucket\_name](#output\_bq\_loader\_dead\_letter\_bucket\_name) | The name of the GCS bucket for dead letter events emitted from the BigQuery loader | | [collector\_ip\_address](#output\_collector\_ip\_address) | The IP address for the Pipeline Collector | -| [postgres\_db\_address](#output\_postgres\_db\_address) | The IP address of the database where your data is being streamed | -| [postgres\_db\_port](#output\_postgres\_db\_port) | The port of the database where your data is being streamed | diff --git a/terraform/gcp/pipeline/default/main.tf b/terraform/gcp/pipeline/default/main.tf index 7d92e9c..57a71f1 100644 --- a/terraform/gcp/pipeline/default/main.tf +++ b/terraform/gcp/pipeline/default/main.tf @@ -46,7 +46,7 @@ module "enriched_topic" { # 2. Deploy Collector stack module "collector_pubsub" { source = "snowplow-devops/collector-pubsub-ce/google" - version = "0.6.0" + version = "0.7.0" accept_limited_use_license = var.accept_limited_use_license @@ -88,7 +88,7 @@ module "collector_lb" { # 3. Deploy Enrichment module "enrich_pubsub" { source = "snowplow-devops/enrich-pubsub-ce/google" - version = "0.4.0" + version = "0.5.0" accept_limited_use_license = var.accept_limited_use_license diff --git a/terraform/gcp/pipeline/default/outputs.tf b/terraform/gcp/pipeline/default/outputs.tf index 6f80d33..9b29019 100644 --- a/terraform/gcp/pipeline/default/outputs.tf +++ b/terraform/gcp/pipeline/default/outputs.tf @@ -3,26 +3,11 @@ output "collector_ip_address" { value = module.collector_lb.ip_address } -output "postgres_db_address" { - description = "The IP address of the database where your data is being streamed" - value = join("", module.postgres_db.*.first_ip_address) -} - -output "postgres_db_port" { - description = "The port of the database where your data is being streamed" - value = join("", module.postgres_db.*.port) -} - output "bigquery_db_dataset_id" { description = "The ID of the BigQuery dataset where your data is being streamed" value = join("", google_bigquery_dataset.bigquery_db.*.dataset_id) } -output "bq_loader_dead_letter_bucket_name" { - description = "The name of the GCS bucket for dead letter events emitted from the BigQuery loader" - value = join("", google_storage_bucket.bq_loader_dead_letter_bucket.*.name) -} - output "bq_loader_bad_rows_topic_name" { description = "The name of the topic for bad rows emitted from the BigQuery loader" value = join("", module.bq_bad_rows_topic.*.name) diff --git a/terraform/gcp/pipeline/default/target_bigquery.tf b/terraform/gcp/pipeline/default/target_bigquery.tf index b610e59..97b2183 100644 --- a/terraform/gcp/pipeline/default/target_bigquery.tf +++ b/terraform/gcp/pipeline/default/target_bigquery.tf @@ -18,26 +18,9 @@ resource "google_bigquery_dataset" "bigquery_db" { labels = var.labels } -resource "google_storage_bucket" "bq_loader_dead_letter_bucket" { - count = var.bigquery_db_enabled && var.bigquery_loader_dead_letter_bucket_deploy ? 1 : 0 - - name = var.bigquery_loader_dead_letter_bucket_name - location = var.region - force_destroy = true - - labels = var.labels -} - -locals { - bq_loader_dead_letter_bucket_name = coalesce( - join("", google_storage_bucket.bq_loader_dead_letter_bucket.*.name), - var.bigquery_loader_dead_letter_bucket_name, - ) -} - module "bigquery_loader" { source = "snowplow-devops/bigquery-loader-pubsub-ce/google" - version = "0.4.0" + version = "0.5.0" accept_limited_use_license = var.accept_limited_use_license @@ -53,10 +36,9 @@ module "bigquery_loader" { ssh_ip_allowlist = var.ssh_ip_allowlist ssh_key_pairs = var.ssh_key_pairs - input_topic_name = module.enriched_topic.name - bad_rows_topic_name = join("", module.bq_bad_rows_topic.*.name) - gcs_dead_letter_bucket_name = local.bq_loader_dead_letter_bucket_name - bigquery_dataset_id = join("", google_bigquery_dataset.bigquery_db.*.dataset_id) + input_topic_name = module.enriched_topic.name + bad_rows_topic_id = join("", module.bq_bad_rows_topic.*.id) + bigquery_dataset_id = join("", google_bigquery_dataset.bigquery_db.*.dataset_id) # Linking in the custom Iglu Server here custom_iglu_resolvers = local.custom_iglu_resolvers diff --git a/terraform/gcp/pipeline/default/target_postgres.tf b/terraform/gcp/pipeline/default/target_postgres.tf deleted file mode 100644 index c64da23..0000000 --- a/terraform/gcp/pipeline/default/target_postgres.tf +++ /dev/null @@ -1,93 +0,0 @@ -module "postgres_db" { - source = "snowplow-devops/cloud-sql/google" - version = "0.4.1" - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-postgres-db" - - region = var.region - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - authorized_networks = var.postgres_db_authorized_networks - - tier = var.postgres_db_tier - - labels = var.labels -} - -module "postgres_loader_enriched" { - source = "snowplow-devops/postgres-loader-pubsub-ce/google" - version = "0.5.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-pg-loader-enriched" - - network = var.network - subnetwork = var.subnetwork - region = var.region - project_id = var.project_id - - ssh_ip_allowlist = var.ssh_ip_allowlist - ssh_key_pairs = var.ssh_key_pairs - - in_topic_name = module.enriched_topic.name - purpose = "ENRICHED_EVENTS" - schema_name = "atomic" - - db_instance_name = join("", module.postgres_db.*.connection_name) - db_port = join("", module.postgres_db.*.port) - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - # Linking in the custom Iglu Server here - custom_iglu_resolvers = local.custom_iglu_resolvers - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - labels = var.labels -} - -module "postgres_loader_bad" { - source = "snowplow-devops/postgres-loader-pubsub-ce/google" - version = "0.5.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-pg-loader-bad" - - network = var.network - subnetwork = var.subnetwork - region = var.region - project_id = var.project_id - - ssh_ip_allowlist = var.ssh_ip_allowlist - ssh_key_pairs = var.ssh_key_pairs - - in_topic_name = module.bad_1_topic.name - purpose = "JSON" - schema_name = "atomic_bad" - - db_instance_name = join("", module.postgres_db.*.connection_name) - db_port = join("", module.postgres_db.*.port) - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - # Linking in the custom Iglu Server here - custom_iglu_resolvers = local.custom_iglu_resolvers - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - labels = var.labels -} diff --git a/terraform/gcp/pipeline/default/terraform.tfvars b/terraform/gcp/pipeline/default/terraform.tfvars index cc38f66..4653650 100644 --- a/terraform/gcp/pipeline/default/terraform.tfvars +++ b/terraform/gcp/pipeline/default/terraform.tfvars @@ -42,41 +42,9 @@ ssl_information = { # --- TARGETS CONFIGURATION ZONE --- # -# --- Target: PostgreSQL -postgres_db_enabled = false - -postgres_db_name = "snowplow" -postgres_db_username = "snowplow" -# Change and keep this secret! -postgres_db_password = "Hell0W0rld!2" -# IP ranges that you want to query the Pipeline Postgres Cloud SQL instance from directly over the internet. An alternative access method is to leverage -# the Cloud SQL Proxy service which creates an IAM authenticated tunnel to the instance -# -# Details: https://cloud.google.com/sql/docs/postgres/sql-proxy -# -# Note: this exposes your data to the internet - take care to ensure your allowlist is strict enough -postgres_db_authorized_networks = [ - { - name = "foo" - value = "999.999.999.999/32" - }, - { - name = "bar" - value = "888.888.888.888/32" - } -] -# Note: the size of the database instance determines the number of concurrent connections - each Postgres Loader instance creates 10 open connections so having -# a sufficiently powerful database tier is important to not running out of connection slots -postgres_db_tier = "db-g1-small" - # --- Target: BigQuery bigquery_db_enabled = false -# To use an existing bucket set this to false -bigquery_loader_dead_letter_bucket_deploy = true -# Must be globally unique so will need to be updated before applying -bigquery_loader_dead_letter_bucket_name = "sp-bq-loader-dead-letter" - # --- ADVANCED CONFIGURATION ZONE --- # # See for more information: https://registry.terraform.io/modules/snowplow-devops/collector-pubsub-ce/google/latest#telemetry diff --git a/terraform/gcp/pipeline/default/variables.tf b/terraform/gcp/pipeline/default/variables.tf index 498cf19..696d30c 100644 --- a/terraform/gcp/pipeline/default/variables.tf +++ b/terraform/gcp/pipeline/default/variables.tf @@ -64,61 +64,12 @@ variable "iglu_super_api_key" { sensitive = true } -variable "postgres_db_enabled" { - description = "Whether to enable loading into a Postgres Database" - default = false - type = bool -} - -variable "postgres_db_name" { - description = "The name of the database to connect to" - type = string -} - -variable "postgres_db_username" { - description = "The username to use to connect to the database" - type = string -} - -variable "postgres_db_password" { - description = "The password to use to connect to the database" - type = string - sensitive = true -} - -variable "postgres_db_authorized_networks" { - description = "The list of CIDR ranges to allow access to the Pipeline Database over" - default = [] - type = list(object({ - name = string - value = string - })) -} - -variable "postgres_db_tier" { - description = "The instance type to assign to the deployed Cloud SQL instance" - type = string - default = "db-g1-small" -} - variable "bigquery_db_enabled" { description = "Whether to enable loading into a BigQuery Dataset" default = false type = bool } -variable "bigquery_loader_dead_letter_bucket_deploy" { - description = "Whether this module should create a new bucket with the specified name - if the bucket already exists set this to false" - default = true - type = bool -} - -variable "bigquery_loader_dead_letter_bucket_name" { - description = "The name of the GCS bucket to use for dead-letter output of loader" - default = "" - type = string -} - variable "telemetry_enabled" { description = "Whether or not to send telemetry information back to Snowplow Analytics Ltd" type = bool diff --git a/terraform/gcp/pipeline/secure/README.md b/terraform/gcp/pipeline/secure/README.md index 6a7f8c2..62c4df5 100644 --- a/terraform/gcp/pipeline/secure/README.md +++ b/terraform/gcp/pipeline/secure/README.md @@ -17,15 +17,12 @@ | Name | Source | Version | |------|--------|---------| | [bad\_1\_topic](#module\_bad\_1\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 | -| [bigquery\_loader](#module\_bigquery\_loader) | snowplow-devops/bigquery-loader-pubsub-ce/google | 0.4.0 | +| [bigquery\_loader](#module\_bigquery\_loader) | snowplow-devops/bigquery-loader-pubsub-ce/google | 0.5.0 | | [bq\_bad\_rows\_topic](#module\_bq\_bad\_rows\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 | | [collector\_lb](#module\_collector\_lb) | snowplow-devops/lb/google | 0.3.0 | -| [collector\_pubsub](#module\_collector\_pubsub) | snowplow-devops/collector-pubsub-ce/google | 0.6.0 | -| [enrich\_pubsub](#module\_enrich\_pubsub) | snowplow-devops/enrich-pubsub-ce/google | 0.4.0 | +| [collector\_pubsub](#module\_collector\_pubsub) | snowplow-devops/collector-pubsub-ce/google | 0.7.0 | +| [enrich\_pubsub](#module\_enrich\_pubsub) | snowplow-devops/enrich-pubsub-ce/google | 0.5.0 | | [enriched\_topic](#module\_enriched\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 | -| [postgres\_db](#module\_postgres\_db) | snowplow-devops/cloud-sql/google | 0.4.1 | -| [postgres\_loader\_bad](#module\_postgres\_loader\_bad) | snowplow-devops/postgres-loader-pubsub-ce/google | 0.5.0 | -| [postgres\_loader\_enriched](#module\_postgres\_loader\_enriched) | snowplow-devops/postgres-loader-pubsub-ce/google | 0.5.0 | | [raw\_topic](#module\_raw\_topic) | snowplow-devops/pubsub-topic/google | 0.3.0 | ## Resources @@ -33,7 +30,6 @@ | Name | Type | |------|------| | [google_bigquery_dataset.bigquery_db](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/bigquery_dataset) | resource | -| [google_storage_bucket.bq_loader_dead_letter_bucket](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/storage_bucket) | resource | ## Inputs @@ -42,9 +38,6 @@ | [iglu\_server\_dns\_name](#input\_iglu\_server\_dns\_name) | The DNS name of your Iglu Server | `string` | n/a | yes | | [iglu\_super\_api\_key](#input\_iglu\_super\_api\_key) | A UUIDv4 string to use as the master API key for Iglu Server management | `string` | n/a | yes | | [network](#input\_network) | The name of the network to deploy within | `string` | n/a | yes | -| [postgres\_db\_name](#input\_postgres\_db\_name) | The name of the database to connect to | `string` | n/a | yes | -| [postgres\_db\_password](#input\_postgres\_db\_password) | The password to use to connect to the database | `string` | n/a | yes | -| [postgres\_db\_username](#input\_postgres\_db\_username) | The username to use to connect to the database | `string` | n/a | yes | | [prefix](#input\_prefix) | Will be prefixed to all resource names. Use to easily identify the resources created | `string` | n/a | yes | | [project\_id](#input\_project\_id) | The project ID in which the stack is being deployed | `string` | n/a | yes | | [region](#input\_region) | The name of the region to deploy within | `string` | n/a | yes | @@ -52,12 +45,7 @@ | [subnetwork](#input\_subnetwork) | The name of the sub-network to deploy within | `string` | n/a | yes | | [accept\_limited\_use\_license](#input\_accept\_limited\_use\_license) | Acceptance of the SLULA terms (https://docs.snowplow.io/limited-use-license-1.0/) | `bool` | `false` | no | | [bigquery\_db\_enabled](#input\_bigquery\_db\_enabled) | Whether to enable loading into a BigQuery Dataset | `bool` | `false` | no | -| [bigquery\_loader\_dead\_letter\_bucket\_deploy](#input\_bigquery\_loader\_dead\_letter\_bucket\_deploy) | Whether this module should create a new bucket with the specified name - if the bucket already exists set this to false | `bool` | `true` | no | -| [bigquery\_loader\_dead\_letter\_bucket\_name](#input\_bigquery\_loader\_dead\_letter\_bucket\_name) | The name of the GCS bucket to use for dead-letter output of loader | `string` | `""` | no | | [labels](#input\_labels) | The labels to append to the resources in this module | `map(string)` | `{}` | no | -| [postgres\_db\_authorized\_networks](#input\_postgres\_db\_authorized\_networks) | The list of CIDR ranges to allow access to the Pipeline Database over |
list(object({
name = string
value = string
}))
| `[]` | no | -| [postgres\_db\_enabled](#input\_postgres\_db\_enabled) | Whether to enable loading into a Postgres Database | `bool` | `false` | no | -| [postgres\_db\_tier](#input\_postgres\_db\_tier) | The instance type to assign to the deployed Cloud SQL instance | `string` | `"db-g1-small"` | no | | [ssh\_key\_pairs](#input\_ssh\_key\_pairs) | The list of SSH key-pairs to add to the servers |
list(object({
user_name = string
public_key = string
}))
| `[]` | no | | [ssl\_information](#input\_ssl\_information) | The ID of an Google Managed certificate to bind to the load balancer |
object({
enabled = bool
certificate_id = string
})
|
{
"certificate_id": "",
"enabled": false
}
| no | | [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no | @@ -69,7 +57,4 @@ |------|-------------| | [bigquery\_db\_dataset\_id](#output\_bigquery\_db\_dataset\_id) | The ID of the BigQuery dataset where your data is being streamed | | [bq\_loader\_bad\_rows\_topic\_name](#output\_bq\_loader\_bad\_rows\_topic\_name) | The name of the topic for bad rows emitted from the BigQuery loader | -| [bq\_loader\_dead\_letter\_bucket\_name](#output\_bq\_loader\_dead\_letter\_bucket\_name) | The name of the GCS bucket for dead letter events emitted from the BigQuery loader | | [collector\_ip\_address](#output\_collector\_ip\_address) | The IP address for the Pipeline Collector | -| [postgres\_db\_address](#output\_postgres\_db\_address) | The IP address of the database where your data is being streamed | -| [postgres\_db\_port](#output\_postgres\_db\_port) | The port of the database where your data is being streamed | diff --git a/terraform/gcp/pipeline/secure/main.tf b/terraform/gcp/pipeline/secure/main.tf index 4723074..c3d177e 100644 --- a/terraform/gcp/pipeline/secure/main.tf +++ b/terraform/gcp/pipeline/secure/main.tf @@ -46,7 +46,7 @@ module "enriched_topic" { # 2. Deploy Collector stack module "collector_pubsub" { source = "snowplow-devops/collector-pubsub-ce/google" - version = "0.6.0" + version = "0.7.0" accept_limited_use_license = var.accept_limited_use_license @@ -90,7 +90,7 @@ module "collector_lb" { # 3. Deploy Enrichment module "enrich_pubsub" { source = "snowplow-devops/enrich-pubsub-ce/google" - version = "0.4.0" + version = "0.5.0" accept_limited_use_license = var.accept_limited_use_license diff --git a/terraform/gcp/pipeline/secure/outputs.tf b/terraform/gcp/pipeline/secure/outputs.tf index 6f80d33..9b29019 100644 --- a/terraform/gcp/pipeline/secure/outputs.tf +++ b/terraform/gcp/pipeline/secure/outputs.tf @@ -3,26 +3,11 @@ output "collector_ip_address" { value = module.collector_lb.ip_address } -output "postgres_db_address" { - description = "The IP address of the database where your data is being streamed" - value = join("", module.postgres_db.*.first_ip_address) -} - -output "postgres_db_port" { - description = "The port of the database where your data is being streamed" - value = join("", module.postgres_db.*.port) -} - output "bigquery_db_dataset_id" { description = "The ID of the BigQuery dataset where your data is being streamed" value = join("", google_bigquery_dataset.bigquery_db.*.dataset_id) } -output "bq_loader_dead_letter_bucket_name" { - description = "The name of the GCS bucket for dead letter events emitted from the BigQuery loader" - value = join("", google_storage_bucket.bq_loader_dead_letter_bucket.*.name) -} - output "bq_loader_bad_rows_topic_name" { description = "The name of the topic for bad rows emitted from the BigQuery loader" value = join("", module.bq_bad_rows_topic.*.name) diff --git a/terraform/gcp/pipeline/secure/target_bigquery.tf b/terraform/gcp/pipeline/secure/target_bigquery.tf index 85ae35a..b37bfb6 100644 --- a/terraform/gcp/pipeline/secure/target_bigquery.tf +++ b/terraform/gcp/pipeline/secure/target_bigquery.tf @@ -18,26 +18,9 @@ resource "google_bigquery_dataset" "bigquery_db" { labels = var.labels } -resource "google_storage_bucket" "bq_loader_dead_letter_bucket" { - count = var.bigquery_db_enabled && var.bigquery_loader_dead_letter_bucket_deploy ? 1 : 0 - - name = var.bigquery_loader_dead_letter_bucket_name - location = var.region - force_destroy = true - - labels = var.labels -} - -locals { - bq_loader_dead_letter_bucket_name = coalesce( - join("", google_storage_bucket.bq_loader_dead_letter_bucket.*.name), - var.bigquery_loader_dead_letter_bucket_name, - ) -} - module "bigquery_loader" { source = "snowplow-devops/bigquery-loader-pubsub-ce/google" - version = "0.4.0" + version = "0.5.0" accept_limited_use_license = var.accept_limited_use_license @@ -53,10 +36,9 @@ module "bigquery_loader" { ssh_ip_allowlist = var.ssh_ip_allowlist ssh_key_pairs = var.ssh_key_pairs - input_topic_name = module.enriched_topic.name - bad_rows_topic_name = join("", module.bq_bad_rows_topic.*.name) - gcs_dead_letter_bucket_name = local.bq_loader_dead_letter_bucket_name - bigquery_dataset_id = join("", google_bigquery_dataset.bigquery_db.*.dataset_id) + input_topic_name = module.enriched_topic.name + bad_rows_topic_id = join("", module.bq_bad_rows_topic.*.id) + bigquery_dataset_id = join("", google_bigquery_dataset.bigquery_db.*.dataset_id) # Linking in the custom Iglu Server here custom_iglu_resolvers = local.custom_iglu_resolvers diff --git a/terraform/gcp/pipeline/secure/target_postgres.tf b/terraform/gcp/pipeline/secure/target_postgres.tf deleted file mode 100644 index 7ef38bf..0000000 --- a/terraform/gcp/pipeline/secure/target_postgres.tf +++ /dev/null @@ -1,97 +0,0 @@ -module "postgres_db" { - source = "snowplow-devops/cloud-sql/google" - version = "0.4.1" - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-postgres-db" - - region = var.region - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - authorized_networks = var.postgres_db_authorized_networks - - tier = var.postgres_db_tier - - labels = var.labels -} - -module "postgres_loader_enriched" { - source = "snowplow-devops/postgres-loader-pubsub-ce/google" - version = "0.5.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-pg-loader-enriched" - - network = var.network - subnetwork = var.subnetwork - region = var.region - project_id = var.project_id - - ssh_ip_allowlist = var.ssh_ip_allowlist - ssh_key_pairs = var.ssh_key_pairs - - in_topic_name = module.enriched_topic.name - purpose = "ENRICHED_EVENTS" - schema_name = "atomic" - - db_instance_name = join("", module.postgres_db.*.connection_name) - db_port = join("", module.postgres_db.*.port) - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - # Linking in the custom Iglu Server here - custom_iglu_resolvers = local.custom_iglu_resolvers - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - associate_public_ip_address = false - - labels = var.labels -} - -module "postgres_loader_bad" { - source = "snowplow-devops/postgres-loader-pubsub-ce/google" - version = "0.5.0" - - accept_limited_use_license = var.accept_limited_use_license - - count = var.postgres_db_enabled ? 1 : 0 - - name = "${var.prefix}-pg-loader-bad" - - network = var.network - subnetwork = var.subnetwork - region = var.region - project_id = var.project_id - - ssh_ip_allowlist = var.ssh_ip_allowlist - ssh_key_pairs = var.ssh_key_pairs - - in_topic_name = module.bad_1_topic.name - purpose = "JSON" - schema_name = "atomic_bad" - - db_instance_name = join("", module.postgres_db.*.connection_name) - db_port = join("", module.postgres_db.*.port) - db_name = var.postgres_db_name - db_username = var.postgres_db_username - db_password = var.postgres_db_password - - # Linking in the custom Iglu Server here - custom_iglu_resolvers = local.custom_iglu_resolvers - - telemetry_enabled = var.telemetry_enabled - user_provided_id = var.user_provided_id - - associate_public_ip_address = false - - labels = var.labels -} diff --git a/terraform/gcp/pipeline/secure/terraform.tfvars b/terraform/gcp/pipeline/secure/terraform.tfvars index 1edddb8..3a31494 100644 --- a/terraform/gcp/pipeline/secure/terraform.tfvars +++ b/terraform/gcp/pipeline/secure/terraform.tfvars @@ -41,41 +41,9 @@ ssl_information = { # --- TARGETS CONFIGURATION ZONE --- # -# --- Target: PostgreSQL -postgres_db_enabled = false - -postgres_db_name = "snowplow" -postgres_db_username = "snowplow" -# Change and keep this secret! -postgres_db_password = "Hell0W0rld!2" -# IP ranges that you want to query the Pipeline Postgres Cloud SQL instance from directly over the internet. An alternative access method is to leverage -# the Cloud SQL Proxy service which creates an IAM authenticated tunnel to the instance -# -# Details: https://cloud.google.com/sql/docs/postgres/sql-proxy -# -# Note: this exposes your data to the internet - take care to ensure your allowlist is strict enough -postgres_db_authorized_networks = [ - { - name = "foo" - value = "999.999.999.999/32" - }, - { - name = "bar" - value = "888.888.888.888/32" - } -] -# Note: the size of the database instance determines the number of concurrent connections - each Postgres Loader instance creates 10 open connections so having -# a sufficiently powerful database tier is important to not running out of connection slots -postgres_db_tier = "db-g1-small" - # --- Target: BigQuery bigquery_db_enabled = false -# To use an existing bucket set this to false -bigquery_loader_dead_letter_bucket_deploy = true -# Must be globally unique so will need to be updated before applying -bigquery_loader_dead_letter_bucket_name = "sp-bq-loader-dead-letter" - # --- ADVANCED CONFIGURATION ZONE --- # # See for more information: https://registry.terraform.io/modules/snowplow-devops/collector-pubsub-ce/google/latest#telemetry diff --git a/terraform/gcp/pipeline/secure/variables.tf b/terraform/gcp/pipeline/secure/variables.tf index 498cf19..696d30c 100644 --- a/terraform/gcp/pipeline/secure/variables.tf +++ b/terraform/gcp/pipeline/secure/variables.tf @@ -64,61 +64,12 @@ variable "iglu_super_api_key" { sensitive = true } -variable "postgres_db_enabled" { - description = "Whether to enable loading into a Postgres Database" - default = false - type = bool -} - -variable "postgres_db_name" { - description = "The name of the database to connect to" - type = string -} - -variable "postgres_db_username" { - description = "The username to use to connect to the database" - type = string -} - -variable "postgres_db_password" { - description = "The password to use to connect to the database" - type = string - sensitive = true -} - -variable "postgres_db_authorized_networks" { - description = "The list of CIDR ranges to allow access to the Pipeline Database over" - default = [] - type = list(object({ - name = string - value = string - })) -} - -variable "postgres_db_tier" { - description = "The instance type to assign to the deployed Cloud SQL instance" - type = string - default = "db-g1-small" -} - variable "bigquery_db_enabled" { description = "Whether to enable loading into a BigQuery Dataset" default = false type = bool } -variable "bigquery_loader_dead_letter_bucket_deploy" { - description = "Whether this module should create a new bucket with the specified name - if the bucket already exists set this to false" - default = true - type = bool -} - -variable "bigquery_loader_dead_letter_bucket_name" { - description = "The name of the GCS bucket to use for dead-letter output of loader" - default = "" - type = string -} - variable "telemetry_enabled" { description = "Whether or not to send telemetry information back to Snowplow Analytics Ltd" type = bool