diff --git a/CHANGELOG.md b/CHANGELOG.md index e6dfc650..1334c19b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## 8.1.13 - 2025-11-13 +### Changed +- Upgrade maven shade plugin to `3.6.1` (was `3.1.1`). +- Upgrade AWS Java SDK in `apiary-gluesync-listener` to `1.12.792` (was `1.12.276`) +- Send view original texts in `apiary-gluesync-listener`. +- Add possibility to only sync views in `apiary-gluesync-listener` CLI/ +- Shade micrometer libraries into glue-sync CLI. + ## 8.1.12 - 2025-09-15 ### Added - GlueSyncCli with fat jar for on-demand syncing. diff --git a/hive-event-listeners/apiary-gluesync-listener/pom.xml b/hive-event-listeners/apiary-gluesync-listener/pom.xml index a9cc9bd7..d3634e9e 100644 --- a/hive-event-listeners/apiary-gluesync-listener/pom.xml +++ b/hive-event-listeners/apiary-gluesync-listener/pom.xml @@ -11,6 +11,11 @@ apiary-gluesync-listener Apiary GlueSync Listener Glue Sync Listener for Apiary that replays metadata events in AWS Glue catalog + + + 1.12.792 + + com.expediagroup.apiary @@ -137,7 +142,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.1.1 + 3.6.1 shade-all @@ -153,6 +158,8 @@ com.amazonaws:* com.expediagroup.apiary:* org.apache.httpcomponents:* + io.micrometer:* + io.prometheus:* ch.qos.reload4j:reload4j @@ -170,6 +177,16 @@ org.apache.http ${shade.prefix}.org.apache.http + + + io.micrometer + ${shade.prefix}.io.micrometer + + + io.prometheus + ${shade.prefix}.io.prometheus + diff --git a/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCli.java b/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCli.java index 117c9b28..9423fcce 100644 --- a/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCli.java +++ b/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCli.java @@ -17,6 +17,7 @@ package com.expediagroup.apiary.extensions.gluesync.cli; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; @@ -47,6 +48,7 @@ import com.hotels.hcommon.hive.metastore.iterator.PartitionIterator; public class GlueSyncCli { + private static final Logger logger = LoggerFactory.getLogger(GlueSyncCli.class); private static final String THRIFT_CONNECTION_URI = System.getenv("THRIFT_CONNECTION_URI"); @@ -109,8 +111,11 @@ public void syncAll(CommandLine cmd) throws TException { boolean continueOnError = cmd.hasOption("continueOnError"); boolean deleteGluePartitions = !cmd.hasOption("keep-glue-partitions"); - logger.debug("Additional parameters: continueOnError={}, deleteGluePartitions={}", continueOnError, - deleteGluePartitions); + String syncTypesFlag = cmd.getOptionValue("sync-types"); + List syncTypes = syncTypesFlag == null ? null : Arrays.asList(syncTypesFlag.split(",")); + + logger.debug("Additional parameters: continueOnError={}, deleteGluePartitions={}, syncTypes={}", + continueOnError, deleteGluePartitions, syncTypesFlag); boolean hadError = false; for (String dbName : metastoreClient.getAllDatabases()) { @@ -120,7 +125,7 @@ public void syncAll(CommandLine cmd) throws TException { if (tableName.matches(tableRegex)) { try { logger.info("Syncing table: {} in database: {}", tableName, dbName); - syncTable(dbName, tableName, deleteGluePartitions, verbose); + syncTable(dbName, tableName, deleteGluePartitions, syncTypes, verbose); } catch (Exception e) { hadError = true; logger.error("Error syncing table: {} in database: {}: {}", tableName, dbName, e.getMessage()); @@ -139,8 +144,8 @@ public void syncAll(CommandLine cmd) throws TException { } } - private void syncTable(String dbName, String tableName, boolean deleteGluePartitions, boolean verbose) - throws TException { + private void syncTable(String dbName, String tableName, boolean deleteGluePartitions, List syncTypes, + boolean verbose) throws TException { Database database = metastoreClient.getDatabase(dbName); if (!glueDatabaseService.exists(database)) { @@ -150,6 +155,14 @@ private void syncTable(String dbName, String tableName, boolean deleteGluePartit Table table = metastoreClient.getTable(dbName, tableName); + if (syncTypes != null) { + String type = table.getTableType(); + if (type != null && !syncTypes.contains(type)) { + logger.info("Table {}.{} is {}, skipping as syncTypes flag {} is active", dbName, tableName, type, syncTypes); + return; + } + } + CreateTableEvent createTableEvent = new CreateTableEvent(table, true, null); apiaryGlueSync.onCreateTable(createTableEvent); @@ -176,5 +189,4 @@ protected Iterator createPartitionIterator(IMetaStoreClient metastore throws org.apache.hadoop.hive.metastore.api.MetaException, org.apache.thrift.TException { return new PartitionIterator(metastoreClient, table, DEFAULT_PARTITION_BATCH_SIZE); } - } diff --git a/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCliParser.java b/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCliParser.java index 4bab4e5e..479d3a87 100644 --- a/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCliParser.java +++ b/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCliParser.java @@ -108,6 +108,8 @@ private static CommandLineParser getParser(Options options) { options.addOption(new Option("c", "continueOnError", false, "Continue on error (default: false)")); options.addOption(new Option(null, "keep-glue-partitions", false, "If true, will keep glue partitions even if there is no corresponding hive partition. If false will delete them (default: false)")); + options.addOption(new Option(null, "sync-types", true, + "List of table types to sync. If non specified it will be sync all table types. Example: sync-types=MANAGED_TABLE,EXTERNAL_TABLE. Possible types: VIRTUAL_VIEW,MANAGED_TABLE,EXTERNAL_TABLE")); CommandLineParser parser = new DefaultParser(); return parser; diff --git a/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/listener/service/HiveToGlueTransformer.java b/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/listener/service/HiveToGlueTransformer.java index 22c3835f..e3de4aae 100644 --- a/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/listener/service/HiveToGlueTransformer.java +++ b/hive-event-listeners/apiary-gluesync-listener/src/main/java/com/expediagroup/apiary/extensions/gluesync/listener/service/HiveToGlueTransformer.java @@ -101,7 +101,9 @@ public TableInput transformTable(final Table table) { .withPartitionKeys(partitionKeys) .withRetention(table.getRetention()) .withStorageDescriptor(sd) - .withTableType(table.getTableType()); + .withTableType(table.getTableType()) + .withViewOriginalText(table.getViewOriginalText()) + .withViewExpandedText(table.getViewExpandedText()); } public PartitionInput transformPartition(final Partition partition) { diff --git a/hive-event-listeners/apiary-gluesync-listener/src/test/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCliTest.java b/hive-event-listeners/apiary-gluesync-listener/src/test/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCliTest.java index 0c3b9d65..3e63749e 100644 --- a/hive-event-listeners/apiary-gluesync-listener/src/test/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCliTest.java +++ b/hive-event-listeners/apiary-gluesync-listener/src/test/java/com/expediagroup/apiary/extensions/gluesync/cli/GlueSyncCliTest.java @@ -133,6 +133,7 @@ private Options createOptions() { options.addOption(new Option("h", "help", false, "Print usage information")); options.addOption(new Option(null, "continueOnError", false, "Continue processing on errors")); options.addOption(new Option(null, "keep-glue-partitions", false, "Keep existing Glue partitions")); + options.addOption(new Option(null, "sync-types", true, "Choose what table type to sync.")); return options; } @@ -170,7 +171,6 @@ public void testSyncAllWithMatchingDatabaseAndTable() throws Exception { verify(mockApiaryGlueSync, never()).onCreateDatabase(any(CreateDatabaseEvent.class)); verify(mockApiaryGlueSync, times(4)).onCreateTable(any(CreateTableEvent.class)); verify(mockGluePartitionService, times(4)).synchronizePartitions(any(), any(), anyBoolean(), anyBoolean()); - } @Test @@ -209,7 +209,6 @@ public void testSyncAllWithVerboseOption() throws Exception { verify(mockApiaryGlueSync, never()).onCreateDatabase(any(CreateDatabaseEvent.class)); verify(mockApiaryGlueSync, times(1)).onCreateTable(any(CreateTableEvent.class)); verify(mockGluePartitionService, times(1)).synchronizePartitions(any(), any(), anyBoolean(), eq(true)); - } @Test @@ -262,7 +261,6 @@ public void testSyncAllWithNoMatchingDatabases() throws Exception { // Assert verify(mockApiaryGlueSync, never()).onCreateDatabase(any(CreateDatabaseEvent.class)); verify(mockApiaryGlueSync, never()).onCreateTable(any(CreateTableEvent.class)); - } @Test(expected = RuntimeException.class) @@ -339,7 +337,7 @@ public void testSyncAllWithKeepGluePartitionsOption() throws Exception { // Arrange - Test that deleteGluePartitions is false when keep-glue-partitions // is set String[] args = { "--database-name-regex", "test_db.*", "--table-name-regex", "test_table.*", - "--keep-glue-partitions" }; + "--keep-glue-partitions" }; CommandLine cmd = new DefaultParser().parse(createOptions(), args); List databases = Arrays.asList("test_db1"); @@ -369,6 +367,104 @@ public void testSyncAllWithKeepGluePartitionsOption() throws Exception { verify(mockGluePartitionService, times(1)).synchronizePartitions(any(), any(), eq(false), anyBoolean()); } + @Test + public void testSyncAllWithSyncOnlyViewsOption_onView() throws Exception { + String[] args = { "--database-name-regex", "test_db.*", "--table-name-regex", "test_table.*", + "--sync-types", "VIRTUAL_VIEW" }; + CommandLine cmd = new DefaultParser().parse(createOptions(), args); + + List databases = Arrays.asList("test_db1"); + List tables = Arrays.asList("test_table1"); + + Database mockDatabase = new Database(); + mockDatabase.setName("test_db1"); + + Table mockTable = new Table(); + mockTable.setDbName("test_db1"); + mockTable.setTableName("test_table1"); + mockTable.setTableType("VIRTUAL_VIEW"); + Map tableParams = new HashMap<>(); + mockTable.setParameters(tableParams); + + when(mockMetastoreClient.getAllDatabases()).thenReturn(databases); + when(mockMetastoreClient.getAllTables("test_db1")).thenReturn(tables); + when(mockMetastoreClient.getDatabase(anyString())).thenReturn(mockDatabase); + when(mockMetastoreClient.getTable(anyString(), anyString())).thenReturn(mockTable); + when(mockIsIcebergTablePredicate.test(any())).thenReturn(false); + when(mockMetastoreClient.listPartitions(anyString(), anyString(), anyShort())).thenReturn(Arrays.asList()); + + // Act + glueSyncCli.syncAll(cmd); + + // Assert - Verify that synchronizePartitions is called with + // deleteGluePartitions=false + verify(mockGluePartitionService, times(1)).synchronizePartitions(any(), any(), anyBoolean(), anyBoolean()); + } + + @Test + public void testSyncAllWithSyncOnlyViewsOption_onNormalTable() throws Exception { + String[] args = { "--database-name-regex", "test_db.*", "--table-name-regex", "test_table.*", + "--sync-types", "VIRTUAL_VIEW" }; + CommandLine cmd = new DefaultParser().parse(createOptions(), args); + + List databases = Arrays.asList("test_db1"); + List tables = Arrays.asList("test_table1"); + + Database mockDatabase = new Database(); + mockDatabase.setName("test_db1"); + + Table mockTable = new Table(); + mockTable.setDbName("test_db1"); + mockTable.setTableName("test_table1"); + mockTable.setTableType("EXTERNAL_TABLE"); + Map tableParams = new HashMap<>(); + mockTable.setParameters(tableParams); + + when(mockMetastoreClient.getAllDatabases()).thenReturn(databases); + when(mockMetastoreClient.getAllTables("test_db1")).thenReturn(tables); + when(mockMetastoreClient.getDatabase(anyString())).thenReturn(mockDatabase); + when(mockMetastoreClient.getTable(anyString(), anyString())).thenReturn(mockTable); + + // Act + glueSyncCli.syncAll(cmd); + + verify(mockGluePartitionService, times(0)).synchronizePartitions(any(), any(), anyBoolean(), anyBoolean()); + } + + @Test + public void testSyncAllWithSyncOnlyViewsOption_onViewAndTable() throws Exception { + String[] args = { "--database-name-regex", "test_db.*", "--table-name-regex", "test_table.*", + "--sync-types", "VIRTUAL_VIEW,EXTERNAL_TABLE" }; + CommandLine cmd = new DefaultParser().parse(createOptions(), args); + + List databases = Arrays.asList("test_db1"); + List tables = Arrays.asList("test_table1"); + + Database mockDatabase = new Database(); + mockDatabase.setName("test_db1"); + + Table mockTable = new Table(); + mockTable.setDbName("test_db1"); + mockTable.setTableName("test_table1"); + mockTable.setTableType("VIRTUAL_VIEW"); + Map tableParams = new HashMap<>(); + mockTable.setParameters(tableParams); + + when(mockMetastoreClient.getAllDatabases()).thenReturn(databases); + when(mockMetastoreClient.getAllTables("test_db1")).thenReturn(tables); + when(mockMetastoreClient.getDatabase(anyString())).thenReturn(mockDatabase); + when(mockMetastoreClient.getTable(anyString(), anyString())).thenReturn(mockTable); + when(mockIsIcebergTablePredicate.test(any())).thenReturn(false); + when(mockMetastoreClient.listPartitions(anyString(), anyString(), anyShort())).thenReturn(Arrays.asList()); + + // Act + glueSyncCli.syncAll(cmd); + + // Assert - Verify that synchronizePartitions is called with + // deleteGluePartitions=false + verify(mockGluePartitionService, times(1)).synchronizePartitions(any(), any(), anyBoolean(), anyBoolean()); + } + @Test public void testSyncAllWithDefaultPartitionDeletion() throws Exception { // Arrange - Test that deleteGluePartitions is true by default (when @@ -443,7 +539,6 @@ public void testSyncAllWithLargePartitionBatchHandling() throws Exception { // Assert verify(mockGluePartitionService, times(1)).synchronizePartitions(any(), eq(largePartitionList), anyBoolean(), eq(true)); - } @Test(expected = RuntimeException.class) @@ -498,7 +593,6 @@ public void testSyncAllWithNonMatchingTableRegex() throws Exception { verify(mockApiaryGlueSync, never()).onCreateDatabase(any(CreateDatabaseEvent.class)); verify(mockApiaryGlueSync, never()).onCreateTable(any(CreateTableEvent.class)); verify(mockGluePartitionService, never()).synchronizePartitions(any(), any(), anyBoolean(), anyBoolean()); - } @Test @@ -516,7 +610,6 @@ public void testSyncAllWithEmptyDatabaseList() throws Exception { verify(mockApiaryGlueSync, never()).onCreateDatabase(any(CreateDatabaseEvent.class)); verify(mockApiaryGlueSync, never()).onCreateTable(any(CreateTableEvent.class)); verify(mockGluePartitionService, never()).synchronizePartitions(any(), any(), anyBoolean(), anyBoolean()); - } @Test diff --git a/hive-event-listeners/apiary-gluesync-listener/src/test/java/com/expediagroup/apiary/extensions/gluesync/listener/ApiaryGlueSyncTest.java b/hive-event-listeners/apiary-gluesync-listener/src/test/java/com/expediagroup/apiary/extensions/gluesync/listener/ApiaryGlueSyncTest.java index d0685968..6fcfa669 100644 --- a/hive-event-listeners/apiary-gluesync-listener/src/test/java/com/expediagroup/apiary/extensions/gluesync/listener/ApiaryGlueSyncTest.java +++ b/hive-event-listeners/apiary-gluesync-listener/src/test/java/com/expediagroup/apiary/extensions/gluesync/listener/ApiaryGlueSyncTest.java @@ -290,6 +290,30 @@ public void onCreateIcebergTable() throws MetaException { assertThat(toList(createTableRequest.getTableInput().getStorageDescriptor().getColumns()), is(asList(colNames))); } + @Test + public void onCreateHiveView() throws MetaException { + CreateTableEvent event = mock(CreateTableEvent.class); + when(event.getStatus()).thenReturn(true); + + Table table = simpleHiveTable(simpleSchema(), Collections.emptyList()); + table.setTableType("VIRTUAL_VIEW"); + table.setViewOriginalText("SELECT * FROM some_table"); + table.setViewExpandedText("SELECT * FROM some_table"); + when(event.getTable()).thenReturn(table); + + glueSync.onCreateTable(event); + + verify(glueClient).createTable(createTableRequestCaptor.capture()); + verify(metricService).incrementCounter(MetricConstants.LISTENER_TABLE_SUCCESS); + CreateTableRequest createTableRequest = createTableRequestCaptor.getValue(); + + assertThat(createTableRequest.getDatabaseName(), is(gluePrefix + dbName)); + assertThat(createTableRequest.getTableInput().getName(), is(tableName)); + assertThat(createTableRequest.getTableInput().getTableType(), is("VIRTUAL_VIEW")); + assertThat(createTableRequest.getTableInput().getViewOriginalText().isEmpty(), is(false)); + assertThat(createTableRequest.getTableInput().getViewExpandedText().isEmpty(), is(false)); + } + @Test public void onAlterHiveTable() throws MetaException { AlterTableEvent event = mock(AlterTableEvent.class);