|
21 | 21 | import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
|
22 | 22 | import org.apache.hadoop.hive.metastore.api.CompactionType;
|
23 | 23 | import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
|
| 24 | +import org.apache.hadoop.hive.metastore.txn.TxnUtils; |
| 25 | +import org.apache.hadoop.hive.metastore.txn.entities.CompactionState; |
24 | 26 | import org.junit.Assert;
|
| 27 | +import org.junit.Before; |
25 | 28 | import org.junit.Test;
|
26 | 29 |
|
27 | 30 | import java.util.ArrayList;
|
28 | 31 | import java.util.List;
|
| 32 | +import java.util.Objects; |
29 | 33 |
|
30 | 34 | import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver;
|
31 | 35 |
|
32 | 36 | public class TestIcebergCompactorOnTez extends CompactorOnTezTest {
|
| 37 | + |
| 38 | + private static final String DB_NAME = "default"; |
| 39 | + private static final String TABLE_NAME = "ice_orc"; |
| 40 | + private static final String QUALIFIED_TABLE_NAME = TxnUtils.getFullTableName(DB_NAME, TABLE_NAME); |
| 41 | + |
| 42 | + @Override |
| 43 | + @Before |
| 44 | + public void setup() throws Exception { |
| 45 | + super.setup(); |
| 46 | + executeStatementOnDriver("drop table if exists " + QUALIFIED_TABLE_NAME, driver); |
| 47 | + } |
33 | 48 |
|
34 | 49 | @Test
|
35 | 50 | public void testIcebergCompactorWithAllPartitionFieldTypes() throws Exception{
|
36 | 51 | conf.setVar(HiveConf.ConfVars.COMPACTOR_JOB_QUEUE, CUSTOM_COMPACTION_QUEUE);
|
37 | 52 | msClient = new HiveMetaStoreClient(conf);
|
38 | 53 |
|
39 |
| - String dbName = "default"; |
40 |
| - String tableName = "ice_orc"; |
41 |
| - String qualifiedTableName = dbName + "." + tableName; |
42 |
| - |
43 |
| - executeStatementOnDriver("drop table if exists " + qualifiedTableName, driver); |
44 | 54 | executeStatementOnDriver(String.format("create table %s " +
|
45 | 55 | "(id int, a string, b int, c bigint, d float, e double, f decimal(4, 2), g boolean, h date, i date, j date, k timestamp) " +
|
46 | 56 | "partitioned by spec(a, truncate(3, a), bucket(4, a), b, c, d, e, f, g, h, year(h), month(i), day(j), k, hour(k)) stored by iceberg stored as orc " +
|
47 |
| - "tblproperties ('compactor.threshold.min.input.files'='1')", qualifiedTableName), driver); |
| 57 | + "tblproperties ('compactor.threshold.min.input.files'='1')", QUALIFIED_TABLE_NAME), driver); |
48 | 58 |
|
49 | 59 | // 6 records, one records per file --> 3 partitions, 2 files per partition
|
50 |
| - executeStatementOnDriver(String.format("INSERT INTO %s VALUES (1, 'aaa111', 1, 100, 1.0, 2.0, 4.00, true, DATE '2024-05-01', DATE '2024-05-01', DATE '2024-05-01', TIMESTAMP '2024-05-02 10:00:00')", qualifiedTableName), driver); |
51 |
| - executeStatementOnDriver(String.format("INSERT INTO %s VALUES (2, 'aaa111', 1, 100, 1.0, 2.0, 4.00, true, DATE '2024-05-01', DATE '2024-05-01', DATE '2024-05-01', TIMESTAMP '2024-05-02 10:00:00')", qualifiedTableName), driver); |
52 |
| - executeStatementOnDriver(String.format("INSERT INTO %s VALUES (3, 'bbb222', 2, 200, 2.0, 3.0, 8.00, false, DATE '2024-05-03', DATE '2024-05-03', DATE '2024-05-03', TIMESTAMP '2024-05-04 13:00:00')", qualifiedTableName), driver); |
53 |
| - executeStatementOnDriver(String.format("INSERT INTO %s VALUES (4, 'bbb222', 2, 200, 2.0, 3.0, 8.00, false, DATE '2024-05-03', DATE '2024-05-03', DATE '2024-05-03', TIMESTAMP '2024-05-04 13:00:00')", qualifiedTableName), driver); |
54 |
| - executeStatementOnDriver(String.format("INSERT INTO %s VALUES (5, null, null, null, null, null, null, null, null, null, null, null)", qualifiedTableName), driver); |
55 |
| - executeStatementOnDriver(String.format("INSERT INTO %s VALUES (6, null, null, null, null, null, null, null, null, null, null, null)", qualifiedTableName), driver); |
| 60 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (1, 'aaa111', 1, 100, 1.0, 2.0, 4.00, true, DATE '2024-05-01', DATE '2024-05-01', DATE '2024-05-01', TIMESTAMP '2024-05-02 10:00:00')", QUALIFIED_TABLE_NAME), driver); |
| 61 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (2, 'aaa111', 1, 100, 1.0, 2.0, 4.00, true, DATE '2024-05-01', DATE '2024-05-01', DATE '2024-05-01', TIMESTAMP '2024-05-02 10:00:00')", QUALIFIED_TABLE_NAME), driver); |
| 62 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (3, 'bbb222', 2, 200, 2.0, 3.0, 8.00, false, DATE '2024-05-03', DATE '2024-05-03', DATE '2024-05-03', TIMESTAMP '2024-05-04 13:00:00')", QUALIFIED_TABLE_NAME), driver); |
| 63 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (4, 'bbb222', 2, 200, 2.0, 3.0, 8.00, false, DATE '2024-05-03', DATE '2024-05-03', DATE '2024-05-03', TIMESTAMP '2024-05-04 13:00:00')", QUALIFIED_TABLE_NAME), driver); |
| 64 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (5, null, null, null, null, null, null, null, null, null, null, null)", QUALIFIED_TABLE_NAME), driver); |
| 65 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (6, null, null, null, null, null, null, null, null, null, null, null)", QUALIFIED_TABLE_NAME), driver); |
56 | 66 |
|
57 |
| - Assert.assertEquals(6, getFilesCount(qualifiedTableName)); |
58 |
| - List<String> recordsBefore = getAllRecords(qualifiedTableName); |
| 67 | + Assert.assertEquals(6, getFilesCount()); |
| 68 | + List<String> recordsBefore = getAllRecords(); |
59 | 69 |
|
60 |
| - CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, false, |
| 70 | + CompactorTestUtil.runCompaction(conf, DB_NAME, TABLE_NAME, CompactionType.MINOR, false, |
61 | 71 | "a=aaa111/a_trunc=aaa/a_bucket=0/b=1/c=100/d=1.0/e=2.0/f=4.00/g=true/h=2024-05-01/h_year=2024/i_month=2024-05/j_day=2024-05-01/k=2024-05-02T10%3A00%3A00/k_hour=2024-05-02-10",
|
62 | 72 | "a=bbb222/a_trunc=bbb/a_bucket=3/b=2/c=200/d=2.0/e=3.0/f=8.00/g=false/h=2024-05-03/h_year=2024/i_month=2024-05/j_day=2024-05-03/k=2024-05-04T13%3A00%3A00/k_hour=2024-05-04-13",
|
63 | 73 | "a=null/a_trunc=null/a_bucket=null/b=null/c=null/d=null/e=null/f=null/g=null/h=null/h_year=null/i_month=null/j_day=null/k=null/k_hour=null"
|
64 | 74 | );
|
65 | 75 |
|
66 |
| - Assert.assertEquals(3, getFilesCount(qualifiedTableName)); |
| 76 | + Assert.assertEquals(3, getFilesCount()); |
67 | 77 | verifySuccessfulCompaction(3);
|
68 |
| - List<String> recordsAfter = getAllRecords(qualifiedTableName); |
| 78 | + List<String> recordsAfter = getAllRecords(); |
69 | 79 |
|
70 | 80 | Assert.assertEquals(recordsBefore, recordsAfter);
|
71 | 81 | }
|
72 |
| - |
73 |
| - private int getFilesCount(String qualifiedTableName) throws Exception { |
74 |
| - driver.run(String.format("select count(*) from %s.files", qualifiedTableName)); |
| 82 | + |
| 83 | + @Test |
| 84 | + public void testIcebergAutoCompactionPartitionEvolution() throws Exception { |
| 85 | + executeStatementOnDriver(String.format("create table %s " + |
| 86 | + "(id int, a string) " + |
| 87 | + "partitioned by spec(id) stored by iceberg stored as orc " + |
| 88 | + "tblproperties ('compactor.threshold.min.input.files'='1')", QUALIFIED_TABLE_NAME), driver); |
| 89 | + |
| 90 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (1, 'a')", QUALIFIED_TABLE_NAME), driver); |
| 91 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (2, 'b')", QUALIFIED_TABLE_NAME), driver); |
| 92 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (3, 'c')", QUALIFIED_TABLE_NAME), driver); |
| 93 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (4, 'd')", QUALIFIED_TABLE_NAME), driver); |
| 94 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (5, 'e')", QUALIFIED_TABLE_NAME), driver); |
| 95 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (6, 'd')", QUALIFIED_TABLE_NAME), driver); |
| 96 | + |
| 97 | + executeStatementOnDriver(String.format("alter table %s set partition spec(truncate(3, a))", QUALIFIED_TABLE_NAME), driver); |
| 98 | + |
| 99 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (7, 'aaa111')", QUALIFIED_TABLE_NAME), driver); |
| 100 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (8, 'aaa111')", QUALIFIED_TABLE_NAME), driver); |
| 101 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (9, 'bbb222')", QUALIFIED_TABLE_NAME), driver); |
| 102 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (10, 'bbb222')", QUALIFIED_TABLE_NAME), driver); |
| 103 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (11, null)", QUALIFIED_TABLE_NAME), driver); |
| 104 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (12, null)", QUALIFIED_TABLE_NAME), driver); |
| 105 | + |
| 106 | + startInitiator(); |
| 107 | + ShowCompactResponse rsp = msClient.showCompactions(); |
| 108 | + Assert.assertEquals(4, rsp.getCompactsSize()); |
| 109 | + |
| 110 | + // Compaction should be initiated for each partition from the latest spec |
| 111 | + Assert.assertTrue(isCompactExist(rsp, "a_trunc_3=aaa", CompactionType.MINOR, CompactionState.INITIATED)); |
| 112 | + Assert.assertTrue(isCompactExist(rsp, "a_trunc_3=bbb", CompactionType.MINOR, CompactionState.INITIATED)); |
| 113 | + Assert.assertTrue(isCompactExist(rsp, "a_trunc_3=null", CompactionType.MINOR, CompactionState.INITIATED)); |
| 114 | + |
| 115 | + // Additional compaction should be initiated for all partitions from past partition specs |
| 116 | + Assert.assertTrue(isCompactExist(rsp, null, CompactionType.MINOR, CompactionState.INITIATED)); |
| 117 | + } |
| 118 | + |
| 119 | + @Test |
| 120 | + public void testIcebergAutoCompactionUnpartitioned() throws Exception { |
| 121 | + executeStatementOnDriver(String.format("create table %s " + |
| 122 | + "(id int, a string) " + |
| 123 | + "stored by iceberg stored as orc " + |
| 124 | + "tblproperties ('compactor.threshold.min.input.files'='1')", QUALIFIED_TABLE_NAME), driver); |
| 125 | + |
| 126 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (7, 'aaa111')", QUALIFIED_TABLE_NAME), driver); |
| 127 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (8, 'aaa111')", QUALIFIED_TABLE_NAME), driver); |
| 128 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (9, 'bbb222')", QUALIFIED_TABLE_NAME), driver); |
| 129 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (10, 'bbb222')", QUALIFIED_TABLE_NAME), driver); |
| 130 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (11, null)", QUALIFIED_TABLE_NAME), driver); |
| 131 | + executeStatementOnDriver(String.format("INSERT INTO %s VALUES (12, null)", QUALIFIED_TABLE_NAME), driver); |
| 132 | + |
| 133 | + startInitiator(); |
| 134 | + ShowCompactResponse rsp = msClient.showCompactions(); |
| 135 | + Assert.assertEquals(1, rsp.getCompactsSize()); |
| 136 | + Assert.assertTrue(isCompactExist(rsp, null, CompactionType.MINOR, CompactionState.INITIATED)); |
| 137 | + } |
| 138 | + |
| 139 | + private int getFilesCount() throws Exception { |
| 140 | + driver.run(String.format("select count(*) from %s.files", QUALIFIED_TABLE_NAME)); |
75 | 141 | List<String> res = new ArrayList<>();
|
76 | 142 | driver.getFetchTask().fetch(res);
|
77 | 143 | return Integer.parseInt(res.get(0));
|
78 | 144 | }
|
79 | 145 |
|
80 |
| - private List<String> getAllRecords(String qualifiedTableName) throws Exception { |
81 |
| - driver.run(String.format("select * from %s order by id", qualifiedTableName)); |
| 146 | + private List<String> getAllRecords() throws Exception { |
| 147 | + driver.run(String.format("select * from %s order by id", QUALIFIED_TABLE_NAME)); |
82 | 148 | List<String> res = new ArrayList<>();
|
83 | 149 | driver.getFetchTask().fetch(res);
|
84 | 150 | return res;
|
85 | 151 | }
|
| 152 | + |
| 153 | + private boolean isCompactExist(ShowCompactResponse rsp, String partName, CompactionType type, CompactionState state) { |
| 154 | + return rsp.getCompacts().stream().anyMatch(c -> |
| 155 | + c.getDbname().equals(DB_NAME) && c.getTablename().equals(TABLE_NAME) && |
| 156 | + Objects.equals(c.getPartitionname(), partName) && c.getType().equals(type) && |
| 157 | + c.getState().equals(state.name().toLowerCase())); |
| 158 | + } |
| 159 | + |
| 160 | + @Override |
| 161 | + protected InitiatorBase getInitiator() { |
| 162 | + return new IcebergInitiator(); |
| 163 | + } |
86 | 164 | }
|
0 commit comments