-
Notifications
You must be signed in to change notification settings - Fork 4.8k
HIVE-29016: fixing cache handling for REST catalog; #5882
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,30 +24,45 @@ | |
import java.util.Map; | ||
import java.util.Set; | ||
import org.apache.iceberg.CachingCatalog; | ||
import org.apache.iceberg.Schema; | ||
import org.apache.iceberg.catalog.Catalog; | ||
import org.apache.iceberg.catalog.Namespace; | ||
import org.apache.iceberg.catalog.SupportsNamespaces; | ||
import org.apache.iceberg.catalog.TableIdentifier; | ||
import org.apache.iceberg.exceptions.NamespaceNotEmptyException; | ||
import org.apache.iceberg.exceptions.NoSuchNamespaceException; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
|
||
/** | ||
* Class that wraps an Iceberg Catalog to cache tables. | ||
* @param <CATALOG> the catalog class | ||
*/ | ||
public class HMSCachingCatalog<CATALOG extends Catalog & SupportsNamespaces> extends CachingCatalog implements SupportsNamespaces { | ||
private static final Logger LOG = LoggerFactory.getLogger(HMSCachingCatalog.class); | ||
protected final CATALOG nsCatalog; | ||
|
||
public HMSCachingCatalog(CATALOG catalog, long expiration) { | ||
super(catalog, true, expiration, Ticker.systemTicker()); | ||
super(catalog, false, expiration, Ticker.systemTicker()); | ||
nsCatalog = catalog; | ||
} | ||
|
||
public CATALOG hmsUnwrap() { | ||
return nsCatalog; | ||
} | ||
|
||
public void invalidateTable(String dbName, String tableName) { | ||
super.invalidateTable(TableIdentifier.of(dbName, tableName)); | ||
} | ||
|
||
@Override | ||
public void invalidateTable(TableIdentifier tableIdentifier) { | ||
if (LOG.isDebugEnabled()) { | ||
LOG.debug("Avoid invalidating table: {}", tableIdentifier); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can then do super.invalidateTable() here? Also, why not do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The idea is to try and have all actual invalidation performed through the listener; this override voids the default call on purpose. |
||
} | ||
|
||
@Override | ||
public void createNamespace(Namespace nmspc, Map<String, String> map) { | ||
nsCatalog.createNamespace(nmspc, map); | ||
|
@@ -65,10 +80,6 @@ public Map<String, String> loadNamespaceMetadata(Namespace nmspc) throws NoSuchN | |
|
||
@Override | ||
public boolean dropNamespace(Namespace nmspc) throws NamespaceNotEmptyException { | ||
List<TableIdentifier> tables = listTables(nmspc); | ||
for (TableIdentifier ident : tables) { | ||
invalidateTable(ident); | ||
} | ||
return nsCatalog.dropNamespace(nmspc); | ||
} | ||
|
||
|
@@ -81,5 +92,16 @@ public boolean setProperties(Namespace nmspc, Map<String, String> map) throws No | |
public boolean removeProperties(Namespace nmspc, Set<String> set) throws NoSuchNamespaceException { | ||
return nsCatalog.removeProperties(nmspc, set); | ||
} | ||
|
||
|
||
@Override | ||
public Catalog.TableBuilder buildTable(TableIdentifier identifier, Schema schema) { | ||
return nsCatalog.buildTable(identifier, schema); | ||
} | ||
|
||
public void invalidateNamespace(String namespace) { | ||
Namespace ns = Namespace.of(namespace); | ||
for (TableIdentifier table : listTables(ns)) { | ||
invalidateTable(table); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.iceberg.rest; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.hive.metastore.MetaStoreEventListener; | ||
import org.apache.hadoop.hive.metastore.api.MetaException; | ||
import org.apache.hadoop.hive.metastore.api.Table; | ||
import org.apache.hadoop.hive.metastore.events.AlterTableEvent; | ||
import org.apache.hadoop.hive.metastore.events.DropDatabaseEvent; | ||
import org.apache.hadoop.hive.metastore.events.DropTableEvent; | ||
import org.apache.hadoop.hive.metastore.events.ReloadEvent; | ||
import org.apache.iceberg.catalog.Catalog; | ||
import org.slf4j.Logger; | ||
|
||
/** | ||
* IcebergEventListener is a Hive Metastore event listener that invalidates the cache | ||
* of the HMSCachingCatalog when certain events occur, such as altering or dropping a table. | ||
*/ | ||
public class HMSEventListener extends MetaStoreEventListener { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm new to this API. Does this work even when we set up HMS with HA with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question, I would hope so, let me try and seek a definite answer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. HMS only triggers MetaStoreEventListener for 'local' events so HA would need to disable the cache (for now). A potential solution would be to actively poll events from each HMS instance (need a dedicated thread that polls the NotificationEventRequest and follows the same invalidation logic). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I shared it with Jira, HiveCatalog might access two types of remote resources.
I'm wondering if we can update HiveCatalog to allow caching (B). The metadata.json is immutable. So, we can safely cache it. |
||
private static final Logger LOG = org.slf4j.LoggerFactory.getLogger(HMSEventListener.class); | ||
/** | ||
* Constructor for HMSEventListener. | ||
* | ||
* @param config the configuration to use for the listener | ||
*/ | ||
public HMSEventListener(Configuration config) { | ||
super(config); | ||
} | ||
|
||
@Override | ||
public void onAlterTable(AlterTableEvent event) { | ||
Catalog catalog = HMSCatalogFactory.getLastCatalog(); | ||
if (catalog instanceof HMSCachingCatalog) { | ||
HMSCachingCatalog<?> hmsCachingCatalog = (HMSCachingCatalog<?>) catalog; | ||
String dbName = event.getOldTable().getDbName(); | ||
String tableName = event.getOldTable().getTableName(); | ||
LOG.debug("onAlterTable: invalidating table cache for {}.{}", dbName, tableName); | ||
hmsCachingCatalog.invalidateTable(dbName, tableName); | ||
} | ||
} | ||
|
||
@Override | ||
public void onDropTable(DropTableEvent event) { | ||
Catalog catalog = HMSCatalogFactory.getLastCatalog(); | ||
if (catalog instanceof HMSCachingCatalog) { | ||
HMSCachingCatalog<?> hmsCachingCatalog = (HMSCachingCatalog<?>) catalog; | ||
String dbName = event.getTable().getDbName(); | ||
String tableName = event.getTable().getTableName(); | ||
LOG.debug("onDropTable: invalidating table cache for {}.{}", dbName, tableName); | ||
hmsCachingCatalog.invalidateTable(dbName, tableName); | ||
} | ||
} | ||
|
||
@Override | ||
public void onReload(ReloadEvent reloadEvent) { | ||
Catalog catalog = HMSCatalogFactory.getLastCatalog(); | ||
if (catalog instanceof HMSCachingCatalog) { | ||
HMSCachingCatalog<?> hmsCachingCatalog = (HMSCachingCatalog<?>) catalog; | ||
Table tableObj = reloadEvent.getTableObj(); | ||
String dbName = tableObj.getDbName(); | ||
String tableName = tableObj.getTableName(); | ||
LOG.debug("onReload: invalidating table cache for {}.{}", dbName, tableName); | ||
hmsCachingCatalog.invalidateTable(dbName, tableName); | ||
} | ||
} | ||
|
||
@Override | ||
public void onDropDatabase (DropDatabaseEvent dbEvent) throws MetaException { | ||
Catalog catalog = HMSCatalogFactory.getLastCatalog(); | ||
if (catalog instanceof HMSCachingCatalog) { | ||
HMSCachingCatalog<?> hmsCachingCatalog = (HMSCachingCatalog<?>) catalog; | ||
String dbName = dbEvent.getDatabase().getName(); | ||
LOG.debug("onDropDatabase: invalidating tables cache for {}", dbName); | ||
hmsCachingCatalog.invalidateNamespace(dbName); | ||
} | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can call l#60 here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This version of invalidateTable() is called by the listener and we do want the invalidation performed here. The override at l#60 does nothing on purpose; we do want all invalidations performed through the listener.