diff --git a/.github/workflows/make_wheel_Linux_x86.sh b/.github/workflows/make_wheel_Linux_x86.sh
index 47b0a2295..61718000e 100644
--- a/.github/workflows/make_wheel_Linux_x86.sh
+++ b/.github/workflows/make_wheel_Linux_x86.sh
@@ -13,7 +13,7 @@ fi
 
 # if tensorflow version >= 2.6.0 and <= 2.11.9
 if [[ "$TF_VERSION" =~ ^2\.([6-9]|10|11)\.[0-9]$ ]] ; then
-  export BUILD_IMAGE="tfra/nosla-cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python$PY_VERSION"
+  export BUILD_IMAGE="tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="11.2"
   export TF_CUDNN_VERSION="8.1"
 elif [ $TF_VERSION == "2.4.1" ] ; then
diff --git a/WORKSPACE b/WORKSPACE
index 9e07792b9..7f7da070f 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -57,55 +57,9 @@ http_archive(
 http_archive(
     name = "hkv",
     build_file = "//build_deps/toolchains/hkv:hkv.BUILD",
-    # TODO(LinGeLin) remove this when update hkv
-    patch_cmds = [
-        """sed -i.bak '1772i\\'$'\\n    ThrustAllocator<uint8_t> thrust_allocator_;\\n' include/merlin_hashtable.cuh""",
-        """sed -i.bak '225i\\'$'\\n    thrust_allocator_.set_allocator(allocator_);\\n' include/merlin_hashtable.cuh""",
-        "sed -i.bak 's/thrust::sort_by_key(thrust_par.on(stream)/thrust::sort_by_key(thrust_par(thrust_allocator_).on(stream)/' include/merlin_hashtable.cuh",
-        "sed -i.bak 's/reduce(thrust_par.on(stream)/reduce(thrust_par(thrust_allocator_).on(stream)/' include/merlin_hashtable.cuh",
-        """sed -i.bak '125i\\'$'\\n    template <typename T>\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '126i\\'$'\\n    struct ThrustAllocator : thrust::device_malloc_allocator<T> {\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '127i\\'$'\\n     public:\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '128i\\'$'\\n      typedef thrust::device_malloc_allocator<T> super_t;\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '129i\\'$'\\n      typedef typename super_t::pointer pointer;\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '130i\\'$'\\n      typedef typename super_t::size_type size_type;\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '131i\\'$'\\n     public:\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '132i\\'$'\\n      pointer allocate(size_type n) {\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '133i\\'$'\\n        void* ptr = nullptr;\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '134i\\'$'\\n        MERLIN_CHECK(\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '135i\\'$'\\n            allocator_ != nullptr,\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '136i\\'$'\\n            "[ThrustAllocator] set_allocator should be called in advance!");\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '137i\\'$'\\n        allocator_->alloc(MemoryType::Device, &ptr, sizeof(T) * n);\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '138i\\'$'\\n        return pointer(reinterpret_cast<T*>(ptr));\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '139i\\'$'\\n      }\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '140i\\'$'\\n      void deallocate(pointer p, size_type n) {\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '141i\\'$'\\n        MERLIN_CHECK(\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '142i\\'$'\\n            allocator_ != nullptr,\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '143i\\'$'\\n            "[ThrustAllocator] set_allocator should be called in advance!");\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '144i\\'$'\\n        allocator_->free(MemoryType::Device, reinterpret_cast<void*>(p.get()));\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '145i\\'$'\\n      }\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '146i\\'$'\\n      void set_allocator(BaseAllocator* allocator) { allocator_ = allocator; }\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '147i\\'$'\\n     public:\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '148i\\'$'\\n      BaseAllocator* allocator_ = nullptr;\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '149i\\'$'\\n     };\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '20i\\'$'\\n     #include <thrust/device_malloc_allocator.h>\\n' include/merlin/allocator.cuh""",
-        """sed -i.bak '367i\\'$'\\n  for (auto addr : (*table)->buckets_address) {\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '368i\\'$'\\n    allocator->free(MemoryType::Device, addr);\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '369i\\'$'\\n  }\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '370i\\'$'\\n  /*\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '382i\\'$'\\n  */\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '224i\\'$'\\n  uint8_t* address = nullptr;\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '225i\\'$'\\n  allocator->alloc(MemoryType::Device, (void**)&(address), bucket_memory_size * (end - start));\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '226i\\'$'\\n  (*table)->buckets_address.push_back(address);\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '228i\\'$'\\n    allocate_bucket_others<K, V, S><<<1, 1>>>((*table)->buckets, i, address + (bucket_memory_size * (i-start)), reserve_size, bucket_max_size);\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '229i\\'$'\\n    /*\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '235i\\'$'\\n    */\\n' include/merlin/core_kernels.cuh""",
-        """sed -i.bak '22i\\'$'\\n#include <vector>\\n' include/merlin/types.cuh""",
-        """sed -i.bak '143i\\'$'\\n  std::vector<uint8_t*> buckets_address;\\n' include/merlin/types.cuh""",
-    ],
-    sha256 = "f8179c445a06a558262946cda4d8ae7252d313e73f792586be9b1bc0c993b1cf",
-    strip_prefix = "HierarchicalKV-0.1.0-beta.6",
-    url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.6.tar.gz",
+    sha256 = "3839f91b703b401fd6d2449c034662b6f8d6563e5b9b71b4c25b217cf1cd63fd",
+    strip_prefix = "HierarchicalKV-0.1.0-beta.8",
+    url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.8.tar.gz",
 )
 
 tf_configure(
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/__init__.py b/tensorflow_recommenders_addons/dynamic_embedding/__init__.py
index 3269c9171..e85aa8b41 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/__init__.py
+++ b/tensorflow_recommenders_addons/dynamic_embedding/__init__.py
@@ -18,6 +18,7 @@
     'CuckooHashTable',
     'CuckooHashTableConfig',
     'CuckooHashTableCreator',
+    'HkvEvictStrategy',
     'HkvHashTable',
     'HkvHashTableConfig',
     'HkvHashTableCreator',
@@ -55,7 +56,7 @@
 from tensorflow_recommenders_addons.dynamic_embedding.python.ops import data_flow_ops as data_flow
 from tensorflow_recommenders_addons.dynamic_embedding.python.ops.dynamic_embedding_creator import (
     KVCreator, CuckooHashTableConfig, CuckooHashTableCreator,
-    HkvHashTableConfig, HkvHashTableCreator, RedisTableConfig,
+    HkvHashTableConfig, HkvHashTableCreator, HkvEvictStrategy, RedisTableConfig,
     RedisTableCreator, FileSystemSaver)
 from tensorflow_recommenders_addons.dynamic_embedding.python.ops.cuckoo_hashtable_ops import (
     CuckooHashTable,)
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/hkv_hashtable_op_gpu.cu.cc b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/hkv_hashtable_op_gpu.cu.cc
index 133992078..fbbb91633 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/hkv_hashtable_op_gpu.cu.cc
+++ b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/hkv_hashtable_op_gpu.cu.cc
@@ -42,6 +42,7 @@ limitations under the License.
 namespace tensorflow {
 
 using GPUDevice = Eigen::GpuDevice;
+using NvEvictStrategy = nv::merlin::EvictStrategy;
 
 namespace recommenders_addons {
 namespace lookup {
@@ -51,7 +52,7 @@ constexpr size_t kDefaultGpuInitCapacity = 1024 * 1024;
 using tensorflow::OpKernelContext;
 using tensorflow::lookup::LookupInterface;
 
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HkvHashTableOfTensorsGpu final : public LookupInterface {
  private:
   std::unique_ptr<nv::merlin::BaseAllocator> allocator_ptr_;
@@ -71,6 +72,7 @@ class HkvHashTableOfTensorsGpu final : public LookupInterface {
     int64 init_capacity_i64 = 0;
     int64 max_capacity_i64 = 0;
     int64 max_hbm_for_vectors_i64 = 0;
+    int64 evict_global_epoch = 0;
     OP_REQUIRES_OK(
         ctx, GetNodeAttr(kernel->def(), "init_capacity", &init_capacity_i64));
     OP_REQUIRES_OK(
@@ -81,9 +83,17 @@ class HkvHashTableOfTensorsGpu final : public LookupInterface {
         ctx, (max_hbm_for_vectors_i64 >= 0),
         errors::InvalidArgument("params max_hbm_for_vectors less than 0"));
 
+    OP_REQUIRES_OK(ctx, GetNodeAttr(kernel->def(), "evict_global_epoch",
+                                    &evict_global_epoch));
+
+    OP_REQUIRES(
+        ctx, (evict_global_epoch >= 0),
+        errors::InvalidArgument("params evict_global_epoch less than 0"));
+
     options.init_capacity = static_cast<size_t>(init_capacity_i64);
     options.max_capacity = static_cast<size_t>(max_capacity_i64);
     options.max_hbm_for_vectors = static_cast<size_t>(max_hbm_for_vectors_i64);
+    options.evict_global_epoch = static_cast<size_t>(evict_global_epoch);
 
     if (options.max_capacity == 0) {
       char* env_max_capacity_str =
@@ -136,7 +146,7 @@ class HkvHashTableOfTensorsGpu final : public LookupInterface {
 
   Status CreateTable(gpu::TableWrapperInitOptions& options,
                      nv::merlin::BaseAllocator* allocator,
-                     gpu::TableWrapper<K, V>** pptable) {
+                     gpu::TableWrapper<K, V, Strategy>** pptable) {
     return gpu::CreateTableImpl(pptable, options, allocator, runtime_dim_);
   }
 
@@ -580,13 +590,13 @@ class HkvHashTableOfTensorsGpu final : public LookupInterface {
   TensorShape value_shape_;
   size_t runtime_dim_;
   mutable mutex mu_;
-  gpu::TableWrapper<K, V>* table_ = nullptr GUARDED_BY(mu_);
+  gpu::TableWrapper<K, V, Strategy>* table_ = nullptr GUARDED_BY(mu_);
 };
 
 }  // namespace lookup
 
 // Table lookup op. Perform the lookup operation on the given table.
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableFindGpuOp : public OpKernel {
  public:
   explicit HashTableFindGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -595,8 +605,8 @@ class HashTableFindGpuOp : public OpKernel {
     lookup::LookupInterface* table;
     OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
     core::ScopedUnref unref_me(table);
-    lookup::HkvHashTableOfTensorsGpu<K, V>* table_hkv =
-        (lookup::HkvHashTableOfTensorsGpu<K, V>*)table;
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
 
     // Input 0 could be a STRING_REF or a RESOURCE
     DataType expected_input_0 = DT_RESOURCE;
@@ -621,13 +631,9 @@ class HashTableFindGpuOp : public OpKernel {
   }
 };
 
-// REGISTER_KERNEL_BUILDER(
-//     Name(PREFIX_OP_NAME(HkvHashTableFind)).Device(DEVICE_GPU),
-//     HashTableFindGpuOp);
-
 // Table lookup op. Perform the lookup operation on the given table.
 
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableFindWithExistsGpuOp : public OpKernel {
  public:
   explicit HashTableFindWithExistsGpuOp(OpKernelConstruction* ctx)
@@ -638,8 +644,8 @@ class HashTableFindWithExistsGpuOp : public OpKernel {
     OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
     core::ScopedUnref unref_me(table);
 
-    lookup::HkvHashTableOfTensorsGpu<K, V>* table_hkv =
-        (lookup::HkvHashTableOfTensorsGpu<K, V>*)table;
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
 
     // Input 0 could be a STRING_REF or a RESOURCE
     DataType expected_input_0 = DT_RESOURCE;
@@ -669,6 +675,7 @@ class HashTableFindWithExistsGpuOp : public OpKernel {
 };
 
 // Table insert op.
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableInsertGpuOp : public OpKernel {
  public:
   explicit HashTableInsertGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -677,6 +684,8 @@ class HashTableInsertGpuOp : public OpKernel {
     lookup::LookupInterface* table;
     OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
     core::ScopedUnref unref_me(table);
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
 
     DataType expected_input_0 = DT_RESOURCE;
     DataTypeVector expected_inputs = {expected_input_0, table->key_dtype(),
@@ -686,16 +695,12 @@ class HashTableInsertGpuOp : public OpKernel {
     const Tensor& keys = ctx->input(1);
     const Tensor& values = ctx->input(2);
     OP_REQUIRES_OK(ctx, table->CheckKeyAndValueTensorsForInsert(keys, values));
-    OP_REQUIRES_OK(ctx, table->Insert(ctx, keys, values));
+    OP_REQUIRES_OK(ctx, table_hkv->Insert(ctx, keys, values));
   }
 };
 
-REGISTER_KERNEL_BUILDER(
-    Name(PREFIX_OP_NAME(HkvHashTableInsert)).Device(DEVICE_GPU),
-    HashTableInsertGpuOp);
-
 // Table accum op.
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableAccumGpuOp : public OpKernel {
  public:
   explicit HashTableAccumGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -704,8 +709,8 @@ class HashTableAccumGpuOp : public OpKernel {
     lookup::LookupInterface* table;
     OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
     core::ScopedUnref unref_me(table);
-    lookup::HkvHashTableOfTensorsGpu<K, V>* table_hkv =
-        (lookup::HkvHashTableOfTensorsGpu<K, V>*)table;
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
 
     DataType expected_input_0 = DT_RESOURCE;
     DataTypeVector expected_inputs = {expected_input_0, table->key_dtype(),
@@ -723,7 +728,6 @@ class HashTableAccumGpuOp : public OpKernel {
 };
 
 // Table remove op.
-// template<class K, class V>
 class HashTableRemoveGpuOp : public OpKernel {
  public:
   explicit HashTableRemoveGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -748,7 +752,7 @@ REGISTER_KERNEL_BUILDER(
     HashTableRemoveGpuOp);
 
 // Table clear op.
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableClearGpuOp : public OpKernel {
  public:
   explicit HashTableClearGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -757,14 +761,14 @@ class HashTableClearGpuOp : public OpKernel {
     lookup::LookupInterface* table;
     OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
     core::ScopedUnref unref_me(table);
-    lookup::HkvHashTableOfTensorsGpu<K, V>* table_hkv =
-        (lookup::HkvHashTableOfTensorsGpu<K, V>*)table;
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
     OP_REQUIRES_OK(ctx, table_hkv->Clear(ctx));
   }
 };
 
 // Op that returns the size of the given table.
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableSizeGpuOp : public OpKernel {
  public:
   explicit HashTableSizeGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -773,8 +777,8 @@ class HashTableSizeGpuOp : public OpKernel {
     lookup::LookupInterface* table;
     OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
     core::ScopedUnref unref_me(table);
-    lookup::HkvHashTableOfTensorsGpu<K, V>* table_hkv =
-        (lookup::HkvHashTableOfTensorsGpu<K, V>*)table;
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
 
     Tensor* out;
     AllocatorAttributes attr;
@@ -788,11 +792,8 @@ class HashTableSizeGpuOp : public OpKernel {
   }
 };
 
-// REGISTER_KERNEL_BUILDER(
-//     Name(PREFIX_OP_NAME(HkvHashTableSize)).Device(DEVICE_GPU),
-//     HashTableSizeGpuOp);
-
 // Op that outputs tensors of all keys and all values.
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableExportGpuOp : public OpKernel {
  public:
   explicit HashTableExportGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -801,17 +802,15 @@ class HashTableExportGpuOp : public OpKernel {
     lookup::LookupInterface* table;
     OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
     core::ScopedUnref unref_me(table);
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
 
-    OP_REQUIRES_OK(ctx, table->ExportValues(ctx));
+    OP_REQUIRES_OK(ctx, table_hkv->ExportValues(ctx));
   }
 };
 
-REGISTER_KERNEL_BUILDER(
-    Name(PREFIX_OP_NAME(HkvHashTableExport)).Device(DEVICE_GPU),
-    HashTableExportGpuOp);
-
 // Op that export all keys and values to file.
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableExportWithScoresGpuOp : public OpKernel {
  public:
   explicit HashTableExportWithScoresGpuOp(OpKernelConstruction* ctx)
@@ -821,13 +820,13 @@ class HashTableExportWithScoresGpuOp : public OpKernel {
     lookup::LookupInterface* table;
     OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
     core::ScopedUnref unref_me(table);
-    lookup::HkvHashTableOfTensorsGpu<K, V>* table_hkv =
-        (lookup::HkvHashTableOfTensorsGpu<K, V>*)table;
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
     OP_REQUIRES_OK(ctx, table_hkv->ExportValuesWithScores(ctx));
   }
 };
 
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableExportKeysAndScoresGpuOp : public OpKernel {
  public:
   explicit HashTableExportKeysAndScoresGpuOp(OpKernelConstruction* ctx)
@@ -839,8 +838,8 @@ class HashTableExportKeysAndScoresGpuOp : public OpKernel {
     lookup::LookupInterface* table;
     OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
     core::ScopedUnref unref_me(table);
-    lookup::HkvHashTableOfTensorsGpu<K, V>* table_hkv =
-        (lookup::HkvHashTableOfTensorsGpu<K, V>*)table;
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
     OP_REQUIRES_OK(ctx, table_hkv->ExportKeysAndScores(
                             ctx, static_cast<size_t>(split_size_i64_)));
   }
@@ -850,6 +849,7 @@ class HashTableExportKeysAndScoresGpuOp : public OpKernel {
 };
 
 // Clear the table and insert data.
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableImportGpuOp : public OpKernel {
  public:
   explicit HashTableImportGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
@@ -858,6 +858,8 @@ class HashTableImportGpuOp : public OpKernel {
     lookup::LookupInterface* table;
     OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
     core::ScopedUnref unref_me(table);
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
 
     DataType expected_input_0 = DT_RESOURCE;
     DataTypeVector expected_inputs = {expected_input_0, table->key_dtype(),
@@ -867,16 +869,12 @@ class HashTableImportGpuOp : public OpKernel {
     const Tensor& keys = ctx->input(1);
     const Tensor& values = ctx->input(2);
     OP_REQUIRES_OK(ctx, table->CheckKeyAndValueTensorsForImport(keys, values));
-    OP_REQUIRES_OK(ctx, table->ImportValues(ctx, keys, values));
+    OP_REQUIRES_OK(ctx, table_hkv->ImportValues(ctx, keys, values));
   }
 };
 
-REGISTER_KERNEL_BUILDER(
-    Name(PREFIX_OP_NAME(HkvHashTableImport)).Device(DEVICE_GPU),
-    HashTableImportGpuOp);
-
 // Op that export all keys and values to FileSystem.
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableSaveToFileSystemGpuOp : public OpKernel {
  public:
   explicit HashTableSaveToFileSystemGpuOp(OpKernelConstruction* ctx)
@@ -912,8 +910,8 @@ class HashTableSaveToFileSystemGpuOp : public OpKernel {
                 errors::InvalidArgument("file name must be scalar."));
     string file_name = string(fname_tensor.scalar<tstring>()().data());
 
-    lookup::HkvHashTableOfTensorsGpu<K, V>* table_hkv =
-        (lookup::HkvHashTableOfTensorsGpu<K, V>*)table;
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
     LOG(INFO) << "c++ dirpath: " << dirpath << " filename: " << file_name;
     std::string filepath = io::JoinPath(dirpath, file_name);
 
@@ -931,7 +929,7 @@ class HashTableSaveToFileSystemGpuOp : public OpKernel {
 };
 
 // Clear the table and insert data from FileSystem.
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class HashTableLoadFromFileSystemGpuOp : public OpKernel {
  public:
   explicit HashTableLoadFromFileSystemGpuOp(OpKernelConstruction* ctx)
@@ -969,8 +967,8 @@ class HashTableLoadFromFileSystemGpuOp : public OpKernel {
 
     LOG(INFO) << "c++ dirpath :" << dirpath << " filename: " << file_name;
 
-    lookup::HkvHashTableOfTensorsGpu<K, V>* table_hkv =
-        (lookup::HkvHashTableOfTensorsGpu<K, V>*)table;
+    lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>* table_hkv =
+        (lookup::HkvHashTableOfTensorsGpu<K, V, Strategy>*)table;
     OP_REQUIRES_OK(
         ctx, table_hkv->ImportValuesFromFile(ctx, dirpath, file_name,
                                              buffer_size_, load_entire_dir_));
@@ -982,65 +980,102 @@ class HashTableLoadFromFileSystemGpuOp : public OpKernel {
   size_t buffer_size_;
 };
 
+#define CONCAT_QUADRA_STRING(X, Y, Z, S) (#X #Y #Z #S)
+
+#define PREFIX_OP_NAME_X_IMPL(N, S) CONCAT_QUADRA_STRING(TFRA, >, N, S)
+#define PREFIX_OP_NAME_X(N, ...) PREFIX_OP_NAME_X_IMPL(N, __VA_ARGS__)
+
 // Register the HkvHashTableOfTensors op.
-#define REGISTER_KERNEL(key_dtype, value_dtype)                                \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name(PREFIX_OP_NAME(HkvHashTableOfTensors))                              \
-          .Device(DEVICE_GPU)                                                  \
-          .TypeConstraint<key_dtype>("key_dtype")                              \
-          .TypeConstraint<value_dtype>("value_dtype"),                         \
-      HashTableGpuOp<lookup::HkvHashTableOfTensorsGpu<key_dtype, value_dtype>, \
-                     key_dtype, value_dtype>);                                 \
-  REGISTER_KERNEL_BUILDER(Name(PREFIX_OP_NAME(HkvHashTableClear))              \
-                              .Device(DEVICE_GPU)                              \
-                              .TypeConstraint<key_dtype>("key_dtype")          \
-                              .TypeConstraint<value_dtype>("value_dtype"),     \
-                          HashTableClearGpuOp<key_dtype, value_dtype>);        \
-  REGISTER_KERNEL_BUILDER(Name(PREFIX_OP_NAME(HkvHashTableSize))               \
-                              .Device(DEVICE_GPU)                              \
-                              .TypeConstraint<key_dtype>("key_dtype")          \
-                              .TypeConstraint<value_dtype>("value_dtype"),     \
-                          HashTableSizeGpuOp<key_dtype, value_dtype>);         \
-  REGISTER_KERNEL_BUILDER(Name(PREFIX_OP_NAME(HkvHashTableAccum))              \
-                              .Device(DEVICE_GPU)                              \
-                              .TypeConstraint<key_dtype>("key_dtype")          \
-                              .TypeConstraint<value_dtype>("value_dtype"),     \
-                          HashTableAccumGpuOp<key_dtype, value_dtype>);        \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name(PREFIX_OP_NAME(HkvHashTableExportWithScores))                       \
-          .Device(DEVICE_GPU)                                                  \
-          .TypeConstraint<key_dtype>("key_dtype")                              \
-          .TypeConstraint<value_dtype>("value_dtype"),                         \
-      HashTableExportWithScoresGpuOp<key_dtype, value_dtype>);                 \
-  REGISTER_KERNEL_BUILDER(Name(PREFIX_OP_NAME(HkvHashTableFind))               \
-                              .Device(DEVICE_GPU)                              \
-                              .TypeConstraint<key_dtype>("Tin")                \
-                              .TypeConstraint<value_dtype>("Tout"),            \
-                          HashTableFindGpuOp<key_dtype, value_dtype>);         \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name(PREFIX_OP_NAME(HkvHashTableFindWithExists))                         \
-          .Device(DEVICE_GPU)                                                  \
-          .TypeConstraint<key_dtype>("Tin")                                    \
-          .TypeConstraint<value_dtype>("Tout"),                                \
-      HashTableFindWithExistsGpuOp<key_dtype, value_dtype>);                   \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name(PREFIX_OP_NAME(HkvHashTableSaveToFileSystem))                       \
-          .Device(DEVICE_GPU)                                                  \
-          .TypeConstraint<key_dtype>("key_dtype")                              \
-          .TypeConstraint<value_dtype>("value_dtype"),                         \
-      HashTableSaveToFileSystemGpuOp<key_dtype, value_dtype>);                 \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name(PREFIX_OP_NAME(HkvHashTableLoadFromFileSystem))                     \
-          .Device(DEVICE_GPU)                                                  \
-          .TypeConstraint<key_dtype>("key_dtype")                              \
-          .TypeConstraint<value_dtype>("value_dtype"),                         \
-      HashTableLoadFromFileSystemGpuOp<key_dtype, value_dtype>);
-
-REGISTER_KERNEL(int64, float);
-REGISTER_KERNEL(int64, int8);
-REGISTER_KERNEL(int64, int32);
-REGISTER_KERNEL(int64, int64);
-REGISTER_KERNEL(int64, Eigen::half);
+#define REGISTER_HKV_TABLE(key_dtype, value_dtype, Strategy, STRATEGY)        \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableOfTensors, STRATEGY))                 \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableGpuOp<                                                         \
+          lookup::HkvHashTableOfTensorsGpu<key_dtype, value_dtype, Strategy>, \
+          key_dtype, value_dtype>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableClear, STRATEGY))                     \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableClearGpuOp<key_dtype, value_dtype, Strategy>);                 \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableSize, STRATEGY))                      \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableSizeGpuOp<key_dtype, value_dtype, Strategy>);                  \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableInsert, STRATEGY))                    \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableInsertGpuOp<key_dtype, value_dtype, Strategy>);                \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableExport, STRATEGY))                    \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableExportGpuOp<key_dtype, value_dtype, Strategy>);                \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableImport, STRATEGY))                    \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableImportGpuOp<key_dtype, value_dtype, Strategy>);                \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableAccum, STRATEGY))                     \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableAccumGpuOp<key_dtype, value_dtype, Strategy>);                 \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableExportWithScores, STRATEGY))          \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableExportWithScoresGpuOp<key_dtype, value_dtype, Strategy>);      \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableFind, STRATEGY))                      \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableFindGpuOp<key_dtype, value_dtype, Strategy>);                  \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableFindWithExists, STRATEGY))            \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableFindWithExistsGpuOp<key_dtype, value_dtype, Strategy>);        \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableSaveToFileSystem, STRATEGY))          \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableSaveToFileSystemGpuOp<key_dtype, value_dtype, Strategy>);      \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name(PREFIX_OP_NAME_X(HkvHashTableLoadFromFileSystem, STRATEGY))        \
+          .Device(DEVICE_GPU)                                                 \
+          .TypeConstraint<key_dtype>("key_dtype")                             \
+          .TypeConstraint<value_dtype>("value_dtype"),                        \
+      HashTableLoadFromFileSystemGpuOp<key_dtype, value_dtype, Strategy>);
+
+#define REGISTER_STRATRGY(Strategy, STRATEGY)           \
+  REGISTER_HKV_TABLE(int64, float, Strategy, STRATEGY); \
+  REGISTER_HKV_TABLE(int64, int8, Strategy, STRATEGY);  \
+  REGISTER_HKV_TABLE(int64, int32, Strategy, STRATEGY); \
+  REGISTER_HKV_TABLE(int64, int64, Strategy, STRATEGY); \
+  REGISTER_HKV_TABLE(int64, Eigen::half, Strategy, STRATEGY);
+
+REGISTER_STRATRGY(NvEvictStrategy::kLru, LRU);
+REGISTER_STRATRGY(NvEvictStrategy::kLfu, LFU);
+REGISTER_STRATRGY(NvEvictStrategy::kEpochLru, EPOCHLRU);
+REGISTER_STRATRGY(NvEvictStrategy::kEpochLfu, EPOCHLFU);
+REGISTER_STRATRGY(NvEvictStrategy::kCustomized, CUSTOMIZED);
+
+#undef REGISTER_STRATRGY
 
 #undef REGISTER_KERNEL
 
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h
index 149e2ba89..cfd44d1a5 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h
+++ b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h
@@ -49,6 +49,8 @@ namespace recommenders_addons {
 namespace lookup {
 namespace gpu {
 
+using NvEvictStrategy = nv::merlin::EvictStrategy;
+
 template <typename K, typename V, typename S>
 class KVOnlyFile : public nv::merlin::BaseKVFile<K, V, S> {
  public:
@@ -297,6 +299,8 @@ struct TableWrapperInitOptions {
   size_t init_capacity;
   size_t max_hbm_for_vectors;
   size_t max_bucket_size;
+  size_t evict_global_epoch;
+
   float max_load_factor;
   int block_size;
   int io_block_size;
@@ -411,18 +415,16 @@ class TFOrDefaultAllocator : public nv::merlin::BaseAllocator {
   }
 };
 
-template <class K, class V>
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
 class TableWrapper {
  private:
-  // using S = uint64_t;
-  using Table = nv::merlin::HashTable<K, V, uint64_t>;
+  using Table = nv::merlin::HashTable<K, V, uint64_t, Strategy>;
   nv::merlin::HashTableOptions mkv_options_;
 
  public:
   TableWrapper(TableWrapperInitOptions& init_options, size_t dim) {
     max_capacity_ = init_options.max_capacity;
     dim_ = dim;
-    // nv::merlin::HashTableOptions mkv_options_;
     mkv_options_.init_capacity =
         std::min(init_options.init_capacity, max_capacity_);
     mkv_options_.max_capacity = max_capacity_;
@@ -434,11 +436,14 @@ class TableWrapper {
     mkv_options_.max_load_factor = 0.5;
     mkv_options_.block_size = nv::merlin::SAFE_GET_BLOCK_SIZE(128);
     mkv_options_.dim = dim;
-    // mkv_options_.evict_strategy = nv::merlin::EvictStrategy::kCustomized;
-    mkv_options_.evict_strategy = nv::merlin::EvictStrategy::kLru;
 
     block_size_ = mkv_options_.block_size;
     table_ = new Table();
+    nv::merlin::EvictStrategy::set_global_epoch(
+        init_options.evict_global_epoch);
+    LOG(INFO) << "Use Evict Strategy:" << Strategy
+              << ", [0:LRU, 1:LFU, 2:EPOCHLRU, 3:EPOCHLFU, 4:CUSTOMIZED]";
+    LOG(INFO) << "Use Evict Global Epoch:" << init_options.evict_global_epoch;
   }
 
   Status init(nv::merlin::BaseAllocator* allocator) {
@@ -674,12 +679,12 @@ class TableWrapper {
   bool dynamic_mode_;
 };
 
-template <class K, class V>
-Status CreateTableImpl(TableWrapper<K, V>** pptable,
+template <class K, class V, int Strategy = NvEvictStrategy::kLru>
+Status CreateTableImpl(TableWrapper<K, V, Strategy>** pptable,
                        TableWrapperInitOptions& options,
                        nv::merlin::BaseAllocator* allocator,
                        size_t runtime_dim) {
-  *pptable = new TableWrapper<K, V>(options, runtime_dim);
+  *pptable = new TableWrapper<K, V, Strategy>(options, runtime_dim);
   return (*pptable)->init(allocator);
 }
 
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv_impl.cu.cc b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv_impl.cu.cc
deleted file mode 100644
index 3f529f5eb..000000000
--- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv_impl.cu.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h"
-
-namespace tensorflow {
-namespace recommenders_addons {
-namespace lookup {
-namespace gpu {
-
-#define DEFINE_PURE_GPU_HASHTABLE(key_type, value_type) \
-  template <>                                           \
-  class TableWrapper<key_type, value_type>
-
-DEFINE_PURE_GPU_HASHTABLE(int64, float);
-DEFINE_PURE_GPU_HASHTABLE(int64, int8);
-DEFINE_PURE_GPU_HASHTABLE(int64, int32);
-DEFINE_PURE_GPU_HASHTABLE(int64, int64);
-DEFINE_PURE_GPU_HASHTABLE(int64, Eigen::half);
-
-#undef DEFINE_PURE_GPU_HASHTABLE
-
-}  // namespace gpu
-}  // namespace lookup
-}  // namespace recommenders_addons
-}  // namespace tensorflow
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/ops/hkv_hashtable_ops.cc b/tensorflow_recommenders_addons/dynamic_embedding/core/ops/hkv_hashtable_ops.cc
index 4830bae45..f24ab682a 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/core/ops/hkv_hashtable_ops.cc
+++ b/tensorflow_recommenders_addons/dynamic_embedding/core/ops/hkv_hashtable_ops.cc
@@ -131,200 +131,186 @@ Status HkvHashTableShape(InferenceContext* c, const ShapeHandle& key,
   return Status::OK();
 }
 
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableFind))
-    .Input("table_handle: resource")
-    .Input("keys: Tin")
-    .Input("default_value: Tout")
-    .Output("values: Tout")
-    .Attr("Tin: type")
-    .Attr("Tout: type")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle handle;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
-
-      ShapeAndType value_shape_and_type;
-      TF_RETURN_IF_ERROR(ValidateTableResourceHandle(
-          c,
-          /*keys=*/c->input(1),
-          /*key_dtype_attr=*/"Tin",
-          /*value_dtype_attr=*/"Tout",
-          /*is_lookup=*/true, &value_shape_and_type));
-      c->set_output(0, value_shape_and_type.shape);
-
-      return Status::OK();
-    });
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableFindWithExists))
-    .Input("table_handle: resource")
-    .Input("keys: Tin")
-    .Input("default_value: Tout")
-    .Output("values: Tout")
-    .Output("exists: bool")
-    .Attr("Tin: type")
-    .Attr("Tout: type")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle handle;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
-
-      ShapeHandle keys = c->UnknownShapeOfRank(1);
-      ShapeAndType value_shape_and_type;
-      TF_RETURN_IF_ERROR(ValidateTableResourceHandle(
-          c,
-          /*keys=*/c->input(1),
-          /*key_dtype_attr=*/"Tin",
-          /*value_dtype_attr=*/"Tout",
-          /*is_lookup=*/true, &value_shape_and_type));
-      c->set_output(0, value_shape_and_type.shape);
-      c->set_output(1, keys);
-
-      return Status::OK();
-    });
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableInsert))
-    .Input("table_handle: resource")
-    .Input("keys: Tin")
-    .Input("values: Tout")
-    .Attr("Tin: type")
-    .Attr("Tout: type")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle handle;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
-
-      // TODO: Validate keys and values shape.
-      return Status::OK();
-    });
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableAccum))
+REGISTER_OP(PREFIX_OP_NAME(HkvHashTableRemove))
     .Input("table_handle: resource")
     .Input("keys: key_dtype")
-    .Input("values_or_deltas: value_dtype")
-    .Input("exists: bool")
     .Attr("key_dtype: type")
-    .Attr("value_dtype: type")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle handle;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
-
-      // TODO: Validate keys and values shape.
-      return Status::OK();
-    });
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableRemove))
-    .Input("table_handle: resource")
-    .Input("keys: Tin")
-    .Attr("Tin: type")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle handle;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
       TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &handle));
-
-      // TODO(turboale): Validate keys shape.
       return Status::OK();
     });
 
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableClear))
-    .Input("table_handle: resource")
-    .Attr("key_dtype: type")
-    .Attr("value_dtype: type");
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableSize))
-    .Input("table_handle: resource")
-    .Output("size: int64")
-    .Attr("key_dtype: type")
-    .Attr("value_dtype: type")
-    .SetShapeFn(ScalarAndTwoElementVectorInputsAndScalarOutputs);
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableExport))
-    .Input("table_handle: resource")
-    .Output("keys: Tkeys")
-    .Output("values: Tvalues")
-    .Attr("Tkeys: type")
-    .Attr("Tvalues: type")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle handle;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
-      ShapeHandle keys = c->UnknownShapeOfRank(1);
-      ShapeAndType value_shape_and_type;
-      TF_RETURN_IF_ERROR(ValidateTableResourceHandle(
-          c,
-          /*keys=*/keys,
-          /*key_dtype_attr=*/"Tkeys",
-          /*value_dtype_attr=*/"Tvalues",
-          /*is_lookup=*/false, &value_shape_and_type));
-      c->set_output(0, keys);
-      c->set_output(1, value_shape_and_type.shape);
-      return Status::OK();
-    });
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableSaveToFileSystem))
-    .Input("table_handle: resource")
-    .Input("dirpath: string")
-    .Input("file_name: string")
-    .Attr("key_dtype: type")
-    .Attr("value_dtype: type")
-    .Attr("dirpath_env: string")
-    .Attr("append_to_file: bool")
-    .Attr("buffer_size: int >= 1");
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableExportKeysAndScores))
-    .Input("table_handle: resource")
-    .Output("keys: Tkeys")
-    .Output("scores: int64")
-    .Attr("Tkeys: type")
-    .Attr("split_size: int")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle handle;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
-      ShapeHandle keys = c->UnknownShapeOfRank(1);
-      ShapeHandle scores = c->UnknownShapeOfRank(1);
-      c->set_output(0, keys);
-      c->set_output(1, scores);
-      return Status::OK();
-    });
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableImport))
-    .Input("table_handle: resource")
-    .Input("keys: Tin")
-    .Input("values: Tout")
-    .Attr("Tin: type")
-    .Attr("Tout: type")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle handle;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
+#define CONCAT_QUADRA_STRING(X, Y, Z, S) (#X #Y #Z #S)
+
+#define PREFIX_OP_NAME_X_IMPL(N, S) CONCAT_QUADRA_STRING(TFRA, >, N, S)
+#define PREFIX_OP_NAME_X(N, ...) PREFIX_OP_NAME_X_IMPL(N, __VA_ARGS__)
+
+#define REGISTER_HKV_TABLE(STRATEGY)                                         \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableFind, STRATEGY))                  \
+      .Input("table_handle: resource")                                       \
+      .Input("keys: key_dtype")                                              \
+      .Input("default_value: value_dtype")                                   \
+      .Output("values: value_dtype")                                         \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type")                                             \
+      .SetShapeFn([](InferenceContext* c) {                                  \
+        ShapeHandle handle;                                                  \
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));            \
+        ShapeAndType value_shape_and_type;                                   \
+        TF_RETURN_IF_ERROR(ValidateTableResourceHandle(                      \
+            c, /*keys=*/c->input(1), /*key_dtype_attr=*/"key_dtype",         \
+            /*value_dtype_attr=*/"value_dtype", /*is_lookup=*/true,          \
+            &value_shape_and_type));                                         \
+        c->set_output(0, value_shape_and_type.shape);                        \
+        return Status::OK();                                                 \
+      });                                                                    \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableFindWithExists, STRATEGY))        \
+      .Input("table_handle: resource")                                       \
+      .Input("keys: key_dtype")                                              \
+      .Input("default_value: value_dtype")                                   \
+      .Output("values: value_dtype")                                         \
+      .Output("exists: bool")                                                \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type")                                             \
+      .SetShapeFn([](InferenceContext* c) {                                  \
+        ShapeHandle handle;                                                  \
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));            \
+        ShapeHandle keys = c->UnknownShapeOfRank(1);                         \
+        ShapeAndType value_shape_and_type;                                   \
+        TF_RETURN_IF_ERROR(ValidateTableResourceHandle(                      \
+            c, /*keys=*/c->input(1), /*key_dtype_attr=*/"key_dtype",         \
+            /*value_dtype_attr=*/"value_dtype", /*is_lookup=*/true,          \
+            &value_shape_and_type));                                         \
+        c->set_output(0, value_shape_and_type.shape);                        \
+        c->set_output(1, keys);                                              \
+        return Status::OK();                                                 \
+      });                                                                    \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableInsert, STRATEGY))                \
+      .Input("table_handle: resource")                                       \
+      .Input("keys: key_dtype")                                              \
+      .Input("values: value_dtype")                                          \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type")                                             \
+      .SetShapeFn([](InferenceContext* c) {                                  \
+        ShapeHandle handle;                                                  \
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));            \
+        return Status::OK();                                                 \
+      });                                                                    \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableAccum, STRATEGY))                 \
+      .Input("table_handle: resource")                                       \
+      .Input("keys: key_dtype")                                              \
+      .Input("values_or_deltas: value_dtype")                                \
+      .Input("exists: bool")                                                 \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type")                                             \
+      .SetShapeFn([](InferenceContext* c) {                                  \
+        ShapeHandle handle;                                                  \
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));            \
+        return Status::OK();                                                 \
+      });                                                                    \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableClear, STRATEGY))                 \
+      .Input("table_handle: resource")                                       \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type");                                            \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableSize, STRATEGY))                  \
+      .Input("table_handle: resource")                                       \
+      .Output("size: int64")                                                 \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type")                                             \
+      .SetShapeFn(ScalarAndTwoElementVectorInputsAndScalarOutputs);          \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableExport, STRATEGY))                \
+      .Input("table_handle: resource")                                       \
+      .Output("keys: key_dtype")                                             \
+      .Output("values: value_dtype")                                         \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type")                                             \
+      .SetShapeFn([](InferenceContext* c) {                                  \
+        ShapeHandle handle;                                                  \
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));            \
+        ShapeHandle keys = c->UnknownShapeOfRank(1);                         \
+        ShapeAndType value_shape_and_type;                                   \
+        TF_RETURN_IF_ERROR(ValidateTableResourceHandle(                      \
+            c, /*keys=*/keys, /*key_dtype_attr=*/"key_dtype",                \
+            /*value_dtype_attr=*/"value_dtype", /*is_lookup=*/false,         \
+            &value_shape_and_type));                                         \
+        c->set_output(0, keys);                                              \
+        c->set_output(1, value_shape_and_type.shape);                        \
+        return Status::OK();                                                 \
+      });                                                                    \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableSaveToFileSystem, STRATEGY))      \
+      .Input("table_handle: resource")                                       \
+      .Input("dirpath: string")                                              \
+      .Input("file_name: string")                                            \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type")                                             \
+      .Attr("dirpath_env: string")                                           \
+      .Attr("append_to_file: bool")                                          \
+      .Attr("buffer_size: int >= 1");                                        \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableExportKeysAndScores, STRATEGY))   \
+      .Input("table_handle: resource")                                       \
+      .Output("keys: Tkeys")                                                 \
+      .Output("scores: int64")                                               \
+      .Attr("Tkeys: type")                                                   \
+      .Attr("split_size: int")                                               \
+      .SetShapeFn([](InferenceContext* c) {                                  \
+        ShapeHandle handle;                                                  \
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));            \
+        ShapeHandle keys = c->UnknownShapeOfRank(1);                         \
+        ShapeHandle scores = c->UnknownShapeOfRank(1);                       \
+        c->set_output(0, keys);                                              \
+        c->set_output(1, scores);                                            \
+        return Status::OK();                                                 \
+      });                                                                    \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableImport, STRATEGY))                \
+      .Input("table_handle: resource")                                       \
+      .Input("keys: key_dtype")                                              \
+      .Input("values: value_dtype")                                          \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type")                                             \
+      .SetShapeFn([](InferenceContext* c) {                                  \
+        ShapeHandle handle;                                                  \
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));            \
+        ShapeHandle keys;                                                    \
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &keys));              \
+        TF_RETURN_IF_ERROR(c->Merge(keys, c->input(2), &keys));              \
+        return Status::OK();                                                 \
+      });                                                                    \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableLoadFromFileSystem, STRATEGY))    \
+      .Input("table_handle: resource")                                       \
+      .Input("dirpath: string")                                              \
+      .Input("file_name: string")                                            \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type")                                             \
+      .Attr("dirpath_env: string")                                           \
+      .Attr("load_entire_dir: bool")                                         \
+      .Attr("buffer_size: int >= 1");                                        \
+  REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableOfTensors, STRATEGY))             \
+      .Output("table_handle: resource")                                      \
+      .Attr("container: string = ''")                                        \
+      .Attr("shared_name: string = ''")                                      \
+      .Attr("use_node_name_sharing: bool = false")                           \
+      .Attr("key_dtype: type")                                               \
+      .Attr("value_dtype: type")                                             \
+      .Attr("value_shape: shape = {}")                                       \
+      .Attr("init_capacity: int = 0")                                        \
+      .Attr("max_capacity: int = 0")                                         \
+      .Attr("max_hbm_for_vectors: int = 0")                                  \
+      .Attr("evict_global_epoch: int = 0")                                   \
+      .SetIsStateful()                                                       \
+      .SetShapeFn([](InferenceContext* c) {                                  \
+        PartialTensorShape value_p;                                          \
+        TF_RETURN_IF_ERROR(c->GetAttr("value_shape", &value_p));             \
+        ShapeHandle value_s;                                                 \
+        TF_RETURN_IF_ERROR(                                                  \
+            c->MakeShapeFromPartialTensorShape(value_p, &value_s));          \
+        return HkvHashTableShape(c, /*key=*/c->Scalar(), /*value=*/value_s); \
+      });
+REGISTER_HKV_TABLE(LRU)
+REGISTER_HKV_TABLE(LFU)
+REGISTER_HKV_TABLE(EPOCHLRU)
+REGISTER_HKV_TABLE(EPOCHLFU)
+REGISTER_HKV_TABLE(CUSTOMIZED)
 
-      ShapeHandle keys;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &keys));
-      TF_RETURN_IF_ERROR(c->Merge(keys, c->input(2), &keys));
-      return Status::OK();
-    });
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableLoadFromFileSystem))
-    .Input("table_handle: resource")
-    .Input("dirpath: string")
-    .Input("file_name: string")
-    .Attr("key_dtype: type")
-    .Attr("value_dtype: type")
-    .Attr("dirpath_env: string")
-    .Attr("load_entire_dir: bool")
-    .Attr("buffer_size: int >= 1");
-
-REGISTER_OP(PREFIX_OP_NAME(HkvHashTableOfTensors))
-    .Output("table_handle: resource")
-    .Attr("container: string = ''")
-    .Attr("shared_name: string = ''")
-    .Attr("use_node_name_sharing: bool = false")
-    .Attr("key_dtype: type")
-    .Attr("value_dtype: type")
-    .Attr("value_shape: shape = {}")
-    .Attr("init_capacity: int = 0")
-    .Attr("max_capacity: int = 0")
-    .Attr("max_hbm_for_vectors: int = 0")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      PartialTensorShape value_p;
-      TF_RETURN_IF_ERROR(c->GetAttr("value_shape", &value_p));
-      ShapeHandle value_s;
-      TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(value_p, &value_s));
-      return HkvHashTableShape(c, /*key=*/c->Scalar(), /*value=*/value_s);
-    });
 }  // namespace tensorflow
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_evict_test.py b/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_evict_test.py
new file mode 100644
index 000000000..e2cf274be
--- /dev/null
+++ b/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_evict_test.py
@@ -0,0 +1,111 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""unit tests of hkv hashtable ops
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+import os
+import itertools
+import numpy as np
+
+from tensorflow_recommenders_addons import dynamic_embedding as de
+from tensorflow_recommenders_addons.utils.check_platform import is_windows, is_macos, is_arm64, is_linux, is_raspi_arm
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.training import server_lib
+from tensorflow.python.client import session
+
+import tensorflow as tf
+try:
+  import tensorflow_io
+except:
+  print()
+
+
+def _type_converter(tf_type):
+  mapper = {
+      dtypes.int32: np.int32,
+      dtypes.int64: np.int64,
+      dtypes.float32: float,
+      dtypes.float64: np.float64,
+      dtypes.string: str,
+      dtypes.half: np.float16,
+      dtypes.int8: np.int8,
+      dtypes.bool: bool,
+  }
+  return mapper[tf_type]
+
+
+default_config = config_pb2.ConfigProto(
+    allow_soft_placement=False,
+    gpu_options=config_pb2.GPUOptions(allow_growth=True))
+
+
+def _get_devices():
+  return ["/gpu:0" if test_util.is_gpu_available() else "/cpu:0"]
+
+
+is_gpu_available = test_util.is_gpu_available()
+
+
+def convert(v, t):
+  return np.array(v).astype(_type_converter(t))
+
+
+class HkvHashtableTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_evict_strategy(self):
+    if not is_gpu_available:
+      self.skipTest('Only test when gpu is available.')
+    strategy_i = 0
+    for strategy in de.HkvEvictStrategy:
+      with self.session(use_gpu=True, config=default_config):
+        with self.captureWritesToStream(sys.stderr) as printed:
+          table = de.get_variable(
+              str(strategy),
+              key_dtype=dtypes.int64,
+              value_dtype=dtypes.int32,
+              initializer=0,
+              dim=8,
+              init_size=1024,
+              kv_creator=de.HkvHashTableCreator(
+                  config=de.HkvHashTableConfig(init_capacity=1024,
+                                               max_capacity=1024,
+                                               max_hbm_for_values=1024 * 4 * 8 *
+                                               2,
+                                               evict_strategy=strategy)))
+          self.evaluate(table.size())
+
+          content = "Use Evict Strategy:" + str(strategy_i)
+          self.assertTrue(content in printed.contents())
+          strategy_i = strategy_i + 1
+
+          del table
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_ops_test.py b/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_ops_test.py
index 5efa574d2..83e5e1822 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_ops_test.py
+++ b/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_ops_test.py
@@ -1,4 +1,4 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/cuckoo_hashtable_ops.py b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/cuckoo_hashtable_ops.py
index fd9a4394e..e0c1b298e 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/cuckoo_hashtable_ops.py
+++ b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/cuckoo_hashtable_ops.py
@@ -154,7 +154,7 @@ def _create_resource(self):
 
     if self._device_type == "GPU":
       with ops.device(self._device):
-        table_ref = hkv_ops.tfra_hkv_hash_table_of_tensors(
+        table_ref = hkv_ops.tfra_hkv_hash_table_of_tensors_lru(
             shared_name=self._shared_name,
             use_node_name_sharing=use_node_name_sharing,
             key_dtype=self._key_dtype,
@@ -213,9 +213,10 @@ def size(self, name=None):
     with ops.name_scope(name, "%s_Size" % self.name, [self.resource_handle]):
       with ops.colocate_with(self.resource_handle):
         if self._device_type == "GPU":
-          return hkv_ops.tfra_hkv_hash_table_size(self.resource_handle,
-                                                  key_dtype=self._key_dtype,
-                                                  value_dtype=self._value_dtype)
+          return hkv_ops.tfra_hkv_hash_table_size_lru(
+              self.resource_handle,
+              key_dtype=self._key_dtype,
+              value_dtype=self._value_dtype)
         else:
           return cuckoo_ops.tfra_cuckoo_hash_table_size(self.resource_handle)
 
@@ -262,9 +263,10 @@ def clear(self, name=None):
     with ops.name_scope(name, "%s_lookup_table_clear" % self.name,
                         (self.resource_handle, self._default_value)):
       if self._device_type == "GPU":
-        return hkv_ops.tfra_hkv_hash_table_clear(self.resource_handle,
-                                                 key_dtype=self._key_dtype,
-                                                 value_dtype=self._value_dtype)
+        return hkv_ops.tfra_hkv_hash_table_clear_lru(
+            self.resource_handle,
+            key_dtype=self._key_dtype,
+            value_dtype=self._value_dtype)
       else:
         return cuckoo_ops.tfra_cuckoo_hash_table_clear(
             self.resource_handle,
@@ -310,14 +312,14 @@ def lookup(self,
       with ops.colocate_with(self.resource_handle, ignore_existing=True):
         if self._device_type == "GPU":
           if return_exists:
-            values, exists = hkv_ops.tfra_hkv_hash_table_find_with_exists(
+            values, exists = hkv_ops.tfra_hkv_hash_table_find_with_exists_lru(
                 self.resource_handle,
                 keys,
                 dynamic_default_values
                 if dynamic_default_values is not None else self._default_value,
             )
           else:
-            values = hkv_ops.tfra_hkv_hash_table_find(
+            values = hkv_ops.tfra_hkv_hash_table_find_lru(
                 self.resource_handle,
                 keys,
                 dynamic_default_values
@@ -368,8 +370,8 @@ def insert(self, keys, values, name=None):
       with ops.colocate_with(self.resource_handle, ignore_existing=True):
         # pylint: disable=protected-access
         if self._device_type == "GPU":
-          return hkv_ops.tfra_hkv_hash_table_insert(self.resource_handle, keys,
-                                                    values)
+          return hkv_ops.tfra_hkv_hash_table_insert_lru(self.resource_handle,
+                                                        keys, values)
         else:
           return cuckoo_ops.tfra_cuckoo_hash_table_insert(
               self.resource_handle, keys, values)
@@ -406,8 +408,9 @@ def accum(self, keys, values_or_deltas, exists, name=None):
       with ops.colocate_with(self.resource_handle, ignore_existing=True):
         # pylint: disable=protected-access
         if self._device_type == "GPU":
-          return hkv_ops.tfra_hkv_hash_table_accum(self.resource_handle, keys,
-                                                   values_or_deltas, exists)
+          return hkv_ops.tfra_hkv_hash_table_accum_lru(self.resource_handle,
+                                                       keys, values_or_deltas,
+                                                       exists)
         else:
           return cuckoo_ops.tfra_cuckoo_hash_table_accum(
               self.resource_handle, keys, values_or_deltas, exists)
@@ -426,7 +429,8 @@ def export(self, name=None):
                         [self.resource_handle]):
       with ops.colocate_with(self.resource_handle):
         if self._device_type == "GPU":
-          keys, values = hkv_ops.tfra_hkv_hash_table_export(
+
+          keys, values = hkv_ops.tfra_hkv_hash_table_export_lru(
               self.resource_handle, self._key_dtype, self._value_dtype)
         else:
           keys, values = cuckoo_ops.tfra_cuckoo_hash_table_export(
@@ -458,7 +462,7 @@ def save_to_file_system(self,
                         [self.resource_handle]):
       with ops.colocate_with(None, ignore_existing=True):
         if self._device_type == "GPU":
-          return hkv_ops.tfra_hkv_hash_table_save_to_file_system(
+          return hkv_ops.tfra_hkv_hash_table_save_to_file_system_lru(
               self.resource_handle,
               dirpath=dirpath,
               file_name=file_name if file_name else self._name,
@@ -502,7 +506,7 @@ def load_from_file_system(self,
                         [self.resource_handle]):
       with ops.colocate_with(None, ignore_existing=True):
         if self._device_type == "GPU":
-          return hkv_ops.tfra_hkv_hash_table_load_from_file_system(
+          return hkv_ops.tfra_hkv_hash_table_load_from_file_system_lru(
               self.resource_handle,
               dirpath=dirpath,
               file_name=file_name if file_name else self._name,
@@ -559,7 +563,7 @@ def restore(self, restored_tensors, restored_shapes, name=None):
       with ops.name_scope(name, "%s_table_restore" % self._restore_name):
         with ops.colocate_with(self.op.resource_handle):
           if self.op.resource_handle.device.count('GPU'):
-            return hkv_ops.tfra_hkv_hash_table_import(
+            return hkv_ops.tfra_hkv_hash_table_import_lru(
                 self.op.resource_handle,
                 restored_tensors[0],
                 restored_tensors[1],
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/dynamic_embedding_creator.py b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/dynamic_embedding_creator.py
index c8379b7e4..64b540170 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/dynamic_embedding_creator.py
+++ b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/dynamic_embedding_creator.py
@@ -15,6 +15,7 @@
 # lint-as: python3
 
 from abc import ABCMeta
+from enum import IntEnum, unique
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
@@ -136,17 +137,30 @@ def get_config(self):
     return config
 
 
+@unique
+class HkvEvictStrategy(IntEnum):
+  LRU = 0
+  LFU = 1
+  EPOCHLRU = 2
+  EPOCHLFU = 3
+  CUSTOMIZED = 4
+
+
 class HkvHashTableConfig(object):
 
   def __init__(self,
                init_capacity=KHkvHashTableInitCapacity,
                max_capacity=KHkvHashTableMaxCapacity,
-               max_hbm_for_values=KHkvHashTableMaxHbmForValuesByBytes):
+               max_hbm_for_values=KHkvHashTableMaxHbmForValuesByBytes,
+               evict_strategy=HkvEvictStrategy.LRU,
+               evict_global_epoch=0):
     """ CuckooHashTableConfig include nothing for parameter default satisfied.
     """
     self.init_capacity = init_capacity
     self.max_capacity = max_capacity
     self.max_hbm_for_values = max_hbm_for_values
+    self.evict_strategy = evict_strategy
+    self.evict_global_epoch = evict_global_epoch
 
 
 class HkvHashTableCreator(KVCreator):
@@ -171,10 +185,14 @@ def create(
     self.init_capacity = init_size
     self.max_capacity = KHkvHashTableMaxCapacity
     self.max_hbm_for_values = KHkvHashTableMaxHbmForValuesByBytes
+    self.evict_strategy = HkvEvictStrategy.LRU
+    self.evict_global_epoch = 0
     if self.config and isinstance(self.config, de.HkvHashTableConfig):
       self.init_capacity = self.config.init_capacity
       self.max_capacity = self.config.max_capacity
       self.max_hbm_for_values = self.config.max_hbm_for_values
+      self.evict_strategy = self.config.evict_strategy
+      self.evict_global_epoch = self.config.evict_global_epoch
     self.device = device
     self.shard_saveable_object_fn = shard_saveable_object_fn
 
@@ -187,6 +205,8 @@ def create(
         init_capacity=self.init_capacity,
         max_capacity=self.max_capacity,
         max_hbm_for_values=self.max_hbm_for_values,
+        evict_strategy=self.evict_strategy,
+        evict_global_epoch=self.evict_global_epoch,
         config=self.config,
         device=self.device,
         shard_saveable_object_fn=self.shard_saveable_object_fn)
diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/hkv_hashtable_ops.py b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/hkv_hashtable_ops.py
index efcee7b4d..cc50fe85a 100644
--- a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/hkv_hashtable_ops.py
+++ b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/hkv_hashtable_ops.py
@@ -32,6 +32,8 @@
 from tensorflow_recommenders_addons.utils.resource_loader import LazySO
 from tensorflow_recommenders_addons.utils.resource_loader import prefix_op_name
 
+from tensorflow_recommenders_addons.dynamic_embedding.python.ops.dynamic_embedding_creator import HkvEvictStrategy
+
 try:
   hkv_ops = LazySO("dynamic_embedding/core/_hkv_ops.so").ops
 except:
@@ -73,6 +75,8 @@ def __init__(
       config=None,
       device='',
       shard_saveable_object_fn=None,
+      evict_strategy=HkvEvictStrategy.LRU,
+      evict_global_epoch=0,
   ):
     """Creates an empty `HkvHashTable` object.
 
@@ -104,6 +108,7 @@ def __init__(
     self._checkpoint = checkpoint
     self._key_dtype = key_dtype
     self._value_dtype = value_dtype
+    self._scores_dtype = dtypes.int64
     self._init_capacity = init_capacity
     self._max_capacity = max_capacity
     self._max_hbm_for_values = max_hbm_for_values
@@ -113,11 +118,15 @@ def __init__(
     if not self._device or self._device == '':
       self._device = ['/GPU:0']
     self._new_obj_trackable = None
+    self._evict_strategy = evict_strategy
+    self._evict_global_epoch = evict_global_epoch
 
     if self._config:
       self._init_capacity = self._config.init_capacity
       self._max_capacity = self._config.max_capacity
       self._max_hbm_for_values = self._config.max_hbm_for_values
+      self._evict_strategy = self._config.evict_strategy
+      self._evict_global_epoch = self._config.evict_global_epoch
 
     self._shared_name = None
     if context.executing_eagerly():
@@ -150,14 +159,33 @@ def __init__(
       if not context.executing_eagerly():
         ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, self.saveable)
 
+  def _get_op_interface(self, base_interface_name):
+    interface_name = base_interface_name + "_lru"
+    if self._evict_strategy == HkvEvictStrategy.LRU:
+      interface_name = base_interface_name + "_lru"
+    elif self._evict_strategy == HkvEvictStrategy.LFU:
+      interface_name = base_interface_name + "_lfu"
+    elif self._evict_strategy == HkvEvictStrategy.EPOCHLRU:
+      interface_name = base_interface_name + "_epochlru"
+    elif self._evict_strategy == HkvEvictStrategy.EPOCHLFU:
+      interface_name = base_interface_name + "_epochlfu"
+    elif self._evict_strategy == HkvEvictStrategy.CUSTOMIZED:
+      interface_name = base_interface_name + "_customized"
+    else:
+      print("unsupport strategy: {}, use default LRU".format(
+          self._evict_strategy))
+    return getattr(hkv_ops, interface_name)
+
   def _create_resource(self):
     # The table must be shared if checkpointing is requested for multi-worker
     # training to work correctly. Use the node name if no shared_name has been
     # explicitly specified.
     use_node_name_sharing = self._checkpoint and self._shared_name is None
 
+    hkv_table = self._get_op_interface("tfra_hkv_hash_table_of_tensors")
+
     with ops.device(self._device):
-      table_ref = hkv_ops.tfra_hkv_hash_table_of_tensors(
+      table_ref = hkv_table(
           shared_name=self._shared_name,
           use_node_name_sharing=use_node_name_sharing,
           key_dtype=self._key_dtype,
@@ -166,6 +194,7 @@ def _create_resource(self):
           init_capacity=self._init_capacity,
           max_capacity=self._max_capacity,
           max_hbm_for_vectors=self._max_hbm_for_values,
+          evict_global_epoch=self._evict_global_epoch,
           name=self._name,
       )
 
@@ -202,11 +231,12 @@ def size(self, name=None):
         Returns:
           A scalar tensor containing the number of elements in this table.
         """
+    hkv_size = self._get_op_interface("tfra_hkv_hash_table_size")
     with ops.name_scope(name, "%s_Size" % self.name, [self.resource_handle]):
       with ops.colocate_with(self.resource_handle):
-        return hkv_ops.tfra_hkv_hash_table_size(self.resource_handle,
-                                                key_dtype=self._key_dtype,
-                                                value_dtype=self._value_dtype)
+        return hkv_size(self.resource_handle,
+                        key_dtype=self._key_dtype,
+                        value_dtype=self._value_dtype)
 
   def remove(self, keys, name=None):
     """Removes `keys` and its associated values from the table.
@@ -246,11 +276,12 @@ def clear(self, name=None):
     Returns:
       The created Operation.
     """
+    hkv_clear = self._get_op_interface("tfra_hkv_hash_table_clear")
     with ops.name_scope(name, "%s_lookup_table_clear" % self.name,
                         (self.resource_handle, self._default_value)):
-      op = hkv_ops.tfra_hkv_hash_table_clear(self.resource_handle,
-                                             key_dtype=self._key_dtype,
-                                             value_dtype=self._value_dtype)
+      op = hkv_clear(self.resource_handle,
+                     key_dtype=self._key_dtype,
+                     value_dtype=self._value_dtype)
 
     return op
 
@@ -295,14 +326,17 @@ def lookup(self,
       keys = ops.convert_to_tensor(keys, dtype=self._key_dtype, name="keys")
       with ops.colocate_with(self.resource_handle, ignore_existing=True):
         if return_exists:
-          values, exists = hkv_ops.tfra_hkv_hash_table_find_with_exists(
+          hkv_find_with_exists = self._get_op_interface(
+              "tfra_hkv_hash_table_find_with_exists")
+          values, exists = hkv_find_with_exists(
               self.resource_handle,
               keys,
               dynamic_default_values
               if dynamic_default_values is not None else self._default_value,
           )
         else:
-          values = hkv_ops.tfra_hkv_hash_table_find(
+          hkv_find = self._get_op_interface("tfra_hkv_hash_table_find")
+          values = hkv_find(
               self.resource_handle,
               keys,
               dynamic_default_values
@@ -310,7 +344,7 @@ def lookup(self,
           )
     return (values, exists) if return_exists else values
 
-  def insert(self, keys, values, name=None):
+  def insert(self, keys, values, scores=None, name=None):
     """Associates `keys` with `values`.
 
         Args:
@@ -334,13 +368,16 @@ def insert(self, keys, values, name=None):
     ):
       keys = ops.convert_to_tensor(keys, self._key_dtype, name="keys")
       values = ops.convert_to_tensor(values, self._value_dtype, name="values")
+      if scores:
+        scores = ops.convert_to_tensor(scores,
+                                       self._scores_dtype,
+                                       name="scores")
+      hkv_insert = self._get_op_interface("tfra_hkv_hash_table_insert")
       with ops.colocate_with(self.resource_handle, ignore_existing=True):
-        # pylint: disable=protected-access
-        op = hkv_ops.tfra_hkv_hash_table_insert(self.resource_handle, keys,
-                                                values)
+        op = hkv_insert(self.resource_handle, keys, values, scores)
     return op
 
-  def accum(self, keys, values_or_deltas, exists, name=None):
+  def accum(self, keys, values_or_deltas, exists, scores=None, name=None):
     """Associates `keys` with `values`.
 
       Args:
@@ -369,10 +406,14 @@ def accum(self, keys, values_or_deltas, exists, name=None):
                                                self._value_dtype,
                                                name="values_or_deltas")
       exists = ops.convert_to_tensor(exists, dtypes.bool, name="exists")
+      if scores:
+        scores = ops.convert_to_tensor(scores,
+                                       self._scores_dtype,
+                                       name="scores")
+      hkv_accum = self._get_op_interface("tfra_hkv_hash_table_accum")
       with ops.colocate_with(self.resource_handle, ignore_existing=True):
-        # pylint: disable=protected-access
-        op = hkv_ops.tfra_hkv_hash_table_accum(self.resource_handle, keys,
-                                               values_or_deltas, exists)
+        op = hkv_accum(self.resource_handle, keys, values_or_deltas, exists,
+                       scores)
     return op
 
   def export(self, name=None):
@@ -385,22 +426,26 @@ def export(self, name=None):
           A pair of tensors with the first tensor containing all keys and the
             second tensors containing all values in the table.
         """
+    hkv_export = self._get_op_interface("tfra_hkv_hash_table_export")
     with ops.name_scope(name, "%s_lookup_table_export_values" % self.name,
                         [self.resource_handle]):
       with ops.colocate_with(self.resource_handle):
-        keys, values = hkv_ops.tfra_hkv_hash_table_export(
-            self.resource_handle, self._key_dtype, self._value_dtype)
+        keys, values = hkv_export(self.resource_handle, self._key_dtype,
+                                  self._value_dtype)
     return keys, values
 
   def export_keys_and_scores(self, split_size, name=None):
     if not (split_size > 0 and isinstance(split_size, int)):
       raise ValueError(f'split_size must be positive integer.')
+    hkv_export_keys_and_scores = self._get_op_interface(
+        "tfra_hkv_hash_table_export_keys_and_scores")
     with ops.name_scope(name,
                         "%s_lookup_table_export_keys_and_scores" % self.name,
                         [self.resource_handle]):
       with ops.colocate_with(self.resource_handle):
-        keys, scores = hkv_ops.tfra_hkv_hash_table_export_keys_and_scores(
-            self.resource_handle, Tkeys=self._key_dtype, split_size=split_size)
+        keys, scores = hkv_export_keys_and_scores(self.resource_handle,
+                                                  Tkeys=self._key_dtype,
+                                                  split_size=split_size)
     return keys, scores
 
   def save_to_file_system(self,
@@ -423,11 +468,12 @@ def save_to_file_system(self,
     Returns:
       An operation to save the table.
     """
-
+    hkv_save_to_file_system = self._get_op_interface(
+        "tfra_hkv_hash_table_save_to_file_system")
     with ops.name_scope(name, "%s_save_table" % self.name,
                         [self.resource_handle]):
       with ops.colocate_with(None, ignore_existing=True):
-        return hkv_ops.tfra_hkv_hash_table_save_to_file_system(
+        return hkv_save_to_file_system(
             self.resource_handle,
             dirpath=dirpath,
             file_name=file_name if file_name else self._name,
@@ -457,10 +503,12 @@ def load_from_file_system(self,
     Returns:
       An operation to load keys and values to table from FileSystem.
     """
+    hkv_load_from_file_system = self._get_op_interface(
+        "tfra_hkv_hash_table_load_from_file_system")
     with ops.name_scope(name, "%s_load_table" % self.name,
                         [self.resource_handle]):
       with ops.colocate_with(None, ignore_existing=True):
-        return hkv_ops.tfra_hkv_hash_table_load_from_file_system(
+        return hkv_load_from_file_system(
             self.resource_handle,
             dirpath=dirpath,
             file_name=file_name if file_name else self._name,
@@ -504,9 +552,10 @@ def __init__(self, table, name, full_name=""):
     def restore(self, restored_tensors, restored_shapes, name=None):
       del restored_shapes  # unused
       # pylint: disable=protected-access
+      hkv_import = self._get_op_interface("tfra_hkv_hash_table_import")
       with ops.name_scope(name, "%s_table_restore" % self._restore_name):
         with ops.colocate_with(self.op.resource_handle):
-          return hkv_ops.tfra_hkv_hash_table_import(
+          return hkv_import(
               self.op.resource_handle,
               restored_tensors[0],
               restored_tensors[1],
diff --git a/tools/docker/build_wheel.Dockerfile b/tools/docker/build_wheel.Dockerfile
index c6fca9e42..a38bf3de3 100644
--- a/tools/docker/build_wheel.Dockerfile
+++ b/tools/docker/build_wheel.Dockerfile
@@ -21,12 +21,12 @@ RUN mv /usr/bin/lsb_release2 /usr/bin/lsb_release
 ARG PY_VERSION
 RUN ln -sf /usr/local/bin/python$PY_VERSION /usr/bin/python
 
-ENV PATH=/dt7/usr/bin:${PATH}
+ENV PATH=/dt8/usr/bin:${PATH}
 ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}
-ENV LD_LIBRARY_PATH=/dt7/user/lib64:${LD_LIBRARY_PATH}
-ENV LD_LIBRARY_PATH=/dt7/user/lib:${LD_LIBRARY_PATH}
-ENV MANPATH=/dt7/user/share/man:${LD_LIBRARY_PATH}
-ENV INFOPATH=/dt7/user/share/info
+ENV LD_LIBRARY_PATH=/dt8/user/lib64:${LD_LIBRARY_PATH}
+ENV LD_LIBRARY_PATH=/dt8/user/lib:${LD_LIBRARY_PATH}
+ENV MANPATH=/dt8/user/share/man:${LD_LIBRARY_PATH}
+ENV INFOPATH=/dt8/user/share/info
 
 ARG TF_VERSION
 ARG TF_NAME
diff --git a/tools/docker/cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile b/tools/docker/cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile
index e46e02fe3..3cbd33457 100644
--- a/tools/docker/cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile
+++ b/tools/docker/cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile
@@ -8,7 +8,7 @@
 #  --tag "tfra/nosla-cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9"
 # $ docker push tfra/nosla-cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9
 
-FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu20.04 as devtoolset
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset
 
 RUN chmod 777 /tmp/
 ENV DEBIAN_FRONTEND=noninteractive
@@ -39,13 +39,11 @@ ADD devtoolset/build_devtoolset.sh build_devtoolset.sh
 ADD devtoolset/rpm-patch.sh rpm-patch.sh
 
 # Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-7 in /dt7.
-RUN /build_devtoolset.sh devtoolset-7 /dt7
 # Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8.
 RUN /build_devtoolset.sh devtoolset-8 /dt8
 
 # TODO(klimek): Split up into two different docker images.
-FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu20.04
-COPY --from=devtoolset /dt7 /dt7
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04
 COPY --from=devtoolset /dt8 /dt8
 
 # Install TensorRT.
@@ -96,8 +94,8 @@ RUN /install/build_and_install_python.sh "3.9.7"
 COPY install/install_pip_packages_by_version.sh /install/
 RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9"
 
-COPY install/use_devtoolset_7.sh /install/
-RUN /install/use_devtoolset_7.sh
+COPY install/use_devtoolset_8.sh /install/
+RUN /install/use_devtoolset_8.sh
 
 COPY install/install_openmpi.sh /install/
 RUN /install/install_openmpi.sh "4.1.1"
diff --git a/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython.Dockerfile b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython.Dockerfile
new file mode 100644
index 000000000..629faaa32
--- /dev/null
+++ b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython.Dockerfile
@@ -0,0 +1,107 @@
+# Dockerfile to build a manylinux 2010 compliant cross-compiler.
+#
+# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible
+# glibc (2.12) and system libstdc++ (4.4).
+#
+# To push a new version, run:
+# $ docker build -f cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython.Dockerfile . \
+#  --tag "tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython"
+# $ docker push tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython
+
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset
+
+RUN chmod 777 /tmp/
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y \
+      cpio \
+      file \
+      flex \
+      g++ \
+      make \
+      patch \
+      rpm2cpio \
+      unar \
+      wget \
+      xz-utils \
+      libjpeg-dev \
+      zlib1g-dev \
+      libgflags-dev \
+      libsnappy-dev \
+      libbz2-dev \
+      liblz4-dev \
+      libzstd-dev \
+      openssh-client \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+ADD devtoolset/fixlinks.sh fixlinks.sh
+ADD devtoolset/build_devtoolset.sh build_devtoolset.sh
+ADD devtoolset/rpm-patch.sh rpm-patch.sh
+
+# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8.
+RUN /build_devtoolset.sh devtoolset-8 /dt8
+
+# TODO(klimek): Split up into two different docker images.
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04
+COPY --from=devtoolset /dt8 /dt8
+
+# Install TensorRT.
+RUN echo \
+    deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \
+    > /etc/apt/sources.list.d/nvidia-ml.list \
+      && \
+    apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \
+    apt-get update && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy and run the install scripts.
+ARG DEBIAN_FRONTEND=noninteractive
+
+COPY install/install_bootstrap_deb_packages.sh /install/
+RUN /install/install_bootstrap_deb_packages.sh
+
+COPY install/install_deb_packages.sh /install/
+RUN /install/install_deb_packages.sh
+
+# Install additional packages needed for this image:
+# - dependencies to build Python from source
+# - patchelf, as it is required by auditwheel
+RUN apt-get update && apt-get install -y \
+    libbz2-dev \
+    libffi-dev \
+    libgdbm-dev \
+    libncurses5-dev \
+    libnss3-dev \
+    libreadline-dev \
+    patchelf \
+    gcc-multilib \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN chmod 777 /tmp/
+WORKDIR /tmp/
+
+COPY install/install_nccl.sh /install/
+RUN /install/install_nccl.sh "2.8.4-1+cuda11.2"
+
+COPY install/install_bazel.sh /install/
+RUN /install/install_bazel.sh "5.1.1"
+
+COPY install/build_and_install_python.sh /install/
+RUN /install/build_and_install_python.sh "3.7.7"
+RUN /install/build_and_install_python.sh "3.8.2"
+RUN /install/build_and_install_python.sh "3.9.7"
+
+COPY install/install_pip_packages_by_version.sh /install/
+RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9"
+RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8"
+RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7"
+
+COPY install/use_devtoolset_8.sh /install/
+RUN /install/use_devtoolset_8.sh
+
+COPY install/install_openmpi.sh /install/
+RUN /install/install_openmpi.sh "4.1.1"
+
+# clean
+RUN rm -rf /tmp/*
diff --git a/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10.Dockerfile b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10.Dockerfile
new file mode 100644
index 000000000..be8fee2c3
--- /dev/null
+++ b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10.Dockerfile
@@ -0,0 +1,103 @@
+# Dockerfile to build a manylinux 2010 compliant cross-compiler.
+#
+# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible
+# glibc (2.12) and system libstdc++ (4.4).
+#
+# To push a new version, run:
+# $ docker build -f cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10.Dockerfile . \
+#  --tag "tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10"
+# $ docker push tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10
+
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset
+
+RUN chmod 777 /tmp/
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y \
+      cpio \
+      file \
+      flex \
+      g++ \
+      make \
+      patch \
+      rpm2cpio \
+      unar \
+      wget \
+      xz-utils \
+      libjpeg-dev \
+      zlib1g-dev \
+      libgflags-dev \
+      libsnappy-dev \
+      libbz2-dev \
+      liblz4-dev \
+      libzstd-dev \
+      openssh-client \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+ADD devtoolset/fixlinks.sh fixlinks.sh
+ADD devtoolset/build_devtoolset.sh build_devtoolset.sh
+ADD devtoolset/rpm-patch.sh rpm-patch.sh
+
+# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8.
+RUN /build_devtoolset.sh devtoolset-8 /dt8
+
+# TODO(klimek): Split up into two different docker images.
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04
+COPY --from=devtoolset /dt8 /dt8
+
+# Install TensorRT.
+RUN echo \
+    deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \
+    > /etc/apt/sources.list.d/nvidia-ml.list \
+      && \
+    apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \
+    apt-get update && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy and run the install scripts.
+ARG DEBIAN_FRONTEND=noninteractive
+
+COPY install/install_bootstrap_deb_packages.sh /install/
+RUN /install/install_bootstrap_deb_packages.sh
+
+COPY install/install_deb_packages.sh /install/
+RUN /install/install_deb_packages.sh
+
+# Install additional packages needed for this image:
+# - dependencies to build Python from source
+# - patchelf, as it is required by auditwheel
+RUN apt-get update && apt-get install -y \
+    libbz2-dev \
+    libffi-dev \
+    libgdbm-dev \
+    libncurses5-dev \
+    libnss3-dev \
+    libreadline-dev \
+    patchelf \
+    gcc-multilib \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN chmod 777 /tmp/
+WORKDIR /tmp/
+
+COPY install/install_nccl.sh /install/
+RUN /install/install_nccl.sh "2.8.4-1+cuda11.2"
+
+COPY install/install_bazel.sh /install/
+RUN /install/install_bazel.sh "5.1.1"
+
+COPY install/build_and_install_python.sh /install/
+RUN /install/build_and_install_python.sh "3.10.6"
+
+COPY install/install_pip_packages_by_version.sh /install/
+RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.10"
+
+COPY install/use_devtoolset_8.sh /install/
+RUN /install/use_devtoolset_8.sh
+
+COPY install/install_openmpi.sh /install/
+RUN /install/install_openmpi.sh "4.1.1"
+
+# clean
+RUN rm -rf /tmp/*
diff --git a/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7.Dockerfile b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7.Dockerfile
new file mode 100644
index 000000000..84581ca09
--- /dev/null
+++ b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7.Dockerfile
@@ -0,0 +1,103 @@
+# Dockerfile to build a manylinux 2010 compliant cross-compiler.
+#
+# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible
+# glibc (2.12) and system libstdc++ (4.4).
+#
+# To push a new version, run:
+# $ docker build -f cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7.Dockerfile . \
+#  --tag "tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7"
+# $ docker push tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7
+
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset
+
+RUN chmod 777 /tmp/
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y \
+      cpio \
+      file \
+      flex \
+      g++ \
+      make \
+      patch \
+      rpm2cpio \
+      unar \
+      wget \
+      xz-utils \
+      libjpeg-dev \
+      zlib1g-dev \
+      libgflags-dev \
+      libsnappy-dev \
+      libbz2-dev \
+      liblz4-dev \
+      libzstd-dev \
+      openssh-client \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+ADD devtoolset/fixlinks.sh fixlinks.sh
+ADD devtoolset/build_devtoolset.sh build_devtoolset.sh
+ADD devtoolset/rpm-patch.sh rpm-patch.sh
+
+# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8.
+RUN /build_devtoolset.sh devtoolset-8 /dt8
+
+# TODO(klimek): Split up into two different docker images.
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04
+COPY --from=devtoolset /dt8 /dt8
+
+# Install TensorRT.
+RUN echo \
+    deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \
+    > /etc/apt/sources.list.d/nvidia-ml.list \
+      && \
+    apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \
+    apt-get update && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy and run the install scripts.
+ARG DEBIAN_FRONTEND=noninteractive
+
+COPY install/install_bootstrap_deb_packages.sh /install/
+RUN /install/install_bootstrap_deb_packages.sh
+
+COPY install/install_deb_packages.sh /install/
+RUN /install/install_deb_packages.sh
+
+# Install additional packages needed for this image:
+# - dependencies to build Python from source
+# - patchelf, as it is required by auditwheel
+RUN apt-get update && apt-get install -y \
+    libbz2-dev \
+    libffi-dev \
+    libgdbm-dev \
+    libncurses5-dev \
+    libnss3-dev \
+    libreadline-dev \
+    patchelf \
+    gcc-multilib \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN chmod 777 /tmp/
+WORKDIR /tmp/
+
+COPY install/install_nccl.sh /install/
+RUN /install/install_nccl.sh "2.8.4-1+cuda11.2"
+
+COPY install/install_bazel.sh /install/
+RUN /install/install_bazel.sh "5.1.1"
+
+COPY install/build_and_install_python.sh /install/
+RUN /install/build_and_install_python.sh "3.7.7"
+
+COPY install/install_pip_packages_by_version.sh /install/
+RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7"
+
+COPY install/use_devtoolset_8.sh /install/
+RUN /install/use_devtoolset_8.sh
+
+COPY install/install_openmpi.sh /install/
+RUN /install/install_openmpi.sh "4.1.1"
+
+# clean
+RUN rm -rf /tmp/*
diff --git a/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8.Dockerfile b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8.Dockerfile
new file mode 100644
index 000000000..748d60e40
--- /dev/null
+++ b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8.Dockerfile
@@ -0,0 +1,103 @@
+# Dockerfile to build a manylinux 2010 compliant cross-compiler.
+#
+# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible
+# glibc (2.12) and system libstdc++ (4.4).
+#
+# To push a new version, run:
+# $ docker build -f cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8.Dockerfile . \
+#  --tag "tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8"
+# $ docker push tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8
+
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset
+
+RUN chmod 777 /tmp/
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y \
+      cpio \
+      file \
+      flex \
+      g++ \
+      make \
+      patch \
+      rpm2cpio \
+      unar \
+      wget \
+      xz-utils \
+      libjpeg-dev \
+      zlib1g-dev \
+      libgflags-dev \
+      libsnappy-dev \
+      libbz2-dev \
+      liblz4-dev \
+      libzstd-dev \
+      openssh-client \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+ADD devtoolset/fixlinks.sh fixlinks.sh
+ADD devtoolset/build_devtoolset.sh build_devtoolset.sh
+ADD devtoolset/rpm-patch.sh rpm-patch.sh
+
+# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8.
+RUN /build_devtoolset.sh devtoolset-8 /dt8
+
+# TODO(klimek): Split up into two different docker images.
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04
+COPY --from=devtoolset /dt8 /dt8
+
+# Install TensorRT.
+RUN echo \
+    deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \
+    > /etc/apt/sources.list.d/nvidia-ml.list \
+      && \
+    apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \
+    apt-get update && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy and run the install scripts.
+ARG DEBIAN_FRONTEND=noninteractive
+
+COPY install/install_bootstrap_deb_packages.sh /install/
+RUN /install/install_bootstrap_deb_packages.sh
+
+COPY install/install_deb_packages.sh /install/
+RUN /install/install_deb_packages.sh
+
+# Install additional packages needed for this image:
+# - dependencies to build Python from source
+# - patchelf, as it is required by auditwheel
+RUN apt-get update && apt-get install -y \
+    libbz2-dev \
+    libffi-dev \
+    libgdbm-dev \
+    libncurses5-dev \
+    libnss3-dev \
+    libreadline-dev \
+    patchelf \
+    gcc-multilib \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN chmod 777 /tmp/
+WORKDIR /tmp/
+
+COPY install/install_nccl.sh /install/
+RUN /install/install_nccl.sh "2.8.4-1+cuda11.2"
+
+COPY install/install_bazel.sh /install/
+RUN /install/install_bazel.sh "5.1.1"
+
+COPY install/build_and_install_python.sh /install/
+RUN /install/build_and_install_python.sh "3.8.2"
+
+COPY install/install_pip_packages_by_version.sh /install/
+RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8"
+
+COPY install/use_devtoolset_8.sh /install/
+RUN /install/use_devtoolset_8.sh
+
+COPY install/install_openmpi.sh /install/
+RUN /install/install_openmpi.sh "4.1.1"
+
+# clean
+RUN rm -rf /tmp/*
diff --git a/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile
new file mode 100644
index 000000000..2c9fb4e81
--- /dev/null
+++ b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile
@@ -0,0 +1,103 @@
+# Dockerfile to build a manylinux 2010 compliant cross-compiler.
+#
+# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible
+# glibc (2.12) and system libstdc++ (4.4).
+#
+# To push a new version, run:
+# $ docker build -f cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile . \
+#  --tag "tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9"
+# $ docker push tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9
+
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset
+
+RUN chmod 777 /tmp/
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y \
+      cpio \
+      file \
+      flex \
+      g++ \
+      make \
+      patch \
+      rpm2cpio \
+      unar \
+      wget \
+      xz-utils \
+      libjpeg-dev \
+      zlib1g-dev \
+      libgflags-dev \
+      libsnappy-dev \
+      libbz2-dev \
+      liblz4-dev \
+      libzstd-dev \
+      openssh-client \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+ADD devtoolset/fixlinks.sh fixlinks.sh
+ADD devtoolset/build_devtoolset.sh build_devtoolset.sh
+ADD devtoolset/rpm-patch.sh rpm-patch.sh
+
+# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8.
+RUN /build_devtoolset.sh devtoolset-8 /dt8
+
+# TODO(klimek): Split up into two different docker images.
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04
+COPY --from=devtoolset /dt8 /dt8
+
+# Install TensorRT.
+RUN echo \
+    deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \
+    > /etc/apt/sources.list.d/nvidia-ml.list \
+      && \
+    apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \
+    apt-get update && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy and run the install scripts.
+ARG DEBIAN_FRONTEND=noninteractive
+
+COPY install/install_bootstrap_deb_packages.sh /install/
+RUN /install/install_bootstrap_deb_packages.sh
+
+COPY install/install_deb_packages.sh /install/
+RUN /install/install_deb_packages.sh
+
+# Install additional packages needed for this image:
+# - dependencies to build Python from source
+# - patchelf, as it is required by auditwheel
+RUN apt-get update && apt-get install -y \
+    libbz2-dev \
+    libffi-dev \
+    libgdbm-dev \
+    libncurses5-dev \
+    libnss3-dev \
+    libreadline-dev \
+    patchelf \
+    gcc-multilib \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN chmod 777 /tmp/
+WORKDIR /tmp/
+
+COPY install/install_nccl.sh /install/
+RUN /install/install_nccl.sh "2.8.4-1+cuda11.2"
+
+COPY install/install_bazel.sh /install/
+RUN /install/install_bazel.sh "5.1.1"
+
+COPY install/build_and_install_python.sh /install/
+RUN /install/build_and_install_python.sh "3.9.7"
+
+COPY install/install_pip_packages_by_version.sh /install/
+RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9"
+
+COPY install/use_devtoolset_8.sh /install/
+RUN /install/use_devtoolset_8.sh
+
+COPY install/install_openmpi.sh /install/
+RUN /install/install_openmpi.sh "4.1.1"
+
+# clean
+RUN rm -rf /tmp/*
diff --git a/tools/docker/install/use_devtoolset_8.sh b/tools/docker/install/use_devtoolset_8.sh
new file mode 100755
index 000000000..0a6b3cbfd
--- /dev/null
+++ b/tools/docker/install/use_devtoolset_8.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# Use devtoolset-8 as tool chain
+rm -r /usr/bin/gcc*
+export PATH=/dt8/usr/bin:${PATH}
+export PATH=/usr/bin/:/usr/local/bin/:${PATH}
+export LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}
+export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:${LD_LIBRARY_PATH}
+ln -sf /dt8/usr/bin/cc /usr/bin/gcc
+ln -sf /dt8/usr/bin/gcc /usr/bin/gcc
+ln -sf /dt8/usr/bin/g++ /usr/bin/g++
+