diff --git a/.github/workflows/make_wheel_Linux_x86.sh b/.github/workflows/make_wheel_Linux_x86.sh index 47b0a2295..61718000e 100644 --- a/.github/workflows/make_wheel_Linux_x86.sh +++ b/.github/workflows/make_wheel_Linux_x86.sh @@ -13,7 +13,7 @@ fi # if tensorflow version >= 2.6.0 and <= 2.11.9 if [[ "$TF_VERSION" =~ ^2\.([6-9]|10|11)\.[0-9]$ ]] ; then - export BUILD_IMAGE="tfra/nosla-cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python$PY_VERSION" + export BUILD_IMAGE="tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python$PY_VERSION" export TF_CUDA_VERSION="11.2" export TF_CUDNN_VERSION="8.1" elif [ $TF_VERSION == "2.4.1" ] ; then diff --git a/WORKSPACE b/WORKSPACE index 9e07792b9..7f7da070f 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -57,55 +57,9 @@ http_archive( http_archive( name = "hkv", build_file = "//build_deps/toolchains/hkv:hkv.BUILD", - # TODO(LinGeLin) remove this when update hkv - patch_cmds = [ - """sed -i.bak '1772i\\'$'\\n ThrustAllocator thrust_allocator_;\\n' include/merlin_hashtable.cuh""", - """sed -i.bak '225i\\'$'\\n thrust_allocator_.set_allocator(allocator_);\\n' include/merlin_hashtable.cuh""", - "sed -i.bak 's/thrust::sort_by_key(thrust_par.on(stream)/thrust::sort_by_key(thrust_par(thrust_allocator_).on(stream)/' include/merlin_hashtable.cuh", - "sed -i.bak 's/reduce(thrust_par.on(stream)/reduce(thrust_par(thrust_allocator_).on(stream)/' include/merlin_hashtable.cuh", - """sed -i.bak '125i\\'$'\\n template \\n' include/merlin/allocator.cuh""", - """sed -i.bak '126i\\'$'\\n struct ThrustAllocator : thrust::device_malloc_allocator {\\n' include/merlin/allocator.cuh""", - """sed -i.bak '127i\\'$'\\n public:\\n' include/merlin/allocator.cuh""", - """sed -i.bak '128i\\'$'\\n typedef thrust::device_malloc_allocator super_t;\\n' include/merlin/allocator.cuh""", - """sed -i.bak '129i\\'$'\\n typedef typename super_t::pointer pointer;\\n' include/merlin/allocator.cuh""", - """sed -i.bak '130i\\'$'\\n typedef typename super_t::size_type size_type;\\n' include/merlin/allocator.cuh""", - """sed -i.bak '131i\\'$'\\n public:\\n' include/merlin/allocator.cuh""", - """sed -i.bak '132i\\'$'\\n pointer allocate(size_type n) {\\n' include/merlin/allocator.cuh""", - """sed -i.bak '133i\\'$'\\n void* ptr = nullptr;\\n' include/merlin/allocator.cuh""", - """sed -i.bak '134i\\'$'\\n MERLIN_CHECK(\\n' include/merlin/allocator.cuh""", - """sed -i.bak '135i\\'$'\\n allocator_ != nullptr,\\n' include/merlin/allocator.cuh""", - """sed -i.bak '136i\\'$'\\n "[ThrustAllocator] set_allocator should be called in advance!");\\n' include/merlin/allocator.cuh""", - """sed -i.bak '137i\\'$'\\n allocator_->alloc(MemoryType::Device, &ptr, sizeof(T) * n);\\n' include/merlin/allocator.cuh""", - """sed -i.bak '138i\\'$'\\n return pointer(reinterpret_cast(ptr));\\n' include/merlin/allocator.cuh""", - """sed -i.bak '139i\\'$'\\n }\\n' include/merlin/allocator.cuh""", - """sed -i.bak '140i\\'$'\\n void deallocate(pointer p, size_type n) {\\n' include/merlin/allocator.cuh""", - """sed -i.bak '141i\\'$'\\n MERLIN_CHECK(\\n' include/merlin/allocator.cuh""", - """sed -i.bak '142i\\'$'\\n allocator_ != nullptr,\\n' include/merlin/allocator.cuh""", - """sed -i.bak '143i\\'$'\\n "[ThrustAllocator] set_allocator should be called in advance!");\\n' include/merlin/allocator.cuh""", - """sed -i.bak '144i\\'$'\\n allocator_->free(MemoryType::Device, reinterpret_cast(p.get()));\\n' include/merlin/allocator.cuh""", - """sed -i.bak '145i\\'$'\\n }\\n' include/merlin/allocator.cuh""", - """sed -i.bak '146i\\'$'\\n void set_allocator(BaseAllocator* allocator) { allocator_ = allocator; }\\n' include/merlin/allocator.cuh""", - """sed -i.bak '147i\\'$'\\n public:\\n' include/merlin/allocator.cuh""", - """sed -i.bak '148i\\'$'\\n BaseAllocator* allocator_ = nullptr;\\n' include/merlin/allocator.cuh""", - """sed -i.bak '149i\\'$'\\n };\\n' include/merlin/allocator.cuh""", - """sed -i.bak '20i\\'$'\\n #include \\n' include/merlin/allocator.cuh""", - """sed -i.bak '367i\\'$'\\n for (auto addr : (*table)->buckets_address) {\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '368i\\'$'\\n allocator->free(MemoryType::Device, addr);\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '369i\\'$'\\n }\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '370i\\'$'\\n /*\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '382i\\'$'\\n */\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '224i\\'$'\\n uint8_t* address = nullptr;\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '225i\\'$'\\n allocator->alloc(MemoryType::Device, (void**)&(address), bucket_memory_size * (end - start));\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '226i\\'$'\\n (*table)->buckets_address.push_back(address);\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '228i\\'$'\\n allocate_bucket_others<<<1, 1>>>((*table)->buckets, i, address + (bucket_memory_size * (i-start)), reserve_size, bucket_max_size);\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '229i\\'$'\\n /*\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '235i\\'$'\\n */\\n' include/merlin/core_kernels.cuh""", - """sed -i.bak '22i\\'$'\\n#include \\n' include/merlin/types.cuh""", - """sed -i.bak '143i\\'$'\\n std::vector buckets_address;\\n' include/merlin/types.cuh""", - ], - sha256 = "f8179c445a06a558262946cda4d8ae7252d313e73f792586be9b1bc0c993b1cf", - strip_prefix = "HierarchicalKV-0.1.0-beta.6", - url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.6.tar.gz", + sha256 = "3839f91b703b401fd6d2449c034662b6f8d6563e5b9b71b4c25b217cf1cd63fd", + strip_prefix = "HierarchicalKV-0.1.0-beta.8", + url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.8.tar.gz", ) tf_configure( diff --git a/tensorflow_recommenders_addons/dynamic_embedding/__init__.py b/tensorflow_recommenders_addons/dynamic_embedding/__init__.py index 3269c9171..e85aa8b41 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/__init__.py +++ b/tensorflow_recommenders_addons/dynamic_embedding/__init__.py @@ -18,6 +18,7 @@ 'CuckooHashTable', 'CuckooHashTableConfig', 'CuckooHashTableCreator', + 'HkvEvictStrategy', 'HkvHashTable', 'HkvHashTableConfig', 'HkvHashTableCreator', @@ -55,7 +56,7 @@ from tensorflow_recommenders_addons.dynamic_embedding.python.ops import data_flow_ops as data_flow from tensorflow_recommenders_addons.dynamic_embedding.python.ops.dynamic_embedding_creator import ( KVCreator, CuckooHashTableConfig, CuckooHashTableCreator, - HkvHashTableConfig, HkvHashTableCreator, RedisTableConfig, + HkvHashTableConfig, HkvHashTableCreator, HkvEvictStrategy, RedisTableConfig, RedisTableCreator, FileSystemSaver) from tensorflow_recommenders_addons.dynamic_embedding.python.ops.cuckoo_hashtable_ops import ( CuckooHashTable,) diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/hkv_hashtable_op_gpu.cu.cc b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/hkv_hashtable_op_gpu.cu.cc index 133992078..fbbb91633 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/hkv_hashtable_op_gpu.cu.cc +++ b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/hkv_hashtable_op_gpu.cu.cc @@ -42,6 +42,7 @@ limitations under the License. namespace tensorflow { using GPUDevice = Eigen::GpuDevice; +using NvEvictStrategy = nv::merlin::EvictStrategy; namespace recommenders_addons { namespace lookup { @@ -51,7 +52,7 @@ constexpr size_t kDefaultGpuInitCapacity = 1024 * 1024; using tensorflow::OpKernelContext; using tensorflow::lookup::LookupInterface; -template +template class HkvHashTableOfTensorsGpu final : public LookupInterface { private: std::unique_ptr allocator_ptr_; @@ -71,6 +72,7 @@ class HkvHashTableOfTensorsGpu final : public LookupInterface { int64 init_capacity_i64 = 0; int64 max_capacity_i64 = 0; int64 max_hbm_for_vectors_i64 = 0; + int64 evict_global_epoch = 0; OP_REQUIRES_OK( ctx, GetNodeAttr(kernel->def(), "init_capacity", &init_capacity_i64)); OP_REQUIRES_OK( @@ -81,9 +83,17 @@ class HkvHashTableOfTensorsGpu final : public LookupInterface { ctx, (max_hbm_for_vectors_i64 >= 0), errors::InvalidArgument("params max_hbm_for_vectors less than 0")); + OP_REQUIRES_OK(ctx, GetNodeAttr(kernel->def(), "evict_global_epoch", + &evict_global_epoch)); + + OP_REQUIRES( + ctx, (evict_global_epoch >= 0), + errors::InvalidArgument("params evict_global_epoch less than 0")); + options.init_capacity = static_cast(init_capacity_i64); options.max_capacity = static_cast(max_capacity_i64); options.max_hbm_for_vectors = static_cast(max_hbm_for_vectors_i64); + options.evict_global_epoch = static_cast(evict_global_epoch); if (options.max_capacity == 0) { char* env_max_capacity_str = @@ -136,7 +146,7 @@ class HkvHashTableOfTensorsGpu final : public LookupInterface { Status CreateTable(gpu::TableWrapperInitOptions& options, nv::merlin::BaseAllocator* allocator, - gpu::TableWrapper** pptable) { + gpu::TableWrapper** pptable) { return gpu::CreateTableImpl(pptable, options, allocator, runtime_dim_); } @@ -580,13 +590,13 @@ class HkvHashTableOfTensorsGpu final : public LookupInterface { TensorShape value_shape_; size_t runtime_dim_; mutable mutex mu_; - gpu::TableWrapper* table_ = nullptr GUARDED_BY(mu_); + gpu::TableWrapper* table_ = nullptr GUARDED_BY(mu_); }; } // namespace lookup // Table lookup op. Perform the lookup operation on the given table. -template +template class HashTableFindGpuOp : public OpKernel { public: explicit HashTableFindGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -595,8 +605,8 @@ class HashTableFindGpuOp : public OpKernel { lookup::LookupInterface* table; OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); core::ScopedUnref unref_me(table); - lookup::HkvHashTableOfTensorsGpu* table_hkv = - (lookup::HkvHashTableOfTensorsGpu*)table; + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; // Input 0 could be a STRING_REF or a RESOURCE DataType expected_input_0 = DT_RESOURCE; @@ -621,13 +631,9 @@ class HashTableFindGpuOp : public OpKernel { } }; -// REGISTER_KERNEL_BUILDER( -// Name(PREFIX_OP_NAME(HkvHashTableFind)).Device(DEVICE_GPU), -// HashTableFindGpuOp); - // Table lookup op. Perform the lookup operation on the given table. -template +template class HashTableFindWithExistsGpuOp : public OpKernel { public: explicit HashTableFindWithExistsGpuOp(OpKernelConstruction* ctx) @@ -638,8 +644,8 @@ class HashTableFindWithExistsGpuOp : public OpKernel { OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); core::ScopedUnref unref_me(table); - lookup::HkvHashTableOfTensorsGpu* table_hkv = - (lookup::HkvHashTableOfTensorsGpu*)table; + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; // Input 0 could be a STRING_REF or a RESOURCE DataType expected_input_0 = DT_RESOURCE; @@ -669,6 +675,7 @@ class HashTableFindWithExistsGpuOp : public OpKernel { }; // Table insert op. +template class HashTableInsertGpuOp : public OpKernel { public: explicit HashTableInsertGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -677,6 +684,8 @@ class HashTableInsertGpuOp : public OpKernel { lookup::LookupInterface* table; OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); core::ScopedUnref unref_me(table); + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; DataType expected_input_0 = DT_RESOURCE; DataTypeVector expected_inputs = {expected_input_0, table->key_dtype(), @@ -686,16 +695,12 @@ class HashTableInsertGpuOp : public OpKernel { const Tensor& keys = ctx->input(1); const Tensor& values = ctx->input(2); OP_REQUIRES_OK(ctx, table->CheckKeyAndValueTensorsForInsert(keys, values)); - OP_REQUIRES_OK(ctx, table->Insert(ctx, keys, values)); + OP_REQUIRES_OK(ctx, table_hkv->Insert(ctx, keys, values)); } }; -REGISTER_KERNEL_BUILDER( - Name(PREFIX_OP_NAME(HkvHashTableInsert)).Device(DEVICE_GPU), - HashTableInsertGpuOp); - // Table accum op. -template +template class HashTableAccumGpuOp : public OpKernel { public: explicit HashTableAccumGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -704,8 +709,8 @@ class HashTableAccumGpuOp : public OpKernel { lookup::LookupInterface* table; OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); core::ScopedUnref unref_me(table); - lookup::HkvHashTableOfTensorsGpu* table_hkv = - (lookup::HkvHashTableOfTensorsGpu*)table; + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; DataType expected_input_0 = DT_RESOURCE; DataTypeVector expected_inputs = {expected_input_0, table->key_dtype(), @@ -723,7 +728,6 @@ class HashTableAccumGpuOp : public OpKernel { }; // Table remove op. -// template class HashTableRemoveGpuOp : public OpKernel { public: explicit HashTableRemoveGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -748,7 +752,7 @@ REGISTER_KERNEL_BUILDER( HashTableRemoveGpuOp); // Table clear op. -template +template class HashTableClearGpuOp : public OpKernel { public: explicit HashTableClearGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -757,14 +761,14 @@ class HashTableClearGpuOp : public OpKernel { lookup::LookupInterface* table; OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); core::ScopedUnref unref_me(table); - lookup::HkvHashTableOfTensorsGpu* table_hkv = - (lookup::HkvHashTableOfTensorsGpu*)table; + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; OP_REQUIRES_OK(ctx, table_hkv->Clear(ctx)); } }; // Op that returns the size of the given table. -template +template class HashTableSizeGpuOp : public OpKernel { public: explicit HashTableSizeGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -773,8 +777,8 @@ class HashTableSizeGpuOp : public OpKernel { lookup::LookupInterface* table; OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); core::ScopedUnref unref_me(table); - lookup::HkvHashTableOfTensorsGpu* table_hkv = - (lookup::HkvHashTableOfTensorsGpu*)table; + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; Tensor* out; AllocatorAttributes attr; @@ -788,11 +792,8 @@ class HashTableSizeGpuOp : public OpKernel { } }; -// REGISTER_KERNEL_BUILDER( -// Name(PREFIX_OP_NAME(HkvHashTableSize)).Device(DEVICE_GPU), -// HashTableSizeGpuOp); - // Op that outputs tensors of all keys and all values. +template class HashTableExportGpuOp : public OpKernel { public: explicit HashTableExportGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -801,17 +802,15 @@ class HashTableExportGpuOp : public OpKernel { lookup::LookupInterface* table; OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); core::ScopedUnref unref_me(table); + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; - OP_REQUIRES_OK(ctx, table->ExportValues(ctx)); + OP_REQUIRES_OK(ctx, table_hkv->ExportValues(ctx)); } }; -REGISTER_KERNEL_BUILDER( - Name(PREFIX_OP_NAME(HkvHashTableExport)).Device(DEVICE_GPU), - HashTableExportGpuOp); - // Op that export all keys and values to file. -template +template class HashTableExportWithScoresGpuOp : public OpKernel { public: explicit HashTableExportWithScoresGpuOp(OpKernelConstruction* ctx) @@ -821,13 +820,13 @@ class HashTableExportWithScoresGpuOp : public OpKernel { lookup::LookupInterface* table; OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); core::ScopedUnref unref_me(table); - lookup::HkvHashTableOfTensorsGpu* table_hkv = - (lookup::HkvHashTableOfTensorsGpu*)table; + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; OP_REQUIRES_OK(ctx, table_hkv->ExportValuesWithScores(ctx)); } }; -template +template class HashTableExportKeysAndScoresGpuOp : public OpKernel { public: explicit HashTableExportKeysAndScoresGpuOp(OpKernelConstruction* ctx) @@ -839,8 +838,8 @@ class HashTableExportKeysAndScoresGpuOp : public OpKernel { lookup::LookupInterface* table; OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); core::ScopedUnref unref_me(table); - lookup::HkvHashTableOfTensorsGpu* table_hkv = - (lookup::HkvHashTableOfTensorsGpu*)table; + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; OP_REQUIRES_OK(ctx, table_hkv->ExportKeysAndScores( ctx, static_cast(split_size_i64_))); } @@ -850,6 +849,7 @@ class HashTableExportKeysAndScoresGpuOp : public OpKernel { }; // Clear the table and insert data. +template class HashTableImportGpuOp : public OpKernel { public: explicit HashTableImportGpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -858,6 +858,8 @@ class HashTableImportGpuOp : public OpKernel { lookup::LookupInterface* table; OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table)); core::ScopedUnref unref_me(table); + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; DataType expected_input_0 = DT_RESOURCE; DataTypeVector expected_inputs = {expected_input_0, table->key_dtype(), @@ -867,16 +869,12 @@ class HashTableImportGpuOp : public OpKernel { const Tensor& keys = ctx->input(1); const Tensor& values = ctx->input(2); OP_REQUIRES_OK(ctx, table->CheckKeyAndValueTensorsForImport(keys, values)); - OP_REQUIRES_OK(ctx, table->ImportValues(ctx, keys, values)); + OP_REQUIRES_OK(ctx, table_hkv->ImportValues(ctx, keys, values)); } }; -REGISTER_KERNEL_BUILDER( - Name(PREFIX_OP_NAME(HkvHashTableImport)).Device(DEVICE_GPU), - HashTableImportGpuOp); - // Op that export all keys and values to FileSystem. -template +template class HashTableSaveToFileSystemGpuOp : public OpKernel { public: explicit HashTableSaveToFileSystemGpuOp(OpKernelConstruction* ctx) @@ -912,8 +910,8 @@ class HashTableSaveToFileSystemGpuOp : public OpKernel { errors::InvalidArgument("file name must be scalar.")); string file_name = string(fname_tensor.scalar()().data()); - lookup::HkvHashTableOfTensorsGpu* table_hkv = - (lookup::HkvHashTableOfTensorsGpu*)table; + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; LOG(INFO) << "c++ dirpath: " << dirpath << " filename: " << file_name; std::string filepath = io::JoinPath(dirpath, file_name); @@ -931,7 +929,7 @@ class HashTableSaveToFileSystemGpuOp : public OpKernel { }; // Clear the table and insert data from FileSystem. -template +template class HashTableLoadFromFileSystemGpuOp : public OpKernel { public: explicit HashTableLoadFromFileSystemGpuOp(OpKernelConstruction* ctx) @@ -969,8 +967,8 @@ class HashTableLoadFromFileSystemGpuOp : public OpKernel { LOG(INFO) << "c++ dirpath :" << dirpath << " filename: " << file_name; - lookup::HkvHashTableOfTensorsGpu* table_hkv = - (lookup::HkvHashTableOfTensorsGpu*)table; + lookup::HkvHashTableOfTensorsGpu* table_hkv = + (lookup::HkvHashTableOfTensorsGpu*)table; OP_REQUIRES_OK( ctx, table_hkv->ImportValuesFromFile(ctx, dirpath, file_name, buffer_size_, load_entire_dir_)); @@ -982,65 +980,102 @@ class HashTableLoadFromFileSystemGpuOp : public OpKernel { size_t buffer_size_; }; +#define CONCAT_QUADRA_STRING(X, Y, Z, S) (#X #Y #Z #S) + +#define PREFIX_OP_NAME_X_IMPL(N, S) CONCAT_QUADRA_STRING(TFRA, >, N, S) +#define PREFIX_OP_NAME_X(N, ...) PREFIX_OP_NAME_X_IMPL(N, __VA_ARGS__) + // Register the HkvHashTableOfTensors op. -#define REGISTER_KERNEL(key_dtype, value_dtype) \ - REGISTER_KERNEL_BUILDER( \ - Name(PREFIX_OP_NAME(HkvHashTableOfTensors)) \ - .Device(DEVICE_GPU) \ - .TypeConstraint("key_dtype") \ - .TypeConstraint("value_dtype"), \ - HashTableGpuOp, \ - key_dtype, value_dtype>); \ - REGISTER_KERNEL_BUILDER(Name(PREFIX_OP_NAME(HkvHashTableClear)) \ - .Device(DEVICE_GPU) \ - .TypeConstraint("key_dtype") \ - .TypeConstraint("value_dtype"), \ - HashTableClearGpuOp); \ - REGISTER_KERNEL_BUILDER(Name(PREFIX_OP_NAME(HkvHashTableSize)) \ - .Device(DEVICE_GPU) \ - .TypeConstraint("key_dtype") \ - .TypeConstraint("value_dtype"), \ - HashTableSizeGpuOp); \ - REGISTER_KERNEL_BUILDER(Name(PREFIX_OP_NAME(HkvHashTableAccum)) \ - .Device(DEVICE_GPU) \ - .TypeConstraint("key_dtype") \ - .TypeConstraint("value_dtype"), \ - HashTableAccumGpuOp); \ - REGISTER_KERNEL_BUILDER( \ - Name(PREFIX_OP_NAME(HkvHashTableExportWithScores)) \ - .Device(DEVICE_GPU) \ - .TypeConstraint("key_dtype") \ - .TypeConstraint("value_dtype"), \ - HashTableExportWithScoresGpuOp); \ - REGISTER_KERNEL_BUILDER(Name(PREFIX_OP_NAME(HkvHashTableFind)) \ - .Device(DEVICE_GPU) \ - .TypeConstraint("Tin") \ - .TypeConstraint("Tout"), \ - HashTableFindGpuOp); \ - REGISTER_KERNEL_BUILDER( \ - Name(PREFIX_OP_NAME(HkvHashTableFindWithExists)) \ - .Device(DEVICE_GPU) \ - .TypeConstraint("Tin") \ - .TypeConstraint("Tout"), \ - HashTableFindWithExistsGpuOp); \ - REGISTER_KERNEL_BUILDER( \ - Name(PREFIX_OP_NAME(HkvHashTableSaveToFileSystem)) \ - .Device(DEVICE_GPU) \ - .TypeConstraint("key_dtype") \ - .TypeConstraint("value_dtype"), \ - HashTableSaveToFileSystemGpuOp); \ - REGISTER_KERNEL_BUILDER( \ - Name(PREFIX_OP_NAME(HkvHashTableLoadFromFileSystem)) \ - .Device(DEVICE_GPU) \ - .TypeConstraint("key_dtype") \ - .TypeConstraint("value_dtype"), \ - HashTableLoadFromFileSystemGpuOp); - -REGISTER_KERNEL(int64, float); -REGISTER_KERNEL(int64, int8); -REGISTER_KERNEL(int64, int32); -REGISTER_KERNEL(int64, int64); -REGISTER_KERNEL(int64, Eigen::half); +#define REGISTER_HKV_TABLE(key_dtype, value_dtype, Strategy, STRATEGY) \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableOfTensors, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableGpuOp< \ + lookup::HkvHashTableOfTensorsGpu, \ + key_dtype, value_dtype>); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableClear, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableClearGpuOp); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableSize, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableSizeGpuOp); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableInsert, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableInsertGpuOp); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableExport, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableExportGpuOp); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableImport, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableImportGpuOp); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableAccum, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableAccumGpuOp); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableExportWithScores, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableExportWithScoresGpuOp); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableFind, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableFindGpuOp); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableFindWithExists, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableFindWithExistsGpuOp); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableSaveToFileSystem, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableSaveToFileSystemGpuOp); \ + REGISTER_KERNEL_BUILDER( \ + Name(PREFIX_OP_NAME_X(HkvHashTableLoadFromFileSystem, STRATEGY)) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("key_dtype") \ + .TypeConstraint("value_dtype"), \ + HashTableLoadFromFileSystemGpuOp); + +#define REGISTER_STRATRGY(Strategy, STRATEGY) \ + REGISTER_HKV_TABLE(int64, float, Strategy, STRATEGY); \ + REGISTER_HKV_TABLE(int64, int8, Strategy, STRATEGY); \ + REGISTER_HKV_TABLE(int64, int32, Strategy, STRATEGY); \ + REGISTER_HKV_TABLE(int64, int64, Strategy, STRATEGY); \ + REGISTER_HKV_TABLE(int64, Eigen::half, Strategy, STRATEGY); + +REGISTER_STRATRGY(NvEvictStrategy::kLru, LRU); +REGISTER_STRATRGY(NvEvictStrategy::kLfu, LFU); +REGISTER_STRATRGY(NvEvictStrategy::kEpochLru, EPOCHLRU); +REGISTER_STRATRGY(NvEvictStrategy::kEpochLfu, EPOCHLFU); +REGISTER_STRATRGY(NvEvictStrategy::kCustomized, CUSTOMIZED); + +#undef REGISTER_STRATRGY #undef REGISTER_KERNEL diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h index 149e2ba89..cfd44d1a5 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h +++ b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h @@ -49,6 +49,8 @@ namespace recommenders_addons { namespace lookup { namespace gpu { +using NvEvictStrategy = nv::merlin::EvictStrategy; + template class KVOnlyFile : public nv::merlin::BaseKVFile { public: @@ -297,6 +299,8 @@ struct TableWrapperInitOptions { size_t init_capacity; size_t max_hbm_for_vectors; size_t max_bucket_size; + size_t evict_global_epoch; + float max_load_factor; int block_size; int io_block_size; @@ -411,18 +415,16 @@ class TFOrDefaultAllocator : public nv::merlin::BaseAllocator { } }; -template +template class TableWrapper { private: - // using S = uint64_t; - using Table = nv::merlin::HashTable; + using Table = nv::merlin::HashTable; nv::merlin::HashTableOptions mkv_options_; public: TableWrapper(TableWrapperInitOptions& init_options, size_t dim) { max_capacity_ = init_options.max_capacity; dim_ = dim; - // nv::merlin::HashTableOptions mkv_options_; mkv_options_.init_capacity = std::min(init_options.init_capacity, max_capacity_); mkv_options_.max_capacity = max_capacity_; @@ -434,11 +436,14 @@ class TableWrapper { mkv_options_.max_load_factor = 0.5; mkv_options_.block_size = nv::merlin::SAFE_GET_BLOCK_SIZE(128); mkv_options_.dim = dim; - // mkv_options_.evict_strategy = nv::merlin::EvictStrategy::kCustomized; - mkv_options_.evict_strategy = nv::merlin::EvictStrategy::kLru; block_size_ = mkv_options_.block_size; table_ = new Table(); + nv::merlin::EvictStrategy::set_global_epoch( + init_options.evict_global_epoch); + LOG(INFO) << "Use Evict Strategy:" << Strategy + << ", [0:LRU, 1:LFU, 2:EPOCHLRU, 3:EPOCHLFU, 4:CUSTOMIZED]"; + LOG(INFO) << "Use Evict Global Epoch:" << init_options.evict_global_epoch; } Status init(nv::merlin::BaseAllocator* allocator) { @@ -674,12 +679,12 @@ class TableWrapper { bool dynamic_mode_; }; -template -Status CreateTableImpl(TableWrapper** pptable, +template +Status CreateTableImpl(TableWrapper** pptable, TableWrapperInitOptions& options, nv::merlin::BaseAllocator* allocator, size_t runtime_dim) { - *pptable = new TableWrapper(options, runtime_dim); + *pptable = new TableWrapper(options, runtime_dim); return (*pptable)->init(allocator); } diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv_impl.cu.cc b/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv_impl.cu.cc deleted file mode 100644 index 3f529f5eb..000000000 --- a/tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv_impl.cu.cc +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_hkv.h" - -namespace tensorflow { -namespace recommenders_addons { -namespace lookup { -namespace gpu { - -#define DEFINE_PURE_GPU_HASHTABLE(key_type, value_type) \ - template <> \ - class TableWrapper - -DEFINE_PURE_GPU_HASHTABLE(int64, float); -DEFINE_PURE_GPU_HASHTABLE(int64, int8); -DEFINE_PURE_GPU_HASHTABLE(int64, int32); -DEFINE_PURE_GPU_HASHTABLE(int64, int64); -DEFINE_PURE_GPU_HASHTABLE(int64, Eigen::half); - -#undef DEFINE_PURE_GPU_HASHTABLE - -} // namespace gpu -} // namespace lookup -} // namespace recommenders_addons -} // namespace tensorflow diff --git a/tensorflow_recommenders_addons/dynamic_embedding/core/ops/hkv_hashtable_ops.cc b/tensorflow_recommenders_addons/dynamic_embedding/core/ops/hkv_hashtable_ops.cc index 4830bae45..f24ab682a 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/core/ops/hkv_hashtable_ops.cc +++ b/tensorflow_recommenders_addons/dynamic_embedding/core/ops/hkv_hashtable_ops.cc @@ -131,200 +131,186 @@ Status HkvHashTableShape(InferenceContext* c, const ShapeHandle& key, return Status::OK(); } -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableFind)) - .Input("table_handle: resource") - .Input("keys: Tin") - .Input("default_value: Tout") - .Output("values: Tout") - .Attr("Tin: type") - .Attr("Tout: type") - .SetShapeFn([](InferenceContext* c) { - ShapeHandle handle; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); - - ShapeAndType value_shape_and_type; - TF_RETURN_IF_ERROR(ValidateTableResourceHandle( - c, - /*keys=*/c->input(1), - /*key_dtype_attr=*/"Tin", - /*value_dtype_attr=*/"Tout", - /*is_lookup=*/true, &value_shape_and_type)); - c->set_output(0, value_shape_and_type.shape); - - return Status::OK(); - }); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableFindWithExists)) - .Input("table_handle: resource") - .Input("keys: Tin") - .Input("default_value: Tout") - .Output("values: Tout") - .Output("exists: bool") - .Attr("Tin: type") - .Attr("Tout: type") - .SetShapeFn([](InferenceContext* c) { - ShapeHandle handle; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); - - ShapeHandle keys = c->UnknownShapeOfRank(1); - ShapeAndType value_shape_and_type; - TF_RETURN_IF_ERROR(ValidateTableResourceHandle( - c, - /*keys=*/c->input(1), - /*key_dtype_attr=*/"Tin", - /*value_dtype_attr=*/"Tout", - /*is_lookup=*/true, &value_shape_and_type)); - c->set_output(0, value_shape_and_type.shape); - c->set_output(1, keys); - - return Status::OK(); - }); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableInsert)) - .Input("table_handle: resource") - .Input("keys: Tin") - .Input("values: Tout") - .Attr("Tin: type") - .Attr("Tout: type") - .SetShapeFn([](InferenceContext* c) { - ShapeHandle handle; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); - - // TODO: Validate keys and values shape. - return Status::OK(); - }); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableAccum)) +REGISTER_OP(PREFIX_OP_NAME(HkvHashTableRemove)) .Input("table_handle: resource") .Input("keys: key_dtype") - .Input("values_or_deltas: value_dtype") - .Input("exists: bool") .Attr("key_dtype: type") - .Attr("value_dtype: type") - .SetShapeFn([](InferenceContext* c) { - ShapeHandle handle; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); - - // TODO: Validate keys and values shape. - return Status::OK(); - }); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableRemove)) - .Input("table_handle: resource") - .Input("keys: Tin") - .Attr("Tin: type") .SetShapeFn([](InferenceContext* c) { ShapeHandle handle; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &handle)); - - // TODO(turboale): Validate keys shape. return Status::OK(); }); -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableClear)) - .Input("table_handle: resource") - .Attr("key_dtype: type") - .Attr("value_dtype: type"); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableSize)) - .Input("table_handle: resource") - .Output("size: int64") - .Attr("key_dtype: type") - .Attr("value_dtype: type") - .SetShapeFn(ScalarAndTwoElementVectorInputsAndScalarOutputs); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableExport)) - .Input("table_handle: resource") - .Output("keys: Tkeys") - .Output("values: Tvalues") - .Attr("Tkeys: type") - .Attr("Tvalues: type") - .SetShapeFn([](InferenceContext* c) { - ShapeHandle handle; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); - ShapeHandle keys = c->UnknownShapeOfRank(1); - ShapeAndType value_shape_and_type; - TF_RETURN_IF_ERROR(ValidateTableResourceHandle( - c, - /*keys=*/keys, - /*key_dtype_attr=*/"Tkeys", - /*value_dtype_attr=*/"Tvalues", - /*is_lookup=*/false, &value_shape_and_type)); - c->set_output(0, keys); - c->set_output(1, value_shape_and_type.shape); - return Status::OK(); - }); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableSaveToFileSystem)) - .Input("table_handle: resource") - .Input("dirpath: string") - .Input("file_name: string") - .Attr("key_dtype: type") - .Attr("value_dtype: type") - .Attr("dirpath_env: string") - .Attr("append_to_file: bool") - .Attr("buffer_size: int >= 1"); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableExportKeysAndScores)) - .Input("table_handle: resource") - .Output("keys: Tkeys") - .Output("scores: int64") - .Attr("Tkeys: type") - .Attr("split_size: int") - .SetShapeFn([](InferenceContext* c) { - ShapeHandle handle; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); - ShapeHandle keys = c->UnknownShapeOfRank(1); - ShapeHandle scores = c->UnknownShapeOfRank(1); - c->set_output(0, keys); - c->set_output(1, scores); - return Status::OK(); - }); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableImport)) - .Input("table_handle: resource") - .Input("keys: Tin") - .Input("values: Tout") - .Attr("Tin: type") - .Attr("Tout: type") - .SetShapeFn([](InferenceContext* c) { - ShapeHandle handle; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); +#define CONCAT_QUADRA_STRING(X, Y, Z, S) (#X #Y #Z #S) + +#define PREFIX_OP_NAME_X_IMPL(N, S) CONCAT_QUADRA_STRING(TFRA, >, N, S) +#define PREFIX_OP_NAME_X(N, ...) PREFIX_OP_NAME_X_IMPL(N, __VA_ARGS__) + +#define REGISTER_HKV_TABLE(STRATEGY) \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableFind, STRATEGY)) \ + .Input("table_handle: resource") \ + .Input("keys: key_dtype") \ + .Input("default_value: value_dtype") \ + .Output("values: value_dtype") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type") \ + .SetShapeFn([](InferenceContext* c) { \ + ShapeHandle handle; \ + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); \ + ShapeAndType value_shape_and_type; \ + TF_RETURN_IF_ERROR(ValidateTableResourceHandle( \ + c, /*keys=*/c->input(1), /*key_dtype_attr=*/"key_dtype", \ + /*value_dtype_attr=*/"value_dtype", /*is_lookup=*/true, \ + &value_shape_and_type)); \ + c->set_output(0, value_shape_and_type.shape); \ + return Status::OK(); \ + }); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableFindWithExists, STRATEGY)) \ + .Input("table_handle: resource") \ + .Input("keys: key_dtype") \ + .Input("default_value: value_dtype") \ + .Output("values: value_dtype") \ + .Output("exists: bool") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type") \ + .SetShapeFn([](InferenceContext* c) { \ + ShapeHandle handle; \ + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); \ + ShapeHandle keys = c->UnknownShapeOfRank(1); \ + ShapeAndType value_shape_and_type; \ + TF_RETURN_IF_ERROR(ValidateTableResourceHandle( \ + c, /*keys=*/c->input(1), /*key_dtype_attr=*/"key_dtype", \ + /*value_dtype_attr=*/"value_dtype", /*is_lookup=*/true, \ + &value_shape_and_type)); \ + c->set_output(0, value_shape_and_type.shape); \ + c->set_output(1, keys); \ + return Status::OK(); \ + }); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableInsert, STRATEGY)) \ + .Input("table_handle: resource") \ + .Input("keys: key_dtype") \ + .Input("values: value_dtype") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type") \ + .SetShapeFn([](InferenceContext* c) { \ + ShapeHandle handle; \ + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); \ + return Status::OK(); \ + }); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableAccum, STRATEGY)) \ + .Input("table_handle: resource") \ + .Input("keys: key_dtype") \ + .Input("values_or_deltas: value_dtype") \ + .Input("exists: bool") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type") \ + .SetShapeFn([](InferenceContext* c) { \ + ShapeHandle handle; \ + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); \ + return Status::OK(); \ + }); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableClear, STRATEGY)) \ + .Input("table_handle: resource") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type"); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableSize, STRATEGY)) \ + .Input("table_handle: resource") \ + .Output("size: int64") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type") \ + .SetShapeFn(ScalarAndTwoElementVectorInputsAndScalarOutputs); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableExport, STRATEGY)) \ + .Input("table_handle: resource") \ + .Output("keys: key_dtype") \ + .Output("values: value_dtype") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type") \ + .SetShapeFn([](InferenceContext* c) { \ + ShapeHandle handle; \ + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); \ + ShapeHandle keys = c->UnknownShapeOfRank(1); \ + ShapeAndType value_shape_and_type; \ + TF_RETURN_IF_ERROR(ValidateTableResourceHandle( \ + c, /*keys=*/keys, /*key_dtype_attr=*/"key_dtype", \ + /*value_dtype_attr=*/"value_dtype", /*is_lookup=*/false, \ + &value_shape_and_type)); \ + c->set_output(0, keys); \ + c->set_output(1, value_shape_and_type.shape); \ + return Status::OK(); \ + }); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableSaveToFileSystem, STRATEGY)) \ + .Input("table_handle: resource") \ + .Input("dirpath: string") \ + .Input("file_name: string") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type") \ + .Attr("dirpath_env: string") \ + .Attr("append_to_file: bool") \ + .Attr("buffer_size: int >= 1"); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableExportKeysAndScores, STRATEGY)) \ + .Input("table_handle: resource") \ + .Output("keys: Tkeys") \ + .Output("scores: int64") \ + .Attr("Tkeys: type") \ + .Attr("split_size: int") \ + .SetShapeFn([](InferenceContext* c) { \ + ShapeHandle handle; \ + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); \ + ShapeHandle keys = c->UnknownShapeOfRank(1); \ + ShapeHandle scores = c->UnknownShapeOfRank(1); \ + c->set_output(0, keys); \ + c->set_output(1, scores); \ + return Status::OK(); \ + }); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableImport, STRATEGY)) \ + .Input("table_handle: resource") \ + .Input("keys: key_dtype") \ + .Input("values: value_dtype") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type") \ + .SetShapeFn([](InferenceContext* c) { \ + ShapeHandle handle; \ + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle)); \ + ShapeHandle keys; \ + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &keys)); \ + TF_RETURN_IF_ERROR(c->Merge(keys, c->input(2), &keys)); \ + return Status::OK(); \ + }); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableLoadFromFileSystem, STRATEGY)) \ + .Input("table_handle: resource") \ + .Input("dirpath: string") \ + .Input("file_name: string") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type") \ + .Attr("dirpath_env: string") \ + .Attr("load_entire_dir: bool") \ + .Attr("buffer_size: int >= 1"); \ + REGISTER_OP(PREFIX_OP_NAME_X(HkvHashTableOfTensors, STRATEGY)) \ + .Output("table_handle: resource") \ + .Attr("container: string = ''") \ + .Attr("shared_name: string = ''") \ + .Attr("use_node_name_sharing: bool = false") \ + .Attr("key_dtype: type") \ + .Attr("value_dtype: type") \ + .Attr("value_shape: shape = {}") \ + .Attr("init_capacity: int = 0") \ + .Attr("max_capacity: int = 0") \ + .Attr("max_hbm_for_vectors: int = 0") \ + .Attr("evict_global_epoch: int = 0") \ + .SetIsStateful() \ + .SetShapeFn([](InferenceContext* c) { \ + PartialTensorShape value_p; \ + TF_RETURN_IF_ERROR(c->GetAttr("value_shape", &value_p)); \ + ShapeHandle value_s; \ + TF_RETURN_IF_ERROR( \ + c->MakeShapeFromPartialTensorShape(value_p, &value_s)); \ + return HkvHashTableShape(c, /*key=*/c->Scalar(), /*value=*/value_s); \ + }); +REGISTER_HKV_TABLE(LRU) +REGISTER_HKV_TABLE(LFU) +REGISTER_HKV_TABLE(EPOCHLRU) +REGISTER_HKV_TABLE(EPOCHLFU) +REGISTER_HKV_TABLE(CUSTOMIZED) - ShapeHandle keys; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &keys)); - TF_RETURN_IF_ERROR(c->Merge(keys, c->input(2), &keys)); - return Status::OK(); - }); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableLoadFromFileSystem)) - .Input("table_handle: resource") - .Input("dirpath: string") - .Input("file_name: string") - .Attr("key_dtype: type") - .Attr("value_dtype: type") - .Attr("dirpath_env: string") - .Attr("load_entire_dir: bool") - .Attr("buffer_size: int >= 1"); - -REGISTER_OP(PREFIX_OP_NAME(HkvHashTableOfTensors)) - .Output("table_handle: resource") - .Attr("container: string = ''") - .Attr("shared_name: string = ''") - .Attr("use_node_name_sharing: bool = false") - .Attr("key_dtype: type") - .Attr("value_dtype: type") - .Attr("value_shape: shape = {}") - .Attr("init_capacity: int = 0") - .Attr("max_capacity: int = 0") - .Attr("max_hbm_for_vectors: int = 0") - .SetIsStateful() - .SetShapeFn([](InferenceContext* c) { - PartialTensorShape value_p; - TF_RETURN_IF_ERROR(c->GetAttr("value_shape", &value_p)); - ShapeHandle value_s; - TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(value_p, &value_s)); - return HkvHashTableShape(c, /*key=*/c->Scalar(), /*value=*/value_s); - }); } // namespace tensorflow diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_evict_test.py b/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_evict_test.py new file mode 100644 index 000000000..e2cf274be --- /dev/null +++ b/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_evict_test.py @@ -0,0 +1,111 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""unit tests of hkv hashtable ops +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import os +import itertools +import numpy as np + +from tensorflow_recommenders_addons import dynamic_embedding as de +from tensorflow_recommenders_addons.utils.check_platform import is_windows, is_macos, is_arm64, is_linux, is_raspi_arm + +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.training import server_lib +from tensorflow.python.client import session + +import tensorflow as tf +try: + import tensorflow_io +except: + print() + + +def _type_converter(tf_type): + mapper = { + dtypes.int32: np.int32, + dtypes.int64: np.int64, + dtypes.float32: float, + dtypes.float64: np.float64, + dtypes.string: str, + dtypes.half: np.float16, + dtypes.int8: np.int8, + dtypes.bool: bool, + } + return mapper[tf_type] + + +default_config = config_pb2.ConfigProto( + allow_soft_placement=False, + gpu_options=config_pb2.GPUOptions(allow_growth=True)) + + +def _get_devices(): + return ["/gpu:0" if test_util.is_gpu_available() else "/cpu:0"] + + +is_gpu_available = test_util.is_gpu_available() + + +def convert(v, t): + return np.array(v).astype(_type_converter(t)) + + +class HkvHashtableTest(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def test_evict_strategy(self): + if not is_gpu_available: + self.skipTest('Only test when gpu is available.') + strategy_i = 0 + for strategy in de.HkvEvictStrategy: + with self.session(use_gpu=True, config=default_config): + with self.captureWritesToStream(sys.stderr) as printed: + table = de.get_variable( + str(strategy), + key_dtype=dtypes.int64, + value_dtype=dtypes.int32, + initializer=0, + dim=8, + init_size=1024, + kv_creator=de.HkvHashTableCreator( + config=de.HkvHashTableConfig(init_capacity=1024, + max_capacity=1024, + max_hbm_for_values=1024 * 4 * 8 * + 2, + evict_strategy=strategy))) + self.evaluate(table.size()) + + content = "Use Evict Strategy:" + str(strategy_i) + self.assertTrue(content in printed.contents()) + strategy_i = strategy_i + 1 + + del table + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_ops_test.py b/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_ops_test.py index 5efa574d2..83e5e1822 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_ops_test.py +++ b/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/hkv_hashtable_ops_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/cuckoo_hashtable_ops.py b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/cuckoo_hashtable_ops.py index fd9a4394e..e0c1b298e 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/cuckoo_hashtable_ops.py +++ b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/cuckoo_hashtable_ops.py @@ -154,7 +154,7 @@ def _create_resource(self): if self._device_type == "GPU": with ops.device(self._device): - table_ref = hkv_ops.tfra_hkv_hash_table_of_tensors( + table_ref = hkv_ops.tfra_hkv_hash_table_of_tensors_lru( shared_name=self._shared_name, use_node_name_sharing=use_node_name_sharing, key_dtype=self._key_dtype, @@ -213,9 +213,10 @@ def size(self, name=None): with ops.name_scope(name, "%s_Size" % self.name, [self.resource_handle]): with ops.colocate_with(self.resource_handle): if self._device_type == "GPU": - return hkv_ops.tfra_hkv_hash_table_size(self.resource_handle, - key_dtype=self._key_dtype, - value_dtype=self._value_dtype) + return hkv_ops.tfra_hkv_hash_table_size_lru( + self.resource_handle, + key_dtype=self._key_dtype, + value_dtype=self._value_dtype) else: return cuckoo_ops.tfra_cuckoo_hash_table_size(self.resource_handle) @@ -262,9 +263,10 @@ def clear(self, name=None): with ops.name_scope(name, "%s_lookup_table_clear" % self.name, (self.resource_handle, self._default_value)): if self._device_type == "GPU": - return hkv_ops.tfra_hkv_hash_table_clear(self.resource_handle, - key_dtype=self._key_dtype, - value_dtype=self._value_dtype) + return hkv_ops.tfra_hkv_hash_table_clear_lru( + self.resource_handle, + key_dtype=self._key_dtype, + value_dtype=self._value_dtype) else: return cuckoo_ops.tfra_cuckoo_hash_table_clear( self.resource_handle, @@ -310,14 +312,14 @@ def lookup(self, with ops.colocate_with(self.resource_handle, ignore_existing=True): if self._device_type == "GPU": if return_exists: - values, exists = hkv_ops.tfra_hkv_hash_table_find_with_exists( + values, exists = hkv_ops.tfra_hkv_hash_table_find_with_exists_lru( self.resource_handle, keys, dynamic_default_values if dynamic_default_values is not None else self._default_value, ) else: - values = hkv_ops.tfra_hkv_hash_table_find( + values = hkv_ops.tfra_hkv_hash_table_find_lru( self.resource_handle, keys, dynamic_default_values @@ -368,8 +370,8 @@ def insert(self, keys, values, name=None): with ops.colocate_with(self.resource_handle, ignore_existing=True): # pylint: disable=protected-access if self._device_type == "GPU": - return hkv_ops.tfra_hkv_hash_table_insert(self.resource_handle, keys, - values) + return hkv_ops.tfra_hkv_hash_table_insert_lru(self.resource_handle, + keys, values) else: return cuckoo_ops.tfra_cuckoo_hash_table_insert( self.resource_handle, keys, values) @@ -406,8 +408,9 @@ def accum(self, keys, values_or_deltas, exists, name=None): with ops.colocate_with(self.resource_handle, ignore_existing=True): # pylint: disable=protected-access if self._device_type == "GPU": - return hkv_ops.tfra_hkv_hash_table_accum(self.resource_handle, keys, - values_or_deltas, exists) + return hkv_ops.tfra_hkv_hash_table_accum_lru(self.resource_handle, + keys, values_or_deltas, + exists) else: return cuckoo_ops.tfra_cuckoo_hash_table_accum( self.resource_handle, keys, values_or_deltas, exists) @@ -426,7 +429,8 @@ def export(self, name=None): [self.resource_handle]): with ops.colocate_with(self.resource_handle): if self._device_type == "GPU": - keys, values = hkv_ops.tfra_hkv_hash_table_export( + + keys, values = hkv_ops.tfra_hkv_hash_table_export_lru( self.resource_handle, self._key_dtype, self._value_dtype) else: keys, values = cuckoo_ops.tfra_cuckoo_hash_table_export( @@ -458,7 +462,7 @@ def save_to_file_system(self, [self.resource_handle]): with ops.colocate_with(None, ignore_existing=True): if self._device_type == "GPU": - return hkv_ops.tfra_hkv_hash_table_save_to_file_system( + return hkv_ops.tfra_hkv_hash_table_save_to_file_system_lru( self.resource_handle, dirpath=dirpath, file_name=file_name if file_name else self._name, @@ -502,7 +506,7 @@ def load_from_file_system(self, [self.resource_handle]): with ops.colocate_with(None, ignore_existing=True): if self._device_type == "GPU": - return hkv_ops.tfra_hkv_hash_table_load_from_file_system( + return hkv_ops.tfra_hkv_hash_table_load_from_file_system_lru( self.resource_handle, dirpath=dirpath, file_name=file_name if file_name else self._name, @@ -559,7 +563,7 @@ def restore(self, restored_tensors, restored_shapes, name=None): with ops.name_scope(name, "%s_table_restore" % self._restore_name): with ops.colocate_with(self.op.resource_handle): if self.op.resource_handle.device.count('GPU'): - return hkv_ops.tfra_hkv_hash_table_import( + return hkv_ops.tfra_hkv_hash_table_import_lru( self.op.resource_handle, restored_tensors[0], restored_tensors[1], diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/dynamic_embedding_creator.py b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/dynamic_embedding_creator.py index c8379b7e4..64b540170 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/dynamic_embedding_creator.py +++ b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/dynamic_embedding_creator.py @@ -15,6 +15,7 @@ # lint-as: python3 from abc import ABCMeta +from enum import IntEnum, unique from tensorflow.python.eager import context from tensorflow.python.framework import constant_op @@ -136,17 +137,30 @@ def get_config(self): return config +@unique +class HkvEvictStrategy(IntEnum): + LRU = 0 + LFU = 1 + EPOCHLRU = 2 + EPOCHLFU = 3 + CUSTOMIZED = 4 + + class HkvHashTableConfig(object): def __init__(self, init_capacity=KHkvHashTableInitCapacity, max_capacity=KHkvHashTableMaxCapacity, - max_hbm_for_values=KHkvHashTableMaxHbmForValuesByBytes): + max_hbm_for_values=KHkvHashTableMaxHbmForValuesByBytes, + evict_strategy=HkvEvictStrategy.LRU, + evict_global_epoch=0): """ CuckooHashTableConfig include nothing for parameter default satisfied. """ self.init_capacity = init_capacity self.max_capacity = max_capacity self.max_hbm_for_values = max_hbm_for_values + self.evict_strategy = evict_strategy + self.evict_global_epoch = evict_global_epoch class HkvHashTableCreator(KVCreator): @@ -171,10 +185,14 @@ def create( self.init_capacity = init_size self.max_capacity = KHkvHashTableMaxCapacity self.max_hbm_for_values = KHkvHashTableMaxHbmForValuesByBytes + self.evict_strategy = HkvEvictStrategy.LRU + self.evict_global_epoch = 0 if self.config and isinstance(self.config, de.HkvHashTableConfig): self.init_capacity = self.config.init_capacity self.max_capacity = self.config.max_capacity self.max_hbm_for_values = self.config.max_hbm_for_values + self.evict_strategy = self.config.evict_strategy + self.evict_global_epoch = self.config.evict_global_epoch self.device = device self.shard_saveable_object_fn = shard_saveable_object_fn @@ -187,6 +205,8 @@ def create( init_capacity=self.init_capacity, max_capacity=self.max_capacity, max_hbm_for_values=self.max_hbm_for_values, + evict_strategy=self.evict_strategy, + evict_global_epoch=self.evict_global_epoch, config=self.config, device=self.device, shard_saveable_object_fn=self.shard_saveable_object_fn) diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/hkv_hashtable_ops.py b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/hkv_hashtable_ops.py index efcee7b4d..cc50fe85a 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/python/ops/hkv_hashtable_ops.py +++ b/tensorflow_recommenders_addons/dynamic_embedding/python/ops/hkv_hashtable_ops.py @@ -32,6 +32,8 @@ from tensorflow_recommenders_addons.utils.resource_loader import LazySO from tensorflow_recommenders_addons.utils.resource_loader import prefix_op_name +from tensorflow_recommenders_addons.dynamic_embedding.python.ops.dynamic_embedding_creator import HkvEvictStrategy + try: hkv_ops = LazySO("dynamic_embedding/core/_hkv_ops.so").ops except: @@ -73,6 +75,8 @@ def __init__( config=None, device='', shard_saveable_object_fn=None, + evict_strategy=HkvEvictStrategy.LRU, + evict_global_epoch=0, ): """Creates an empty `HkvHashTable` object. @@ -104,6 +108,7 @@ def __init__( self._checkpoint = checkpoint self._key_dtype = key_dtype self._value_dtype = value_dtype + self._scores_dtype = dtypes.int64 self._init_capacity = init_capacity self._max_capacity = max_capacity self._max_hbm_for_values = max_hbm_for_values @@ -113,11 +118,15 @@ def __init__( if not self._device or self._device == '': self._device = ['/GPU:0'] self._new_obj_trackable = None + self._evict_strategy = evict_strategy + self._evict_global_epoch = evict_global_epoch if self._config: self._init_capacity = self._config.init_capacity self._max_capacity = self._config.max_capacity self._max_hbm_for_values = self._config.max_hbm_for_values + self._evict_strategy = self._config.evict_strategy + self._evict_global_epoch = self._config.evict_global_epoch self._shared_name = None if context.executing_eagerly(): @@ -150,14 +159,33 @@ def __init__( if not context.executing_eagerly(): ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, self.saveable) + def _get_op_interface(self, base_interface_name): + interface_name = base_interface_name + "_lru" + if self._evict_strategy == HkvEvictStrategy.LRU: + interface_name = base_interface_name + "_lru" + elif self._evict_strategy == HkvEvictStrategy.LFU: + interface_name = base_interface_name + "_lfu" + elif self._evict_strategy == HkvEvictStrategy.EPOCHLRU: + interface_name = base_interface_name + "_epochlru" + elif self._evict_strategy == HkvEvictStrategy.EPOCHLFU: + interface_name = base_interface_name + "_epochlfu" + elif self._evict_strategy == HkvEvictStrategy.CUSTOMIZED: + interface_name = base_interface_name + "_customized" + else: + print("unsupport strategy: {}, use default LRU".format( + self._evict_strategy)) + return getattr(hkv_ops, interface_name) + def _create_resource(self): # The table must be shared if checkpointing is requested for multi-worker # training to work correctly. Use the node name if no shared_name has been # explicitly specified. use_node_name_sharing = self._checkpoint and self._shared_name is None + hkv_table = self._get_op_interface("tfra_hkv_hash_table_of_tensors") + with ops.device(self._device): - table_ref = hkv_ops.tfra_hkv_hash_table_of_tensors( + table_ref = hkv_table( shared_name=self._shared_name, use_node_name_sharing=use_node_name_sharing, key_dtype=self._key_dtype, @@ -166,6 +194,7 @@ def _create_resource(self): init_capacity=self._init_capacity, max_capacity=self._max_capacity, max_hbm_for_vectors=self._max_hbm_for_values, + evict_global_epoch=self._evict_global_epoch, name=self._name, ) @@ -202,11 +231,12 @@ def size(self, name=None): Returns: A scalar tensor containing the number of elements in this table. """ + hkv_size = self._get_op_interface("tfra_hkv_hash_table_size") with ops.name_scope(name, "%s_Size" % self.name, [self.resource_handle]): with ops.colocate_with(self.resource_handle): - return hkv_ops.tfra_hkv_hash_table_size(self.resource_handle, - key_dtype=self._key_dtype, - value_dtype=self._value_dtype) + return hkv_size(self.resource_handle, + key_dtype=self._key_dtype, + value_dtype=self._value_dtype) def remove(self, keys, name=None): """Removes `keys` and its associated values from the table. @@ -246,11 +276,12 @@ def clear(self, name=None): Returns: The created Operation. """ + hkv_clear = self._get_op_interface("tfra_hkv_hash_table_clear") with ops.name_scope(name, "%s_lookup_table_clear" % self.name, (self.resource_handle, self._default_value)): - op = hkv_ops.tfra_hkv_hash_table_clear(self.resource_handle, - key_dtype=self._key_dtype, - value_dtype=self._value_dtype) + op = hkv_clear(self.resource_handle, + key_dtype=self._key_dtype, + value_dtype=self._value_dtype) return op @@ -295,14 +326,17 @@ def lookup(self, keys = ops.convert_to_tensor(keys, dtype=self._key_dtype, name="keys") with ops.colocate_with(self.resource_handle, ignore_existing=True): if return_exists: - values, exists = hkv_ops.tfra_hkv_hash_table_find_with_exists( + hkv_find_with_exists = self._get_op_interface( + "tfra_hkv_hash_table_find_with_exists") + values, exists = hkv_find_with_exists( self.resource_handle, keys, dynamic_default_values if dynamic_default_values is not None else self._default_value, ) else: - values = hkv_ops.tfra_hkv_hash_table_find( + hkv_find = self._get_op_interface("tfra_hkv_hash_table_find") + values = hkv_find( self.resource_handle, keys, dynamic_default_values @@ -310,7 +344,7 @@ def lookup(self, ) return (values, exists) if return_exists else values - def insert(self, keys, values, name=None): + def insert(self, keys, values, scores=None, name=None): """Associates `keys` with `values`. Args: @@ -334,13 +368,16 @@ def insert(self, keys, values, name=None): ): keys = ops.convert_to_tensor(keys, self._key_dtype, name="keys") values = ops.convert_to_tensor(values, self._value_dtype, name="values") + if scores: + scores = ops.convert_to_tensor(scores, + self._scores_dtype, + name="scores") + hkv_insert = self._get_op_interface("tfra_hkv_hash_table_insert") with ops.colocate_with(self.resource_handle, ignore_existing=True): - # pylint: disable=protected-access - op = hkv_ops.tfra_hkv_hash_table_insert(self.resource_handle, keys, - values) + op = hkv_insert(self.resource_handle, keys, values, scores) return op - def accum(self, keys, values_or_deltas, exists, name=None): + def accum(self, keys, values_or_deltas, exists, scores=None, name=None): """Associates `keys` with `values`. Args: @@ -369,10 +406,14 @@ def accum(self, keys, values_or_deltas, exists, name=None): self._value_dtype, name="values_or_deltas") exists = ops.convert_to_tensor(exists, dtypes.bool, name="exists") + if scores: + scores = ops.convert_to_tensor(scores, + self._scores_dtype, + name="scores") + hkv_accum = self._get_op_interface("tfra_hkv_hash_table_accum") with ops.colocate_with(self.resource_handle, ignore_existing=True): - # pylint: disable=protected-access - op = hkv_ops.tfra_hkv_hash_table_accum(self.resource_handle, keys, - values_or_deltas, exists) + op = hkv_accum(self.resource_handle, keys, values_or_deltas, exists, + scores) return op def export(self, name=None): @@ -385,22 +426,26 @@ def export(self, name=None): A pair of tensors with the first tensor containing all keys and the second tensors containing all values in the table. """ + hkv_export = self._get_op_interface("tfra_hkv_hash_table_export") with ops.name_scope(name, "%s_lookup_table_export_values" % self.name, [self.resource_handle]): with ops.colocate_with(self.resource_handle): - keys, values = hkv_ops.tfra_hkv_hash_table_export( - self.resource_handle, self._key_dtype, self._value_dtype) + keys, values = hkv_export(self.resource_handle, self._key_dtype, + self._value_dtype) return keys, values def export_keys_and_scores(self, split_size, name=None): if not (split_size > 0 and isinstance(split_size, int)): raise ValueError(f'split_size must be positive integer.') + hkv_export_keys_and_scores = self._get_op_interface( + "tfra_hkv_hash_table_export_keys_and_scores") with ops.name_scope(name, "%s_lookup_table_export_keys_and_scores" % self.name, [self.resource_handle]): with ops.colocate_with(self.resource_handle): - keys, scores = hkv_ops.tfra_hkv_hash_table_export_keys_and_scores( - self.resource_handle, Tkeys=self._key_dtype, split_size=split_size) + keys, scores = hkv_export_keys_and_scores(self.resource_handle, + Tkeys=self._key_dtype, + split_size=split_size) return keys, scores def save_to_file_system(self, @@ -423,11 +468,12 @@ def save_to_file_system(self, Returns: An operation to save the table. """ - + hkv_save_to_file_system = self._get_op_interface( + "tfra_hkv_hash_table_save_to_file_system") with ops.name_scope(name, "%s_save_table" % self.name, [self.resource_handle]): with ops.colocate_with(None, ignore_existing=True): - return hkv_ops.tfra_hkv_hash_table_save_to_file_system( + return hkv_save_to_file_system( self.resource_handle, dirpath=dirpath, file_name=file_name if file_name else self._name, @@ -457,10 +503,12 @@ def load_from_file_system(self, Returns: An operation to load keys and values to table from FileSystem. """ + hkv_load_from_file_system = self._get_op_interface( + "tfra_hkv_hash_table_load_from_file_system") with ops.name_scope(name, "%s_load_table" % self.name, [self.resource_handle]): with ops.colocate_with(None, ignore_existing=True): - return hkv_ops.tfra_hkv_hash_table_load_from_file_system( + return hkv_load_from_file_system( self.resource_handle, dirpath=dirpath, file_name=file_name if file_name else self._name, @@ -504,9 +552,10 @@ def __init__(self, table, name, full_name=""): def restore(self, restored_tensors, restored_shapes, name=None): del restored_shapes # unused # pylint: disable=protected-access + hkv_import = self._get_op_interface("tfra_hkv_hash_table_import") with ops.name_scope(name, "%s_table_restore" % self._restore_name): with ops.colocate_with(self.op.resource_handle): - return hkv_ops.tfra_hkv_hash_table_import( + return hkv_import( self.op.resource_handle, restored_tensors[0], restored_tensors[1], diff --git a/tools/docker/build_wheel.Dockerfile b/tools/docker/build_wheel.Dockerfile index c6fca9e42..a38bf3de3 100644 --- a/tools/docker/build_wheel.Dockerfile +++ b/tools/docker/build_wheel.Dockerfile @@ -21,12 +21,12 @@ RUN mv /usr/bin/lsb_release2 /usr/bin/lsb_release ARG PY_VERSION RUN ln -sf /usr/local/bin/python$PY_VERSION /usr/bin/python -ENV PATH=/dt7/usr/bin:${PATH} +ENV PATH=/dt8/usr/bin:${PATH} ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH} -ENV LD_LIBRARY_PATH=/dt7/user/lib64:${LD_LIBRARY_PATH} -ENV LD_LIBRARY_PATH=/dt7/user/lib:${LD_LIBRARY_PATH} -ENV MANPATH=/dt7/user/share/man:${LD_LIBRARY_PATH} -ENV INFOPATH=/dt7/user/share/info +ENV LD_LIBRARY_PATH=/dt8/user/lib64:${LD_LIBRARY_PATH} +ENV LD_LIBRARY_PATH=/dt8/user/lib:${LD_LIBRARY_PATH} +ENV MANPATH=/dt8/user/share/man:${LD_LIBRARY_PATH} +ENV INFOPATH=/dt8/user/share/info ARG TF_VERSION ARG TF_NAME diff --git a/tools/docker/cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile b/tools/docker/cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile index e46e02fe3..3cbd33457 100644 --- a/tools/docker/cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile +++ b/tools/docker/cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile @@ -8,7 +8,7 @@ # --tag "tfra/nosla-cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9" # $ docker push tfra/nosla-cuda11.2.1-cudnn8-ubuntu20.04-manylinux2014-python3.9 -FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu20.04 as devtoolset +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset RUN chmod 777 /tmp/ ENV DEBIAN_FRONTEND=noninteractive @@ -39,13 +39,11 @@ ADD devtoolset/build_devtoolset.sh build_devtoolset.sh ADD devtoolset/rpm-patch.sh rpm-patch.sh # Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-7 in /dt7. -RUN /build_devtoolset.sh devtoolset-7 /dt7 # Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. RUN /build_devtoolset.sh devtoolset-8 /dt8 # TODO(klimek): Split up into two different docker images. -FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu20.04 -COPY --from=devtoolset /dt7 /dt7 +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 COPY --from=devtoolset /dt8 /dt8 # Install TensorRT. @@ -96,8 +94,8 @@ RUN /install/build_and_install_python.sh "3.9.7" COPY install/install_pip_packages_by_version.sh /install/ RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9" -COPY install/use_devtoolset_7.sh /install/ -RUN /install/use_devtoolset_7.sh +COPY install/use_devtoolset_8.sh /install/ +RUN /install/use_devtoolset_8.sh COPY install/install_openmpi.sh /install/ RUN /install/install_openmpi.sh "4.1.1" diff --git a/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython.Dockerfile b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython.Dockerfile new file mode 100644 index 000000000..629faaa32 --- /dev/null +++ b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython.Dockerfile @@ -0,0 +1,107 @@ +# Dockerfile to build a manylinux 2010 compliant cross-compiler. +# +# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible +# glibc (2.12) and system libstdc++ (4.4). +# +# To push a new version, run: +# $ docker build -f cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython.Dockerfile . \ +# --tag "tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython" +# $ docker push tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-multipython + +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset + +RUN chmod 777 /tmp/ +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + cpio \ + file \ + flex \ + g++ \ + make \ + patch \ + rpm2cpio \ + unar \ + wget \ + xz-utils \ + libjpeg-dev \ + zlib1g-dev \ + libgflags-dev \ + libsnappy-dev \ + libbz2-dev \ + liblz4-dev \ + libzstd-dev \ + openssh-client \ + && \ + rm -rf /var/lib/apt/lists/* + +ADD devtoolset/fixlinks.sh fixlinks.sh +ADD devtoolset/build_devtoolset.sh build_devtoolset.sh +ADD devtoolset/rpm-patch.sh rpm-patch.sh + +# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. +RUN /build_devtoolset.sh devtoolset-8 /dt8 + +# TODO(klimek): Split up into two different docker images. +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 +COPY --from=devtoolset /dt8 /dt8 + +# Install TensorRT. +RUN echo \ + deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \ + > /etc/apt/sources.list.d/nvidia-ml.list \ + && \ + apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \ + apt-get update && \ + rm -rf /var/lib/apt/lists/* + +# Copy and run the install scripts. +ARG DEBIAN_FRONTEND=noninteractive + +COPY install/install_bootstrap_deb_packages.sh /install/ +RUN /install/install_bootstrap_deb_packages.sh + +COPY install/install_deb_packages.sh /install/ +RUN /install/install_deb_packages.sh + +# Install additional packages needed for this image: +# - dependencies to build Python from source +# - patchelf, as it is required by auditwheel +RUN apt-get update && apt-get install -y \ + libbz2-dev \ + libffi-dev \ + libgdbm-dev \ + libncurses5-dev \ + libnss3-dev \ + libreadline-dev \ + patchelf \ + gcc-multilib \ + && \ + rm -rf /var/lib/apt/lists/* + +RUN chmod 777 /tmp/ +WORKDIR /tmp/ + +COPY install/install_nccl.sh /install/ +RUN /install/install_nccl.sh "2.8.4-1+cuda11.2" + +COPY install/install_bazel.sh /install/ +RUN /install/install_bazel.sh "5.1.1" + +COPY install/build_and_install_python.sh /install/ +RUN /install/build_and_install_python.sh "3.7.7" +RUN /install/build_and_install_python.sh "3.8.2" +RUN /install/build_and_install_python.sh "3.9.7" + +COPY install/install_pip_packages_by_version.sh /install/ +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9" +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8" +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" + +COPY install/use_devtoolset_8.sh /install/ +RUN /install/use_devtoolset_8.sh + +COPY install/install_openmpi.sh /install/ +RUN /install/install_openmpi.sh "4.1.1" + +# clean +RUN rm -rf /tmp/* diff --git a/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10.Dockerfile b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10.Dockerfile new file mode 100644 index 000000000..be8fee2c3 --- /dev/null +++ b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10.Dockerfile @@ -0,0 +1,103 @@ +# Dockerfile to build a manylinux 2010 compliant cross-compiler. +# +# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible +# glibc (2.12) and system libstdc++ (4.4). +# +# To push a new version, run: +# $ docker build -f cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10.Dockerfile . \ +# --tag "tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10" +# $ docker push tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.10 + +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset + +RUN chmod 777 /tmp/ +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + cpio \ + file \ + flex \ + g++ \ + make \ + patch \ + rpm2cpio \ + unar \ + wget \ + xz-utils \ + libjpeg-dev \ + zlib1g-dev \ + libgflags-dev \ + libsnappy-dev \ + libbz2-dev \ + liblz4-dev \ + libzstd-dev \ + openssh-client \ + && \ + rm -rf /var/lib/apt/lists/* + +ADD devtoolset/fixlinks.sh fixlinks.sh +ADD devtoolset/build_devtoolset.sh build_devtoolset.sh +ADD devtoolset/rpm-patch.sh rpm-patch.sh + +# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. +RUN /build_devtoolset.sh devtoolset-8 /dt8 + +# TODO(klimek): Split up into two different docker images. +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 +COPY --from=devtoolset /dt8 /dt8 + +# Install TensorRT. +RUN echo \ + deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \ + > /etc/apt/sources.list.d/nvidia-ml.list \ + && \ + apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \ + apt-get update && \ + rm -rf /var/lib/apt/lists/* + +# Copy and run the install scripts. +ARG DEBIAN_FRONTEND=noninteractive + +COPY install/install_bootstrap_deb_packages.sh /install/ +RUN /install/install_bootstrap_deb_packages.sh + +COPY install/install_deb_packages.sh /install/ +RUN /install/install_deb_packages.sh + +# Install additional packages needed for this image: +# - dependencies to build Python from source +# - patchelf, as it is required by auditwheel +RUN apt-get update && apt-get install -y \ + libbz2-dev \ + libffi-dev \ + libgdbm-dev \ + libncurses5-dev \ + libnss3-dev \ + libreadline-dev \ + patchelf \ + gcc-multilib \ + && \ + rm -rf /var/lib/apt/lists/* + +RUN chmod 777 /tmp/ +WORKDIR /tmp/ + +COPY install/install_nccl.sh /install/ +RUN /install/install_nccl.sh "2.8.4-1+cuda11.2" + +COPY install/install_bazel.sh /install/ +RUN /install/install_bazel.sh "5.1.1" + +COPY install/build_and_install_python.sh /install/ +RUN /install/build_and_install_python.sh "3.10.6" + +COPY install/install_pip_packages_by_version.sh /install/ +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.10" + +COPY install/use_devtoolset_8.sh /install/ +RUN /install/use_devtoolset_8.sh + +COPY install/install_openmpi.sh /install/ +RUN /install/install_openmpi.sh "4.1.1" + +# clean +RUN rm -rf /tmp/* diff --git a/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7.Dockerfile b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7.Dockerfile new file mode 100644 index 000000000..84581ca09 --- /dev/null +++ b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7.Dockerfile @@ -0,0 +1,103 @@ +# Dockerfile to build a manylinux 2010 compliant cross-compiler. +# +# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible +# glibc (2.12) and system libstdc++ (4.4). +# +# To push a new version, run: +# $ docker build -f cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7.Dockerfile . \ +# --tag "tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7" +# $ docker push tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.7 + +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset + +RUN chmod 777 /tmp/ +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + cpio \ + file \ + flex \ + g++ \ + make \ + patch \ + rpm2cpio \ + unar \ + wget \ + xz-utils \ + libjpeg-dev \ + zlib1g-dev \ + libgflags-dev \ + libsnappy-dev \ + libbz2-dev \ + liblz4-dev \ + libzstd-dev \ + openssh-client \ + && \ + rm -rf /var/lib/apt/lists/* + +ADD devtoolset/fixlinks.sh fixlinks.sh +ADD devtoolset/build_devtoolset.sh build_devtoolset.sh +ADD devtoolset/rpm-patch.sh rpm-patch.sh + +# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. +RUN /build_devtoolset.sh devtoolset-8 /dt8 + +# TODO(klimek): Split up into two different docker images. +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 +COPY --from=devtoolset /dt8 /dt8 + +# Install TensorRT. +RUN echo \ + deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \ + > /etc/apt/sources.list.d/nvidia-ml.list \ + && \ + apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \ + apt-get update && \ + rm -rf /var/lib/apt/lists/* + +# Copy and run the install scripts. +ARG DEBIAN_FRONTEND=noninteractive + +COPY install/install_bootstrap_deb_packages.sh /install/ +RUN /install/install_bootstrap_deb_packages.sh + +COPY install/install_deb_packages.sh /install/ +RUN /install/install_deb_packages.sh + +# Install additional packages needed for this image: +# - dependencies to build Python from source +# - patchelf, as it is required by auditwheel +RUN apt-get update && apt-get install -y \ + libbz2-dev \ + libffi-dev \ + libgdbm-dev \ + libncurses5-dev \ + libnss3-dev \ + libreadline-dev \ + patchelf \ + gcc-multilib \ + && \ + rm -rf /var/lib/apt/lists/* + +RUN chmod 777 /tmp/ +WORKDIR /tmp/ + +COPY install/install_nccl.sh /install/ +RUN /install/install_nccl.sh "2.8.4-1+cuda11.2" + +COPY install/install_bazel.sh /install/ +RUN /install/install_bazel.sh "5.1.1" + +COPY install/build_and_install_python.sh /install/ +RUN /install/build_and_install_python.sh "3.7.7" + +COPY install/install_pip_packages_by_version.sh /install/ +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" + +COPY install/use_devtoolset_8.sh /install/ +RUN /install/use_devtoolset_8.sh + +COPY install/install_openmpi.sh /install/ +RUN /install/install_openmpi.sh "4.1.1" + +# clean +RUN rm -rf /tmp/* diff --git a/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8.Dockerfile b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8.Dockerfile new file mode 100644 index 000000000..748d60e40 --- /dev/null +++ b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8.Dockerfile @@ -0,0 +1,103 @@ +# Dockerfile to build a manylinux 2010 compliant cross-compiler. +# +# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible +# glibc (2.12) and system libstdc++ (4.4). +# +# To push a new version, run: +# $ docker build -f cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8.Dockerfile . \ +# --tag "tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8" +# $ docker push tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.8 + +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset + +RUN chmod 777 /tmp/ +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + cpio \ + file \ + flex \ + g++ \ + make \ + patch \ + rpm2cpio \ + unar \ + wget \ + xz-utils \ + libjpeg-dev \ + zlib1g-dev \ + libgflags-dev \ + libsnappy-dev \ + libbz2-dev \ + liblz4-dev \ + libzstd-dev \ + openssh-client \ + && \ + rm -rf /var/lib/apt/lists/* + +ADD devtoolset/fixlinks.sh fixlinks.sh +ADD devtoolset/build_devtoolset.sh build_devtoolset.sh +ADD devtoolset/rpm-patch.sh rpm-patch.sh + +# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. +RUN /build_devtoolset.sh devtoolset-8 /dt8 + +# TODO(klimek): Split up into two different docker images. +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 +COPY --from=devtoolset /dt8 /dt8 + +# Install TensorRT. +RUN echo \ + deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \ + > /etc/apt/sources.list.d/nvidia-ml.list \ + && \ + apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \ + apt-get update && \ + rm -rf /var/lib/apt/lists/* + +# Copy and run the install scripts. +ARG DEBIAN_FRONTEND=noninteractive + +COPY install/install_bootstrap_deb_packages.sh /install/ +RUN /install/install_bootstrap_deb_packages.sh + +COPY install/install_deb_packages.sh /install/ +RUN /install/install_deb_packages.sh + +# Install additional packages needed for this image: +# - dependencies to build Python from source +# - patchelf, as it is required by auditwheel +RUN apt-get update && apt-get install -y \ + libbz2-dev \ + libffi-dev \ + libgdbm-dev \ + libncurses5-dev \ + libnss3-dev \ + libreadline-dev \ + patchelf \ + gcc-multilib \ + && \ + rm -rf /var/lib/apt/lists/* + +RUN chmod 777 /tmp/ +WORKDIR /tmp/ + +COPY install/install_nccl.sh /install/ +RUN /install/install_nccl.sh "2.8.4-1+cuda11.2" + +COPY install/install_bazel.sh /install/ +RUN /install/install_bazel.sh "5.1.1" + +COPY install/build_and_install_python.sh /install/ +RUN /install/build_and_install_python.sh "3.8.2" + +COPY install/install_pip_packages_by_version.sh /install/ +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8" + +COPY install/use_devtoolset_8.sh /install/ +RUN /install/use_devtoolset_8.sh + +COPY install/install_openmpi.sh /install/ +RUN /install/install_openmpi.sh "4.1.1" + +# clean +RUN rm -rf /tmp/* diff --git a/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile new file mode 100644 index 000000000..2c9fb4e81 --- /dev/null +++ b/tools/docker/cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile @@ -0,0 +1,103 @@ +# Dockerfile to build a manylinux 2010 compliant cross-compiler. +# +# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible +# glibc (2.12) and system libstdc++ (4.4). +# +# To push a new version, run: +# $ docker build -f cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9.Dockerfile . \ +# --tag "tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9" +# $ docker push tfra/nosla-cuda11.2.2-cudnn8-ubuntu20.04-manylinux2014-python3.9 + +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as devtoolset + +RUN chmod 777 /tmp/ +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + cpio \ + file \ + flex \ + g++ \ + make \ + patch \ + rpm2cpio \ + unar \ + wget \ + xz-utils \ + libjpeg-dev \ + zlib1g-dev \ + libgflags-dev \ + libsnappy-dev \ + libbz2-dev \ + liblz4-dev \ + libzstd-dev \ + openssh-client \ + && \ + rm -rf /var/lib/apt/lists/* + +ADD devtoolset/fixlinks.sh fixlinks.sh +ADD devtoolset/build_devtoolset.sh build_devtoolset.sh +ADD devtoolset/rpm-patch.sh rpm-patch.sh + +# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. +RUN /build_devtoolset.sh devtoolset-8 /dt8 + +# TODO(klimek): Split up into two different docker images. +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 +COPY --from=devtoolset /dt8 /dt8 + +# Install TensorRT. +RUN echo \ + deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \ + > /etc/apt/sources.list.d/nvidia-ml.list \ + && \ + apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \ + apt-get update && \ + rm -rf /var/lib/apt/lists/* + +# Copy and run the install scripts. +ARG DEBIAN_FRONTEND=noninteractive + +COPY install/install_bootstrap_deb_packages.sh /install/ +RUN /install/install_bootstrap_deb_packages.sh + +COPY install/install_deb_packages.sh /install/ +RUN /install/install_deb_packages.sh + +# Install additional packages needed for this image: +# - dependencies to build Python from source +# - patchelf, as it is required by auditwheel +RUN apt-get update && apt-get install -y \ + libbz2-dev \ + libffi-dev \ + libgdbm-dev \ + libncurses5-dev \ + libnss3-dev \ + libreadline-dev \ + patchelf \ + gcc-multilib \ + && \ + rm -rf /var/lib/apt/lists/* + +RUN chmod 777 /tmp/ +WORKDIR /tmp/ + +COPY install/install_nccl.sh /install/ +RUN /install/install_nccl.sh "2.8.4-1+cuda11.2" + +COPY install/install_bazel.sh /install/ +RUN /install/install_bazel.sh "5.1.1" + +COPY install/build_and_install_python.sh /install/ +RUN /install/build_and_install_python.sh "3.9.7" + +COPY install/install_pip_packages_by_version.sh /install/ +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9" + +COPY install/use_devtoolset_8.sh /install/ +RUN /install/use_devtoolset_8.sh + +COPY install/install_openmpi.sh /install/ +RUN /install/install_openmpi.sh "4.1.1" + +# clean +RUN rm -rf /tmp/* diff --git a/tools/docker/install/use_devtoolset_8.sh b/tools/docker/install/use_devtoolset_8.sh new file mode 100755 index 000000000..0a6b3cbfd --- /dev/null +++ b/tools/docker/install/use_devtoolset_8.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Use devtoolset-8 as tool chain +rm -r /usr/bin/gcc* +export PATH=/dt8/usr/bin:${PATH} +export PATH=/usr/bin/:/usr/local/bin/:${PATH} +export LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH} +export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:${LD_LIBRARY_PATH} +ln -sf /dt8/usr/bin/cc /usr/bin/gcc +ln -sf /dt8/usr/bin/gcc /usr/bin/gcc +ln -sf /dt8/usr/bin/g++ /usr/bin/g++ +