@@ -564,58 +564,61 @@ <h2>Functions<a class="headerlink" href="#functions" title="Link to this heading
564564< tr  class ="row-even "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.AnnotateDeviceRegions " title ="tilelang.transform.AnnotateDeviceRegions "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> AnnotateDeviceRegions</ span > </ code > </ a > ()</ p > </ td > 
565565< td > < p > AnnotateDeviceRegions</ p > </ td > 
566566</ tr > 
567- < tr  class ="row-odd "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.VectorizeLoop " title ="tilelang.transform.VectorizeLoop "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> VectorizeLoop</ span > </ code > </ a > ([enable_vectorize])</ p > </ td > 
567+ < tr  class ="row-odd "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.SplitHostDevice " title ="tilelang.transform.SplitHostDevice "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> SplitHostDevice</ span > </ code > </ a > ()</ p > </ td > 
568+ < td > < p > Split host/device functions even for empty kernels.</ p > </ td > 
569+ </ tr > 
570+ < tr  class ="row-even "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.VectorizeLoop " title ="tilelang.transform.VectorizeLoop "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> VectorizeLoop</ span > </ code > </ a > ([enable_vectorize])</ p > </ td > 
568571< td > < p > VectorizeLoop</ p > </ td > 
569572</ tr > 
570- < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.InjectPTXAsyncCopy " title ="tilelang.transform.InjectPTXAsyncCopy "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> InjectPTXAsyncCopy</ span > </ code > </ a > ()</ p > </ td > 
573+ < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.InjectPTXAsyncCopy " title ="tilelang.transform.InjectPTXAsyncCopy "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> InjectPTXAsyncCopy</ span > </ code > </ a > ()</ p > </ td > 
571574< td > < p > Rewrite global to shared memory copy on CUDA with asynchronous copy.</ p > </ td > 
572575</ tr > 
573- < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerDeviceStorageAccessInfo " title ="tilelang.transform.LowerDeviceStorageAccessInfo "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerDeviceStorageAccessInfo</ span > </ code > </ a > ()</ p > </ td > 
576+ < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerDeviceStorageAccessInfo " title ="tilelang.transform.LowerDeviceStorageAccessInfo "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerDeviceStorageAccessInfo</ span > </ code > </ a > ()</ p > </ td > 
574577< td > < p > Lower attached storage access information on device.</ p > </ td > 
575578</ tr > 
576- < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LoopVectorizeDynamic " title ="tilelang.transform.LoopVectorizeDynamic "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LoopVectorizeDynamic</ span > </ code > </ a > ()</ p > </ td > 
579+ < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LoopVectorizeDynamic " title ="tilelang.transform.LoopVectorizeDynamic "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LoopVectorizeDynamic</ span > </ code > </ a > ()</ p > </ td > 
577580< td > < p > Try to vectorize loop with dynamic shape.</ p > </ td > 
578581</ tr > 
579- < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.ConfigIndexBitwidth " title ="tilelang.transform.ConfigIndexBitwidth "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> ConfigIndexBitwidth</ span > </ code > </ a > ()</ p > </ td > 
582+ < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.ConfigIndexBitwidth " title ="tilelang.transform.ConfigIndexBitwidth "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> ConfigIndexBitwidth</ span > </ code > </ a > ()</ p > </ td > 
580583< td > < p > Config index bitwidth.</ p > </ td > 
581584</ tr > 
582- < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.FlattenBuffer " title ="tilelang.transform.FlattenBuffer "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> FlattenBuffer</ span > </ code > </ a > ()</ p > </ td > 
585+ < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.FlattenBuffer " title ="tilelang.transform.FlattenBuffer "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> FlattenBuffer</ span > </ code > </ a > ()</ p > </ td > 
583586< td > < p > FlattenBuffer</ p > </ td > 
584587</ tr > 
585- < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.EliminateStorageSyncForMBarrier " title ="tilelang.transform.EliminateStorageSyncForMBarrier "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> EliminateStorageSyncForMBarrier</ span > </ code > </ a > ()</ p > </ td > 
588+ < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.EliminateStorageSyncForMBarrier " title ="tilelang.transform.EliminateStorageSyncForMBarrier "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> EliminateStorageSyncForMBarrier</ span > </ code > </ a > ()</ p > </ td > 
586589< td > < p > EliminateStorageSyncForMBarrier</ p > </ td > 
587590</ tr > 
588- < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.MergeSharedMemoryAllocations " title ="tilelang.transform.MergeSharedMemoryAllocations "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> MergeSharedMemoryAllocations</ span > </ code > </ a > ([...])</ p > </ td > 
591+ < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.MergeSharedMemoryAllocations " title ="tilelang.transform.MergeSharedMemoryAllocations "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> MergeSharedMemoryAllocations</ span > </ code > </ a > ([...])</ p > </ td > 
589592< td > < p > MergeSharedMemoryAllocations</ p > </ td > 
590593</ tr > 
591- < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerL2Persistent " title ="tilelang.transform.LowerL2Persistent "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerL2Persistent</ span > </ code > </ a > ()</ p > </ td > 
594+ < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerL2Persistent " title ="tilelang.transform.LowerL2Persistent "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerL2Persistent</ span > </ code > </ a > ()</ p > </ td > 
592595< td > < p > LowerL2Persistent</ p > </ td > 
593596</ tr > 
594- < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.PersistThreadblock " title ="tilelang.transform.PersistThreadblock "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> PersistThreadblock</ span > </ code > </ a > ()</ p > </ td > 
597+ < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.PersistThreadblock " title ="tilelang.transform.PersistThreadblock "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> PersistThreadblock</ span > </ code > </ a > ()</ p > </ td > 
595598< td > < p > PersistThreadblock</ p > </ td > 
596599</ tr > 
597- < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.AlignDynamicSharedMemoryAllocations " title ="tilelang.transform.AlignDynamicSharedMemoryAllocations "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> AlignDynamicSharedMemoryAllocations</ span > </ code > </ a > ([align_bytes])</ p > </ td > 
600+ < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.AlignDynamicSharedMemoryAllocations " title ="tilelang.transform.AlignDynamicSharedMemoryAllocations "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> AlignDynamicSharedMemoryAllocations</ span > </ code > </ a > ([align_bytes])</ p > </ td > 
598601< td > < p > AlignDynamicSharedMemoryAllocations</ p > </ td > 
599602</ tr > 
600- < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerSharedBarrier " title ="tilelang.transform.LowerSharedBarrier "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerSharedBarrier</ span > </ code > </ a > ()</ p > </ td > 
603+ < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerSharedBarrier " title ="tilelang.transform.LowerSharedBarrier "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerSharedBarrier</ span > </ code > </ a > ()</ p > </ td > 
601604< td > < p > LowerSharedBarrier</ p > </ td > 
602605</ tr > 
603- < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.StorageRewrite " title ="tilelang.transform.StorageRewrite "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> StorageRewrite</ span > </ code > </ a > ()</ p > </ td > 
606+ < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.StorageRewrite " title ="tilelang.transform.StorageRewrite "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> StorageRewrite</ span > </ code > </ a > ()</ p > </ td > 
604607< td > < p > StorageRewrite</ p > </ td > 
605608</ tr > 
606- < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerOpaqueBlock " title ="tilelang.transform.LowerOpaqueBlock "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerOpaqueBlock</ span > </ code > </ a > ()</ p > </ td > 
609+ < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerOpaqueBlock " title ="tilelang.transform.LowerOpaqueBlock "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerOpaqueBlock</ span > </ code > </ a > ()</ p > </ td > 
607610< td > < p > LowerOpaqueBlock</ p > </ td > 
608611</ tr > 
609- < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerThreadAllreduce " title ="tilelang.transform.LowerThreadAllreduce "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerThreadAllreduce</ span > </ code > </ a > ()</ p > </ td > 
612+ < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerThreadAllreduce " title ="tilelang.transform.LowerThreadAllreduce "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerThreadAllreduce</ span > </ code > </ a > ()</ p > </ td > 
610613< td > < p > LowerThreadAllreduce</ p > </ td > 
611614</ tr > 
612- < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerDeviceKernelLaunch " title ="tilelang.transform.LowerDeviceKernelLaunch "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerDeviceKernelLaunch</ span > </ code > </ a > ()</ p > </ td > 
615+ < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerDeviceKernelLaunch " title ="tilelang.transform.LowerDeviceKernelLaunch "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerDeviceKernelLaunch</ span > </ code > </ a > ()</ p > </ td > 
613616< td > < p > Create and return a transform pass that lowers device kernel launch constructs to target-specific IR.</ p > </ td > 
614617</ tr > 
615- < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerSharedTmem " title ="tilelang.transform.LowerSharedTmem "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerSharedTmem</ span > </ code > </ a > ()</ p > </ td > 
618+ < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LowerSharedTmem " title ="tilelang.transform.LowerSharedTmem "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LowerSharedTmem</ span > </ code > </ a > ()</ p > </ td > 
616619< td > < p > LowerSharedTmem</ p > </ td > 
617620</ tr > 
618- < tr  class ="row-even  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LayoutReducer " title ="tilelang.transform.LayoutReducer "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LayoutReducer</ span > </ code > </ a > ()</ p > </ td > 
621+ < tr  class ="row-odd  "> < td > < p > < a  class ="reference internal " href ="#tilelang.transform.LayoutReducer " title ="tilelang.transform.LayoutReducer "> < code  class ="xref py py-obj docutils literal notranslate "> < span  class ="pre "> LayoutReducer</ span > </ code > </ a > ()</ p > </ td > 
619622< td > < p > Return a TVM transform pass that performs layout reduction/normalization.</ p > </ td > 
620623</ tr > 
621624</ tbody > 
@@ -960,6 +963,20 @@ <h3>Returns:<a class="headerlink" href="#returns" title="Link to this heading">
960963</ dl > 
961964</ dd > </ dl > 
962965
966+ < dl  class ="py function "> 
967+ < dt  class ="sig sig-object py " id ="tilelang.transform.SplitHostDevice "> 
968+ < span  class ="sig-prename descclassname "> < span  class ="pre "> tilelang.transform.</ span > </ span > < span  class ="sig-name descname "> < span  class ="pre "> SplitHostDevice</ span > </ span > < span  class ="sig-paren "> (</ span > < span  class ="sig-paren "> )</ span > < a  class ="headerlink " href ="#tilelang.transform.SplitHostDevice " title ="Link to this definition "> ¶</ a > </ dt > 
969+ < dd > < p > Split host/device functions even for empty kernels.</ p > 
970+ < dl  class ="field-list simple "> 
971+ < dt  class ="field-odd "> Returns< span  class ="colon "> :</ span > </ dt > 
972+ < dd  class ="field-odd "> < p > < strong > fpass</ strong >  – The result pass</ p > 
973+ </ dd > 
974+ < dt  class ="field-even "> Return type< span  class ="colon "> :</ span > </ dt > 
975+ < dd  class ="field-even "> < p > tvm.transform.Pass</ p > 
976+ </ dd > 
977+ </ dl > 
978+ </ dd > </ dl > 
979+ 
963980< dl  class ="py function "> 
964981< dt  class ="sig sig-object py " id ="tilelang.transform.VectorizeLoop "> 
965982< span  class ="sig-prename descclassname "> < span  class ="pre "> tilelang.transform.</ span > </ span > < span  class ="sig-name descname "> < span  class ="pre "> VectorizeLoop</ span > </ span > < span  class ="sig-paren "> (</ span > < em  class ="sig-param "> < span  class ="n "> < span  class ="pre "> enable_vectorize</ span > </ span > < span  class ="o "> < span  class ="pre "> =</ span > </ span > < span  class ="default_value "> < span  class ="pre "> True</ span > </ span > </ em > < span  class ="sig-paren "> )</ span > < a  class ="headerlink " href ="#tilelang.transform.VectorizeLoop " title ="Link to this definition "> ¶</ a > </ dt > 
@@ -1254,6 +1271,7 @@ <h3>Returns:<a class="headerlink" href="#returns" title="Link to this heading">
12541271< li > < a  class ="reference internal " href ="#tilelang.transform.LegalizeSafeMemoryAccess "> < code  class ="docutils literal notranslate "> < span  class ="pre "> LegalizeSafeMemoryAccess()</ span > </ code > </ a > </ li > 
12551272< li > < a  class ="reference internal " href ="#tilelang.transform.MakePackedAPI "> < code  class ="docutils literal notranslate "> < span  class ="pre "> MakePackedAPI()</ span > </ code > </ a > </ li > 
12561273< li > < a  class ="reference internal " href ="#tilelang.transform.AnnotateDeviceRegions "> < code  class ="docutils literal notranslate "> < span  class ="pre "> AnnotateDeviceRegions()</ span > </ code > </ a > </ li > 
1274+ < li > < a  class ="reference internal " href ="#tilelang.transform.SplitHostDevice "> < code  class ="docutils literal notranslate "> < span  class ="pre "> SplitHostDevice()</ span > </ code > </ a > </ li > 
12571275< li > < a  class ="reference internal " href ="#tilelang.transform.VectorizeLoop "> < code  class ="docutils literal notranslate "> < span  class ="pre "> VectorizeLoop()</ span > </ code > </ a > </ li > 
12581276< li > < a  class ="reference internal " href ="#tilelang.transform.InjectPTXAsyncCopy "> < code  class ="docutils literal notranslate "> < span  class ="pre "> InjectPTXAsyncCopy()</ span > </ code > </ a > </ li > 
12591277< li > < a  class ="reference internal " href ="#tilelang.transform.LowerDeviceStorageAccessInfo "> < code  class ="docutils literal notranslate "> < span  class ="pre "> LowerDeviceStorageAccessInfo()</ span > </ code > </ a > </ li > 
0 commit comments