bheisler · LutzCle · Jan 15, 2020
diff --git a/src/function.rs b/src/function.rs
@@ -149,8 +149,15 @@ pub enum FunctionAttribute {
     /// option "-Xptxas --dlcm=ca" set.
     CacheModeCa = 7,
 
+    /// The maximum size of dynamically allocated shared memory in bytes.
+    MaxDynamicSharedSizeBytes = 8,
+
+    /// The percentage of cache resources used for shared memory on architectures on
+    /// which the L1 cache and shared memory are unified.
+    PreferredSharedMemoryCarveout = 9,
+
     #[doc(hidden)]
-    __Nonexhaustive = 8,
+    __Nonexhaustive = 10,
 }
 
 /// Handle to a global kernel function.
@@ -202,6 +209,20 @@ impl<'a> Function<'a> {
         }
     }
 
+    /// Sets function attributes.
+    fn set_attribute(&mut self, attr: FunctionAttribute, value: i32) -> CudaResult<()> {
+        unsafe {
+            cuda_driver_sys::cuFuncSetAttribute(
+                self.inner,
+                // This should be safe, as the repr and values of FunctionAttribute should match.
+                ::std::mem::transmute(attr),
+                value,
+            )
+            .to_result()?;
+            Ok(())
+        }
+    }
+
     /// Sets the preferred cache configuration for this function.
     ///
     /// On devices where L1 cache and shared memory use the same hardware resources, this sets the
@@ -263,6 +284,61 @@ impl<'a> Function<'a> {
         unsafe { cuda_driver_sys::cuFuncSetSharedMemConfig(self.inner, transmute(cfg)).to_result() }
     }
 
+    /// Sets the maximum amount of dynamically allocated shared memory in bytes.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use rustacuda::*;
+    /// # use std::error::Error;
+    /// # fn main() -> Result<(), Box<dyn Error>> {
+    /// # let _ctx = quick_init()?;
+    /// # use rustacuda::module::Module;
+    /// # use std::ffi::CString;
+    /// # let ptx = CString::new(include_str!("../resources/add.ptx"))?;
+    /// # let module = Module::load_from_string(&ptx)?;
+    /// # let name = CString::new("sum")?;
+    /// let mut function = module.get_function(&name)?;
+    /// function.set_max_dynamic_shared_size_bytes(32768)?;
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn set_max_dynamic_shared_size_bytes(&mut self, max_bytes: u32) -> CudaResult<()> {
+        self.set_attribute(
+            FunctionAttribute::MaxDynamicSharedSizeBytes,
+            max_bytes as i32,
+        )
+    }
+
+    /// Sets the percentage of cache resources used for shared memory.
+    ///
+    /// On devices on which the L1 cache and shared memory are unified, this function
+    /// sets the percentage of cache resources that will be used for shared memory. The remaining
+    /// cache resources will used for the L1 cache. The setting is a preference that
+    /// can be ignored by the driver.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use rustacuda::*;
+    /// # use std::error::Error;
+    /// # fn main() -> Result<(), Box<dyn Error>> {
+    /// # let _ctx = quick_init()?;
+    /// # use rustacuda::module::Module;
+    /// # use std::ffi::CString;
+    /// # let ptx = CString::new(include_str!("../resources/add.ptx"))?;
+    /// # let module = Module::load_from_string(&ptx)?;
+    /// # let name = CString::new("sum")?;
+    /// let mut function = module.get_function(&name)?;
+    /// function.set_preferred_shared_memory_carveout(50)?;
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn set_preferred_shared_memory_carveout(&mut self, percent: u8) -> CudaResult<()> {
+        let percent_i32: i32 = percent.into();
+        self.set_attribute(FunctionAttribute::MaxDynamicSharedSizeBytes, percent_i32)
+    }
+
     pub(crate) fn to_inner(&self) -> CUfunction {
         self.inner
     }