From 4dfa8a542b5575825115d9d69dd5e74318a847ed Mon Sep 17 00:00:00 2001
From: JaredSchwartz <75581425+JaredSchwartz@users.noreply.github.com>
Date: Sun, 27 Oct 2024 16:20:22 -0600
Subject: [PATCH] Optimize memory usage with bitmatrix operations

---
 src/RuleMiner.jl                  |   1 +
 src/association_rules/apriori.jl  |   3 +-
 src/data_structures/txnutils.jl   |  40 ++++++++++
 src/itemsets/closed/carpenter.jl  |  65 ++++++----------
 src/itemsets/closed/charm.jl      |  90 +++++++++++------------
 src/itemsets/closed/fpclose.jl    |  11 +--
 src/itemsets/closed/lcm.jl        |  80 +++++++++-----------
 src/itemsets/frequent/eclat.jl    |  44 +++--------
 src/itemsets/frequent/fpgrowth.jl |  12 +--
 src/itemsets/itemset_utils.jl     | 118 ++++++++++++++++++++++++++++++
 src/itemsets/maximal/fpmax.jl     |  12 +--
 src/itemsets/maximal/genmax.jl    |  98 ++++++++++++-------------
 12 files changed, 325 insertions(+), 249 deletions(-)
 create mode 100644 src/itemsets/itemset_utils.jl

diff --git a/src/RuleMiner.jl b/src/RuleMiner.jl
index 144bdc9..bf2f6cc 100644
--- a/src/RuleMiner.jl
+++ b/src/RuleMiner.jl
@@ -62,6 +62,7 @@ export apriori
 include("itemsets/frequent/eclat.jl")
 include("itemsets/frequent/fpgrowth.jl")
 include("itemsets/frequent/recovery.jl")
+include("itemsets/itemset_utils.jl")
 
 export eclat
 export fpgrowth
diff --git a/src/association_rules/apriori.jl b/src/association_rules/apriori.jl
index c1bd135..ccee7a9 100644
--- a/src/association_rules/apriori.jl
+++ b/src/association_rules/apriori.jl
@@ -92,8 +92,7 @@ function apriori(txns::Transactions, min_support::Union{Int,Float64}, min_confid
     basenum = vec(count(txns.matrix, dims=1))
     min_support = min_support isa Float64 ? ceil(Int, min_support * n_transactions) : min_support
 
-    items = findall(basenum .>= min_support)
-    subtxns = BitMatrix(txns.matrix[:, items])
+    subtxns, items = RuleMiner.prune_matrix(txns.matrix,min_support)
     rules = Vector{Arule}()
 
     initials = Vector{Arule}()
diff --git a/src/data_structures/txnutils.jl b/src/data_structures/txnutils.jl
index dc3aeab..dd99bca 100644
--- a/src/data_structures/txnutils.jl
+++ b/src/data_structures/txnutils.jl
@@ -299,4 +299,44 @@ function txns_to_df(txns::SeqTxns, index::Bool = true)::DataFrame
     end
     
     return df
+end
+
+"""
+    prune_matrix(matrix::SparseMatrixCSC, min_support::Int) -> Tuple{BitMatrix, Vector{Int}}
+
+Filter and sort sparse matrix columns based on minimum support threshold.
+
+# Arguments
+- `matrix::SparseMatrixCSC`: A sparse boolean matrix where rows represent transactions and columns
+   represent items. A true value at position (i,j) indicates item j is present in transaction i.
+- `min_support::Int`: The minimum absolute support threshold. Columns with fewer than this number
+   of true values will be removed.
+
+# Returns
+A tuple containing:
+- `BitMatrix`: A pruned view of the matrix containing only frequent columns, converted to a BitMatrix
+- `Vector{Int}`: A vector of column indices corresponding to the frequent columns, sorted by their sums
+
+# Description
+This helper function performs two key preprocessing steps for frequent pattern mining:
+1. Removes infrequent columns (pruning): Filters out columns whose sum is less than the minimum
+   support threshold
+2. Sorts columns by frequency: Reorders the remaining columns based on their sums in ascending order
+
+The pruned matrix is returned as a BitMatrix for efficient boolean operations in pattern mining algorithms.
+
+# Example
+```julia
+txns = Txns(sparse([1 1 0; 1 0 1; 0 1 1]), ["A", "B", "C"], ["I1", "I2", "I3"])
+matrix, indices = prune_matrix(txns, 2)
+```
+"""
+function prune_matrix(matrix::SparseMatrixCSC, min_support::Int)
+    supports = sum(matrix, dims=1)
+    sorted_items = [i for i in axes(matrix,2) if supports[1,i] >= min_support]
+    sort!(sorted_items, by= x -> supports[1,x])
+    
+    matrix = view(matrix,:, sorted_items) |> BitMatrix
+
+    return matrix[vec(any(matrix, dims=2)), :], sorted_items
 end
\ No newline at end of file
diff --git a/src/itemsets/closed/carpenter.jl b/src/itemsets/closed/carpenter.jl
index d7a9708..e96db19 100644
--- a/src/itemsets/closed/carpenter.jl
+++ b/src/itemsets/closed/carpenter.jl
@@ -66,14 +66,8 @@ function carpenter(txns::Transactions, min_support::Union{Int,Float64})
     # Handle min_support as a float value
     min_support = min_support isa Float64 ? ceil(Int, min_support * n_transactions) : min_support
     
-    # Create tidsets (transaction ID sets) for each item
-    tidsets = [BitSet(findall(txns.matrix[:,col])) for col in 1:n_items]
-    supports = vec(sum(txns.matrix, dims=1))
-
-    # Create vectors of all items and all frequent items for mining
-    allitems = collect(1:n_items)
-    frequent_items = findall(supports .>= min_support)
-
+    matrix, sorted_items = RuleMiner.prune_matrix(txns.matrix, min_support)
+    
     # Initialize results dictionary and threading lock
     Results = Dict{Vector{Int}, Int}()
     ThreadLock = ReentrantLock()
@@ -82,50 +76,39 @@ function carpenter(txns::Transactions, min_support::Union{Int,Float64})
         # Pruning 3: Early return if itemset is already present in the output
         haskey(closed_itemsets, X) && return
         
-        # Find transactions with the itemset and calculate support
-        tidset_X = length(X) == 1 ? tidsets[X[1]] : intersect(tidsets[X]...)
-        support_X = length(tidset_X)
+        # Get closure of current itemset and map back to original indices
+        X_pos = Vector{Int}(findall(in(X), sorted_items))
+        closed_pos = RuleMiner.closure(matrix, X_pos)
+        closed = sorted_items[closed_pos]
         
-        # Pruning 1: Early return if the itemset is not frequent
-        support_X < min_support && return
-    
-        # Pruning 2: Find items that can be added without changing support
-        Y = filter(i -> length(intersect(tidset_X, tidsets[i])) == support_X, R)
-
-        # Add X to itemsets if it's closed (Y is empty)
-        if isempty(Y) 
-            lock(Lock) do
-                closed_itemsets[X] = support_X
-            end
-        # If Y is not empty, add the itemset's closure (X ∪ Y)
-        else 
+        # Calculate support
+        rows = vec(all(view(matrix, :, X_pos), dims=2))
+        support = count(rows)
+        
+        # Pruning 1: Early return if not frequent
+        support < min_support && return
+        
+        # Pruning 2: Add closure to results if not empty
+        if !isempty(closed)
             lock(Lock) do
-                closed_itemsets[sort(vcat(X, Y))] = support_X
+                closed_itemsets[closed] = support
             end
         end
         
         # Recursive enumeration
-        for i in setdiff(R, Y)
-            carpenter!(closed_itemsets, sort(vcat(X, i)), setdiff(R, [i]), Lock)
+        remaining = filter(i -> i ∉ closed, R)
+        for i in remaining
+            carpenter!(closed_itemsets, sort(vcat(X, i)), filter(>(i), remaining), Lock)
         end
     end
-
+    
     # Parallel Processing of initial itemsets
     @sync begin
-        for item in frequent_items
-            Threads.@spawn carpenter!(Results, [item], setdiff(allitems, [item]), ThreadLock)
+        for item in sorted_items
+            remaining_items = filter(x -> x > item, sorted_items)
+            Threads.@spawn carpenter!(Results, [item], remaining_items, ThreadLock)
         end
     end
     
-    # Create the result DataFrame
-    result_df = DataFrame(
-        Itemset = [RuleMiner.getnames(itemset, txns) for itemset in keys(Results)],
-        Support = [support / n_transactions for support in values(Results)],
-        N = collect(values(Results)),
-        Length = [length(itemset) for itemset in keys(Results)]
-    )
-    
-    # Sort results by support in descending order
-    sort!(result_df, :N, rev=true)
-    return result_df
+    return RuleMiner.make_itemset_df(Results, txns)
 end
\ No newline at end of file
diff --git a/src/itemsets/closed/charm.jl b/src/itemsets/closed/charm.jl
index 31c692f..4377d00 100644
--- a/src/itemsets/closed/charm.jl
+++ b/src/itemsets/closed/charm.jl
@@ -58,70 +58,72 @@ Zaki, Mohammed, and Ching-Jui Hsiao. “CHARM: An Efficient Algorithm for Closed
 """
 function charm(txns::Transactions, min_support::Union{Int,Float64})::DataFrame
     n_transactions, n_items = size(txns.matrix)
-
+    
     # Handle min_support as a float value
     min_support = min_support isa Float64 ? ceil(Int, min_support * n_transactions) : min_support
-    
-    # Create tidsets (transaction ID sets) for each item
-    tidsets = [BitSet(findall(txns.matrix[:,col])) for col in 1:n_items]
-    supports = vec(sum(txns.matrix,dims=1))
 
-    # Sort items by support in ascending order, keeping only frequent items
-    item_order = sort(findall(s -> s >= min_support, supports), by=i -> supports[i])
+    # Get pruned matrix and sorted items
+    matrix, sorted_items = RuleMiner.prune_matrix(txns.matrix, min_support)
     
     # Initialize results dictionary and threading lock
     Results = Dict{Vector{Int}, Int}()
     ThreadLock = ReentrantLock()
     
-    function charm!(closed_itemsets::Dict{Vector{Int}, Int}, prefix::Vector{Int}, eq_class::Vector{Int})
-        for (i, item) in enumerate(eq_class)
-            
+    function charm!(closed_itemsets::Dict{Vector{Int}, Int}, prefix::Vector{Int}, eq_class::Vector{Int}, rows::BitVector)
+        for (i, pos) in enumerate(eq_class)
             # Create new itemset by adding current item to prefix
-            new_itemset = vcat(prefix, item)
-            new_tidset = intersect(tidsets[new_itemset]...)
-            support = length(new_tidset)
+            new_itemset = vcat(prefix, pos)
+            new_rows = rows .& matrix[:, pos]
+            support = count(new_rows)
             
             # Skip infrequent itemsets
             support < min_support && continue
             
+            # Initialize new equivalence class
             new_eq_class = Int[]
+            
+            # Process remaining items in current equivalence class
             for j in (i+1):length(eq_class)
-
-                # Generate itemset, tidset, and support for new items in the next eq class
-                other_item = eq_class[j]
-                other_tidset = intersect(new_tidset, tidsets[other_item])
-                other_support = length(other_tidset)
+                other_pos = eq_class[j]
+                
+                # Calculate intersection with the other item
+                other_rows = new_rows .& matrix[:, other_pos]
+                other_support = count(other_rows)
                 
                 # Skip infrequent items
                 other_support < min_support && continue
-
+                
                 if support == other_support
                     # If supports are equal, add item to current itemset
-                    push!(new_itemset, other_item)
+                    push!(new_itemset, other_pos)
                 else
-                    # Otherwise, add to new equivalence class for further processing
-                    push!(new_eq_class, other_item)
+                    # Otherwise, add to new equivalence class
+                    push!(new_eq_class, other_pos)
                 end
             end
             
-            # Update closed itemsets list, ensuring thread safety
+            # Map positions back to original item indices
+            orig_itemset = sorted_items[new_itemset]
+            
+            # Update closed itemsets list with thread safety
             lock(ThreadLock) do
-                update_closed_itemsets!(closed_itemsets, new_itemset, support)
+                update_closed_itemsets!(closed_itemsets, orig_itemset, support)
             end
             
             # Recursively process new equivalence class if non-empty
-            !isempty(new_eq_class) && charm!(closed_itemsets, new_itemset, new_eq_class)
+            !isempty(new_eq_class) && charm!(closed_itemsets, new_itemset, new_eq_class, new_rows)
         end
     end
-
+    
     # Helper function to update closed itemsets
-    function update_closed_itemsets!(closed_itemsets, new_itemset, support)
+    function update_closed_itemsets!(closed_itemsets::Dict{Vector{Int}, Int}, new_itemset::Vector{Int}, support::Int)
         new_set = Set(new_itemset)
+        
+        # Check against existing closed itemsets
         for (existing_itemset, existing_support) in closed_itemsets
-            
             # Only compare itemsets with equal support
             support != existing_support && continue
-
+            
             existing_set = Set(existing_itemset)
             
             # If new itemset is a subset of an existing one, it's not closed
@@ -137,27 +139,25 @@ function charm(txns::Transactions, min_support::Union{Int,Float64})::DataFrame
         closed_itemsets[new_itemset] = support
     end
     
-    # Add single-item frequent itemsets
-    for item in item_order
-        Results[[item]] = supports[item]
+    # Process single items and add to results
+    for (pos, item) in enumerate(sorted_items)
+        Results[[item]] = count(matrix[:, pos])
     end
     
     # Parallel processing of top-level equivalence classes
     @sync begin
-        for (i, item) in enumerate(item_order)
-            Threads.@spawn charm!(Results, [item], item_order[i+1:end])
+        for (i, pos) in enumerate(1:length(sorted_items))
+            Threads.@spawn begin
+                # Get initial rows for this item
+                initial_rows = matrix[:, pos]
+                
+                # Only process if it meets minimum support
+                if count(initial_rows) >= min_support
+                    charm!(Results, [pos], collect((i+1):length(sorted_items)), initial_rows)
+                end
+            end
         end
     end
     
-    # Create the result DataFrame
-    result_df = DataFrame(
-        Itemset = [RuleMiner.getnames(itemset, txns) for itemset in keys(Results)],
-        Support = [support / n_transactions for support in values(Results)],
-        N = collect(values(Results)),
-        Length = [length(itemset) for itemset in keys(Results)]
-    )
-    
-    # Sort results by support in descending order
-    sort!(result_df, :N, rev=true)
-    return result_df
+    return RuleMiner.make_itemset_df(Results, txns)
 end
\ No newline at end of file
diff --git a/src/itemsets/closed/fpclose.jl b/src/itemsets/closed/fpclose.jl
index c188a90..51fe44d 100644
--- a/src/itemsets/closed/fpclose.jl
+++ b/src/itemsets/closed/fpclose.jl
@@ -126,14 +126,5 @@ function fpclose(data::Union{Transactions,FPTree}, min_support::Union{Int,Float6
     # Start the mining process
     fpclose!(Results, tree, Int[], min_support)
 
-    df = DataFrame(
-        Itemset = [data.colkeys[itemset] for itemset in keys(Results)], 
-        Support = [support / n_transactions for support in values(Results)],
-        N = collect(values(Results)),
-        Length = [length(itemset) for itemset in keys(Results)]
-    )
-    
-    sort!(df, [:Length, :N], rev=[false, true])
-    
-    return df
+    return RuleMiner.make_itemset_df(Results, data)
 end
\ No newline at end of file
diff --git a/src/itemsets/closed/lcm.jl b/src/itemsets/closed/lcm.jl
index 83774e0..2f36cb9 100644
--- a/src/itemsets/closed/lcm.jl
+++ b/src/itemsets/closed/lcm.jl
@@ -66,69 +66,57 @@ function LCM(txns::Transactions, min_support::Union{Int,Float64})::DataFrame
     # Handle min_support as a float value
     min_support = min_support isa Float64 ? ceil(Int, min_support * n_transactions) : min_support
 
-    # Create tidsets (transaction ID sets) for each item
-    tidsets = [BitSet(findall(txns.matrix[:,col])) for col in 1:n_items]
-    supports = vec(sum(txns.matrix, dims=1))
-
-    # Sort items by support in descending order, keeping only frequent items
-    sorted_items = sort(findall(s -> s >= min_support, supports), by=i -> supports[i], rev=true)
-
+    matrix, sorted_items = prune_matrix(txns.matrix, min_support)
+    
     # Dictionary to store closed itemsets and their supports
-    Results = Dict{Vector{Int}, Int}()
-
+    results = Dict{Vector{Int}, Int}()
     ThreadLock = ReentrantLock()
-
-    function lcm!(closed_itemsets::Dict{Vector{Int}, Int}, current::Vector{Int}, tidset::BitSet, dict_lock::ReentrantLock)
-        closure = findall(i -> length(intersect(tidset, tidsets[i])) == length(tidset), 1:n_items)
-        support = length(tidset)
+    
+    function lcm!(closed_itemsets::Dict{Vector{Int}, Int}, current::Vector{Int}, rows::BitVector, dict_lock::ReentrantLock)
+        # Get closure of current itemset
+        closed = sorted_items[closure(matrix, current)]  # Map back to original indices
+        support = count(rows)
         
         lock(dict_lock) do
             # If we've seen this closure with equal or higher support, skip it
-            (haskey(closed_itemsets, closure) && closed_itemsets[closure] >= support) && return
-
+            (haskey(closed_itemsets, closed) && closed_itemsets[closed] >= support) && return
+            
             # Add Closure to Dict
-            if !isempty(closure)
-                closed_itemsets[closure] = support
+            if !isempty(closed)
+                closed_itemsets[closed] = support
             end
         end
         
+        # Get current item's position in sorted_items for comparison
+        curr_pos = isempty(current) ? 0 : findfirst(==(current[end]), 1:size(matrix, 2))
+        
         # Try extending the itemset with each frequent item
-        for item in sorted_items
-
+        for new_pos in eachindex(sorted_items)
+            orig_item = sorted_items[new_pos]
+            
             # Skip if the item is already in the closure
-            item ∈ closure && continue
-
+            orig_item ∈ closed && continue
+            
             # Skip if the item comes before the last item in the current itemset
-            item <= (isempty(current) ? 0 : current[end]) && continue
+            new_pos <= curr_pos && continue
             
-            # Compute the new tidset for the extended itemset
-            new_tidset = intersect(tidset, tidsets[item])
-
-            # Skip if the new tidset doesn't meet minimum support
-            length(new_tidset) < min_support && continue
+            # Compute the new rows that contain both the current itemset and the new item
+            new_rows = rows .& matrix[:, new_pos]
             
-            # Recurse with new tidset and itemset
-            lcm!(closed_itemsets, vcat(current, item), new_tidset, dict_lock)
+            # Skip if the new rows don't meet minimum support
+            count(new_rows) < min_support && continue
+            
+            # Recurse with new rows and itemset
+            lcm!(closed_itemsets, vcat(current, new_pos), new_rows, dict_lock)
         end
     end
-
+    
     # Start the LCM process with size-1 itemsets
     @sync begin
-        for item in sorted_items
-            Threads.@spawn lcm!(Results, [item], tidsets[item], ThreadLock)
+        for pos in 1:length(sorted_items)
+            Threads.@spawn lcm!(results, [pos], matrix[:, pos], ThreadLock)
         end
     end
-
-    # Convert results to a DataFrame
-    result = DataFrame(
-        Itemset = [RuleMiner.getnames(itemset, txns) for itemset in keys(Results)],
-        Support = [support / n_transactions for support in values(Results)],
-        N = collect(values(Results)),
-        Length = [length(itemset) for itemset in keys(Results)]
-    )
-
-    # Sort results by support in descending order
-    sort!(result, :N, rev=true)
-
-    return result
-end
+    
+    return make_itemset_df(results, txns)
+end
\ No newline at end of file
diff --git a/src/itemsets/frequent/eclat.jl b/src/itemsets/frequent/eclat.jl
index 9a4bc53..95460c8 100644
--- a/src/itemsets/frequent/eclat.jl
+++ b/src/itemsets/frequent/eclat.jl
@@ -62,40 +62,30 @@ result = eclat(txns, 5_000)
 # References
 Zaki, Mohammed. “Scalable Algorithms for Association Mining.” Knowledge and Data Engineering, IEEE Transactions On 12 (June 1, 2000): 372–90. https://doi.org/10.1109/69.846291.
 """
-function eclat(txns::Transactions, min_support::Union{Int,Float64})::DataFrame
+function eclat(txns::Transactions, min_support::Union{Int,Float64})#::DataFrame
     n_transactions = size(txns.matrix, 1)
     
     # Handle min_support as a float value
     min_support = min_support isa Float64 ? ceil(Int, min_support * n_transactions) : min_support
 
-    # Calculate initial supports and sort the columns
-    item_index = collect(1:size(txns.matrix, 2))
-    item_supports = Dict(zip(item_index, vec(sum(txns.matrix, dims=1))))
-    
-    frequent_items = [item for item in item_index if item_supports[item] >= min_support]
-    sorted_items = sort(frequent_items, by= x -> item_supports[x])
+    matrix, sorted_items = prune_matrix(txns.matrix, min_support)
 
     # Initialize results dictionary and threading lock
-    Results = Dict{Vector{Int}, Int}()
-    ThreadLock = ReentrantLock()
-
-    # Add single-item frequent itemsets to results
-    for item in sorted_items
-        Results[[item]] = item_supports[item]
-    end
+    results = Dict(zip([[i] for i in sorted_items], vec(sum(matrix,dims=1))))
+    thread_lock = ReentrantLock()
 
     # Define recursive eclat function and run it on the data
-    function eclat!(lineage::Vector{Int}, items::Vector{Int}, trans::Transactions, min_support::Int)
+    function eclat!(results::Dict{Vector{Int}, Int}, lineage::Vector{Int}, items::Vector{Int}, matrix::BitMatrix, min_support::Int)
         for (i, item) in enumerate(items)
             new_lineage = vcat(lineage, item)
-            support = sum(all(trans.matrix[:, new_lineage], dims=2))
+            support = sum(all(view(matrix, :, new_lineage), dims=2))
     
             # Skip this itemset if it does not meet minimum suppot
             support < min_support && continue
 
             # Add the Itemset to results
-            lock(ThreadLock) do
-                Results[new_lineage] = support
+            lock(thread_lock) do
+                results[sorted_items[new_lineage]] = support
             end
 
             # Generate new possible items
@@ -105,25 +95,15 @@ function eclat(txns::Transactions, min_support::Union{Int,Float64})::DataFrame
             isempty(new_items) && continue
             
             # Recurse with new items
-            eclat!(new_lineage, new_items, trans, min_support)
+            eclat!(results, new_lineage, new_items, matrix, min_support)
         end
     end
 
     @sync begin
-        for (i, item) in enumerate(sorted_items)
-            Threads.@spawn eclat!([item], sorted_items[i+1:end], txns, min_support)
+        for item in eachindex(sorted_items)
+            Threads.@spawn eclat!(results, [item], collect(item+1:length(sorted_items)), matrix, min_support)
         end
     end
     
-    # Create the result DataFrame
-    result_df = DataFrame(
-        Itemset = [RuleMiner.getnames(itemset, txns) for itemset in keys(Results)],
-        Support = [support / n_transactions for support in values(Results)],
-        N = collect(values(Results)),
-        Length = [length(itemset) for itemset in keys(Results)]
-    )
-    
-    # Sort results by support in descending order
-    sort!(result_df, :N, rev=true)
-    return result_df
+    return make_itemset_df(results,txns)
 end
\ No newline at end of file
diff --git a/src/itemsets/frequent/fpgrowth.jl b/src/itemsets/frequent/fpgrowth.jl
index 47f8c2e..c229e8b 100644
--- a/src/itemsets/frequent/fpgrowth.jl
+++ b/src/itemsets/frequent/fpgrowth.jl
@@ -108,15 +108,5 @@ function fpgrowth(data::Union{Transactions,FPTree}, min_support::Union{Int,Float
     # Mine frequent sets
     fpgrowth!(Results,tree, Int[], min_support)
     
-    # Create the result DataFrame
-    result_df = DataFrame(
-        Itemset = [data.colkeys[itemset] for itemset in keys(Results)],
-        Support = [support / n_transactions for support in values(Results)],
-        N = collect(values(Results)),
-        Length = [length(itemset) for itemset in keys(Results)]
-    )
-    
-    # Sort results by support in descending order
-    sort!(result_df, :N, rev=true)
-    return result_df
+    return RuleMiner.make_itemset_df(Results, data)
 end
\ No newline at end of file
diff --git a/src/itemsets/itemset_utils.jl b/src/itemsets/itemset_utils.jl
new file mode 100644
index 0000000..4204fec
--- /dev/null
+++ b/src/itemsets/itemset_utils.jl
@@ -0,0 +1,118 @@
+# itemsetutils.jl
+# Utilities for mining frequent itemsets
+#=
+Copyright (c) 2024 Jared Schwartz
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+=#
+
+"""
+    make_itemset_df(results::Dict{Vector{Int}, Int}, txns::Transactions)::DataFrame
+
+Convert a dictionary of frequent itemsets and their supports into a formatted DataFrame.
+
+# Arguments
+- `results::Dict{Vector{Int}, Int}`: Dictionary mapping itemsets (as vectors of integer indices) to
+   their absolute support counts. Keys are vectors representing itemsets and values are 
+   the number of transactions containing that itemset.
+- `txns::Transactions`: The Transactions object used for mining, containing item names and 
+   the total number of transactions.
+
+# Returns
+A DataFrame with the following columns:
+- `Itemset`: Vector{String} - The items in each frequent itemset, with integer indices converted 
+   to their original item names
+- `Support`: Float64 - The relative support of each itemset (proportion of transactions containing it)
+- `N`: Int - The absolute support count (number of transactions containing the itemset)
+- `Length`: Int - The number of items in each itemset
+
+The DataFrame is sorted by absolute support (N) in descending order.
+
+# Example
+```julia
+# Assuming we have mined results and a transactions object
+results = Dict(
+    [1, 2] => 50,  # Itemset of items 1 and 2 appears in 50 transactions
+    [1] => 75      # Item 1 appears in 75 transactions
+)
+txns = Txns(...)   # Transactions object with item names "A" and "B"
+
+df = make_itemset_df(results, txns)
+
+# Returns DataFrame:
+# Itemset        Support    N    Length
+# ["A"]         0.75      75      1
+# ["A", "B"]    0.50      50      2
+```
+"""
+function make_itemset_df(results::Dict{Vector{Int}, Int}, txns::Union{Transactions,FPTree})::DataFrame
+    result_df = DataFrame(
+        Itemset = [RuleMiner.getnames(itemset, txns) for itemset in keys(results)],
+        Support = [support / txns.n_transactions for support in values(results)],
+        N = collect(values(results)),
+        Length = [length(itemset) for itemset in keys(results)]
+    )
+    sort!(result_df, :N, rev=true)
+    return result_df
+end
+
+
+"""
+    closure(matrix::BitMatrix, itemset::Vector{Int}) -> Vector{Int}
+
+Calculate the closure of an itemset in a binary transaction matrix.
+
+# Arguments
+- `matrix::BitMatrix`: A binary matrix where rows represent transactions and columns represent items.
+   True values indicate item presence in a transaction.
+- `itemset::Vector{Int}`: Vector of column indices representing the itemset whose closure 
+   should be computed.
+
+# Returns
+- `Vector{Int}`: Column indices of the closure - all items that appear in every transaction 
+   containing the input itemset.
+
+# Description
+The closure operation finds all items that are functionally implied by a given itemset
+in the transaction data. It works by:
+1. Finding all transactions that contain the input itemset
+2. Identifying which items appear in all of these transactions
+
+An item is in the closure if it appears in every transaction that contains the input itemset.
+The input itemset is always a subset of its closure.
+
+# Example
+```julia
+# Create a binary matrix with 3 transactions and 4 items
+matrix = BitMatrix([
+    1 1 1 0;  # Transaction 1 contains items 1, 2, and 3
+    1 1 1 0;  # Transaction 2 contains items 1, 2, and 3
+    0 0 0 1   # Transaction 3 contains only item 4
+])
+
+# Find closure of itemset [1]
+closed = closure(matrix, [1])  # Returns [1, 2, 3]
+# Items 2 and 3 are in the closure because they appear in
+# all transactions containing item 1
+```
+"""
+function closure(matrix::BitMatrix, itemset::Vector{Int})
+        rows = vec(all(view(matrix,:, itemset), dims=2))
+        return findall(vec(all(view(matrix, rows, :), dims=1)))
+end
\ No newline at end of file
diff --git a/src/itemsets/maximal/fpmax.jl b/src/itemsets/maximal/fpmax.jl
index f6cdef1..82ca63f 100644
--- a/src/itemsets/maximal/fpmax.jl
+++ b/src/itemsets/maximal/fpmax.jl
@@ -136,15 +136,5 @@ function fpmax(data::Union{Transactions,FPTree}, min_support::Union{Int,Float64}
         is_maximal && (Results[itemset] = support)
     end
 
-    # Create the result DataFrame
-    result_df = DataFrame(
-        Itemset = [data.colkeys[itemset] for itemset in keys(Results)],
-        Support = [support / n_transactions for support in values(Results)],
-        N = collect(values(Results)),
-        Length = [length(itemset) for itemset in keys(Results)]
-    )
-    
-    # Sort results by length in descending order, then by support
-    sort!(result_df, [:Length, :Support], rev=true)
-    return result_df
+    return RuleMiner.make_itemset_df(Results, data)
 end
\ No newline at end of file
diff --git a/src/itemsets/maximal/genmax.jl b/src/itemsets/maximal/genmax.jl
index c4ea47c..1d5b26b 100644
--- a/src/itemsets/maximal/genmax.jl
+++ b/src/itemsets/maximal/genmax.jl
@@ -63,84 +63,80 @@ result = genmax(txns, 5_000)
 Gouda, Karam, and Mohammed J. Zaki. “GenMax: An Efficient Algorithm for Mining Maximal Frequent Itemsets.” Data Mining and Knowledge Discovery 11, no. 3 (November 1, 2005): 223–42. https://doi.org/10.1007/s10618-005-0002-x.
 """
 function genmax(txns::Transactions, min_support::Union{Int,Float64})::DataFrame
-    n_transactions, n_items = size(txns.matrix)
     
     # Handle min_support as a float value
-    min_support = min_support isa Float64 ? ceil(Int, min_support * n_transactions) : min_support
+    min_support = min_support isa Float64 ? ceil(Int, min_support * txns.n_transactions) : min_support
 
-    # Calculate initial supports for each item
-    item_supports = Dict(i => sum(txns.matrix[:, i]) for i in 1:n_items)
+    # Get pruned matrix and sorted items
+    matrix, sorted_items = RuleMiner.prune_matrix(txns.matrix, min_support)
     
-    # Sort items by support in descending order and filter for frequent items
-    sorted_items = sort(collect(keys(item_supports)), by=i -> item_supports[i], rev=true)
-    frequent_items = filter(i -> item_supports[i] >= min_support, sorted_items)
+    # Initialize results dictionary and threading lock
+    results = Dict{Vector{Int}, Int}()
+    candidates = Dict{Vector{Int}, Int}()
+    thread_lock = ReentrantLock()
 
-    # Create BitSets for each frequent item's transactions
-    item_bitsets = [BitSet(findall(txns.matrix[:, i])) for i in frequent_items]
-
-    # Initialize the Maximal Frequent Itemsets (MFI) list and threading lock
-    Results = Vector{Vector{Int}}()
-    ThreadLock = ReentrantLock()
-
-    # Depth-First Search to find maximal frequent itemsets
-    function genmax!(itemset::Vector{Int}, start_idx::Int, tidset::BitSet)
+    function genmax!(itemset::Vector{Int}, start_idx::Int, rows::BitVector)
         local_maximal = true
         
-        for i in start_idx:length(frequent_items)
-            item = frequent_items[i]
-            new_tidset = intersect(tidset, item_bitsets[i])
+        for i in start_idx:size(matrix, 2)
+            # Calculate new support with additional item
+            new_rows = rows .& matrix[:, i]
+            new_support = count(new_rows)
             
             # Skip if the new itemset is not frequent
-            length(new_tidset) < min_support && continue
+            new_support < min_support && continue
             
             local_maximal = false
-            new_itemset = push!(copy(itemset), item)
-            genmax!(new_itemset, i + 1, new_tidset)
+            new_itemset = push!(copy(itemset), i)
+            genmax!(new_itemset, i + 1, new_rows)
         end
         
         # If itemset is empty or not locally maximal, return
         (isempty(itemset) || !local_maximal) && return
         
-        lock(ThreadLock) do
-            push!(Results, itemset)
+        # Map positions back to original item indices
+        orig_itemset = sorted_items[itemset]
+        support = count(rows)
+        
+        lock(thread_lock) do
+            candidates[orig_itemset] = support
         end
     end
 
     # Start the depth-first search in parallel
     @sync begin
-        for (i, item) in enumerate(frequent_items)
-            Threads.@spawn genmax!([item], i + 1, item_bitsets[i])
+        for i in 1:length(sorted_items)
+            Threads.@spawn begin
+                initial_rows = matrix[:, i]
+                initial_support = count(initial_rows)
+                
+                # Only process if it meets minimum support
+                if initial_support >= min_support
+                    genmax!([i], i + 1, initial_rows)
+                end
+            end
         end
     end
 
-    # Filter candidates to get final maximal sets
-    sort!(Results, by=length, rev=true)
-    maximal = trues(length(Results))
-    
-    for i in 1:length(Results)
-        #Skip if the item has already been marked as non-maximal
-        !maximal[i] && continue
+    # Filter candidates to get maximal itemsets
+    for (itemset, support) in candidates
+        is_maximal = true
+        itemset_set = Set(itemset)
         
-        for j in 1:length(Results)
-            # Skip if item is being compared to its self or if [j] has been marked as non-maximal
-            (i == j || !maximal[j]) && continue
+        for (other_itemset, other_support) in candidates
+            itemset === other_itemset && continue
             
-            # Check if Results[j] is a subset of Results[i] and mark it not maximal if it is
-            Results[j] ⊊ Results[i] && (maximal[j] = false)
+            if issubset(itemset_set, Set(other_itemset))
+                is_maximal = false
+                break
+            end
+        end
+        
+        # Add to results if maximal
+        if is_maximal
+            results[itemset] = support
         end
     end
 
-    result = Results[maximal]
-
-    # Create output DataFrame
-    df = DataFrame(
-        Itemset = [RuleMiner.getnames(itemset, txns) for itemset in result],
-        Support = [length(intersect([item_bitsets[findfirst(==(item), frequent_items)] for item in itemset]...)) / n_transactions for itemset in result],
-        N = [length(intersect([item_bitsets[findfirst(==(item), frequent_items)] for item in itemset]...)) for itemset in result],
-        Length = [length(itemset) for itemset in result]
-    )
-
-    # Sort by length (descending) and then by support (descending)
-    sort!(df, [:Length, :Support], rev=true)
-    return df
+    return RuleMiner.make_itemset_df(results, txns)
 end
\ No newline at end of file