Help language development. Donate to The Perl Foundation
use v6.d; use Data::Reshapers; use ML::AssociationRuleLearning::Preprocessing; use ML::TriesWithFrequencies; class ML::AssociationRuleLearning::Apriori does ML::AssociationRuleLearning::Preprocessing { has @.transactions; has @.freqSets; has $!freqEnough; has ML::TriesWithFrequencies::Trie $!trTrans; has $.nTransactions is rw = Whatever; has @.result is rw; ##------------------------------------------------------- ## Scan transaction ##------------------------------------------------------- method scan-transaction($trans, Int $k, Str :$sep = '∩', Bool :$candidate-sort = False) { my @candidates = $trans.combinations($k); # It looks like the function combinations produces lists in which # the elements appear in the same order as in the original list. # I do not think that is guaranteed, so there is an option to sort the lists # after they are generated. if $candidate-sort { @candidates = @candidates.map({ $_.sort.List }).List } return @candidates.grep({ $_[^($_.elems-1)].join($sep) ∈ $!freqEnough && $_.tail ∈ $!freqEnough}).List; } ##------------------------------------------------------- ## Preprocess ##------------------------------------------------------- method preprocess($transData) { if self.is-map-of-sets($transData) { # Assuming the we are given an incidence matrix in "row-wise" form. $!nTransactions = $transData.elems; @!transactions = $transData.map({ $_.keys }).sort.Array; } elsif self.is-list-of-lists($transData) { # List of lists -- "primary" use case $!nTransactions = $transData.elems; @!transactions = $transData.map({ $_.unique.sort.Array }).Array; } else { die 'Do not know how to process the transactions argument.' } return @!transactions; } ##------------------------------------------------------- ## Apriori ##------------------------------------------------------- method frequent-sets(Numeric :$min-support!, Numeric :$min-number-of-items = 1, Numeric :$max-number-of-items = Inf, Bool :$counts = False, Str :$sep = '∩', Bool :$candidate-sort = False) { if !($!nTransactions ~~ Numeric && $!nTransactions > 0) { die 'No pre-processed transactions. ($!nTransactions is not a positive number).'; } if ! @!transactions { die 'No pre-processed transactions. (%!transactions is empty).'; } # Reset accumulated frequent sets holder @!freqSets = (); # Find initial, single frequent sets # Make single items baskets trie my $trBase = trie-create(self.really-flat(@!transactions).map({[$_,]}).Array); # Remove the items that are not frequent enough $trBase = $trBase.remove-by-threshold($min-support * $!nTransactions); # Verify early stop if $trBase.leafQ { warn "All items have support less than $min-support (≈{ ceiling($min-support * self.nTransactions) } transactions.)"; return Empty; } # Initial set of frequent sets $!freqEnough = Set($trBase.words.grep({ $_.elems == 1 }).List); # Gather the first trie my %allTries = 1 => $trBase; # Main Apriori loop for (2...$max-number-of-items) -> $k { # Scan the transactions and get viable candidates my @candidates = flatten( @!transactions.map({ self.scan-transaction($_, $k, :$sep, :$candidate-sort) }), max-level=>1); # Check should exit the loop last if [email protected]; # Make trie with viable candidates my $trSets = trie-create(@candidates); # Remove transactions that are not frequent enough my $trSets2 = $trSets.remove-by-threshold($min-support * $!nTransactions); # Get frequent sets from the trie my @new = $trSets2.words.grep({ $_.elems == $k }).map({ $_.join($sep) }).Array; # Exit loop if no new frequent sets last if [email protected]; # Update frequent sets $!freqEnough (|)= @new; # Add to gathered tries %allTries.push( $k => $trSets2 ); } # Get frequent sets @!freqSets = $!freqEnough.keys.map({ $_.split($sep) }).Array; my @res = @!freqSets.sort.List; # Filter by min length @res = @res.grep({ $_.elems ≥ $min-number-of-items }).List; # Get counts from tries @res = @res.map({ $_.List => %allTries{$_.elems}.retrieve($_).value }).Array; # Counts to supports if !$counts { @res = @res.map({ $_.key => $_.value / $!nTransactions }).Array; } # Result @!result = @res; return @res; } }