The data need to be prepared as a Pandas dataframe. Here we have 9 transactions with three items called A, B and C. True means that a transaction contains the item.
import pandas as pd
df = pd.DataFrame (
[
[True,True, True],
[True, False,False],
[True, True, True],
[True, False, False],
[True, True, True],
[True, False, True],
[True, True, True],
[False, False, True],
[False, True, True],
[True, False, True],
],
columns=list ('ABC'))
df
Next, we need to set up the R package arules and rpy2 to connect to R. To install arules, open R and install the package arules using install.packages("arules")
. To install rpy2, you can use pip install rpy2
.
from rpy2.robjects import pandas2ri
pandas2ri.activate()
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
arules = importr("arules")
# some helper functions
def arules_as_matrix(x, what = "items"):
return ro.r('function(x) as(' + what + '(x), "matrix")')(x)
def arules_as_dict(x, what = "items"):
l = ro.r('function(x) as(' + what + '(x), "list")')(x)
l.names = [*range(0, len(l))]
return dict(zip(l.names, map(list,list(l))))
def arules_quality(x):
return x.slots["quality"]
itsets = arules.apriori(df,
parameter = ro.ListVector({"supp": 0.1, "target": "frequent itemsets"}))
print(arules.DATAFRAME(itsets))
The frequent itemsets can be accessed as a binary matrix.
its = arules_as_matrix(itsets)
print(its)
Access itemset as a dictionary
its = arules_as_dict(itsets)
print(its)
Accessing the quality measures
arules_quality(itsets)
rules = arules.apriori(df,
parameter = ro.ListVector({"supp": 0.1, "conf": 0.8}))
print(arules.DATAFRAME(rules))
Get the left-hand-side, the right-hand-side and the rule quality.
lhs = arules_as_matrix(rules, what = "lhs")
print (lhs)
rhs = arules_as_matrix(rules, what = "rhs")
print(rhs)
lhs = arules_as_dict(rules, what = "lhs")
print (lhs)
rhs = arules_as_dict(rules, what = "rhs")
print (rhs)
arules_quality(rules)