Hello ,
as stated in
Associative Classification in R: arc,
arulesCBA, and rCBA
by Michael Hahsler, Ian Johnson, Tomáš Kliegr and Jaroslav Kuchaˇr
https://journal.r-project.org/archive/2019/RJ-2019-048/RJ-2019-048.pdf
'''
Automatic threshold tuning. Association rule learning is notorious for how difficult it is to set the
minimum support and minimum confidence thresholds. The necessity to set these thresholds applies
also to CBA. The arc package contains an optional procedure for automatic setting of these thresholds
detailed in (Kliegr and Kuchar, 2019) . The package contains a wrapper for the apriori function from
the arules package that iterative changes mining parameters (maximum antecedent length, minimum
support threshold and minimum confidence threshold) until a desired number of rules is obtained, all
options are exhausted or a preset time limit is reached. The desired number of rules can be specified
by the target_rule_count parameter.
'''
Is python code supports : "The desired number of rules can be specified
by the target_rule_count parameter"
So I try to get best prediction / best rules when number of rules is limited to target_rule_count value
Can be CBA used or top_rules (Function for finding the best n (target_rule_count) rules from transaction list).
I try to set target_rule_count = 2 , but still get many rules?
, something like this example:
'''
from pyarc import CBA
from pyarc.data_structures import (
TransactionDB
)
from pyarc.algorithms import (
top_rules,
createCARs,
M1Algorithm,
generateCARs,
)
from sklearn.metrics import confusion_matrix
header1 = ['F1' ,'F2','F3','F4' ,'F5','F6', "F7", 'F8' ,'Target']
rows1 = [
[1, 1, 0, 0, 0, 0, 1, 0,1],
[1, 1, 0, 0, 0, 0, 1, 1,0],
[1, 1, 0, 0, 0, 0, 1, 1,0],
[0, 1, 1, 0, 0, 0, 1, 1,0],
[1, 1, 1, 0, 0, 0, 1, 0,1],
[1, 1, 1, 0, 0, 0, 1, 1,0],
[0, 1, 1, 0, 0, 0, 1, 1,1],
[1, 1, 1, 0, 0, 0, 1, 1,0],
[1, 1, 0, 1, 0, 1, 1, 0,0],
[1, 0, 0, 1, 1, 1, 1, 1,0],
[1, 0, 1, 1, 1, 1, 1, 0,1],
[1, 1, 0, 1, 1, 1, 1, 1,0],
[0, 1, 1, 1, 1, 1, 1, 1,0],
[1, 0, 1, 1, 0, 1, 1, 1,0],
[1, 0, 0, 1, 0, 1, 1, 1,0],
[0, 0, 0, 1, 0, 1, 1, 1,0],
[1, 0, 0, 1, 0, 1, 1, 1,0],
[0, 1, 0, 0, 0, 0, 1, 1,0],
[0, 1, 0, 0, 1, 0, 0, 0,1],
[1, 0, 0, 1, 1, 1, 0, 0,1],
[0, 1, 0, 1, 1, 1, 0, 0,1],
[0, 1, 0, 1, 0, 1, 0, 0,1],
]
target = [x[-1] for x in rows1]
transactions = TransactionDB(rows1, header1)
#cba = CBA(confidence= 0.7)
cba = CBA(support=0.20, confidence=0.8, algorithm="m1" , maxlen = 6) #good
#cba = CBA(support=0.60, confidence=0.8, algorithm="m1" , maxlen = 3)
cba.fit(transactions)
print(cba.predict(transactions) )
print('rules')
[print(x) for x in cba.clf.rules]
cba.clf.rules
print('cba.clf.default_class')
print(cba.clf.default_class)
cba.clf.default_class_attribute
cba.clf.default_class_support
print('default_class_confidence' , cba.clf.default_class_confidence)
#print('\n *** predict_matched_rules ***')
#[print(x) for x in cba.predict_matched_rules(transactions) ]
print('\n predict_probability')
print([int(x * 100) for x in cba.predict_probability(transactions) ])
cars = generateCARs(transactions , maxlen= 5, support= 20 , confidence = 30 )
cars = generateCARs(transactions , maxlen= 2, support= 20 , confidence = 30 )
cars = generateCARs(transactions , maxlen= 1, support= 20 , confidence = 30 )
cars = generateCARs(transactions , maxlen= 1, support= 20 , confidence = 30 )
rules = top_rules(transactions.string_representation ,
init_conf = 0.4, conf_step = 0.1 ,
init_support = 20, supp_step = 5 ,
minlen = 2, init_maxlen = 4, target_rule_count = 2 , total_timeout=10000. )
'''
len(rules)
1085
'''
cars = createCARs(rules)
'''
len(cars)
1085
'''
print('createCARs : number of Class Association Rules after optimization = ' , len(cars))
ARC_classifier = M1Algorithm(cars, transactions).build()
train_ARC_accuracy = ARC_classifier.test_transactions(transactions)
#m1clf = classifier.build()
train_predicted_ARC = ARC_classifier.predict_all(transactions)
train_predicted_ARC = [int(x) for x in train_predicted_ARC]
train_ARC_classifier_predict_probability = ARC_classifier.predict_probability_all(transactions)
print( 'TRAIN confusion_matrix for ARC)' )
print(confusion_matrix(target, train_predicted_ARC))
'''
example
rules[22]
('Target:=:0', ('F5:=:0', 'F2:=:1', 'F7:=:1'), 0.3181818181818182, 0.7)
rules[23]
('F2:=:1', ('Target:=:0', 'F5:=:0'), 0.3181818181818182, 0.6363636363636364)
'''
'''
def top_rules(transactions,
appearance={},
target_rule_count=1000,
init_support=0.,
init_conf=0.5,
conf_step=0.05,
supp_step=0.05,
minlen=2,
init_maxlen=3,
total_timeout=100.,
max_iterations=30):
"""Function for finding the best n (target_rule_count)
rules from transaction list
< or > ??
if (rule_count >= target_rule_count):
flag = False
print("Target rule count satisfied:", target_rule_count)
'''
q=0
'''
Thank you very much in advance