@comment{{Bibliography on Association Rule Mining Maintained by: Michael Hahsler Contribute at: https://github.com/mhahsler/arules/blob/master/docs/association_rules.bib }}
@article{arules:Kohavi:1988, author = {Ron Kohavi and Foster Provost}, title = {Glossary of Terms}, journal = {Machine Learning}, year = {1988}, volume = {30}, pages = {271--274}, number = {2--3}, abstract = {Definition of measures from Machine Learning. Theses are especially interesting for comparison and evaluation of association rule algorithms.}, keywords = {evaluation} }
@inproceedings{arules:Sebag:1988, author = {Sebag, M. and M. Schoenauer}, title = {Generation of rules with certainty and confidence factors from incomplete and incoherent learning bases}, booktitle = {In Proceedings of the European Knowledge Acquisition Workshop (EKAW'88), Gesellschaft fuer Mathematik und Datenverarbeitung mbH}, year = {1988}, abstract = {Defines the Seebag measure for rules.}, keywords = {measure} }
@incollection{arules:Piatetsky-Shapiro:1991, author = {G. Piatetsky-Shapiro}, title = {Discovery, Analysis, and Presentation of Strong Rules}, booktitle = {Knowledge Discovery in Databases}, publisher = {AAAI/MIT Press}, year = {1991}, editor = {G. Piatetsky-Shapiro and W.J. Frawley}, address = {Cambridge, MA}, abstract = {Introduces the measure LEVERAGE which is the simplest function which satisfies his principles for rule-interest functions (0 if the variables are statistically independent; monotonically increasing if the variables occur more often together; monotonically decreasing if one of the variables alone occurs more often).}, keywords = {kdd, measure} }
@inproceedings{arules:Smyth:1991, title = {Rule Induction Using Information Theory}, author = {Padhraic Smyth and R. Goodman}, booktitle = {Knowledge Discovery in Databases}, year = {1991}, abstract = {Introduces the J-Measure as a scaled measures of cross entropy for the information content of a rule.}, keywords = {measure} }
@inproceedings{arules:Agrawal:1993, author = {R. Agrawal and T. Imielinski and A. Swami}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data}, title = {Mining Association Rules between Sets of Items in Large Databases}, year = {1993}, address = {Washington D.C.}, month = {May}, pages = {207--216}, abstract = {Introduces association rules and the SUPPORT-CONFIDENCE framework and an algorithm to mine large itemsets. The algorithm is sometimes called AIS after the authors initials.}, keywords = {algorithm}, doi = {10.1145/170036.170072} }
@article{arules:Agrawal:1993b, author = {Rakesh Agrawal and Tomasz Imielinski and Arun Swami}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {Database Mining: {A} Performance Perspective}, year = {1993}, number = {6}, pages = {914--925}, volume = {5}, abstract = {Places association rule mining together with classification and sequence mining into the context of rule discovery in database mining. The authors basic operations and an algorithm to discover classification rules. For the evaluation they generate artificial survey data using different classification functions.}, keywords = {evaluation}, doi = {10.1109/69.250074} }
@inproceedings{arules:Agrawal:1994, author = {Rakesh Agrawal and Ramakrishnan Srikant}, title = {Fast Algorithms for Mining Association Rules in Large Databases}, booktitle = {Proceedings of the 20th International Conference on Very Large Data Bases, {VLDB}}, year = {1994}, editor = {Jorge B. Bocca and Matthias Jarke and Carlo Zaniolo}, pages = {487--499}, address = {Santiago, Chile}, month = {September}, abstract = {Introduction of the APRIORI algorithm (the best-known algorithm; it uses a breadth-first search strategy to counting the support of itemsets). The algorithm uses an improved candidate generation function which exploits the downward closure property of support and makes it more efficient than AIS. Also an algorithm to generate synthetic transaction data is presented. Such synthetic transaction data are widely used for the evaluation and comparison of new algorithms.}, keywords = {algorithm, evaluation} }
@inproceedings{arules:Klemettinen:1994, author = {Mika Klemettinen and Heikki Mannila and Pirjo Ronkainen and Hannu Toivonen and A. Inkeri Verkamo}, booktitle = {Third International Conference on Information and Knowledge Management ({CIKM}'94)}, title = {Finding interesting rules from large sets of discovered association rules}, year = {1994}, editor = {Nabil R. Adam and Bharat K. Bhargava and Yelena Yesha}, pages = {401--407}, publisher = {ACM Press}, abstract = {Introduce the usage of rule templates.}, keywords = {constraint}, doi = {10.1145/191246.191314} }
@inproceedings{arules:Mannila:1994, author = {Heikki Mannila and Hannu Toivonen and A. Inkeri Verkamo}, title = {Efficient algorithms for discovering association rules}, booktitle = {{AAAI} Workshop on Knowledge Discovery in Databases (KDD-94)}, year = {1994}, editor = {Usama M. Fayyad and Ramasamy Uthurusamy}, pages = {181--192}, address = {Seattle, Washington}, publisher = {AAAI Press}, abstract = {Develop similar improvements to the candidate generation as APRIORI. Itemsets with support are called covering sets. The paper also introduces sampling from the database and gives bounds for the resulting estimate of support.}, keywords = {algorithm,sampling} }
@inproceedings{arules:Agrawal:1995, author = {Rakesh Agrawal and Ramakrishnan Srikant}, booktitle = {Eleventh International Conference on Data Engineering}, title = {Mining Sequential Patterns}, year = {1995}, address = {Taipei, Taiwan}, editor = {Philip S. Yu and Arbee S. P. Chen}, pages = {3--14}, publisher = {IEEE Computer Society Press}, abstract = {Introduces mining sequential patterns. A sequential pattern is a maximal sequence that exceeds minimum support (a minimum number of customers). The algorithms AprioriSome and AprioryAll (based on Apriori) are presented.}, keywords = {sequential} }
@inproceedings{arules:Savasere:1995, author = {Ashok Savasere and Edward Omiecinski and Shamkant Navathe}, title = {An efficient algorithm for mining association rules in large databases}, booktitle = {Proceedings of the 21st VLDB Conference}, year = {1995}, pages = {432--443}, address = {Zurich, Switzerland}, abstract = {Introduction of the PARTITION algorithm. The database is scanned only twice. For the first scan the DB is partitioned and in each partition support is counted. Then the counts are merged to generate potential large itemsets. In the second scan the potential large itemsets are counted to find the actual large itemsets.}, keywords = {algorithm} }
@inproceedings{arules:Srikant:1995, author = {Ramakrishnan Srikant and Rakesh Agrawal}, booktitle = {Proceedings of the 21st VLDB Conference, Zurich, Switzerland}, title = {Mining generalized association rules}, year = {1995}, abstract = {Generalized association rules use a taxonomy (is-a hierarchy) on items. The paper introduces R-interesting rules as rules with a support which is R-times higher than the support of its closest ancestor (a rule with at leased on item generalized). Algorithms that use R-interesting in addition to support and confidence are presented and evaluated.}, keywords = {generalized}, doi = {10.1016/s0167-739x(97)00019-8} }
@article{arules:Bernard:1996, title = {L'analyse implicative bayésienne : une méthode pour l'étude des dépendances orientées. 2. modele logique sur un tableau de contingence}, journal = {Mathématiques et sciences humaines}, volume = {134}, number = {}, pages = {5-18}, year = {1996}, author = {Jean-Marc Bernard and Camilo Charron}, doi = {10.4000/msh.2734}, abstract = {Introduces Varying Rates Liaison.}, keywords = {measure} }
@inbook{arules:Fayyad:1996, pages = {1--36}, title = {From Data Mining to Knowledge Discovery: An Overview}, publisher = {MIT Press}, year = {1996}, author = {Usama M. Fayyad and Gregory Piatetsky-Shapiro and Padhraic Smyth}, address = {Cambridge, MA}, abstract = {Introduction to the KDD process.}, booktitle = {Advances in knowledge discovery and data mining}, keywords = {kdd} }
@inproceedings{arules:Fukuda:1996, author = {Takeshi Fukuda and Yasuhiko Morimoto and Shinichi Morishita and Takeshi Tokuyama}, booktitle = {PODS '96 Proceedings of the fifteenth ACM SIGACT-SIGMOD-SIGART symposium on Principles of database systems}, title = {Mining optimized association rules for numeric attributes}, year = {1996}, pages = {182--191}, publisher = {ACM Press}, abstract = {Finds appropriate ranges for quantitative attributes automatically by maximizing the support on the condition that the confidence ratio is at least a given threshold value or by maximizing the confidence ratio on the condition that the support is at least a given threshold number. The paper also introduces the measure gain: gain(R) = sup(R) - minConf * sup(lhs(R)) = sup(R) * (conf(R) - minConf).}, keywords = {quantitative}, doi = {10.1006/jcss.1998.1595} }
@book{arules:Gras:1996, author = {Gras, Régis and Ag Almouloud, Saddo and Bailleul, Marc and Larher, Annie and Polo, Maria and Ratsimba-Rajohn, Harrisson and Totohasina, André}, year = {1996}, month = {01}, pages = {}, publisher = {ARDM}, isbn = {2 85919 129 0}, title = {L'implication statistique, nouvelle méthode exploratoire de données}, abstract = {Introduces the implication index.}, keywords = {measure} }
@inproceedings{arules:Mannila:1996, author = {Heikki Mannila and Hannu Toivonen}, booktitle = {Proceedings of the Second International Conference on Knowledge Discovery and Data Mining (KDD-96)}, title = {Multiple Uses of Frequent Sets and Condensed Representations}, year = {1996}, pages = {189--194}, publisher = {AAAI Press}, abstract = {Introduces general rules with disjunctions and negations in the antecedent and the consequent. The confidence of any such rules can be approximated by using the support of frequent itemsets only (applying the inclusion-exclusion principle). Using the negative border, an error bound for the estimates can be calculated. The authors also show that frequent itemsets with a support of epsilon are a concise representation (epsilon-adequate representation) which can approximate the confidence of any itemset with an error of at most epsilon.}, keywords = {concise}, editors = {Evangelos Simoudis and Jiawei Han and Usama M. Fayyad} }
@inproceedings{arules:Toivonen:1996, author = {Hannu Toivonen}, title = {Sampling Large Databases for Association Rules}, booktitle = {VLDB '96: Proceedings of the 22th International Conference on Very Large Data Bases}, year = {1996}, pages = {134--145}, address = {San Francisco, CA, USA}, publisher = {Morgan Kaufmann Publishers Inc.}, abstract = {Find frequent itemsets in a random sample of a database (that fits into main memory) and then verify the found frequent itemsets in the database.}, keywords = {algorithm,sampling}, isbn = {1-55860-382-4} }
@inproceedings{arules:Brin:1997, author = {Sergey Brin and Rajeev Motwani and Jeffrey D. Ullman and Shalom Tsur}, booktitle = {SIGMOD 1997, Proceedings ACM SIGMOD International Conference on Management of Data}, title = {Dynamic Itemset Counting and Implication Rules for Market Basket Data}, year = {1997}, address = {Tucson, Arizona, USA}, month = {May}, pages = {255--264}, abstract = {Introduces CONVICTION (as an improvement to confidence based on implication rules) and INTEREST (later called LIFT).}, keywords = {measure}, doi = {10.1145/253262.253325} }
@inproceedings{arules:Brin:1997b, author = {Sergey Brin and Rajeev Motwani and Craig Silverstein}, booktitle = {SIGMOD 1997, Proceedings ACM SIGMOD International Conference on Management of Data}, title = {Beyond Market Baskets: Generalizing Association Rules to Correlations}, year = {1997}, address = {Tucson, Arizona, USA}, month = {May}, pages = {265--276}, abstract = {Proposes to use the chi-square test for correlation. For an itemset of length l, the test is carried out on a l-dimensional contingency tables. A problem is cells with low counts and multiple tests.}, keywords = {no-support}, doi = {10.1145/253262.253327} }
@inproceedings{arules:Lent:1996, author = {Brian Lent and Arun N. Swami and Jennifer Widom}, title = {Clustering Association Rules}, booktitle = {Proceedings of the Thirteenth International Conference on Data Engineering, April 7--11, 1997 Birmingham U.K.}, year = {1997}, pages = {220--231}, publisher = {IEEE Computer Society}, abstract = {Join adjacent intervals for quantitative association rules to produce more general rules.}, keywords = {clustering,quantitative} }
@article{arules:Mannila:1997, author = {Heikki Mannila and Hannu Toivonen and A. Inkeri Verkamo}, title = {Discovery of Frequent Episodes in Event Sequences}, journal = {Data Mining and Knowledge Discovery}, year = {1997}, volume = {1}, pages = {259-289}, number = {3}, keywords = {sequential} }
@inproceedings{arules:Srikant:1997, author = {Ramakrishnan Srikant and Quoc Vu and Rakesh Agrawal}, title = {Mining Association Rules with Item Constraints}, booktitle = {Proceedings of the 3rd International Conference Knowledge Discovery and Data Mining (KDD-97)}, year = {1997}, editor = {David Heckerman and Heikki Mannila and Daryl Pregibon and Ramasamy Uthurusamy}, pages = {67--73}, publisher = {AAAI Press}, abstract = {Integrates BOOLEAN CONSTRAINTS on items (absence, presence) into the mining algorithm to reduce the search space. Algorithms are discussed.}, keywords = {constraint} }
@techreport{arules:Zaki:1997, author = {Mohammed J. Zaki and Srinivasan Parthasarathy and Mitsunori Ogihara and Wei Li}, title = {New Algorithms for Fast Discovery of Association Rules}, institution = {Computer Science Department, University of Rochester}, year = {1997}, number = {651}, address = {Rochester, NY 14627}, month = {July}, abstract = {Quickly identify MAXIMAL FREQUENT ITEMSETS (a frequent itemset is maximal if it is no proper subset of any other frequent itemset) using different database layout schemes (regular, inverted) and clustering techniques (equivalence class ECLAT, max. clique). See also Zaki 2000, Scalable Algorithms for Association Mining.}, keywords = {maximal} }
@inproceedings{arules:Zaki:1997b, author = {Mohammed Javeed Zaki and Srinivasan Parthasarathy and Wei Li and Mitsunori Ogihara}, title = {Evaluation of Sampling for Data Mining of Association Rules}, booktitle = {Proceedings of the 7th International Workshop on Research Issues in Data Engineering (RIDE '97) High Performance Database Management for Large-Scale Applications}, year = {1997}, pages = {42--50}, publisher = {{IEEE} Computer Society}, abstract = {Evaluates random sampling with replacement as presented in Manila et al. 1994 using several datasets. The experiments show that Chernoff bounds overestimate the needed sample size and that sampling seems an effective tool for practical purposes.}, keywords = {sampling} }
@inproceedings{arules:Aggarwal:1998, author = {C. C. Aggarwal and P. S. Yu}, booktitle = {PODS 98, Symposium on Principles of Database Systems}, title = {A New Framework For Itemset Generation}, year = {1998}, address = {Seattle, WA, USA}, pages = {18--24}, abstract = {Points out weaknesses of the large frequent itemset method using support (spuriousness, dense datasets) and that lift gives only values close to one for items which are very frequent, even if they are perfectly positive correlated. COLLECTIVE STRENGTH is introduced. Collective strength uses the violation rate for an itemset which is the fraction of transactions which contains some, but not all items of the itemset. The violation rate is compared to the expected violation rate under independence. Collective strength is downward closed.}, keywords = {measure}, doi = {10.1145/275487.275490} }
@inproceedings{arules:Liu:1998, author = {Bing Liu and Wynne Hsu and Yiming Ma}, title = {Integrating Classification and Association Rule Mining}, booktitle = {Proceedings of the 4rd International Conference Knowledge Discovery and Data Mining (KDD-98)}, year = {1998}, pages = {80--86}, publisher = {AAAI Press}, abstract = {Mines only the subset of association rules with the classification class attribute in the right-hand-site (CARs). From these CARs a classifier is built by using the rules with the highest confidence to cover the whole database. The presented alorithm is called Classification Based on Associations (CBA). In ecperiment the resulting classifiers are more accurate than C4.5.}, keywords = {classification} }
@inproceedings{arules:Megiddo:1998, author = {Nimrod Megiddo and Ramakrishnan Srikant}, title = {Discovering Predictive Association Rules.}, booktitle = {Proceedings of the Fourth International Conference on Knowledge Discovery and Data Mining (KDD-98)}, year = {1998}, editor = {Rakesh Agrawal and Paul E. Stolorz and Gregory Piatetsky-Shapiro}, pages = {274--278}, publisher = {AAAI Press}, abstract = {Introduces several STATISTICAL TESTS: Test if the observed support count is sig. greater than a support threshold, Chi-square test of independence (see also Brin et al. 1997). Also deals with the Bonferroni effect (multiple-comparison problem) by finding an upper bound of the number of tested hypotheses and proposing a resampling procedure using an independence model. The paper introduced confidence intervals for support and confidence. Finally, the authors find that the support-confidence framework does a good job to eliminate statistically insignificant rules (on market basket data).}, keywords = {theory} }
@inproceedings{arules:Ng:1998, author = {Raymond T. Ng and Laks V.S. Lakshmanan and Jiawei Han and Alex Pang}, booktitle = {Proceedings of the ACM SIGMOD Conference, Seattle, WA}, title = {Exploratory mining and pruning optimizations of constrained associations rules}, year = {1998}, pages = {13--24}, abstract = {Characterizes various constraints (contains, minimum, maximum, count, sum, avg) according to anti-monotonicity and succinctness. Anti-monotonicity is the property which allows iterative pruning (generate and test candidates) used e.g., on support by Apriori. Succinctness is a property that enables us to generate only those itemsets which satisfy the constraint without the need to test them.}, keywords = {constraint}, doi = {10.1145/276305.276307} }
@article{arules:Silverstein:1998, author = {Craig Silverstein and Sergey Brin and Rajeev Motwani}, journal = {Data Mining and Knowledge Discovery}, title = {Beyond Market Baskets: Generalizing Association Rules to Dependence Rules}, year = {1998}, pages = {39--68}, volume = {2}, abstract = {Journal version of Brin et al. (1997).}, keywords = {no-support} }
@inproceedings{arules:Zaki:1998, author = {M. J. Zaki and M. Ogihara}, booktitle = {SIGMOD'98 Workshop on Research Issues in Data Mining and Knowledge Discovery (SIGMOD-DMKD'98), Seattle, Friday, June 5, 1998}, title = {Theoretical foundation of association rules}, year = {1998}, abstract = {Presents the lattice-theoretic foundations of mining associations based on FORMAL CONCEPT ANALYSIS and shows that frequent itemsets are determined by the set of frequent concepts. The paper studies the generation of a minimal set of rules (called base) can be generated from which all other association rules can be inferred. The paper also presents some complexity considerations using the connection between frequent itemsets and maximal bipartite cliques. It is shown that for very sparse databases association rule algorithms should scale linearly in the number of items.}, keywords = {theory} }
@inproceedings{arules:Bayardo:1999, author = {Robert J. {Bayardo Jr.} and Rakesh Agrawal}, booktitle = {Proceedings of the fifth ACM SIGKDD international conference on Knowledge discovery and data mining (KDD-99)}, title = {Mining the most interesting rules}, year = {1999}, pages = {145--154}, publisher = {ACM Press}, abstract = {Shows that for all rules with the same antecedent, the best (optimal, most interesting) rules according to measures as confidence, support, gain, chi-square value, gini, entropy gain, laplace, lift, conviction all must reside along a support/confidence border. The paper also shows that many measures are monotone functions of support and confidence.}, keywords = {theory}, doi = {10.1145/312129.312219} }
@inproceedings{arules:Li:1999, author = {Jinyan Li and Xiuzhen Zhang and Guozho Dong and Kotagiri Ramamohanarao and Qun Sun}, booktitle = {Principles of Data Mining and Knowledge Discovery PKDD'99, LNAI 1704, Prague, Czech Republic}, title = {Efficient Mining of High Confidence Association Rules without Support Thresholds}, year = {1999}, editor = {J. Zytkow and J. Rauch}, pages = {406--411}, publisher = {Springer-Verlag}, abstract = {This paper used JUMPING EMERGING PATTERNS to mine a border for top rules (rules with 100\% confidence) for a given consequent. The drawbacks are that only one consequent is mined at a time and that finding rules with other than 100\% confidence is difficult.}, keywords = {no-support}, doi = {10.1007/978-3-540-48247-5_50} }
@inproceedings{arules:Liu:1999, author = {Bing Liu and Wynne Hsu and Yiming Ma}, booktitle = {Proceedings of the fifth ACM SIGKDD international conference on Knowledge discovery and data mining (KDD-99)}, title = {Mining Association Rules with Multiple Minimum Supports}, year = {1999}, pages = {337--341}, publisher = {ACM Press}, abstract = {Adapts APRIORI to work with different minimum support thresholds assigned to different items (minimum item supports, MIS). To preserve the downward closure property of support item sorting using the MIS values is used.}, keywords = {var-support}, doi = {10.1145/312129.312274} }
@inproceedings{arules:Liu:1999b, author = {Bing Liu and Wynne Hsu and Yiming Ma}, booktitle = {Proceedings of the fifth ACM SIGKDD international conference on Knowledge discovery and data mining (KDD-99)}, title = {Pruning and summarizing the discovered associations}, year = {1999}, pages = {125--134}, publisher = {ACM Press}, abstract = {Remove insignificant rules using the chi-square test to test for correlation between the antecedent and the confident of a rule. Also DIRECTION SETTING (DS) RULES are introduced. A DS rule has a pos. correlated antecedent and consequent and is not built from a rule with a shorter antecedent which is a DS rule. Normally, only a small and concise fraction of rules are DS rules.}, keywords = {measures,theory}, doi = {10.1145/312129.312216} }
@inproceedings{arules:Nada:1999, author = {Lavra{\v{c}}, Nada and Flach, Peter and Zupan, Blaz}, booktitle = {Inductive Logic Programming}, title = {Rule Evaluation Measures: A Unifying View}, year = {1999}, address = {Berlin, Heidelberg}, editor = {D{\v{z}}eroski, Sa{\v{s}}o and Flach, Peter}, pages = {174--185}, publisher = {Springer Berlin Heidelberg}, abstract = {Introduces relative accuracy/gain.}, keywords = {measure}, doi = {10.1007/3-540-48751-4_17}, isbn = {978-3-540-48751-7} }
@inproceedings{arules:Pasquier:1999, author = {Nicolas Pasquier and Yves Bastide and Rafik Taouil and Lotfi Lakhal}, booktitle = {Proceeding of the 7th International Conference on Database Theory, Lecture Notes In Computer Science (LNCS 1540)}, title = {Discovering Frequent Closed Itemsets for Association Rules}, year = {1999}, pages = {398--416}, publisher = {Springer}, abstract = {Introduces CLOSED ITEMSETS. An itemset X is closed if no proper super set of X is contained in every transaction in which X is contained. Which means there exists no super set of X with the same support count as X.}, keywords = {closed}, doi = {10.1007/3-540-49257-7_25} }
@article{arules:Pasquier:1999b, author = {Nicolas Pasquier and Yves Bastide and Rafik Taouil and Lotfi Lakhal}, journal = {Information Systems}, title = {Efficient mining of association rules using closed itemset lattices}, year = {1999}, number = {1}, pages = {25--46}, volume = {24}, abstract = {Present the CLOSE algorithm to mine frequent closed itemsets.}, address = {Oxford, UK, UK}, keywords = {closed}, doi = {10.1016/s0306-4379(99)00003-4}, publisher = {Elsevier Science Ltd.} }
@article{arules:2000, author = {Y. Bastide and R. Taouil and N. Pasquier and G. Stumme and L. Lakhai}, journal = {SIGKDD Explorations}, title = {Mining frequent patterns with counting inference}, year = {2000}, number = {2}, pages = {66--75}, volume = {2}, abstract = {Proposes the algorithm PASCAL (a APRIORI optimization) to mine closed and frequent items. This approach uses frequent key-patterns to infer counts of frequent non-key patterns.}, keywords = {closed}, doi = {10.1145/380995.381017} }
@inproceedings{arules:Agarwal:2000, author = {Rakesh C. Agrawal and Charu C. Aggarwal and V. V. V. Prasad}, booktitle = {Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2000)}, title = {Depth First Generation of Long Patterns}, year = {2000}, pages = {108--118}, abstract = {Introduces the algorithm DepthProject which builds a lexicographic tree in a depth first order.}, keywords = {maximal}, doi = {10.1145/347090.347114} }
@article{arules:Ahmed:2000, author = {Khalil M. Ahmed and Nagwa M. El-Makky and Yousry Taha}, title = {A note on ''{B}eyond market baskets: {G}eneralizing association rules to correlations''}, journal = {SIGKDD Explorations}, year = {2000}, volume = {1}, pages = {46--48}, number = {2}, abstract = {A reply to Brin et al. (1997). The authors state that the chi-square test tests the whole contingency table, but for larger than 2x2 tables we want to test dependence for single cells.}, keywords = {no-support}, publisher = {ACM Press} }
@article{arules:Bayardo:2000, author = {R. Bayardo and R. Agrawal and D. Gunopulos}, title = {Constraint-based rule mining in large, dense databases}, journal = {Data Mining and Knowledge Discovery}, year = {2000}, volume = {4}, pages = {217--240}, number = {2/3}, abstract = {Introduces the MINIMUM IMPROVEMENT constraint for confidence (mine only rules with a confidence which is minimp greater than the confidence of any of its proper subset-rules). DenseMiner, an algorithm that enforces minimum support, minimum confidence and minimum improvement already during a breadth-first search for all rules for a given consequent C is presented.}, keywords = {constraint} }
@article{arules:Freitas:2000, author = {Alex A. Freitas}, journal = {SIGKDD Explorations}, title = {Understanding the Crucial Differences Between Classification and Discovery of Association Rules -- A Position Paper}, year = {2000}, number = {1}, pages = {65--69}, volume = {2}, keywords = {classification}, doi = {10.1145/360402.360423} }
@article{arules:Hipp:2000, author = {Jochen Hipp and Ulrich G\"untzer and Gholamreza Nakhaeizadeh}, journal = {SIGKDD Explorations}, title = {Algorithms for Association Rule Mining -- {A} General Survey and Comparison}, year = {2000}, number = {2}, pages = {1--58}, volume = {2}, abstract = {Describes the fundamentals of association rule mining and presents an systematization of existing algorithms.}, keywords = {algorithm}, doi = {10.1145/360402.360421} }
@article{arules:kddcup:2000, author = {Ron Kohavi and Carla Brodley and Brian Frasca and Llew Mason and Zijian Zheng}, journal = {SIGKDD Explorations}, title = {{KDD-Cup} 2000 Organizers' Report: Peeling the Onion}, year = {2000}, number = {2}, pages = {86--98}, volume = {2}, abstract = {Introduces also some freely available data sets for algorithm performance evaluation.}, keywords = {evaluation}, doi = {10.1145/380995.381033} }
@inproceedings{arules:Pei:2000, author = {Jian Pei and Jiawei Han and Runying Mao}, title = {{C}{L}{O}{S}{E}{T}: An Efficient Algorithm for Mining Frequent Closed Itemsets}, booktitle = {ACM SIGMOD Workshop on Research Issues in Data Mining and Knowledge Discovery}, year = {2000}, abstract = {Introduces the algorithm CLOSET which mines frequent closed itemsets using FP-growth (a depth-first search using support counting).}, keywords = {closed} }
@article{arules:Silverstein:2000, author = {Craig Silverstein and Sergey Brin and Rajeev Motwani and Jeffrey D. Ullman}, title = {Scalable Techniques for Mining Causal Structures}, journal = {Data Mining and Knowledge Discovery}, year = {2000}, volume = {4}, pages = {163--192}, number = {2/3}, abstract = {Explores the applicability of constraint-based causal discovery (known from Bayesian learning) to discover causal relationships in market basket data.}, keywords = {causal} }
@inproceedings{arules:Webb:2000, author = {Geoffrey I. Webb}, booktitle = {Proceedings of the 6th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Boston, Massachusetts, United States, August 20 -- 23, 2000}, title = {Efficient search for association rules}, year = {2000}, pages = {99--107}, doi = {10.1145/347090.347112} }
@inproceedings{arules:Wirth:2000, author = {R{\"u}diger Wirth and Jochen Hipp}, title = {CRISP-DM: Towards a Standard Process Model for Data Mining}, booktitle = {Proceedings of the 4th International Conference on the Practical Applications of Knowledge Discovery and Data Mining}, year = {2000}, address = {Manchester, UK}, month = {April}, keywords = {kdd} }
@article{arules:Zaki:2000, author = {Mohammed J. Zaki}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {Scalable Algorithms for Association Mining}, year = {2000}, month = {May/June}, number = {3}, pages = {372--390}, volume = {12}, abstract = {Introduces six new algorithms combining several features (database format, the decomposition technique, and the search procedure). Includes Eclat (Equivalence CLAss Transformation), MaxEclat, Clique, MaxClique, TopDown, and AprClique. ECLAT is a well known depth-first search algorithm using set intersection.}, keywords = {algorithm}, doi = {10.1109/69.846291} }
@book{arules:Adamo:2001, author = {Jean-Marc Adamo}, publisher = {Springer}, title = {Data Mining for Association Rules and Sequential Patterns}, year = {2001}, address = {New York}, abstract = {Introduction to association rules and mining sequential patterns.}, keywords = {sequential}, doi = {10.1007/978-1-4613-0085-4} }
@article{arules:Bay:2001, author = {Stephen D. Bay and Michael J. Pazzani}, title = {Detecting Group Differences: Mining Contrast Sets}, journal = {Data Mining and Knowledge Discovery}, year = {2001}, volume = {5}, pages = {213--246}, number = {3}, abstract = {Finds sets with substantially different support in different groups. Uses interest based pruning and statistical surprise for filtering (summarizing) contrast sets. The search error is controlled using different (Bonferroni) correction for sets of different size.}, keywords = {theory}, publisher = {Kluwer Academic Publishers} }
@article{arules:Bruzzese:2001, author = {Dario Bruzzese and Cristina Davino}, title = {Pruning of Discovered Association Rules}, journal = {Computational Statistics}, year = {2001}, volume = {16}, pages = {387--398}, abstract = {The authors construct several statistical tests to evaluate the significance of discovered associations.}, keywords = {measure}, publisher = {Physica-Verlag} }
@inproceedings{arules:Burdick:2001, author = {Douglas Burdick and Manuel Calimlim and Johannes Gehrke}, booktitle = {Proceedings of the 17th International Conference on Data Engineering}, title = {{M}{A}{F}{I}{A}: A Maximal Frequent Itemset Algorithm for Transactional Databases}, year = {2001}, address = {Washington, DC}, pages = {443--452}, publisher = {IEEE Computer Society}, abstract = {MAFIA (MAximal Frequent Itemset Algorithm) finds maximal itemsets using a depth-first traversal of the itemset lattice, a compressed vertical bitmap representation of the database, additional pruning techniques (Parent Equivalence Pruning, Frequent Head Union Tail pruning) and dynamic reordering. The authors claim that MAFIA outperforms DepthProject (Agrawal et al., 2001) by a factor of 3 to 5 on average.}, keywords = {maximal} }
@inproceedings{arules:Cadez:2001, author = {Igor V. Cadez and Padhraic Smyth and Heikki Mannila}, booktitle = {Proceedings of the ACM SIGKDD Intentional Conference on Knowledge Discovery in Databases and Data Mining (KDD-01)}, title = {Probabilistic modeling of transaction data with applications to profiling, visualization, and prediction}, year = {2001}, editor = {F. Provost and R. Srikant}, pages = {37--45}, publisher = {ACM Press}, abstract = {The authors construct a model (profile with weights) for each individual's behavior as a mixture of several components. This mixture provides the probabilities for a multinomial probability model (each item has a constant probability to be chosen for a transaction). Finally, the authors compare several estimation methods and model variants empirically using store choice data.}, keywords = {evaluation}, doi = {10.1145/502512.502523} }
@article{arules:Cohen:2001, author = {Edith Cohen and Mayur Datar and Shinji Fujiwara and Aristides Gionis and Piotr Indyk and Rajeev Motwani and Jeffrey D. Ullman and Cheng Yang}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {Finding Interesting Associations without Support Pruning}, year = {2001}, number = {1}, pages = {64--78}, volume = {13}, abstract = {Uses similarity measures between hashed values of rows in a transaction database. The approach in the paper was only shown for associations between two items.}, keywords = {no-support}, doi = {10.1109/69.908981} }
@inproceedings{arules:DuMouchel:2001, author = {William DuMouchel and Daryl Pregibon}, booktitle = {Proceedings of the ACM SIGKDD Intentional Conference on Knowledge Discovery in Databases and Data Mining (KDD-01)}, title = {Empirical {B}ayes Screening for Multi-Item Associations}, year = {2001}, editor = {F. Provost and R. Srikant}, pages = {67--76}, publisher = {ACM Press}, abstract = {Search for unusually frequent itemsets using statistical methods. First, the authors propose stratification of the data to avoid finding spurious associations within strata. Then the deviation of the observed frequency over a baseline frequency (based on independence) is used. Since the deviation is unreliable for low counts, an empirical Bayes model (its 95\% confidence limit) is used to produce a posterior distribution of the true ratio of actual to baseline frequencies. The Bayes model gives ratios close to the observed ratios for large samples and reduces (shrinks) the ratio if the sample size gets small (to smooth away noise). For multi-item associations log-linear models are proposed to find higher order associations which cannot be explained by pairwise associations.}, keywords = {no-support,theory}, doi = {10.1145/502512.502526} }
@article{arules:Hofmann:2001, author = {Heike Hofmann and Adalbert F. X. Wilhelm}, title = {Visual Comparison of Association Rules}, journal = {Comput. Stat.}, volume = {16}, number = {3}, pages = {399--415}, year = {2001}, doi = {10.1007/s001800100075}, abstract = {Introduces difference of confidence.}, keywords = {measure} }
@inproceedings{arules:Kodratoff:2001, author = {Yves Kodratoff}, editor = {Georgios Paliouras and Vangelis Karkaletsis and Constantine D. Spyropoulos}, title = {Comparing Machine Learning and Knowledge Discovery in DataBases: An Application to Knowledge Discovery in Texts}, booktitle = {Machine Learning and Its Applications, Advanced Lectures}, series = {Lecture Notes in Computer Science}, volume = {2049}, pages = {1--21}, publisher = {Springer}, year = {2001}, doi = {10.1007/3-540-44673-7}, abstract = {Introduces casual support and casual confidence informed by negatives.}, keywords = {measure} }
@inproceedings{arules:Li:2001, author = {Yingjiu Li and Peng Ning and X. Sean Wang and Sushil Jajodia}, title = {Generating Market Basket Data with Temporal Information}, booktitle = {ACM KDD Workshop on Temporal Data Mining}, year = {2001}, month = {August}, abstract = {Develop a generator for synthetic data with temporal patterns based on the generator by Agrawal and Srikan (1994).}, keywords = {sequential,evaluation} }
@inproceedings{arules:Pei:2001, author = {Jian Pei and Jiawei Han and Laks V.S. Lakshmanan}, title = {Mining Frequent Itemsets with Convertible Constraints}, booktitle = {Proceedings of the 17th International Conference on Data Engineering, April 2--6, 2001, Heidelberg, Germany}, year = {2001}, pages = {433--442}, abstract = {Develops a technique of how constraints on avg, median and sum can be converted so that they can be used already during the search phase of the FP-growth algorithm. The constraints are classified into constraints that are: convertible anti-monotone, convertible monotone and strongly convertible.}, keywords = {constraint} }
@inproceedings{arules:Seno:2001, author = {Masakazu Seno and George Karypis}, title = {LPMiner: An Algorithm for Finding Frequent Itemsets Using Length Decreasing Support Constraint}, booktitle = {Proceedings of the 2001 IEEE International Conference on Data Mining, 29 November -- 2 December 2001, San Jose, California, USA}, year = {2001}, editor = {Nick Cercone and Tsau Young Lin and Xindong Wu}, pages = {505--512}, publisher = {IEEE Computer Society}, abstract = {To find longer frequent itemsets, the minimal support requirement decreases as a function of the itemset length. A algorithm based on the FP-tree is presented and a property called small valid extension (SVE) is introduced which makes mining efficient in absence of downward closure.}, keywords = {var-support} }
@inproceedings{arules:Song:2001, author = {Song, Hee Seok and Kim, Soung Hie and Kim, Jae Kyeong}, title = {A Methodology for Detecting the Change of Customer Behavior based on Association Rule Mining}, booktitle = {Proceedings of the Pacific Asia Conference on Information System}, year = {2001}, pages = {871--885}, publisher = {PACIS}, abstract = {Develops a methodology to detect changes of customer behavior automatically by comparing association rules between different time snapshots of data. Defines emerging pattern, unexpected change and the added/perished rule based on similarity and difference measures for rule matching.}, keywords = {changing} }
@inproceedings{arules:Wang:2001, author = {Ke Wang and Yu He and David W. Cheung}, booktitle = {Proceedings of the tenth international conference on Information and knowledge management}, title = {Mining confident rules without support requirement}, year = {2001}, address = {New York, NY}, pages = {89 - 96}, publisher = {ACM Press}, abstract = {The paper shows that for data with categorical attributes a UNIVERSAL-EXISTENTIAL UPWARD CLOSURE exists for confidence. With this property algorithms with confidence-based pruning are possible that use a level-wise (from k to k-1) candidate generation are. The paper also discusses a disk-based implementation.}, keywords = {no-support}, doi = {10.1145/502585.502601} }
@inproceedings{arules:Zheng:2001, author = {Zijian Zheng and Ron Kohavi and Llew Mason}, booktitle = {Proceedings of the ACM SIGKDD Intentional Conference on Knowledge Discovery in Databases and Data Mining (KDD-01)}, title = {Real World Performance of Association Rule Algorithms}, year = {2001}, editor = {F. Provost and R. Srikant}, pages = {401--406}, publisher = {ACM Press}, abstract = {Compares the performance of association rule algorithms (APRIORI, CHARM, FP-growth, CLOSET, MagnumOpus) using one IBM-Artificial dataset and three real-world e-commerce datasets. It shows that some improvements demonstrated on artificial datasets do not carry over to real-world datasets.}, keywords = {evaluation}, doi = {10.1145/502512.502572} }
@article{arules:Aggarwal:2002, author = {Charu C. Aggarwal and Cecilia Magdalena Procopiuc and Philip S. Yu}, journal = {Knowledge and Data Engineering}, title = {Finding Localized Associations in Market Basket Data}, year = {2002}, number = {1}, pages = {51--62}, volume = {14}, abstract = {Proposes to cluster transactions using a similarity measure based on the new affinity measure (measures similarity between pairs of items). Then mine association rules in the identified clusters.}, keywords = {sampling}, doi = {10.1109/69.979972} }
@incollection{arules:Aze:2002, author = {J. Azé and Y. Kodratoff}, editor = {D. Hérin and D.A. Zighed}, title = {Evaluation de la résistance au bruit de quelques mesures d’extraction de règles d’assocation}, booktitle = {Extraction des connaissances et apprentissage}, volume = 1, pages = {143--154}, year = {2002}, publisher = {Hermes}, abstract = {Introduces least contradiction.}, keywords = {measure} }
@inproceedings{arules:Borgelt:2002, author = {Christian Borgelt and Rudolf Kruse}, booktitle = {15th Conference on Computational Statistics (Compstat 2002)}, title = {Induction of Association Rules: Apriori Implementation}, year = {2002}, address = {Heidelberg, Germany}, publisher = {Physica Verlag}, abstract = {An efficient implementation of APRIORI.}, keywords = {implementation}, doi = {10.1007/978-3-642-57489-4_59} }
@inproceedings{arules:Calders:2002, author = {Toon Calders and Bart Goethals}, booktitle = {Proceedings of the 6th European Conference on Principles of Data Mining and Knowledge Discovery}, title = {Mining all non-derivable frequent itemsets}, year = {2002}, editor = {Tapio Elomaa and Heikki Mannila and Hannu Toivonen}, pages = {74--85}, publisher = {Springer-Verlag}, series = {Lecture Notes in Computer Science}, volume = {2431}, abstract = {Introduce NON-DERIVABLE ITEMSETS (NDIs). The support of all frequent NDIs allows for computing the support of all frequent itemsets using deduction rules based on the inclusion-exclusion principle.}, keywords = {concise}, doi = {10.1007/3-540-45681-3_7}, eprint = {cs/0206004} }
@article{arules:Galiano:2002, author = {F. Galiano and I. J. Blanco and D. S{\'a}nchez and M. Vila}, journal = {Intell. Data Anal.}, title = {Measuring the accuracy and interest of association rules: A new framework}, year = {2002}, pages = {221--235}, volume = {6}, abstract = {Introduces casual support and casual confidence informed by negatives.}, keywords = {measure}, doi = {10.3233/ida-2002-6303} }
@article{arules:Stumme:2002, author = {Gerd Stumme and Rafik Taouil and Yves Bastide and Nicolas Pasquier and Lotfi Lakhal}, journal = {Data \& Knowledge Engineering}, title = {Computing iceberg concept lattices with TITANIC}, year = {2002}, number = {2}, pages = {189--222}, volume = {42}, abstract = {The paper shows how iceberg concept lattices can be used as a condensed method to represent and visualize frequent (closed) itemsets. Iceberg concept lattices only show the top-most part of a concept lattices (known from Formal Concept Analysis). To compute iceberg concept lattices the algorithm TITANIC is presented which computes closed sets (a closure system) in a level-wise approach using weights (e.g., support), equivalence classes and key sets (minimal sets in an equivalence class). TITANIC is compared experimentally to Next-Closure and performs better. PASCAL (Bastide et al. 2000) is a modified version of TITANIC to mine all frequent itemsets.}, keywords = {theory}, doi = {10.1016/s0169-023x(02)00057-5} }
@inproceedings{arules:Zaki:2002, author = {Mohammed J. Zaki and Ching-Jiu Hsiao}, booktitle = {Proceedings of the Second SIAM International Conference on Data Mining}, title = {{C}{H}{A}{R}{M}: An Efficient Algorithm for Closed Itemset Mining}, year = {2002}, address = {Arlington, VA}, publisher = {SIAM}, abstract = {The algorithm CHARM enumerates all frequent closed itemsets and uses a number of improvements: (a) It uses a IT-tree (itemset-tidset tree based on equivalence classes) to search simultaneously the itemset space and the transaction space. (b) It uses a fast hash-based elimination of non-closed itemsets. (c) It uses diffsets which represents the database in a compact way which should fit into main memory. (d) It uses efficient intersection operations. The performance testing shows that CHARM can provide significant improvement over algorithms as Apriori, Close, Pascal, Mafia, and Closet.}, keywords = {closed}, doi = {10.1137/1.9781611972726.27} }
@article{arules:Aumann:2003, author = {Y. Aumann and Y. Lindell}, title = {Statistical Theory for Quantitative Association Rules}, journal = {Journal of Intelligent Information Systems}, year = {2003}, volume = {20}, pages = {255--283}, number = {3}, abstract = {Defines QUANTITATIVE ASSOCIATION RULES using statistical measures (e.g., mean and variance) of continuous data. Also algorithms are discussed.}, keywords = {quantitative} }
@article{arules:Barber:2003, author = {Brock Barber and Howard J. Hamilton}, title = {Extracting share frequent itemsets with infrequent subsets}, journal = {Data Mining and Knowledge Discovery}, year = {2003}, volume = {7}, pages = {153--185}, abstract = {ITEMSET SHARE is the fraction of some measure (e.g., sales, profit) contributed by the items in the set. A itemset is share frequent if it exceeds a threshold. Share frequency is not downward closed! The article presents several algorithms and heuristics to mine share frequent itemsets.}, keywords = {measure} }
@inproceedings{arules:Borgelt:2003, author = {Christian Borgelt}, title = {Efficient Implementations of Apriori and Eclat}, booktitle = {Proceedings of the IEEE ICDM Workshop on Frequent Itemset Mining Implementations}, year = {2003}, editor = {Bart Goethals and Mohammed J. Zaki}, address = {Melbourne, FL, USA}, month = {November}, abstract = {Discusses the efficient implementation of APRIORI (with prefix tree) and ECLAT.}, keywords = {implementation} }
@article{arules:Boulicaut:2003, author = {Jean-Francois Boulicaut and Artur Bykowski and Christophe Rigotti}, title = {Free-Sets: A Condensed Representation of Boolean Data for the Approximation of Frequency Queries}, journal = {Data Mining and Knowledge Discovery}, year = {2003}, volume = {7}, pages = {5--22}, number = {1}, abstract = {Presents a new epsilon-adequate representation for frequent itemsets called frequent FREE-SETS. An itemset is a free-set if it has no subset with (almost) the same support thus the items in the itemset cannot be used to form a (nearly) exact rule.}, address = {Hingham, MA, USA}, keywords = {concise}, publisher = {Kluwer Academic Publishers} }
@inproceedings{arules:Hollmen:2003, author = {Jaakko Hollm{\'e}n and Jouni K. Sepp{\"a}nen and Heikki Mannila}, booktitle = {SIAM International Conference on Data Mining (SDM'03)}, title = {Mixture Models and Frequent Sets: Combining Global and Local Methods for 0--1 Data.}, year = {2003}, address = {San Fransisco}, month = {May}, abstract = {Clusters binary data first using the EM-algorithms (looks like LCA; Cadez et al. (2001) seem to do the same to find profiles). Then the authors mine frequent itemsets in each cluster. Finally, they use the maximum entropy technique to obtain local models from the frequent itemsets and combine these models to approximate the joint distribution.}, keywords = {clustering}, doi = {10.1137/1.9781611972733.32} }
@article{arules:Omiecinski:2003, author = {Edward R. Omiecinski}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {Alternative Interest Measures for Mining Associations in Databases}, year = {2003}, month = {Jan/Feb}, number = {1}, pages = {57--69}, volume = {15}, abstract = {Omiecinski introduced several alternatives to support. The first measure, ANY-CONFIDENCE, is defined as the confidence of the rule with the largest confidence which can be generated from an itemset. The author states that although finding all itemsets with a set any-confidence would enable us to find all rules with a given minimum confidence, any-confidence cannot be used efficiently as a measure of interestingness since confidence is not downward closed. The second introduced measure is ALL-CONFIDENCE. This measure is defined as the smallest confidence of all rules which can be produced from an itemset, i.e., all rules produced form an itemset will have a confidence greater or equal to its all-confidence value. BOND, the last measure, is defined as the ratio of the number of transactions which contain all items of an itemset to the number of transactions which contain at least one of these items. Omiecinski showed that bond and all-confidence are downward closed and, therefore, can be used for efficient mining algorithms.}, keywords = {no-support}, doi = {10.1109/tkde.2003.1161582} }
@inproceedings{arules:Orlando:2003, author = {Salvatore Orlando and Claudio Lucchese and Paolo Palmerini and Raffaele Perego and Fabrizio Silvestri}, title = {kDCI: a Multi-Strategy Algorithm for Mining Frequent Sets}, booktitle = {FIMI'03: Proceedings of the IEEE ICDM Workshop on Frequent Itemset Mining Implementations}, year = {2003}, editor = {Bart Goethals and Mohammed J. Zaki}, month = {November}, abstract = {Introduces the kDCI algorithm.}, keywords = {algorithm,implementation} }
@article{arules:Pavlov:2003, author = {Dmitry Pavlov and Heikki Mannila and Padhraic Smyth}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {Beyond Independence: Probabilistic Models for Query Approximation on Binary Transaction Data}, year = {2003}, number = {6}, pages = {1409--1421}, volume = {15}, abstract = {Investigates the use of probabilistic models (independence model, pair-wise interactions stored in a Chow-Liu Tree, mixtures of independence models, itemset inclusion-exclusion model, and the maximum entropy method) for the problem of generating fast approx. answers to queries for large sparse binary data sets.}, keywords = {theory}, doi = {10.1109/tkde.2003.1245281} }
@inproceedings{arules:Ramesh:2003, author = {Ganesh Ramesh and William A. Maniatty and Mohammed J. Zaki}, booktitle = {Symposium on Principles of Database Systems, PODS 2003}, title = {Feasible itemset distributions in data mining: theory and application}, year = {2003}, address = {San Diego, CA, USA}, publisher = {ACM Press}, abstract = {Studies the length distributions of frequent and frequent maximal itemsets (the number of frequent itemsets with the same length). The length distribution determines the algorithms performance and is important to generate realistic synthetic datasets.}, keywords = {theory}, doi = {10.1145/773153.773181} }
@article{arules:Sung:2003, author = {Sam Y. Sung and Zhao Li and Chew L. Tan and Peter A. Ng}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {Forecasting Association Rules Using Existing Data Sets}, year = {2003}, month = {Nov/Dec}, number = {6}, pages = {1448--1459}, volume = {15}, abstract = {Resample datasets proportional to background attributes (e.g., distribution of customers' sex) to forecast rules in a new situation (e.g., a new store at a new location).}, keywords = {sampling}, doi = {10.1109/tkde.2003.1245284} }
@inproceedings{arules:Tao:2003, author = {Feng Tao and Fionn Murtagh and Mohsen Farid}, booktitle = {Proceedings of The Ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2003)}, title = {Weighted Association Rule Mining using Weighted Support and Significance Framework}, year = {2003}, address = {Washington, DC}, publisher = {ACM Press}, abstract = {Uses attributes of the items (e.g., price, page dwelling time) to WEIGHT SUPPORT. A support and significance framework is presented which possesses a weighted downward closure property important for pruning the search space.}, keywords = {var-support}, doi = {10.1145/956750.956836} }
@inproceedings{arules:Xiong:2003, author = {Hui Xiong and Pang-Ning Tan and Vipin Kumar}, booktitle = {Proceedings of the IEEE International Conference on Data Mining, November 19--22, 2003, Melbourne, Florida}, title = {Mining Strong Affinity Association Patterns in Data Sets with Skewed Support Distribution}, year = {2003}, editor = {Bart Goethals and Mohammed J. Zaki}, month = {November}, pages = {387--394}, abstract = {Support-based pruning strategies are not effective for data sets with skewed support distributions. The authors propose the concept of hyperclique pattern, which uses an objective measure called h-confidence (equal to all-confidence by Omiecinski, 2003) to identify strong affinity patterns. The generation of so-called cross-support patterns (patterns with items with substantially different support) is avoided by h-confidence's cross-support property.}, keywords = {no-support} }
@article{arules:Coenen:2004, author = {Frans Coenen and Graham Goulbourne and Paul Leng}, journal = {Data Mining and Knowledge Discovery}, title = {Tree structures for mining association rules}, year = {2004}, pages = {25--51}, volume = {8}, abstract = {Describes how to compute PARTIAL SUPPORT COUNTS in one DB-pass and how to store them in an enumeration tree (P-Tree).}, keywords = {algorithm}, doi = {10.1023/b:dami.0000005257.93780.3b} }
@article{arules:Coenen:2004a, author = {Frans Coenen and Paul Leng and Shakil Ahmed}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {Data Structures for Association Rule Mining: T-Trees and {P}-Trees}, year = {2004}, number = {6}, pages = {774--778}, volume = {16}, abstract = {Describes two new structures for association rule mining: T-trees (total support trees) and P-trees (partial support trees). The T-tree is a data structure (a compressed set enumeration tree) to store itemsets. The P-tree is a compressed way to represent a database in memory for mining.}, keywords = {implementation}, doi = {10.1109/tkde.2004.8} }
@article{arules:Goethals:2004, author = {Bart Goethals and Mohammed J. Zaki}, journal = {SIGKDD Explorations}, title = {Advances in Frequent Itemset Mining Implementations: Report on {FIMI}'03}, year = {2004}, number = {1}, pages = {109--117}, volume = {6}, abstract = {This paper reports on the performance of different frequent itemset mining implementations on several real-world and artificial databases. The authors conclude that the latest algorithms (patricia, kdci, fpclose, fpmax*) outperform older ones but that currently no tested algorithm gracefully scales up to very large databases with millions of transactions.}, keywords = {implementation}, doi = {10.1145/1007730.1007744} }
@article{arules:Han:2004, author = {Jiawei Han and Jian Pei and Yiwen Yin and Runying Mao}, title = {Mining frequent patterns without candidate generation}, journal = {Data Mining and Knowledge Discovery}, year = {2004}, volume = {8}, pages = {53--87}, abstract = {Describes the data mining method FP-growth (frequent pattern growth) which uses an extended prefix-tree (FP-tree) structure to store the database in a compressed form. FP-growth adopts a divide-and-conquer approach to decompose both the mining tasks and the databases. It uses a pattern fragment growth method to avoid the costly process of candidate generation and testing.}, keywords = {algorithm} }
@article{arules:Sistrom:2004, title = {Proportions, odds, and risk}, journal = {Radiology}, volume = {230}, number = {1}, pages = {12--19}, year = {2004}, author = {Sistrom, CL and Garvan, CW}, doi = {10.1148/radiol.2301031028}, abstract = {Introduces relative risk.}, keywords = {measure} }
@article{arules:Tan:2004, author = {Pang-Ning Tan and Vipin Kumar and Jaideep Srivastava}, journal = {Information Systems}, title = {Selecting the right objective measure for association analysis}, year = {2004}, number = {4}, pages = {293--313}, volume = {29}, abstract = {Compare the properties of 21 objective measures (of interest). The measures in general lack to agree with each other. However, the authors show that if support-based pruning or table standardization (of the contingency tables) is used, the measures become highly correlated.}, keywords = {measures}, doi = {10.1016/s0306-4379(03)00072-3}, publisher = {Elsevier Science Ltd.} }
@inproceedings{arules:Yang:2004, author = {Guizhen Yang}, booktitle = {Proceedings of the 2004 ACM SIGKDD international conference on Knowledge discovery and data mining}, title = {The complexity of mining maximal frequent itemsets and maximal frequent patterns}, year = {2004}, address = {Seattle, WA, USA}, publisher = {ACM Press}, abstract = {Shows that enumerating all maximal frequent itemsets is NP-hard and the associated counting problem is #P-complete.}, keywords = {theory}, doi = {10.1145/1014052.1014091} }
@article{arules:Zaki:2004, author = {Mohammed Zaki}, journal = {Data Mining and Knowledge Discovery}, title = {Mining Non-Redundant Association Rules}, year = {2004}, pages = {223--248}, volume = {9}, abstract = {Compares frequent itemsets and frequent closed itemsets and shows that frequent closed itemsets can be used to generate NON-REDUNDANT association rules. Non-Redundant rules are a set of rules with the most general rules (smallest antecedent and consequent) without loss of information.}, keywords = {closed}, doi = {10.1023/b:dami.0000040429.96086.c7} }
@inproceedings{arules:Blanchard:2005, author = {Julien Blanchard and Fabrice Guillet and Henri Briand and Regis Gras}, title = {Assessing rule interestingness with a probabilistic measure of deviation from equilibrium}, booktitle = {Proceedings of the 11th international symposium on Applied Stochastic Models and Data Analysis ASMDA-2005}, year = {2005}, pages = {191--200}, publisher = {ENST}, abstract = {Presents a statistical test for the deviation from the equilibrium of a rule. The equilibrium for rule a -> b is defined as: the number of transactions which contain a and b together is equal to the number of transactions which contain a and not b.}, keywords = {measure} }
@article{arules:Bonchi:2005, author = {Francesco Bonchi and Fosca Giannotti and Alessio Mazzanti and Dino Pedreschi}, journal = {IEEE Intelligent Systems}, title = {{E}x{A}nte: {A} Preprocessing Method for Frequent-Pattern Mining}, year = {2005}, number = {3}, pages = {25--31}, volume = {20}, abstract = {Reduces the database size before mining by iteratively applying mu-reduction and alpha-reduction. Mu-reduction removes transactions which do not meet monotone constraints. Alpha-reduction remove infrequent items from the transactions.}, keywords = {constraint}, doi = {10.1109/mis.2005.45} }
@article{arules:Gouda:2005, author = {Karam Gouda and Mohammed J. Zaki}, journal = {Data Mining and Knowledge Discovery}, title = {{G}en{M}ax: {A}n Efficient Algorithm for Mining Maximal Frequent Itemsets}, year = {2005}, pages = {1--20}, volume = {11}, abstract = {Presents a backtrack search based algorithm for mining maximal frequent itemsets. Uses: progressive focusing for maximality checking, and diffset propagation for frequency computation.}, keywords = {maximal}, doi = {10.1007/s10618-005-0002-x} }
@inproceedings{arules:Jeske:2005, author = {Daniel R. Jeske and Behrokh Samadi and Pengyue J. Lin and Lan Ye and Sean Cox and Rui Xiao and Ted Younglove and Minh Ly and Douglas Holt and Ryan Rich}, title = {Generation of synthetic data sets for evaluating the accuracy of knowledge discovery systems}, booktitle = {Proceeding of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining}, year = {2005}, pages = {756--762}, address = {New York, NY, USA}, publisher = {ACM Press}, abstract = {Generate synthetic data (e.g., credit card transaction data) for accuracy evaluation using semantic graphs.}, keywords = {evaluation}, doi = {10.1145/1081870.1081969}, isbn = {1-59593-135-X}, location = {Chicago, Illinois, USA} }
@article{arules:Scheffer:2005, author = {Tobias Scheffer}, journal = {Intelligent Data Analysis}, title = {Finding association rules that trade support optimally against confidence}, year = {2005}, number = {4}, pages = {381--395}, volume = {9}, abstract = {Introduces predictive accuracy which is the expected value of the confidence of a rules with respect to the process underlying the database. The author shows how predictive accuracy can be calculated from confidence and support measured on a data set using a Bayesian frequency correction (very simplified: confidence is discounted for rules with low supports). Also an algorithm is presented which finds the top n most predictive association rules (redundant rules with a 0 predictive accuracy improvement are removed) and shows how to estimate the prior distribution needed for the correction.}, keywords = {theory,measures}, doi = {10.3233/ida-2005-9405}, publisher = {IOS Press} }
@article{arules:Seno:2005, author = {Masakazu Seno and George Karypis}, title = {Finding Frequent Itemsets Using Length-Decreasing Support Constraint}, journal = {Data Mining and Knowledge Discovery}, year = {2005}, volume = {10}, pages = {197--228}, abstract = {See Seno and Karypis 2001.}, keywords = {var-support} }
@article{arules:Webb:2003, author = {Geoffrey I. Webb and Songmao S. Zhang}, journal = {Data Mining and Knowledge Discovery}, title = {k-Optimal-Rule-Discovery}, year = {2005}, number = {1}, pages = {39--79}, volume = {10}, abstract = {Develops GRD (based on the OPUS search strategy) which discovers all rules satisfying a set of constraints (max. number of rules, min support, min confidence, max coverage, max leverage) in a depth-first search. (An early draft of the paper was called: Beyond association rules: Generalized rule discovery)}, keywords = {constraint}, doi = {10.1007/s10618-005-0255-4} }
@article{arules:Zaki:2005, author = {Mohammed Zaki and Ching-Jui Hsiao}, journal = {{IEEE} Transactions on Knowledge and Data Engineering}, title = {Efficient Algorithms for Mining Closed Itemsets and Their Lattice Structure}, year = {2005}, number = {4}, pages = {462--478}, volume = {17}, abstract = {Describes the algorithm CHARM.}, keywords = {closed}, doi = {10.1109/tkde.2005.60} }
@article{arules:Bonchi:2006, author = {Francesco Bonchi and Claudio Lucchese}, title = {On condensed representations of constrained frequent patterns}, journal = {Knowledge and Information Systems}, year = {2006}, volume = {9}, pages = {180--201}, number = {2}, abstract = {Presents an algorithm to efficiently mine closed and constrained frequent itemsets.}, keywords = {closed,constraint}, publisher = {Springer-Verlag New York, Inc.} }
@inproceedings{arules:Calders:2006, author = {Calders, Toon and Rigotti, Christophe and Boulicaut, Jean-Francois}, booktitle = {Constraint-Based Mining and Inductive Databases: European Workshop on Inductive Databases and Constraint Based Mining, Hinterzarten, Germany, March 11-13, 2004, Revised Selected Papers}, title = {A Survey on Condensed Representations for Frequent Sets}, year = {2006}, editor = {Jean-Francois Boulicaut and Luc Raedt and Heikki Mannila}, month = {February}, pages = {64--80}, series = {Lecture Notes in Computer Science}, volume = {3848}, keywords = {concise}, doi = {10.1007/11615576_4} }
@article{arules:Geng:2006, author = {Liqiang Geng and Howard J. Hamilton}, journal = {ACM Computing Surveys}, title = {Interestingness measures for data mining: A survey}, year = {2006}, number = {3}, pages = {9}, volume = {38}, address = {New York, NY, USA}, keywords = {measures}, doi = {10.1145/1132960.1132963}, publisher = {ACM} }
@article{arules:Hahsler:2006, author = {Michael Hahsler}, journal = {Data Mining and Knowledge Discovery}, title = {A Model-Based Frequency Constraint for Mining Associations from Transaction Data}, year = {2006}, issn = {1384-5810}, month = {September}, number = {2}, pages = {137--166}, volume = {13}, abstract = {Develops a novel model-based frequency constraint as an alternative to a single, user-specified minimum support. The constraint utilizes knowledge of the process generating transaction data by applying a simple stochastic mixture model (the NB model) and uses a user-specified precision threshold to find local frequency thresholds for groups of itemsets (NB-frequent itemsets). The new constraint provides improvements over a single minimum support threshold and that the precision threshold is more robust and easier to set and interpret by the user.}, keywords = {no-support}, doi = {10.1007/s10618-005-0026-2} }
@article{arules:Li:2006, author = {Jiuyong Li}, journal = {IEEE Transactions on Knowledge and Data Engineering}, title = {On Optimal Rule Discovery}, year = {2006}, issn = {1041-4347}, number = {4}, pages = {460--471}, volume = {18}, abstract = {An optimal rule set (with respect to a metric of interestingness) contains all rules except those with no greater interestingness than one of its more general rules. An optimal rule set is a subset of a nonredundant rule set. The autors present an algorithm called ORD to find an optimal rule set. Classifiers build on optimal class association rules are at least as accurate as those built from CBA and C4.5 rule.}, address = {Piscataway, NJ, USA}, keywords = {measures,classification}, doi = {10.1109/tkde.2006.1599385}, publisher = {IEEE Educational Activities Department} }
@inproceedings{arules:Webb:2006, author = {Geoffrey I. Webb}, title = {Discovering significant rules}, booktitle = {KDD '06: Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining}, year = {2006}, pages = {434--443}, address = {New York, NY, USA}, publisher = {ACM Press}, abstract = {Comapares two approaches (the well-known Bonferroni adjustment and a new evaluation using holdout data) to control the experimentwise risk of false discoveries for statistical hypothesis tests. Experimental results indicate that neither of the two approaches dominates the other.}, keywords = {theory}, doi = {10.1145/1150402.1150451}, isbn = {1-59593-339-5}, location = {Philadelphia, PA, USA} }
@incollection{arules:Diatta:2007, author = {Diatta, Jean and Ralambondrainy, Henri and Totohasina, Andr{\'e}}, editor = {Guillet, Fabrice J. and Hamilton, Howard J.}, title = {Towards a Unifying Probabilistic Implicative Normalized Quality Measure for Association Rules}, booktitle = {Quality Measures in Data Mining}, year = {2007}, publisher = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, pages = {237--250}, isbn = {978-3-540-44918-8}, doi = {10.1007/978-3-540-44918-8_10}, abstract = {Introduces the Ralambondrainy measure.}, keywords = {measure} }
@article{arules:Hahsler:2007, author = {Michael Hahsler and Kurt Hornik}, journal = {Intelligent Data Analysis}, title = {New Probabilistic Interest Measures for Association Rules}, year = {2007}, issn = {1088-467X}, number = {5}, pages = {437--455}, volume = {11}, abstract = {Develops the interest measures, hyper-lift and hyper-confidence, which can be used to filter or order mined association rules. The measures are related to Fisher's exact test and show significantly better performance than lift for applications where spurious rules are problematic.}, keywords = {measures}, doi = {10.3233/IDA-2007-11502}, eprint = {0803.0966}, pdf = {http://michael.hahsler.net/research/hyperConfidence_IDA2007/hyperConfidence.pdf}, url = {https://arxiv.org/abs/0803.0966} }
@article{arules:Han:2007, author = {Han, J. and Cheng, H. and Xin, D. and Yan, X.}, journal = {Data Mining and Knowledge Discovery}, title = {Frequent Pattern Mining: Current Status and Future Directions}, year = {2007}, number = {1}, volume = {14}, abstract = {Complete overview of the state-of-the art in frequent patten mining and identifies future research directions.}, keywords = {algorithm, concise, sequential}, doi = {10.1007/s10618-006-0059-1} }
@incollection{arules:Lenca:2007, author = {Lenca, Philippe and Vaillant, Beno{\^i}t and Meyer, Patrick and Lallich, Stephane}, editor = {Guillet, Fabrice J. and Hamilton, Howard J.}, title = {Association Rule Interestingness Measures: Experimental and Theoretical Studies}, booktitle = {Quality Measures in Data Mining}, year = {2007}, publisher = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, pages = {51--76}, isbn = {978-3-540-44918-8}, doi = {10.1007/978-3-540-44918-8_3}, abstract = {Compares interest measures.}, keywords = {measure} }
@inproceedings{arules:Kennett:2008, author = {Kenett, Ron and Salini, Silvia}, editor = {Perner, Petra}, title = {Relative Linkage Disequilibrium: A New Measure for Association Rules}, booktitle = {Advances in Data Mining. Medical Applications, E-Commerce, Marketing, and Theoretical Aspects}, year = {2008}, publisher = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, pages = {189--199}, isbn = {978-3-540-70720-2}, abstract = {Introduces Relative Linkage Disequilibrium. RLD is an association measure motivated by indices used in population genetics. It evaluates the deviation of the support of the whole rule from the support expected under independence given the supports of X and Y.}, keywords = {measure} }
@article{arules:McNicholas:2008, title = {Standardising the lift of an association rule}, journal = {Computational Statistics \& Data Analysis}, volume = {52}, number = {10}, pages = {4712-4721}, year = {2008}, issn = {0167-9473}, doi = {10.1016/j.csda.2008.03.013}, author = {P.D. McNicholas and T.B. Murphy and M. O’Regan}, abstract = {Standardized lift uses the minimum and maximum lift that can reach for each rule to standardize lift between 0 and 1.}, keywords = {measure} }
@inproceedings{arules:Jalali-Heravi:2010, author = {Jalali-Heravi, Mojdeh and Za\"{\i}ane, Osmar R.}, booktitle = {Proceedings of the 2010 ACM Symposium on Applied Computing}, title = {A study on interestingness measures for associative classifiers}, year = {2010}, pages = {1039--1046}, publisher = {ACM}, series = {SAC '10}, abstract = {Compares associative classifiers using 53 different objective measures for association rules.}, keywords = {classification}, doi = {10.1145/1774088.1774306} }
@article{arules:Wu:2010, author = {Wu, Tianyi and Chen, Yuguo and Han, Jiawei}, doi = {10.1007/s10618-009-0161-2}, issn = {1384-5810}, journal = {Data Mining and Knowledge Discovery}, month = jan, title = {Re-examination of interestingness measures in pattern mining: a unified framework}, year = {2010}, abstract = {Re-examines a set of null-invariant interestingness measures (AllConf, Coherence, Cosine, Kulc, MaxConf) and shows that they can be expressed as the generalized mathematical mean, leading to a total ordering of them. Also proposes a new measure called Imbalance Ratio.}, keywords = {measure} }
@article{arules:Balcazar:2013, author = {Balc\'{a}zar, Jos\'{e} L.}, title = {Formal and Computational Properties of the Confidence Boost of Association Rules}, year = {2013}, issue_date = {November 2013}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {7}, number = {4}, issn = {1556-4681}, doi = {10.1145/2541268.2541272}, abstract = {Introduces the measure confidence boost to help to obtain small and crisp sets of mined association rules.}, journal = {ACM Trans. Knowl. Discov. Data}, month = dec, articleno = {19}, numpages = {41}, keywords = {measure} }
@article{arules:Li:2014, author = {Li, Jiuyong and Liu, Jixue and Toivonen, Hannu and Satou, Kenji and Sun, Youqiang and Sun, Bingyu}, title = {Discovering Statistically Non-Redundant Subgroups}, year = {2014}, publisher = {Elsevier Science Publishers B. V.}, volume = {67}, issn = {0950-7051}, doi = {10.1016/j.knosys.2014.04.030}, abstract = {Uses a confidence interval around the rule's odds ratio to define redundant rules. Following this definition, the paper presents an efficient alforithm to mine non-redundant rules.}, journal = {Knowledge Based Systems}, pages = {315–-327}, keywords = {measure} }
@inproceedings{arules:Ochin:2008, author = {Ochin, Suresh and Kumar, Nisheeth Joshi}, booktitle = {6th International Conference On Advances In Computing and Communications, ICACC 2016}, title = {Rule Power Factor: A New Interest Measure in Associative Classification}, year = {2016}, address = {Cochin, India}, abstract = {The rule power factor weights the confidence of a rule by its support.}, keywords = {measure}, doi = {10.1016/j.procs.2016.07.175} }
@article{arules:Lopez:2014, author = {Griselda López and Joaquín Abellán and Alfonso Montella and Juan de Oña}, title = {Patterns of Single-Vehicle Crashes on Two-Lane Rural Highways in Granada Province, Spain: In-Depth Analysis Through Decision Rules}, journal = {Transportation Research Record}, volume = {2432}, number = {1}, pages = {133-141}, year = {2014}, doi = {10.3141/2432-16}, abstract = {Introduces lift increase (LIC).}, keywords = {measure} }
@manual{arules:Hahsler:2023, title = {arules: Mining Association Rules and Frequent Itemsets}, author = {Michael Hahsler and Christian Buchta and Bettina Gruen and Kurt Hornik}, note = {R package version 1.7-7}, url = {https://github.com/mhahsler/arules}, year = {2023}, abstract = {R infrastructure for association rule mining. Implements several algorithms and generalizations of measures.}, keywords = {measure, algorithm} }
@comment{{jabref-meta: databaseType:bibtex;}}
@comment{{jabref-meta: saveActions:disabled; all-text-fields[identity] date[normalize_date] month[normalize_month] pages[normalize_page_numbers] ;}}
@comment{{jabref-meta: saveOrderConfig:specified;year;false;citationkey;false;author;false;title;false;}}
This file was generated by bibtex2html 1.99.