apriori算法
发布时间:2020-05-25 17:03:51 所属栏目:Python 来源:互联网
导读:apriori算法
|
下面是脚本之家 jb51.cc 通过网络收集整理的代码片段。 脚本之家小编现在分享给大家,也给大家做个参考。 """
Description : Simple Python implementation of the Apriori Algorithm
Usage:
$python apriori.py -f DATASET.csv -s minSupport -c minConfidence
$python apriori.py -f DATASET.csv -s 0.15 -c 0.6
"""
import sys
from itertools import chain,combinations
from collections import defaultdict
from optparse import OptionParser
def subsets(arr):
""" Returns non empty subsets of arr"""
return chain(*[combinations(arr,i + 1) for i,a in enumerate(arr)])
def returnItemsWithMinSupport(itemSet,transactionList,minSupport,freqSet):
"""calculates the support for items in the itemSet and returns a subset
of the itemSet each of whose elements satisfies the minimum support"""
_itemSet = set()
localSet = defaultdict(int)
for item in itemSet:
for transaction in transactionList:
if item.issubset(transaction):
freqSet[item] += 1
localSet[item] += 1
for item,count in localSet.items():
support = float(count)/len(transactionList)
if support >= minSupport:
_itemSet.add(item)
return _itemSet
def joinSet(itemSet,length):
"""Join a set with itself and returns the n-element itemsets"""
return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])
def getItemSetTransactionList(data_iterator):
transactionList = list()
itemSet = set()
for record in data_iterator:
transaction = frozenset(record)
transactionList.append(transaction)
for item in transaction:
itemSet.add(frozenset([item])) # Generate 1-itemSets
return itemSet,transactionList
def runApriori(data_iter,minConfidence):
"""
run the apriori algorithm. data_iter is a record iterator
Return both:
- items (tuple,support)
- rules ((pretuple,posttuple),confidence)
"""
itemSet,transactionList = getItemSetTransactionList(data_iter)
freqSet = defaultdict(int)
largeSet = dict()
# Global dictionary which stores (key=n-itemSets,value=support)
# which satisfy minSupport
assocRules = dict()
# Dictionary which stores Association Rules
oneCSet = returnItemsWithMinSupport(itemSet,freqSet)
currentLSet = oneCSet
k = 2
while(currentLSet != set([])):
largeSet[k-1] = currentLSet
currentLSet = joinSet(currentLSet,k)
currentCSet = returnItemsWithMinSupport(currentLSet,freqSet)
currentLSet = currentCSet
k = k + 1
def getSupport(item):
"""local function which Returns the support of an item"""
return float(freqSet[item])/len(transactionList)
toRetItems = []
for key,value in largeSet.items():
toRetItems.extend([(tuple(item),getSupport(item))
for item in value])
toRetRules = []
for key,value in largeSet.items()[1:]:
for item in value:
_subsets = map(frozenset,[x for x in subsets(item)])
for element in _subsets:
remain = item.difference(element)
if len(remain) > 0:
confidence = getSupport(item)/getSupport(element)
if confidence >= minConfidence:
toRetRules.append(((tuple(element),tuple(remain)),confidence))
return toRetItems,toRetRules
def printResults(items,rules):
"""prints the generated itemsets and the confidence rules"""
for item,support in items:
print "item: %s,%.3f" % (str(item),support)
print "n------------------------ RULES:"
for rule,confidence in rules:
pre,post = rule
print "Rule: %s ==> %s,%.3f" % (str(pre),str(post),confidence)
def dataFromFile(fname):
"""Function which reads from the file and yields a generator"""
file_iter = open(fname,'rU')
for line in file_iter:
line = line.strip().rstrip(',') # Remove trailing comma
record = frozenset(line.split(','))
yield record
if __name__ == "__main__":
optparser = OptionParser()
optparser.add_option('-f','--inputFile',dest='input',help='filename containing csv',default=None)
optparser.add_option('-s','--minSupport',dest='minS',help='minimum support value',default=0.15,type='float')
optparser.add_option('-c','--minConfidence',dest='minC',help='minimum confidence value',default=0.6,type='float')
(options,args) = optparser.parse_args()
inFile = None
if options.input is None:
inFile = sys.stdin
elif options.input is not None:
inFile = dataFromFile(options.input)
else:
print 'No dataset filename specified,system with exitn'
sys.exit('System will exit')
minSupport = options.minS
minConfidence = options.minC
items,rules = runApriori(inFile,minConfidence)
printResults(items,rules)
以上是脚本之家(jb51.cc)为你收集整理的全部代码内容,希望文章能够帮你解决所遇到的程序开发问题。 如果觉得脚本之家网站内容还不错,欢迎将脚本之家网站推荐给程序员好友。 (编辑:安卓应用网) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |
推荐文章
站长推荐
热点阅读
