Python Codes Arules

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 17

Python Codes :

Books data set :


=>Importing pandas library
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

=> Loading the data set


book = pd.read_csv("C:/Users/NEHAL RAJ/Downloads/Datasets (6)/book.csv")

=>Splitting the data


# splitting the data into separate transactions using separator as "\n"
book = book.split("\n")

print(book.split())
book_list = []
for i in book:
book_list.append(i.split(","))
“There is a link that will split the data when ever we see the (comma(,)) values”.
all_book_list = [i for item in book_list for i in item]
we are going in I and in I we are search all the values in i

from collections import Counter # ,OrderedDict


 We are importing counter from collections
item_frequencies = Counter(all_book_list)
countering all the books list and feeding in the item_frequencies
# after sorting
item_frequencies = sorted(item_frequencies.items(), key = lambda x:x[1])
sorting the data

# Storing frequencies and items in separate variables


frequencies = list(reversed([i[1] for i in item_frequencies]))
items = list(reversed([i[0] for i in item_frequencies]))

# barplot of top 10
import matplotlib.pyplot as plt
=>Importing matplotlib to visualize the plot of the data

plt.bar(height = frequencies[0:11], x = list(range(0, 11)), color = 'rgbkymc')


plt.xticks(list(range(0, 11), ), items[0:11])
plt.xlabel("items")
plt.ylabel("Count")
plt.show()
# Creating Data Frame for the transactions data
book_series = pd.DataFrame(pd.Series(book_list))
book_series = book_series.iloc[:2000, :] # removing the last empty transaction
Creating the data frame

book_series.columns = ["trans"]
=>Giving the coloumn name

# creating a dummy columns for the each item in each transactions ... Using column names as
item name
X = book_series['trans'].str.join(sep = '*').str.get_dummies(sep = '*')
frequent_itemsets = apriori(X, min_support = 0.0075, max_len = 4, use_colnames = True)

# Most Frequent item sets based on support


frequent_itemsets.sort_values('support', ascending = False, inplace = True)

plt.bar(x = list(range(0, 11)), height = frequent_itemsets.support[0:11], color ='rgmyk')


plt.xticks(list(range(0, 11)), frequent_itemsets.itemsets[0:11])
plt.xlabel('item-sets')
plt.ylabel('support')
plt.show()
rules = association_rules(frequent_itemsets, metric = "lift", min_threshold = 1)
rules.head(10)
rules.sort_values('lift', ascending = False).head(10)

Python codes used :


import pandas as pd
mlxtend.frequent _patterns import apriori association_rules
from collections import Counter
import matplotlib.pyplot as plt

Movie data set :

=>Importing pandas library


import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
=>loading the data set
movie = pd.read_csv("C:\\Users\\NEHAL RAJ\\Downloads\\Datasets (6)\\my_movies.csv")

# splitting the data into separate transactions using separator as "\n"


=>Splitting the data
movie = movie.split("\n")
“There is a link that will split the data when ever we see the (comma(,)) values”.

movie_list = []
for i in movie:
movie_list.append(i.split(","))

all_movie_list = [i for item in movie_list for i in item]


we are going in I and in I we are search all the values in i

from collections import Counter # ,OrderedDict


 We are importing counter from collections
item_frequencies = Counter(all_book_list)
countering all the books list and feeding in the item_frequencies

# after sorting
item_frequencies = sorted(item_frequencies.items(), key = lambda x:x[1])

# Storing frequencies and items in separate variables


frequencies = list(reversed([i[1] for i in item_frequencies]))
items = list(reversed([i[0] for i in item_frequencies]))

# barplot of top 10
import matplotlib.pyplot as plt
=>Importing matplotlib to visualize the plot of the data
plt.bar(height = frequencies[0:11], x = list(range(0, 11)), color = 'rgbkymc')
plt.xticks(list(range(0, 11), ), items[0:11],rotation=30)
plt.xlabel("items")
plt.ylabel("Count")
plt.show()

# Creating Data Frame for the transactions data


movie_series = pd.DataFrame(pd.Series(movie_list))
Creating the data frame

book_series.columns = ["trans"]
=>Giving the coloumn name
# creating a dummy columns for the each item in each transactions ... Using column names as
item name
X = movie_series['trans'].str.join(sep = '*').str.get_dummies(sep = '*')

frequent_itemsets = apriori(X, min_support = 0.0075, max_len = 4, use_colnames = True)

# Most Frequent item sets based on support


frequent_itemsets.sort_values('support', ascending = False, inplace = True)

plt.bar(x = list(range(0, 11)), height = frequent_itemsets.support[0:11], color ='rgmyk')


plt.xticks(list(range(0, 11)), frequent_itemsets.itemsets[0:11], rotation=15)
plt.xlabel('item-sets')
plt.ylabel('support')
plt.show()
rules = association_rules(frequent_itemsets, metric = "lift")
rules.head(10)
rules.sort_values('lift', ascending = False).head(10)

Python codes used :


import pandas as pd
mlxtend.frequent _patterns import apriori association_rules
from collections import Counter
import matplotlib.pyplot as plt

Phone data set :

import pandas as pd
=>Importing pandas library

from mlxtend.frequent_patterns import apriori, association_rules


Loading the data set

data = pd.read_csv("C:\\Users\\NEHAL RAJ\\Downloads\\Datasets (6)\\myphonedata.csv")

data_list = []
for i in data:
data_list.append(i.split(","))
“There is a link that will split the data when ever we see the (comma(,)) values”.

all_data_list = [i for item in data_list for i in item]


we are going in I and in I we are search all the values in i

from collections import Counter # ,OrderedDict

item_frequencies = Counter(all_data_list)

# after sorting
item_frequencies = sorted(item_frequencies.items(), key = lambda x:x[1])

# Storing frequencies and items in separate variables


frequencies = list(reversed([i[1] for i in item_frequencies]))
items = list(reversed([i[0] for i in item_frequencies]))

# barplot of top 10
import matplotlib.pyplot as plt
=>Importing matplotlib to visualize the plot of the data

plt.bar(height = frequencies[0:5], x = list(range(0, 5)), color = 'rgbkymc')


plt.xticks(list(range(0, 5), ), items[0:11],rotation=30)
plt.xlabel("items")
plt.ylabel("Count")
plt.show()

# Creating Data Frame for the transactions data


data_series = pd.DataFrame(pd.Series(data_list))

data_series.columns = ["trans"]

# creating a dummy columns for the each item in each transactions ... Using column names as
item name
X = data_series['trans'].str.join(sep = '*').str.get_dummies(sep = '*')

frequent_itemsets = apriori(X, min_support = 0.0075, max_len = 4, use_colnames = True)

# Most Frequent item sets based on support


frequent_itemsets.sort_values('support', ascending = False, inplace = True)

plt.bar(x = list(range(0, 5)), height = frequent_itemsets.support[0:5], color ='rgmyk')


plt.xticks(list(range(0, 5)), frequent_itemsets.itemsets[0:11], rotation=15)
plt.xlabel('item-sets')
plt.ylabel('support')
plt.show()

rules = association_rules(frequent_itemsets, metric = "lift", min_threshold = 1)


rules.head(5)
rules.sort_values('lift', ascending = False).head(5)
=>Python codes used :
import pandas as pd
mlxtend.frequent _patterns import apriori association_rules
from collections import Counter
import matplotlib.pyplot as plt

Transactions data set :

import pandas as pd
=>Importing pandas library

from mlxtend.frequent_patterns import apriori, association_rules

tr= pd.read_csv("C:\\Users\\NEHAL RAJ\\Downloads\\Datasets (6)\\transactions_retail1.csv")


=>loading the data set

tr_list = []
for i in tr:
tr_list.append(i.split(","))

“There is a link that will split the data when ever we see the (comma(,)) values”.

all_data_list = [i for item in data_list for i in item]


we are going in I and in I we are search all the values in i
from collections import Counter # ,OrderedDict

item_frequencies = Counter(all_tr_list)

# after sorting
item_frequencies = sorted(item_frequencies.items(), key = lambda x:x[1])

# Storing frequencies and items in separate variables


frequencies = list(reversed([i[1] for i in item_frequencies]))
items = list(reversed([i[0] for i in item_frequencies]))

# barplot of top 10
import matplotlib.pyplot as plt
=>Importing matplotlib to visualize the plot of the data

plt.bar(height = frequencies[0:5], x = list(range(0, 5)), color = 'rgbkymc')


plt.xticks(list(range(0, 5), ), items[0:5],rotation=30)
plt.xlabel("items")
plt.ylabel("Count")
plt.show()
# Creating Data Frame for the transactions data
tr_series = pd.DataFrame(pd.Series(tr_list))

tr_series.columns = ["trans"]

# creating a dummy columns for the each item in each transactions ... Using column names as
item name
X = tr_series['trans'].str.join(sep = '*').str.get_dummies(sep = '*')

frequent_itemsets = apriori(X, min_support = 0.0075, max_len = 4, use_colnames = True)

# Most Frequent item sets based on support


frequent_itemsets.sort_values('support', ascending = False, inplace = True)
plt.bar(x = list(range(0, 5)), height = frequent_itemsets.support[0:5], color ='rgmyk')
plt.xticks(list(range(0, 5)), frequent_itemsets.itemsets[0:5], rotation=15)
plt.xlabel('item-sets')
plt.ylabel('support')
plt.show()

rules = association_rules(frequent_itemsets, metric = "lift", min_threshold = 1)


rules.head(20)
rules.sort_values('lift', ascending = False).head(10)
=>Python codes used :
import pandas as pd
mlxtend.frequent _patterns import apriori association_rules
from collections import Counter
import matplotlib.pyplot as plt

You might also like