import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
# Sample transaction data (list of lists)
# Each inner list represents a transaction and contains items purchased
dataset = [
['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']
]
# Transform the dataset into the required one-hot encoded format
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
# print("One-hot encoded DataFrame:")
# print(df)
# --- Step 1: Find frequent itemsets using Apriori ---
# We set a minimum support threshold (e.g., 0.5 means itemsets appearing in at least 50% of transactions)
min_support_threshold = 0.5 # Adjust as needed
frequent_itemsets = apriori(df, min_support=min_support_threshold, use_colnames=True)
print("\nFrequent Itemsets (min_support=", min_support_threshold, "):")
print(frequent_itemsets)
# --- Step 2: Generate association rules from frequent itemsets ---
# We can filter rules based on metrics like confidence or lift
# For example, generate rules with a minimum confidence of 0.7
min_confidence_threshold = 0.7 # Adjust as needed
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence_threshold)
print("\nAssociation Rules (min_confidence=", min_confidence_threshold, "):")
# Displaying selected columns for clarity
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift', 'leverage', 'conviction']])
# Example: Filter rules by lift > 1
# high_lift_rules = rules[rules['lift'] > 1]
# print("\nHigh Lift Association Rules (lift > 1):")
# print(high_lift_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
# Example: Find rules leading to 'Kidney Beans'
# rules_for_beans = rules[rules['consequents'] == {'Kidney Beans'}]
# print("\nRules leading to Kidney Beans:")
# print(rules_for_beans)