写在之前
本书涉及的源程序和数据都可以在以下网站中找到:http://guidetodatamining.com/ 这本书理论比较简单,书中错误较少,动手锻炼较多,如果每个代码都自己写出来,收获不少。总结:适合入门。欢迎转载,转载请注明出处,如有问题欢迎指正。合集地址:https://www.zybuluo.com/hainingwyx/note/559139
协同过滤
相似用户评判标准:曼哈顿距离、欧氏距离、明氏距离。
-
# Manhattan.py
-
users = {
-
"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5,
-
"Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5,
-
"Vampire Weekend": 2.0},
-
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0,
-
"Phoenix": 2.0,"Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
-
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0,
-
"Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
-
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5,
-
"Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
-
"Vampire Weekend": 2.0},
-
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0,
-
"The Strokes": 4.0, "Vampire Weekend": 1.0},
-
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0,
-
"Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
-
"Vampire Weekend": 4.0},
-
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0,
-
"Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
-
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0,
-
"Slightly Stoopid": 2.5, "The Strokes": 3.0}
-
}
-
-
def manhattan(rating1, rating2):
-
"""Computes the Manhattan distance. Both rating1 and rating2 are dictionaries
-
of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
-
distance = 0
-
commonRatings = False
-
for key in rating1:
-
if key in rating2:
-
distance += abs(rating1[key] - rating2[key])
-
commonRatings = True
-
if commonRatings:
-
return distance
-
else:
-
return -1 #Indicates no ratings in common
-
-
def computeNearestNeighbor(username, users):
-
"""creates a sorted list of users based on their distance to username"""
-
distances = []
-
for user in users:
-
if user != username:
-
distance = manhattan(users[user], users[username])
-
distances.append((distance, user))
-
# sort based on distance -- closest first
-
distances.sort()
-
return distances
-
-
def recommend(username, users):
-
"""Give list of recommendations"""
-
# first find nearest neighbor
-
nearest = computeNearestNeighbor(username, users)[0][1]
-
print nearest
-
recommendations = []
-
# now find bands neighbor rated that user didn't
-
neighborRatings = users[nearest]
-
userRatings = users[username]
-
for artist in neighborRatings:
-
if not artist in userRatings:
-
recommendations.append((artist, neighborRatings[artist]))
-
# using the fn sorted for variety - sort is more efficient
-
return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)
-
# -*- coding: utf-8 -*-
-
from math import sqrt
-
-
users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
-
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
-
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
-
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
-
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
-
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
-
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
-
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
-
}
-
-
#明氏距离
-
def minkowski(rating1,rating2,r):
-
distance=0
-
commonRatings=False
-
for key in rating1:
-
if key in rating2:
-
distance += pow(abs(rating1[key]-rating2[key]),r)
-
commonRatings=True
-
distance = pow(distance,1.0/r)
-
if commonRatings:
-
return distance
-
else:
-
return -1 #Indicates no ratings in common
-
-
def computeNearestNeighbor(username, users):
-
"""creates a sorted list of users based on their distance to username"""
-
distances = []
-
for user in users:
-
if user != username:
-
distance = minkowski(users[user], users[username],3)
-
distances.append((distance, user))
-
# sort based on distance -- closest first
-
distances.sort()
-
return distances
-
-
def recommend(username, users):
-
"""Give list of recommendations"""
-
# first find nearest neighbor
-
nearest = computeNearestNeighbor(username, users)[0][1]
-
print nearest
-
recommendations = []
-
# now find bands neighbor rated that user didn't
-
neighborRatings = users[nearest]
-
userRatings = users[username]
-
for artist in neighborRatings:
-
if not artist in userRatings:
-
recommendations.append((artist, neighborRatings[artist]))
-
# using the fn sorted for variety - sort is more efficient
-
return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)
但是可能存在常数差别,但是两者爱好相同的问题。皮尔逊相关系数:
-
# Pearson.py
-
from math import sqrt
-
-
users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
-
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
-
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
-
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
-
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
-
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
-
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
-
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
-
}
-
-
# 这里为了简单使用近似代替
-
def pearson(rating1,rating2):
-
sum_xy=0
-
sum_x=0
-
sum_y=0
-
sum_x2=0
-
sum_y2=0
-
n=0
-
for key in rating1:
-
if key in rating2:
-
n += 1
-
x = rating1[key]
-
y = rating2[key]
-
sum_xy += x*y
-
sum_x += x
-
sum_y += y
-
sum_x2 += x**2
-
sum_y2 += y**2
-
denominnator = sqrt(sum_x2-(sum_x**2)/n)*sqrt(sum_y2-(sum_y**2)/n)
-
if denominnator == 0:
-
return 0
-
else:
-
return (sum_xy-(sum_x*sum_y)/n)/denominnator
-
-
def cos_like(rating1,rating2):
-
innerProd=0
-
vector_x=0
-
vectoy_y=0
-
for key in rating1:
-
if key in rating2:
-
x=rating1[key]
-
y=rating2[key]
-
innerProd += x*y
-
vector_x += x**2
-
vectoy_y += y**2
-
denominnator = sqrt(vector_x) * sqrt(vectoy_y)
-
if denominnator == 0:
-
return 0
-
else:
-
return innerProd / denominnator
余弦相似度:$$cos(x, y)=\frac{x·y}{||x||\times ||y||}$$ 总结:如果数据稠密使用欧氏距离;如果数据稀疏,使用余弦相似度;如果用户评级范围不同,使用皮尔逊相关系数。但是如果仅仅是基于一个用户进行推荐,个别用户的怪癖也会被推荐。
k近邻:利用k个最相似的用户确定推荐结果,K和应有有关。利用皮尔逊系数来确定每个人的影响因子。
-
# A dictionary of movie critics and their ratings of a small
-
# set of movies
-
critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
-
'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
-
'The Night Listener': 3.0},
-
'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
-
'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
-
'You, Me and Dupree': 3.5},
-
'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
-
'Superman Returns': 3.5, 'The Night Listener': 4.0},
-
'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
-
'The Night Listener': 4.5, 'Superman Returns': 4.0,
-
'You, Me and Dupree': 2.5},
-
'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
-
'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
-
'You, Me and Dupree': 2.0},
-
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
-
'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
-
'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}
-
-
-
from math import sqrt
-
-
# Returns a distance-based similarity score for person1 and person2
-
def sim_distance(prefs,person1,person2):
-
# Get the list of shared_items
-
si={}
-
for item in prefs[person1]:
-
if item in prefs[person2]: si[item]=1
-
-
# if they have no ratings in common, return 0
-
if len(si)==0: return 0
-
-
# Add up the squares of all the differences
-
sum_of_squares=sum([pow(prefs[person1][item]-prefs[person2][item],2)
-
for item in prefs[person1] if item in prefs[person2]])
-
-
return 1/(1+sum_of_squares)
-
-
# Returns the Pearson correlation coefficient for p1 and p2
-
def sim_pearson(prefs,p1,p2):
-
# Get the list of mutually rated items
-
si={}
-
for item in prefs[p1]:
-
if item in prefs[p2]: si[item]=1
-
-
# if they are no ratings in common, return 0
-
if len(si)==0: return 0
-
-
# Sum calculations
-
n=len(si)
-
-
# Sums of all the preferences
-
sum1=sum([prefs[p1][it] for it in si])
-
sum2=sum([prefs[p2][it] for it in si])
-
-
# Sums of the squares
-
sum1Sq=sum([pow(prefs[p1][it],2) for it in si])
-
sum2Sq=sum([pow(prefs[p2][it],2) for it in si])
-
-
# Sum of the products
-
pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
-
-
# Calculate r (Pearson score)
-
num=pSum-(sum1*sum2/n)
-
den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
-
if den==0: return 0
-
-
r=num/den
-
-
return r
-
-
# Returns the best matches for person from the prefs dictionary.
-
# Number of results and similarity function are optional params.
-
def topMatches(prefs,person,n=5,similarity=sim_pearson):
-
scores=[(similarity(prefs,person,other),other)
-
for other in prefs if other!=person]
-
scores.sort()
-
scores.reverse()
-
return scores[0:n]
-
-
# Gets recommendations for a person by using a weighted average
-
# of every other user's rankings
-
def getRecommendations(prefs,person,similarity=sim_pearson):
-
totals={}
-
simSums={}
-
for other in prefs:
-
# don't compare me to myself
-
if other==person: continue
-
sim=similarity(prefs,person,other)
-
-
# ignore scores of zero or lower
-
if sim<=0: continue
-
for item in prefs[other]:
-
-
# only score movies I haven't seen yet
-
if item not in prefs[person] or prefs[person][item]==0:
-
# Similarity * Score
-
totals.setdefault(item,0)
-
totals[item]+=prefs[other][item]*sim
-
# Sum of similarities
-
simSums.setdefault(item,0)
-
simSums[item]+=sim
-
-
# Create the normalized list
-
rankings=[(total/simSums[item],item) for item,total in totals.items()]
-
-
# Return the sorted list
-
rankings.sort()
-
rankings.reverse()
-
return rankings
-
-
def transformPrefs(prefs):
-
result={}
-
for person in prefs:
-
for item in prefs[person]:
-
result.setdefault(item,{})
-
-
# Flip item and person
-
result[item][person]=prefs[person][item]
-
return result
-
-
-
def calculateSimilarItems(prefs,n=10):
-
# Create a dictionary of items showing which other items they
-
# are most similar to.
-
result={}
-
# Invert the preference matrix to be item-centric
-
itemPrefs=transformPrefs(prefs)
-
c=0
-
for item in itemPrefs:
-
# Status updates for large datasets
-
c+=1
-
if c%100==0: print "%d / %d" % (c,len(itemPrefs))
-
# Find the most similar items to this one
-
scores=topMatches(itemPrefs,item,n=n,similarity=sim_distance)
-
result[item]=scores
-
return result
-
-
def getRecommendedItems(prefs,itemMatch,user):
-
userRatings=prefs[user]
-
scores={}
-
totalSim={}
-
# Loop over items rated by this user
-
for (item,rating) in userRatings.items( ):
-
-
# Loop over items similar to this one
-
for (similarity,item2) in itemMatch[item]:
-
-
# Ignore if this user has already rated this item
-
if item2 in userRatings: continue
-
# Weighted sum of rating times similarity
-
scores.setdefault(item2,0)
-
scores[item2]+=similarity*rating
-
# Sum of all the similarities
-
totalSim.setdefault(item2,0)
-
totalSim[item2]+=similarity
-
-
# Divide each total score by total weighting to get an average
-
rankings=[(score/totalSim[item],item) for item,score in scores.items( )]
-
-
# Return the rankings from highest to lowest
-
rankings.sort( )
-
rankings.reverse( )
-
return rankings
-
-
def loadMovieLens(path='C:\Users\WangYixin\Desktop\PCI_Code\PCI_Code Folder\chapter2\data'):
-
# Get movie titles
-
movies={}
-
for line in open(path+'/u.item'):
-
(id,title)=line.split('|')[0:2]
-
movies[id]=title
-
-
# Load data
-
prefs={}
-
for line in open(path+'/u.data'):
-
(user,movieid,rating,ts)=line.split('\t')
-
prefs.setdefault(user,{})
-
prefs[user][movies[movieid]]=float(rating)
-
return prefs
-
# -*- coding: utf-8 -*-
-
# 推荐类
-
import codecs
-
from math import sqrt
-
-
users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0,
-
"Norah Jones": 4.5, "Phoenix": 5.0,
-
"Slightly Stoopid": 1.5,
-
"The Strokes": 2.5, "Vampire Weekend": 2.0},
-
-
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5,
-
"Deadmau5": 4.0, "Phoenix": 2.0,
-
"Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
-
-
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0,
-
"Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5,
-
"Slightly Stoopid": 1.0},
-
-
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0,
-
"Deadmau5": 4.5, "Phoenix": 3.0,
-
"Slightly Stoopid": 4.5, "The Strokes": 4.0,
-
"Vampire Weekend": 2.0},
-
-
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0,
-
"Norah Jones": 4.0, "The Strokes": 4.0,
-
"Vampire Weekend": 1.0},
-
-
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0,
-
"Norah Jones": 5.0, "Phoenix": 5.0,
-
"Slightly Stoopid": 4.5, "The Strokes": 4.0,
-
"Vampire Weekend": 4.0},
-
-
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0,
-
"Norah Jones": 3.0, "Phoenix": 5.0,
-
"Slightly Stoopid": 4.0, "The Strokes": 5.0},
-
-
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0,
-
"Phoenix": 4.0, "Slightly Stoopid": 2.5,
-
"The Strokes": 3.0}
-
}
-
-
-
-
class recommender:
-
-
def __init__(self, data, k=1, metric='pearson', n=5):
-
""" initialize recommender
-
currently, if data is dictionary the recommender is initialized
-
to it.
-
For all other data types of data, no initialization occurs
-
k is the k value for k nearest neighbor
-
metric is which distance formula to use
-
n is the maximum number of recommendations to make"""
-
self.k = k
-
self.n = n
-
self.username2id = {}
-
self.userid2name = {}
-
self.productid2name = {}
-
# for some reason I want to save the name of the metric
-
self.metric = metric
-
if self.metric == 'pearson':
-
self.fn = self.pearson
-
#
-
# if data is dictionary set recommender data to it
-
#
-
if type(data).__name__ == 'dict':
-
self.data = data
-
-
def convertProductID2name(self, id):
-
"""Given product id number return product name"""
-
if id in self.productid2name:
-
return self.productid2name[id]
-
else:
-
return id
-
-
-
def userRatings(self, id, n):
-
"""Return n top ratings for user with id"""
-
print ("Ratings for " + self.userid2name[id])
-
ratings = self.data[id]
-
print(len(ratings))
-
ratings = list(ratings.items())
-
ratings = [(self.convertProductID2name(k), v)
-
for (k, v) in ratings]
-
# finally sort and return
-
ratings.sort(key=lambda artistTuple: artistTuple[1],
-
reverse = True)
-
ratings = ratings[:n]
-
for rating in ratings:
-
print("%s\t%i" % (rating[0], rating[1]))
-
-
-
-
-
def loadBookDB(self, path=''):
-
"""loads the BX book dataset. Path is where the BX files are
-
located"""
-
self.data = {}
-
i = 0
-
#
-
# First load book ratings into self.data
-
#
-
f = codecs.open(path + "BX-Book-Ratings.csv", 'r', 'utf8')
-
f.readline() #read the title
-
for line in f:
-
i += 1
-
#separate line into fields
-
fields = line.split(';') # still with ""
-
user = fields[0].strip('"') #delete “ in the fields
-
book = fields[1].strip('"')
-
rating = int(fields[2].strip().strip('"'))
-
if user in self.data:
-
currentRatings = self.data[user]
-
else:
-
currentRatings = {}
-
currentRatings[book] = rating
-
self.data[user] = currentRatings
-
#line = f.readline()
-
f.close()
-
#
-
# Now load books into self.productid2name
-
# Books contains isbn, title, and author among other fields
-
#
-
f = codecs.open(path + "BX-Books.csv", 'r', 'utf8')
-
for line in f:
-
i += 1
-
#separate line into fields
-
fields = line.split(';')
-
isbn = fields[0].strip('"')
-
title = fields[1].strip('"')
-
author = fields[2].strip().strip('"')
-
title = title + ' by ' + author
-
self.productid2name[isbn] = title
-
f.close()
-
#
-
# Now load user info into both self.userid2name and
-
# self.username2id
-
#
-
f = codecs.open(path + "BX-Users.csv", 'r', 'utf8')
-
for line in f:
-
i += 1
-
#print(line)
-
#separate line into fields
-
fields = line.split(';')
-
userid = fields[0].strip('"')
-
location = fields[1].strip('"')
-
if len(fields) > 3:
-
age = fields[2].strip().strip('"')
-
else:
-
age = 'NULL'
-
if age != 'NULL':
-
value = location + ' (age: ' + age + ')'
-
else:
-
value = location
-
self.userid2name[userid] = value
-
self.username2id[location] = userid
-
f.close()
-
print(i)
-
-
-
def pearson(self, rating1, rating2):
-
sum_xy = 0
-
sum_x = 0
-
sum_y = 0
-
sum_x2 = 0
-
sum_y2 = 0
-
n = 0
-
for key in rating1:
-
if key in rating2:
-
n += 1
-
x = rating1[key]
-
y = rating2[key]
-
sum_xy += x * y
-
sum_x += x
-
sum_y += y
-
sum_x2 += pow(x, 2)
-
sum_y2 += pow(y, 2)
-
if n == 0:
-
return 0
-
# now compute denominator
-
denominator = (sqrt(sum_x2 - pow(sum_x, 2) / n)
-
* sqrt(sum_y2 - pow(sum_y, 2) / n))
-
if denominator == 0:
-
return 0
-
else:
-
return (sum_xy - (sum_x * sum_y) / n) / denominator
-
-
-
def computeNearestNeighbor(self, username):
-
"""creates a sorted list of users based on their distance to
-
username"""
-
distances = []
-
for instance in self.data:
-
if instance != username:
-
distance = self.fn(self.data[username],
-
self.data[instance])
-
distances.append((instance, distance))
-
# sort based on distance -- closest first
-
distances.sort(key=lambda artistTuple: artistTuple[1],
-
reverse=True)
-
return distances
-
-
def recommend(self, user):
-
"""Give list of recommendations"""
-
recommendations = {}
-
# first get list of users ordered by nearness
-
nearest = self.computeNearestNeighbor(user)
-
# now get the ratings for the user
-
userRatings = self.data[user]
-
# determine the total distance
-
totalDistance = 0.0
-
for i in range(self.k):
-
totalDistance += nearest[i][1]
-
# now iterate through the k nearest neighbors
-
# accumulating their ratings
-
for i in range(self.k):
-
# compute slice of pie
-
weight = nearest[i][1] / totalDistance
-
# get the name of the person
-
name = nearest[i][0]
-
# get the ratings for this person
-
neighborRatings = self.data[name]
-
# get the name of the person
-
# now find bands neighbor rated that user didn't
-
for artist in neighborRatings:
-
if not artist in userRatings:
-
if artist not in recommendations:
-
recommendations[artist] = (neighborRatings[artist] * weight)
-
else:
-
recommendations[artist] = (recommendations[artist] + neighborRatings[artist] * weight)
-
# now make list from dictionary
-
recommendations = list(recommendations.items())
-
recommendations = [(self.convertProductID2name(k), v)
-
for (k, v) in recommendations]
-
# finally sort and return
-
recommendations.sort(key=lambda artistTuple: artistTuple[1],
-
reverse = True)
-
# Return the first n items
-
return recommendations[:self.n]
-
-
############test code############
-
#r = recommender(users)
-
#r.recommend('Jordyn')
-
#r.recommend('Hailey')
-
#r.loadBookDB('BX-CSV-Dump/')
-
#r.recommend('171118')
-
#r.userRatings('171118', 5)