LoginSignup
7
11

More than 5 years have passed since last update.

pythonでユーザベース協調フィルタリング

Posted at

pythonでアイテムベース協調フィルタリングを実装する - MovieLensを例に の続きとして、ユーザベース協調フィルタリングをやってみた。

import numpy as np
import pandas as pd
from  scipy.spatial.distance import cosine
from scipy.stats import pearsonr

df = pd.read_csv('u.data', sep='\t', names=['user_id','item_id', 'rating', 'timestamp'])

shape = (df.max().ix['user_id'], df.max().ix['item_id'])
R = np.zeros(shape) 

for i in df.index:
    row = df.ix[i]
    R[row['user_id'] -1 , row['item_id'] - 1] = row['rating']


class CollaborativeFiltering:
    def fit(self, rating_matrix):
        u_count = rating_matrix.shape[0]

        boolean_matrix = (rating_matrix > 0) * 1
        mean_ratings = [self.evaluated_mean(rating_matrix[i,:]) for i in range(u_count)] # of users

        self.rating_matrix = rating_matrix
        self.rating_matrix_mean =  (boolean_matrix.T * mean_ratings).T

    def predict(self, x):
        sims = self.user_similarities(x)

        scores = sims.dot(self.rating_matrix - self.rating_matrix_mean)
        norms = np.absolute(sims).dot((self.rating_matrix > 0) * 1)
        x_mean = self.evaluated_mean(x)

        p = scores / norms +  x_mean

        for i in range(p.size):
            if np.isnan(p[i]):
                p[i] = 0.0

        return p


    def evaluated_mean(self, v):
            ev = v[v > 0]
            if ev.size > 0:
                return np.mean(ev)
            else:
                return 0.

    def user_similarities(self, x):
        # n: user counts
        n = self.rating_matrix.shape[0]
        return np.array([similarity(x, self.rating_matrix[i]) for i in range(n)])

    def similarity(self, v1, v2):
        # index of items that non-zero
        idx = np.logical_and(v1 != 0, v2 != 0)

        v1_non_zero = v1[idx]
        v2_non_zero = v2[idx]

        sim = 0.0
        if v1_non_zero.size > 0:
            coef, _ = pearsonr(v1_non_zero, v2_non_zero)
            if not np.isnan(coef):
                sim = coef

        return sim

cf = CollaborativeFiltering()
cf.fit(R)

参考

推薦システムのアルゴリズム

7
11
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
7
11