# 基于MF 的推荐系统 funckSVD算法以及实现

`花了点时间重写基于内存的矩阵分解拟合原始矩阵的推荐算法，下一篇将会给出biasSVD算法，整个算法的难点在于梯度的计算以及梯度的更新，对于批量梯度下降算法可以参照相应的数学公式实现`

```from numpy import *
import ray
import socket
import pandas as pd
import os
from sklearn.utils import shuffle
from sklearn import preprocessing
from collections import Counter
import time
import progressbar
import matplotlib.pyplot as plt

np.seterr(divide='ignore', invalid='ignore')
'''
this is a function to come true svd model named svd++ algorithm. and using ray freamwork.
email: kenny13141314@163.com
time: 2021/11/17
'''

'''
=================================================common function area==============================================================
'''

'''
Returns:matirx, userno and videono
'''
dictionary = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(dictionary, 'data/00000005.csv')
userno = o_data['userid'].max() + 1
videono = o_data['videoid'].max() + 1
return [o_data, userno, videono]

def build_score_matrix_R(data, userno, videono):
'''
this is common function for all algorithm-model.
via original data to build the true score matrix.
Args:
data:
userno: the max number of user code.
videono:the max number of item code.
Returns: score matrix
'''
matrix = [[None] * videono] * userno
matrix = np.array(matrix)

# matrix = np.zeros((videono, userno))
for index, row in data.iterrows():
matrix[int(row['userid']), int(row['videoid'])] = float(row['score'])
return matrix

def L2Norm(a, vector):
result = list(np.dot(vector, vector) * a)[0][0]
return result

'''
=================================================funck svd==============================================================
'''

def init_P_Q_matrix(user_disms=[3, 3], item_disms=[3, 3], init_method='quadrature'):
'''
this is a function to create two matrix for sgd training.
Args:
user_disms: user matrix shape.
item_disms: item matrix shape
init_method: generating matrix approach.
Returns:
'''

P = random.randn(user_disms[0], user_disms[1])
Q = random.randn(item_disms[1], item_disms[0])
return [P, Q]
return

def calculate_error(P_matrix, Q_matrix, y_matrix):
'''
calculating error rator from two matrix.
Returns:
'''
rows, cols = np.nonzero(y_matrix != None)
errors = y_matrix[rows, cols] - np.sum(P_matrix[rows] * Q_matrix.T[cols], axis=1)
return errors

def gradient(P_matrix, Q_matrix, rows, cols, a, index, error):
or_row, or_col = rows[index], cols[index]
P_gradient = -2 * error * Q_matrix[:, or_col] + 2 * a * P_matrix[or_row, :]
Q_gradient = -2 * error * P_matrix[or_row, :] + 2 * a * Q_matrix[:, or_col]

or_row, or_col = rows[index], cols[index]
P[or_row, :] -= learning_rate * P_gradient
Q[:, or_col] -= learning_rate * Q_gradient

return [P, Q]

def funck_svd():
'''
train function is ford training svd++ algorithm.
defined two matrix to fit the orginal rating-matrix.
Returns: cost and iters count.
'''
learning_rate = 0.001
iters = 50000
a = 0.005

[P, Q] = init_P_Q_matrix(user_disms=[userno, 2], item_disms=[videono, 2], init_method='quadrature')
y_matirx = build_score_matrix_R(data, userno, videono)

if not isinstance(P, np.ndarray):
P = np.array(P).around(decimals=4)
if not isinstance(Q, np.ndarray):
Q = np.array(Q).around(decimals=4)
if not isinstance(y_matirx, np.ndarray):
y_matirx = np.array(y_matirx).around(decimals=4)

rows, cols = np.nonzero(y_matirx != None)
cost_arr = []
count = 0
bar = progressbar
for i in bar.progressbar(range(iters)):
errors_matrix = calculate_error(P, Q, y_matirx)
cost = np.sum(np.square(errors_matrix))
if cost <= 0.00001:
break

for index in range(len(rows)):