Source code for recommenders.matrix_factorization

import numpy as np
from tensorflow.keras.layers import (
    Dropout,
    Dense,
    Dot,
    Reshape,
    Flatten,
    Input,
    Embedding,
    Concatenate,
)
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping

from typing import List
from .utils.logger import get_logger

logger = get_logger(__name__)


[docs]class MatrixFactorization: def __init__( self, unique_users: np.ndarray, unique_items: np.ndarray, embedding_dim: int = None, epochs: int = None, batch_size: int = None, ): """ Class supporting Generalized Matrix Factorization. GMF exploits explicit feedback in by applying an inner product on the latent features of users and item. Parameters ---------- n_users: np.ndarray input query dataset n_items: np.ndarray input candidate dataset epochs: int number of epochs to train the model. An epoch is an iteration over the entire x and y data provided. batch_size:int number of samples per batch of computation. embedding_dim: int number of dimensions for embedding layer """ self.unique_users = unique_users self.unique_items = unique_items self.batch_size = batch_size self.epochs = epochs self.embedding_dim = embedding_dim self.loss = "mse" def __repr__(self): return """ Matrix Factorization Recommendation Engine """
[docs] def build_model(self, x, y): """Build Deep Matrix Factorization Model Parameters ---------- x: np.ndarray input training data; example input: [use_id,item_id,features] y: np.ndarray input target; example movie ratings """ # User Embeddings self.user_id_input = Input(shape=[1], name="user") self.user_embedding = Embedding( output_dim=self.embedding_dim, input_dim=self.unique_users.shape[0], input_length=1, embeddings_regularizer=l2(1e-6), name="user_embedding", ) # Item Embeddings self.item_id_input = Input(shape=[1], name="item") self.item_embedding = Embedding( output_dim=self.embedding_dim, input_dim=self.unique_items.shape[0], input_length=1, embeddings_regularizer=l2(1e-6), name="item_embedding", ) # Flatten the embedding vector as latent features in GMF self.user_latent = Flatten()(self.user_embedding(self.user_id_input)) self.item_latent = Flatten()(self.item_embedding(self.item_id_input)) self.output = Dot(1, normalize=False)([self.user_latent, self.item_latent]) self.model = Model( inputs=[self.user_id_input, self.item_id_input], outputs=self.output ) self.model.compile(loss=self.loss, optimizer="adam")
def _evaluate(self, X, Y, name): """Evaluate Model""" mse = self.model.evaluate(X, Y) logger.info(f"{name} mean squared error: {mse:.4f}")
[docs] def train(self, X_train: np.ndarray, Y_train: np.ndarray): """Helper function to train model Parameters ---------- x: np.ndarray input training data; example input: [use_id,item_id,features] y: np.ndarray input target; example movie ratings """ self.build_model(X_train, Y_train) early_stop = EarlyStopping( monitor="val_loss", mode="auto", verbose=0, patience=5 ) history = self.model.fit( X_train, Y_train, batch_size=self.batch_size, epochs=self.epochs, validation_split=0.15, shuffle=True, verbose=1, callbacks=[early_stop], ).history self._evaluate(X_train, Y_train, "train") return history
[docs]class DeepMatrixFactorization: def __init__( self, unique_users: np.ndarray, unique_items: np.ndarray, embedding_dim: int = None, epochs: int = None, batch_size: int = None, layers: List[int] = None, dense_units: int = None, dropout: float = None, ): """ This class is an extension of Matrix Factorization for building recommendation engines using explicit data. The modification is to simply apply a non-linear kernel to model the latent feature interactions. Parameters ---------- n_users: np.ndarray input query dataset n_items: np.ndarray input candidate dataset epochs: int number of epochs to train the model. An epoch is an iteration over the entire x and y data provided. layers: List[int] input list of layers, where the index is the size of each layer dropout:int randomly sets input units to 0 with a frequency of rate at each step during training time to help prevent overfitting batch_size:int number of samples per batch of computation. embedding_dim: int number of dimensions for embedding layer """ self.unique_users = unique_users self.unique_items = unique_items self.layers = layers self.dropout = dropout self.batch_size = batch_size self.epochs = epochs self.embedding_dim = embedding_dim self.loss = "mse" def __repr__(self): return """ Deep Matrix Factorization Recommendation Engine """
[docs] def build_model(self, x, y): """Build Deep Matrix Factorization Model Parameters ---------- x: np.ndarray input training data; example input: [use_id,item_id,features] y: np.ndarray input target; example movie ratings """ # User Embeddings self.user_id_input = Input(shape=[1], name="user") self.user_embedding = Embedding( output_dim=self.embedding_dim, input_dim=self.unique_users.shape[0], input_length=1, embeddings_regularizer=l2(1e-6), name="user_embedding", )(self.user_id_input) self.user_vector = Reshape([self.embedding_dim])(self.user_embedding) # Item Embeddings self.item_id_input = Input(shape=[1], name="item") self.item_embedding = Embedding( output_dim=self.embedding_dim, input_dim=self.unique_items.shape[0], input_length=1, embeddings_regularizer=l2(1e-6), name="item_embedding", )(self.item_id_input) self.item_vector = Reshape([self.embedding_dim])(self.item_embedding) # Concat user/item vectors self.vector = Concatenate()([self.user_vector, self.item_vector]) # Add dense layers for idx in range(0, len(self.layers)): layer = Dense( self.layers[idx], kernel_regularizer="l2", activation="relu", name=f"layer{idx+1}", )(self.vector) self.vector = Dropout(self.dropout)(layer) # Output layer self.output = Dense(1, name="prediction")(self.vector) # Defined model self.model = Model( inputs=[self.user_id_input, self.item_id_input], outputs=self.output ) self.model.compile(loss=self.loss, optimizer="adam")
def _evaluate(self, X, Y, name): """Evaluate Model""" mse = self.model.evaluate(X, Y) logger.info(f"{name} mean squared error: {mse:.4f}")
[docs] def train(self, X_train: np.ndarray, Y_train: np.ndarray): """Helper function to train model Parameters ---------- x: np.ndarray input training data; example input: [use_id,item_id,features] y: np.ndarray input target; example movie ratings """ self.build_model(X_train, Y_train) early_stop = EarlyStopping( monitor="val_loss", mode="auto", verbose=0, patience=5 ) history = self.model.fit( X_train, Y_train, batch_size=self.batch_size, epochs=self.epochs, validation_split=0.15, shuffle=True, verbose=1, callbacks=[early_stop], ).history self._evaluate(X_train, Y_train, "train") return history
def get_recommendations( user_id: int, items: List[int], items_lookup: dict, model: Model, topk: int = None, ): """ Helper function to predict ratings for given input user and return top recommended items Parameters ---------- user_id: int input user id items: List[int] input items for predictions items_lookup: dict lookup items to map encodings back to unique name/id model: tf.keras.models.Model input trained matrix factorization model topk: int maximum recommended items to user """ pred = model.predict([np.array([user_id] * len(items)), np.array(items)]).ravel() mappings = [items_lookup[i] for i in np.argsort(pred)[:topk]] return mappings