Source code for recommenders.factorization_machines

# Factorization Machines using Turicreate
import pandas as pd
from typing import Optional
import turicreate as tc


[docs]class RankingFactorizationRecommender: def __init__( self, name: str, users_col: str, items_col: str, extra_cols: Optional[str] = None, ): """ Ranking Factorization Recommender Class Source: https://apple.github.io/turicreate/docs/api/generated/turicreate.recommender.factorization_recommender.FactorizationRecommender.html Note: get_similar_users currently not supported for item similarity models. As a workaround, to get the neighborhood of users, train a model with the items and users reversed, then call get_similar_items. Parameters ---------- name: str name of input data source users_col: str name of the column in `observation_data` that corresponds to the user id. items_col: str name of the column in `observation_data` that corresponds to the item id. extras_col: Optional[str] side information for the items. This SFrame must have a column with the same name as what is specified by the `item_id` input parameter. `item_data` can provide any amount of additional item-specific information. """ self.name = name self.users_col = users_col self.items_col = items_col self.extra_cols = extra_cols
[docs] def convert_dataframe(self, df: pd.DataFrame) -> tc.SFrame: """Convert pandas DataFrame to "scalable, tabular, column-mutable dataframe object that can scale to big data. Parameters ---------- None """ return tc.SFrame(tc.SFrame(df.astype(str)))
[docs] def fit(self, data: pd.DataFrame): """Fit ranking factorization recommender to learn a set of latent factors for each user and item and uses them to rank recommended items according to the likelihood of observing those pairs. Assumption: implicit data (e.g. solver = implicit Alternating Least Squares) Parameters ---------- data - pd.DataFrame input pandas dataframe """ self.sdf = self.convert_dataframe(data) if self.extra_cols: self.extra_cols = self.sdf[self.extra_cols] self.matrix = tc.ranking_factorization_recommender.create( self.sdf, user_id=self.users_col, item_id=self.items_col, item_data=self.extra_cols, solver="ials", )
def _rank_users(self, n_top: int) -> pd.DataFrame: """Factorization_recommender will return the nearest users based on the cosine similarity between latent user factors """ return ( self.matrix.get_similar_users(self.sdf[self.users_col], n_top) .to_dataframe() .drop_duplicates() ) def _rank_items(self, n_top: int) -> pd.DataFrame: """Factorization_recommender will return the nearest items based on the cosine similarity between latent user factors """ return ( self.matrix.get_similar_items(self.sdf[self.items_col], n_top) .to_dataframe() .drop_duplicates() )