Source code for recommenders.factorization_machines

# Factorization Machines using Turicreate
import pandas as pd
from typing import Optional
import turicreate as tc


[docs]class RankingFactorizationRecommender:
    def __init__(
        self,
        name: str,
        users_col: str,
        items_col: str,
        extra_cols: Optional[str] = None,
    ):
        """
        Ranking Factorization Recommender Class

        Source:
        https://apple.github.io/turicreate/docs/api/generated/turicreate.recommender.factorization_recommender.FactorizationRecommender.html

        Note: get_similar_users currently not supported for item similarity
        models. As a workaround, to get the neighborhood of users, train a
        model with the items and users reversed, then call get_similar_items.

        Parameters
        ----------
        name: str
            name of input data source
        users_col: str
            name of the column in `observation_data` that corresponds to the user id.
        items_col: str
            name of the column in `observation_data` that corresponds to
            the item id.
        extras_col: Optional[str]
            side information for the items.  This SFrame must have a column with
            the same name as what is specified by the `item_id` input parameter.
            `item_data` can provide any amount of additional item-specific
            information.
        """
        self.name = name
        self.users_col = users_col
        self.items_col = items_col
        self.extra_cols = extra_cols

[docs]    def convert_dataframe(self, df: pd.DataFrame) -> tc.SFrame:
        """Convert pandas DataFrame to "scalable, tabular, column-mutable
        dataframe object that can scale to big data.

        Parameters
        ----------
        None
        """
        return tc.SFrame(tc.SFrame(df.astype(str)))

[docs]    def fit(self, data: pd.DataFrame):
        """Fit ranking factorization recommender to learn a set of latent
        factors for each user and item and uses them to rank recommended
        items according to the likelihood of observing those pairs.

        Assumption: implicit data (e.g. solver = implicit Alternating Least Squares)

        Parameters
        ----------
        data - pd.DataFrame
            input pandas dataframe
        """
        self.sdf = self.convert_dataframe(data)

        if self.extra_cols:
            self.extra_cols = self.sdf[self.extra_cols]

        self.matrix = tc.ranking_factorization_recommender.create(
            self.sdf,
            user_id=self.users_col,
            item_id=self.items_col,
            item_data=self.extra_cols,
            solver="ials",
        )

    def _rank_users(self, n_top: int) -> pd.DataFrame:
        """Factorization_recommender will return the nearest users based on
        the cosine similarity between latent user factors
        """
        return (
            self.matrix.get_similar_users(self.sdf[self.users_col], n_top)
            .to_dataframe()
            .drop_duplicates()
        )

    def _rank_items(self, n_top: int) -> pd.DataFrame:
        """Factorization_recommender will return the nearest items based on
        the cosine similarity between latent user factors
        """
        return (
            self.matrix.get_similar_items(self.sdf[self.items_col], n_top)
            .to_dataframe()
            .drop_duplicates()
        )