diff --git a/run_ranker_dataset.py b/run_ranker_dataset.py
new file mode 100644
index 0000000..22e8dfb
--- /dev/null
+++ b/run_ranker_dataset.py
@@ -0,0 +1,444 @@
+import pickle
+from pathlib import Path
+from typing import Literal, Optional
+
+import numpy as np
+import pandas as pd
+import scipy.sparse as sps
+from loguru import logger
+from scipy.stats import kurtosis, skew
+from sklearn.model_selection import KFold, ShuffleSplit
+from tqdm import tqdm, trange
+
+from Data_manager.competition import load, load_raw
+from Recommenders.BaseRecommender import BaseRecommender
+from Recommenders.Hybrid import (
+    ScoresMultipleHybridRecommender,
+    UserWideHybridRecommender,
+)
+from Recommenders.Similarity.Compute_Similarity import Compute_Similarity
+from Recommenders.MatrixFactorization.PureSVDRecommender import PureSVDRecommender
+
+logger.remove()
+logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+
+USE = "training"
+EXPERIMENT = "final"
+NUMBER_FOLDS = 10
+CUTOFF = 50
+
+TOP_POPULAR_THRESHOLDS = (10, 100, 1000)
+
+ITEM_LATENT_DIMENSIONS = 10
+USER_LATENT_DIMENSIONS = 10
+
+RECOMMENDATION_MODELS_TO_USE = (
+    70,
+    71,
+    72,
+    73,
+)
+SCORE_MODELS_TO_USE = (
+    20,
+    21,
+    22,
+    23,
+)
+USE_SCORE_HYBRID = True
+USE_USER_HYBRID = True
+USER_WIDE_HYBRID_BEGIN = 30
+
+OUTPUT_PATH = Path() / f"ranker_{USE}_data_{EXPERIMENT}.parquet"
+
+MODELS_BASE_DIR = Path() / "models"
+TRAIN_MODELS_BASE_DIR = MODELS_BASE_DIR / "train"
+TRAIN_MODELS_MAP_DIR = TRAIN_MODELS_BASE_DIR / "map"
+TRAIN_MODELS_RECALL_DIR = TRAIN_MODELS_BASE_DIR / "recall"
+SUBMISSION_MODELS_MAP_DIR = MODELS_BASE_DIR / "all" / "map" / "renamed"
+SUBMISSION_MODELS_RECALL_DIR = Path(
+    str(SUBMISSION_MODELS_MAP_DIR).replace("map", "recall")
+)
+
+NUMBER_GROUPS_USER_WIDE_HYBRID = 10
+
+MULTIPLE_SCORE_HYBRID_WEIGHTS = {
+    50: 0.253770701546336,
+    51: 0.10324855050317669,
+}
+
+SVD_FIT_PARAMS = {
+    "num_factors": 350,
+}
+
+ROWS_PER_FOLD = 900_000
+
+RNG = np.random.default_rng(42)
+
+
+def build_user_wide_hybrid(urm: sps.csr_matrix, models: dict[str, BaseRecommender]):
+    profile_lengths = np.ediff1d(urm.indptr)
+    sorted_users = np.argsort(profile_lengths)
+    block_size = len(sorted_users) // NUMBER_GROUPS_USER_WIDE_HYBRID
+    group_users = {}
+    for group in range(NUMBER_GROUPS_USER_WIDE_HYBRID + 1):
+        group_users[group] = sorted_users[group * block_size : (group + 1) * block_size]
+    group_recommenders = {
+        group: models.pop(str(USER_WIDE_HYBRID_BEGIN + group))
+        for group in range(NUMBER_GROUPS_USER_WIDE_HYBRID + 1)
+    }
+    return UserWideHybridRecommender(urm, group_users, group_recommenders)
+
+
+def build_score_hybrid(urm: sps.csr_matrix, models: dict[str, BaseRecommender]):
+    recommenders = [
+        models.pop(str(index)) for index in MULTIPLE_SCORE_HYBRID_WEIGHTS.keys()
+    ]
+    weights = list(MULTIPLE_SCORE_HYBRID_WEIGHTS.values())
+    return ScoresMultipleHybridRecommender(urm, recommenders, weights)
+
+
+def urm_df_to_csr(
+    urm_df: pd.DataFrame, number_users: int, number_items: int
+) -> sps.csr_matrix:
+    return sps.csr_matrix(
+        (urm_df.data, (urm_df.user_id, urm_df.item_id)),
+        shape=(number_users, number_items),
+    )
+
+
+def row_statistics(df: pd.DataFrame, like: str) -> pd.DataFrame:
+    df = df.filter(like=like)
+    statistics_df = pd.DataFrame([], index=df.index)
+    statistics_df[f"{like}_mean"] = df.mean(axis="columns")
+    statistics_df[f"{like}_std"] = df.std(axis="columns")
+    statistics_df[f"{like}_min"] = df.min(axis="columns")
+    statistics_df[f"{like}_max"] = df.max(axis="columns")
+    statistics_df[f"{like}_kurtosis"] = kurtosis(df, axis=1)
+    statistics_df[f"{like}_skew"] = skew(df, axis=1)
+    return statistics_df
+
+
+def compute_base_dataset(
+    number_users: int,
+    recommendation_models: dict[str, BaseRecommender],
+    score_models: dict[str, BaseRecommender],
+    cutoff: int,
+    fold: Optional[int] = None,
+) -> pd.DataFrame:
+    dataset = pd.DataFrame(index=range(0, number_users), columns=["ItemID"])
+    dataset.index.name = "UserID"
+
+    recommendations_list = []
+    recommenders_list = []
+    rank_list = []
+    for user_id in trange(number_users, desc="User (candidate)"):
+        user_recommendations = []
+        user_recommenders = []
+        user_rankings = []
+        for name, recommender in recommendation_models.items():
+            user_recommendations.extend(
+                recommender.recommend(
+                    user_id,
+                    cutoff=cutoff,
+                    remove_seen_flag=True,
+                )
+            )
+            user_recommenders.extend([name] * cutoff)
+            user_rankings.extend(list(range(cutoff)))
+        recommendations_list.append(user_recommendations)
+        recommenders_list.append(user_recommenders)
+        rank_list.append(user_rankings)
+
+    dataset["ItemID"] = recommendations_list
+    dataset["Recommender"] = recommenders_list
+    dataset["Ranking"] = rank_list
+
+    exploded_recommender = dataset["Recommender"].explode()
+    exploded_ranking = dataset["Ranking"].explode()
+    dataset = dataset.explode("ItemID")
+    dataset["Recommender"] = exploded_recommender
+    dataset["Ranking"] = exploded_ranking.astype("int")
+
+    recommender_agreement = (
+        dataset.reset_index()[["UserID", "ItemID"]]
+        .groupby(["UserID", "ItemID"])
+        .value_counts()
+    )
+    dataset["recommender_agreement"] = recommender_agreement.loc[
+        list(zip(dataset.index, dataset["ItemID"]))
+    ].to_numpy()
+
+    for user_id in tqdm(dataset.index.unique(), desc="User (score)"):
+        for rec_label, rec_instance in score_models.items():
+            item_list = dataset.loc[user_id, "ItemID"].to_list()
+
+            all_item_scores = rec_instance._compute_item_score(
+                [user_id], items_to_compute=item_list
+            )
+
+            dataset.loc[user_id, f"score_{rec_label}"] = all_item_scores[0, item_list]
+
+    score_statistics = row_statistics(dataset, "score")
+    dataset = pd.concat([dataset, score_statistics], axis="columns")
+
+    dataset = dataset.reset_index()
+    dataset = dataset.rename(columns={"index": "UserID"})
+
+    if fold is not None:
+        dataset["fold"] = fold
+
+    return dataset
+
+
+def add_labels(training_df: pd.DataFrame, correct_recommendations_df: pd.DataFrame):
+    training_df = training_df.merge(
+        correct_recommendations_df,
+        on=["UserID", "ItemID"],
+        how="left",
+        indicator="Exist",
+    )
+    training_df["Label"] = training_df["Exist"] == "both"
+    training_df = training_df.drop(columns=["Exist"])
+
+    if training_df.shape[0] > ROWS_PER_FOLD:
+        logger.debug("Reducing dataset from {} to {} rows", training_df.shape[0], ROWS_PER_FOLD)
+        logger.debug("Original positive rate {}", training_df["Label"].mean())
+        positive_indices = training_df[training_df["Label"]].index
+        negative_indices = training_df[~training_df["Label"]].index
+        negative_sampled_indices = RNG.choice(
+            negative_indices, ROWS_PER_FOLD - len(positive_indices), replace=False
+        )
+        training_df = training_df.loc[positive_indices.union(negative_sampled_indices)]
+        logger.debug("New positive rate {}", training_df["Label"].mean())
+
+    return training_df
+
+
+def compute_correct_recommendations(
+    urm_val: sps.csr_matrix,
+) -> pd.DataFrame:
+    urm_val_coo = sps.coo_matrix(urm_val)
+    return pd.DataFrame({"UserID": urm_val_coo.row, "ItemID": urm_val_coo.col})
+
+
+def load_models_fold(
+    urm: sps.csr_matrix,
+    fold_dir: Path,
+    use_only: Optional[list[str]] = None,
+    with_user_hybrid: bool = False,
+    with_score_hybrid: bool = False,
+):
+    all_models = load_models_all(fold_dir)
+    models = all_models
+    if use_only is not None:
+        models = {str(index): models[str(index)] for index in use_only}
+    if with_user_hybrid and "user_wide_hybrid" not in models:
+        user_wide_hybrid = build_user_wide_hybrid(urm, all_models)
+        models["user_wide_hybrid"] = user_wide_hybrid
+    if with_score_hybrid and "score_hybrid" not in models:
+        score_hybrid = build_score_hybrid(urm, all_models)
+        models["score_hybrid"] = score_hybrid
+
+    return models
+
+
+def load_models_all(dir_: Path) -> dict[str, BaseRecommender]:
+    return {path.stem: pickle.load(path.open("rb")) for path in dir_.glob("*.pkl")}
+
+
+def compute_training_dataset(
+    number_users: int,
+    number_items: int,
+    urm_df: pd.DataFrame,
+    folds: Optional[int] = None,
+):
+    if folds is None:
+        split = ShuffleSplit(1, test_size=0.2, random_state=42)
+    else:
+        split = KFold(folds, shuffle=True, random_state=42)
+
+    fold_training_datasets: dict[int, pd.DataFrame] = {}
+    for i, (train_indices, val_indices) in tqdm(
+        enumerate(split.split(urm_df)),
+        total=folds,
+        desc="Fold",
+    ):
+        fold_urm_train = urm_df_to_csr(
+            urm_df.iloc[train_indices], number_users, number_items
+        )
+        fold_urm_val = urm_df_to_csr(
+            urm_df.iloc[val_indices], number_users, number_items
+        )
+
+        fold_recommendation_models_dir = TRAIN_MODELS_RECALL_DIR
+        fold_score_models_dir = TRAIN_MODELS_MAP_DIR
+        if folds is not None:
+            fold_recommendation_models_dir /= str(i)
+            fold_score_models_dir /= str(i)
+
+        recommendation_models = load_models_fold(
+            fold_urm_train,
+            fold_recommendation_models_dir,
+            use_only=RECOMMENDATION_MODELS_TO_USE,
+            with_user_hybrid=False,
+            with_score_hybrid=False,
+        )
+        logger.debug("Recommendation models: {}", recommendation_models.keys())
+        score_models = load_models_fold(
+            fold_urm_train,
+            fold_score_models_dir,
+            use_only=SCORE_MODELS_TO_USE,
+            with_user_hybrid=USE_USER_HYBRID,
+            with_score_hybrid=USE_SCORE_HYBRID,
+        )
+        logger.debug("Score models: {}", score_models.keys())
+
+        fold_training_dataset = compute_base_dataset(
+            number_users,
+            recommendation_models,
+            score_models,
+            CUTOFF,
+            fold=i,
+        )
+        correct_recommendations_df = compute_correct_recommendations(fold_urm_val)
+        fold_training_dataset = add_labels(
+            fold_training_dataset, correct_recommendations_df
+        )
+        fold_training_datasets[i] = fold_training_dataset
+
+    return pd.concat(fold_training_datasets.values())
+
+
+def compute_submission_dataset(number_users: int):
+    recommendation_models = load_models_all(SUBMISSION_MODELS_RECALL_DIR)
+    score_models = load_models_all(SUBMISSION_MODELS_MAP_DIR)
+    return compute_base_dataset(
+        number_users,
+        recommendation_models,
+        score_models,
+        CUTOFF,
+    )
+
+
+def compute_dataset(
+    use: Literal["training", "submission"],
+    number_users: int,
+    number_items: int,
+    urm_df: pd.DataFrame,
+    folds: Optional[int] = None,
+):
+    if use == "training":
+        return compute_training_dataset(number_users, number_items, urm_df, folds)
+    elif use == "submission":
+        return compute_submission_dataset(number_users)
+
+
+def add_features(dataset: pd.DataFrame, urm: sps.csr_matrix, icm: sps.csr_matrix):
+    svd = PureSVDRecommender(urm)
+    svd.fit(**SVD_FIT_PARAMS)
+    # Item features
+
+    ## Item popularity
+    item_popularity = np.ediff1d(sps.csc_matrix(urm).indptr)
+
+    dataset["item_popularity"] = item_popularity[
+        dataset["ItemID"].to_numpy().astype(int)
+    ]
+
+    ## Distance to closest items
+    item_similarity = Compute_Similarity(icm.T).compute_similarity()
+    item_similarity
+
+    mean_item_similarity_dict = {i: row.mean() for i, row in enumerate(item_similarity)}
+    mean_item_similarity: pd.DataFrame = pd.Series(mean_item_similarity_dict).to_frame(
+        name="item_similarity"
+    )
+    mean_item_similarity
+
+    dataset = dataset.join(mean_item_similarity, on="ItemID")
+
+    ## Singular vectors
+    for i in range(ITEM_LATENT_DIMENSIONS):
+        dataset[f"item_svd_{i}"] = svd.ITEM_factors[
+            dataset["ItemID"].to_numpy().astype(int), i
+        ]
+
+    ## Autoencoder embeddings
+    encoder_embeddings: pd.DataFrame = pd.read_csv("reduced_features.csv", index_col=0)
+    enencoder_embeddings = encoder_embeddings.rename(
+        columns=lambda x: f"item_autoencoder_{x}"
+    )
+    dataset = dataset.join(enencoder_embeddings, on="ItemID")
+
+    # User features
+
+    ## User popularity
+    user_popularity = np.ediff1d(sps.csr_matrix(urm).indptr)
+
+    dataset["user_profile_len"] = user_popularity[
+        dataset["UserID"].to_numpy().astype(int)
+    ]
+
+    ## User popularity bias
+    # (measure of how much popularity influences the user)
+    item_popularity_ranking = item_popularity.argsort()[::-1]
+    item_popularity_ranking
+
+    item_id_df = urm_df[["user_id", "item_id"]]
+    item_id_df
+
+    for k in TOP_POPULAR_THRESHOLDS:
+        top_k_popular = item_popularity_ranking[:k]
+        item_id_df.loc[item_id_df["item_id"].isin(top_k_popular), f"top_{k}"] = 1
+    item_id_df = item_id_df.fillna(0)
+    item_id_df
+
+    user_top_k_df = item_id_df.groupby("user_id").aggregate(
+        {f"top_{k}": "sum" for k in TOP_POPULAR_THRESHOLDS}
+    )
+    user_top_k_df
+
+    dataset = dataset.join(user_top_k_df, on="UserID")
+
+    ## Distance to closest users
+    user_similarity = Compute_Similarity(urm.T).compute_similarity()
+    user_similarity
+
+    mean_user_similarity_dict = {i: row.mean() for i, row in enumerate(user_similarity)}
+    mean_user_similarity: pd.DataFrame = pd.Series(mean_user_similarity_dict).to_frame(
+        name="user_similarity"
+    )
+    mean_user_similarity
+
+    dataset = dataset.join(mean_user_similarity, on="UserID")
+
+    ## Singular vectors
+    for i in range(USER_LATENT_DIMENSIONS):
+        dataset[f"user_svd_{i}"] = svd.USER_factors[
+            dataset["UserID"].to_numpy().astype(int), i
+        ]
+
+    return dataset
+
+
+if __name__ == "__main__":
+    icm_df, urm_df = load_raw()
+    number_users = urm_df["user_id"].nunique()
+    number_items = icm_df["item_id"].nunique()
+
+    icm_matrix, urm_all, *_ = load()
+
+    dataset = compute_dataset(
+        use=USE,
+        number_users=number_users,
+        number_items=number_items,
+        urm_df=urm_df,
+        folds=NUMBER_FOLDS,
+    )
+
+    dataset = add_features(dataset, urm_all, icm_matrix)
+
+    for categorical_column in ("UserID", "ItemID", "Recommender"):
+        dataset[categorical_column] = dataset[categorical_column].astype("category")
+
+    dataset.to_parquet(OUTPUT_PATH)
diff --git a/run_train_kfold.py b/run_train_kfold.py
new file mode 100644
index 0000000..df62f80
--- /dev/null
+++ b/run_train_kfold.py
@@ -0,0 +1,410 @@
+import pickle
+from pathlib import Path
+from multiprocessing import cpu_count
+
+import scipy.sparse as sps
+from sklearn.model_selection import KFold
+
+from Data_manager.competition import load_raw
+from Recommenders.BaseRecommender import BaseRecommender
+from Recommenders.NonPersonalizedRecommender import TopPop
+from Recommenders.GraphBased.P3alphaRecommender import P3alphaRecommender
+from Recommenders.GraphBased.RP3betaRecommenderICM import RP3betaRecommenderICM
+from Recommenders.KNN.ItemKNN_CFCBF_Hybrid_Recommender import (
+    ItemKNN_CFCBF_Hybrid_Recommender,
+)
+from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
+from Recommenders.SLIM.SLIMElasticNetRecommender import (
+    SLIMElasticNetRecommender,
+)
+
+N_FOLDS = 10
+MODEL_DIR = Path() / "models" / "train" / "recall"
+
+HYPERPARAMETERS: dict[int, dict] = {
+    # User-wide hybrid 1 (0-10)
+    # 0: {
+    #     "topK": 22,
+    #     "alpha": 0.015137951778257512,
+    #     "normalize_similarity": True,
+    #     "implicit": True,
+    # },
+    # 1: {
+    #     "topK": 5,
+    #     "shrink": 774,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.0,
+    #     "feature_weighting": "TF-IDF",
+    #     "ICM_weight": 0.01,
+    # },
+    # 2: {
+    #     "topK": 5,
+    #     "shrink": 1000,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.2626851799303072,
+    #     "feature_weighting": "TF-IDF",
+    #     "ICM_weight": 0.1560410093044209,
+    # },
+    # 3: {
+    #     "topK": 1000,
+    #     "alpha": 1.8920160119169898,
+    #     "beta": 0.4950301468130674,
+    #     "delta": 0.30908791366521954,
+    #     "normalize_similarity": True,
+    # },
+    # 4: {
+    #     "topK": 5,
+    #     "shrink": 1000,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.0,
+    #     "feature_weighting": "TF-IDF",
+    #     "ICM_weight": 0.06864228467890522,
+    # },
+    # 5: {
+    #     "topK": 7,
+    #     "shrink": 293,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.0732688773175534,
+    #     "feature_weighting": "TF-IDF",
+    #     "ICM_weight": 0.23668747670276377,
+    # },
+    # 6: {
+    #     "topK": 5,
+    #     "shrink": 1000,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.0,
+    #     "feature_weighting": "TF-IDF",
+    #     "ICM_weight": 0.06565478344525211,
+    # },
+    # 7: {
+    #     "topK": 5,
+    #     "shrink": 0,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.0329315091653946,
+    #     "feature_weighting": "BM25",
+    #     "ICM_weight": 0.16124267891305158,
+    # },
+    # 8: {
+    #     "topK": 5,
+    #     "shrink": 1000,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.0,
+    #     "feature_weighting": "TF-IDF",
+    #     "ICM_weight": 0.171628301912052,
+    # },
+    # 9: {
+    #     "topK": 10,
+    #     "alpha": 0.35225624527493254,
+    #     "normalize_similarity": True,
+    #     "implicit": True,
+    # },
+    # 10: {
+    #     "topK": 44,
+    #     "shrink": 473,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.35983197418129564,
+    #     "feature_weighting": "TF-IDF",
+    #     "ICM_weight": 0.12542629369630146,
+    # },
+    # 20: {  # Item KNN CF+CBF
+    #     "topK": 96,
+    #     "shrink": 966,
+    #     "similarity": "cosine",
+    #     "normalize": True,
+    #     "feature_weighting": "BM25",
+    #     "ICM_weight": 0.015154282137075726,
+    # },
+    # 21: {  # SLIM ElasticNet
+    #     "l1_ratio": 0.4408355927953408,
+    #     "alpha": 0.00013519978876092592,
+    #     "positive_only": False,
+    #     "topK": 59,
+    #     "do_feature_selection": True,
+    # },
+    # 22: {  # RP3 ICM
+    #     "topK": 11,
+    #     "alpha": 1.9811525250064195,
+    #     "beta": 0.6832513917848906,
+    #     "delta": 0.0037274512973076712,
+    #     "normalize_similarity": True,
+    #     "implicit": True,
+    #     "min_rating": 1.0,
+    # },
+    # 23: {  # Item KNN CF
+    #     "topK": 5,
+    #     "shrink": 224,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.0,
+    #     "feature_weighting": "TF-IDF",
+    # },
+    # User-wide hybrid 2 (30-40)
+    # 30: {
+    #     "topK": 1000,
+    #     "l1_ratio": 0.0036552968571563925,
+    #     "alpha": 0.001,
+    #     "positive_only": True,
+    #     "do_feature_selection": True,
+    # },
+    # 31: {
+    #     "topK": 1000,
+    #     "l1_ratio": 0.0036439600383419896,
+    #     "alpha": 0.001,
+    #     "positive_only": True,
+    #     "do_feature_selection": True,
+    # },
+    # 32: {
+    #     "topK": 1000,
+    #     "l1_ratio": 0.01294361044706415,
+    #     "alpha": 0.001,
+    #     "positive_only": True,
+    #     "do_feature_selection": True,
+    # },
+    # 33: {
+    #     "topK": 469,
+    #     "l1_ratio": 0.0025724182700638666,
+    #     "alpha": 0.001,
+    #     "positive_only": True,
+    #     "do_feature_selection": True,
+    # },
+    # 34: {
+    #     "topK": 1000,
+    #     "l1_ratio": 0.012451061879323577,
+    #     "alpha": 0.001,
+    #     "positive_only": True,
+    #     "do_feature_selection": True,
+    # },
+    # 35: {
+    #     "topK": 1000,
+    #     "l1_ratio": 0.0037651439623475717,
+    #     "alpha": 0.001,
+    #     "positive_only": False,
+    #     "do_feature_selection": True,
+    # },
+    # 36: {
+    #     "topK": 1000,
+    #     "l1_ratio": 0.009466188626970398,
+    #     "alpha": 0.001,
+    #     "positive_only": True,
+    #     "do_feature_selection": True,
+    # },
+    # 37: {
+    #     "topK": 196,
+    #     "l1_ratio": 0.019833595367995636,
+    #     "alpha": 0.001,
+    #     "positive_only": True,
+    #     "do_feature_selection": True,
+    # },
+    # 38: {
+    #     "topK": 145,
+    #     "l1_ratio": 2.6489644774823373e-05,
+    #     "alpha": 0.001,
+    #     "positive_only": True,
+    #     "do_feature_selection": True,
+    # },
+    # 39: {
+    #     "topK": 866,
+    #     "l1_ratio": 0.019729118757762613,
+    #     "alpha": 0.001,
+    #     "positive_only": True,
+    #     "do_feature_selection": True,
+    # },
+    # 40: {
+    #     "topK": 44,
+    #     "shrink": 473,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.35983197418129564,
+    #     "feature_weighting": "TF-IDF",
+    #     "ICM_weight": 0.12542629369630146,
+    # },
+    # Score hybrid
+    # 50: {  # RP3 ICM
+    #     "topK": 79,
+    #     "alpha": 0.7864757238135991,
+    #     "beta": 0.443333110568691,
+    #     "delta": 0.7593249588588719,
+    #     "min_rating": 0.008553401844836345,
+    #     "implicit": True,
+    #     "normalize_similarity": True,
+    # },
+    # 51: {  # SLIM ElasticNet
+    #     "l1_ratio": 0.04077479852537514,
+    #     "alpha": 0.0004098922954204119,
+    #     "positive_only": True,
+    #     "topK": 144,
+    #     "do_feature_selection": True,
+    # },
+    # Recall@10 optimised models
+    # 60: {  # SLIM ElasticNet
+    #     "topK": 1000,
+    #     "l1_ratio": 0.009196376132404047,
+    #     "alpha": 0.001,
+    #     "positive_only": True,
+    #     "do_feature_selection": True,
+    # },
+    # 61: {  # Item KNN CF+CBF
+    #     "topK": 5,
+    #     "shrink": 1000,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.0,
+    #     "feature_weighting": "TF-IDF",
+    #     "ICM_weight": 0.1918507776404466,
+    # },
+    # 62: {  # Item KNN CF
+    #     "topK": 5,
+    #     "shrink": 1000,
+    #     "similarity": "asymmetric",
+    #     "normalize": True,
+    #     "asymmetric_alpha": 0.12250234857130494,
+    #     "feature_weighting": "TF-IDF",
+    # },
+    # 63: {  # RP3 ICM
+    #     "topK": 556,
+    #     "alpha": 2.0,
+    #     "beta": 0.43088991464943555,
+    #     "delta": 0.0,
+    #     "normalize_similarity": True,
+    # },
+    # 64: {},  # Top Popular
+    # Recall@50 optimised models
+    70: {  # Item KNN CF
+        "topK": 152,
+        "shrink": 1000,
+        "similarity": "asymmetric",
+        "normalize": True,
+        "asymmetric_alpha": 0.2696194971486583,
+        "feature_weighting": "TF-IDF",
+    },
+    71: {  # Item KNN CF+CBF
+        "topK": 43,
+        "shrink": 937,
+        "similarity": "asymmetric",
+        "normalize": True,
+        "asymmetric_alpha": 0.45165257781514373,
+        "feature_weighting": "TF-IDF",
+        "ICM_weight": 0.3749251701759684,
+    },
+    72: {  # SLIM ElasticNet
+        "l1_ratio": 0.14747318214902194,
+        "alpha": 0.00043480530562990655,
+        "positive_only": False,
+        "topK": 75,
+        "do_feature_selection": True,
+    },
+    73: {  # RP3 ICM
+        "topK": 351,
+        "alpha": 1.8421852767137328,
+        "beta": 0.26321333084561177,
+        "delta": 0.27195701300859715,
+        "normalize_similarity": True,
+    },
+}
+
+
+def recommender_factory(urm, icm) -> dict[int, BaseRecommender]:
+    return {
+        # User-wide hybrid 1 (0-10)
+        # 0: P3alphaRecommender(urm),
+        # 1: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # 2: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # 3: RP3betaRecommenderICM(urm, icm),
+        # 4: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # 5: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # 6: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # 7: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # 8: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # 9: P3alphaRecommender(urm),
+        # 10: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # 20: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # 21: SLIMElasticNetRecommender(urm),
+        # 22: RP3betaRecommenderICM(urm, icm),
+        # 23: ItemKNNCFRecommender(urm),
+        # User-wide hybrid 2 (30-40)
+        # 30: SLIMElasticNetRecommender(urm),
+        # 31: SLIMElasticNetRecommender(urm),
+        # 32: SLIMElasticNetRecommender(urm),
+        # 33: SLIMElasticNetRecommender(urm),
+        # 34: SLIMElasticNetRecommender(urm),
+        # 35: SLIMElasticNetRecommender(urm),
+        # 36: SLIMElasticNetRecommender(urm),
+        # 37: SLIMElasticNetRecommender(urm),
+        # 38: SLIMElasticNetRecommender(urm),
+        # 39: SLIMElasticNetRecommender(urm),
+        # 40: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # Score hybrid
+        # 50: RP3betaRecommenderICM(urm, icm),
+        # 51: SLIMElasticNetRecommender(urm),
+        # Recall@10 optimised models
+        # 60: SLIMElasticNetRecommender(urm),
+        # 61: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        # 62: ItemKNNCFRecommender(urm),
+        # 63: RP3betaRecommenderICM(urm, icm),
+        # 64: TopPop(urm),
+        # Recall@50 optimised models
+        70: ItemKNNCFRecommender(urm),
+        71: ItemKNN_CFCBF_Hybrid_Recommender(urm, icm),
+        72: SLIMElasticNetRecommender(urm),
+        73: RP3betaRecommenderICM(urm, icm),
+    }
+
+
+if __name__ == "__main__":
+    from datetime import datetime
+    from concurrent.futures import ProcessPoolExecutor
+
+    icm_df, urm_df = load_raw()
+    num_users = urm_df["user_id"].nunique()
+    num_items = urm_df["item_id"].nunique()
+    num_features = icm_df["feature_id"].nunique()
+
+    icm = sps.csr_matrix(
+        (icm_df.data, (icm_df.item_id, icm_df.feature_id)),
+        shape=(num_items, num_features),
+    )
+
+    def train_fold(i, train_indices, icm, num_users, num_items, urm_df):
+        fold_dir = MODEL_DIR / str(i)
+        fold_dir.mkdir(exist_ok=True)
+
+        fold_urm_df = urm_df.iloc[train_indices]
+        fold_urm = sps.csr_matrix(
+            (fold_urm_df["data"], (fold_urm_df["user_id"], fold_urm_df["item_id"])),
+            shape=(num_users, num_items),
+        )
+
+        fold_recommenders = recommender_factory(fold_urm, icm)
+        for j, (key, recommender) in enumerate(fold_recommenders.items()):
+            print(
+                f"Fold {str(i).zfill(2)} Recommender {str(j).zfill(2)} {datetime.now()}"
+            )
+            recommender.fit(**HYPERPARAMETERS[key])
+            with (fold_dir / f"{key}.pkl").open("wb") as f:
+                pickle.dump(recommender, f)
+
+    with ProcessPoolExecutor(max_workers=cpu_count() // 2) as executor:
+        futures = [
+            executor.submit(
+                train_fold,
+                i,
+                train_indices,
+                icm.copy(),
+                num_users,
+                num_items,
+                urm_df.copy(),
+            )
+            for i, (train_indices, _) in enumerate(
+                KFold(N_FOLDS, shuffle=True, random_state=42).split(urm_df)
+            )
+        ]
+        for future in futures:
+            future.result()