- model_column = 'Tumor_model_annot'
- samples = data_annot.loc[data_annot['Tumor_model_annot'] == 'cancer_cells'].index
- cancer_expr = data_expr[samples]
- cancer_annot = data_annot.loc[samples]
- cancer_annot['Tumor_type'] = cancer_annot['Dataset']
- cancer_annot = cancer_annot[['Tumor_type', 'Dataset']]
- samples = data_annot.loc[~data_annot[model_column].isna() & (data_annot['Tumor_model_annot'] != 'cancer_cells')].index
- cells_expr = data_expr[samples]
- cells_annot = data_annot.loc[samples]
- cells_annot = cells_annot[[model_column, 'Dataset']]
- cells_annot.columns = ['Cell_type', 'Dataset']
- cells_annot = pd.concat([lab_annot, cells_annot])
- cells_annot.loc[cells_annot['Dataset'].isna(), 'Dataset'] = cells_annot.loc[cells_annot['Dataset'].isna()].index
- cells_expr = pd.concat([lab_expr, cells_expr], axis=1)
-
to make sure that there is no repeated samples
- samples = sorted(list(set(cells_annot.index).intersection(set(cells_expr.columns))))
- cells_expr = cells_expr[samples]
- cells_annot = cells_annot.loc[samples]
- print(cells_expr.shape, cells_annot.shape)
- print(cancer_expr.shape, cancer_annot.shape)
-
adding missing cell types
- cell_types = CellTypes.load('configs/cell_types.yaml')
- missing_cts = [x for x in cell_types.get_all_subtypes('General_cells') if not x in cells_annot['Cell_type'].unique()]
- for ct in missing_cts:
-
subtypes = cell_types.get_direct_subtypes(ct)
-
annot = cells_annot.loc[cells_annot['Cell_type'].isin(subtypes)]
-
-
expr = cells_expr[annot.index]
-
-
annot.index = annot.index + f'_{ct}'
-
annot['Dataset'] = annot.index
-
expr.columns = expr.columns + f'_{ct}'
-
cells_expr = pd.concat([cells_expr, expr], axis=1)
-
cells_annot = pd.concat([cells_annot, annot])
-
to make sure that there is no repeated samples
- samples = sorted(list(set(cells_annot.index).intersection(set(cells_expr.columns))))
- cells_expr = cells_expr[samples]
- cells_annot = cells_annot.loc[samples]
- print(cells_expr.shape, cells_annot.shape)
-
Model training
- mixer = Mixer(cell_types=cell_types,
-
cells_expr=cells_expr, cells_annot=cells_annot,
-
tumor_expr=cancer_expr, tumor_annot=cancer_annot,
-
num_av=3, num_points=300000)
- model = DeconvolutionModel(cell_types,
-
boosting_params_first_step='configs/boosting_params/lgb_parameters_first_step.tsv',
-
boosting_params_second_step='configs/boosting_params/lgb_parameters_second_step.tsv')
- model.fit(mixer)
Hello, I want to generate a tumor model. In the above code, I changed "Blood_model_annot" in line 1 to "Tumor_model_annot", made modifications in line 8, and changed line 27 to "cell_types.yaml". I didn't make any changes elsewhere. Is this acceptable? After execution, an error occurred at the step of model.fit(mixer), as shown in the figure. Could you please tell me the reason and how I should modify it?
to make sure that there is no repeated samples
adding missing cell types
to make sure that there is no repeated samples
Model training
Hello, I want to generate a tumor model. In the above code, I changed "Blood_model_annot" in line 1 to "Tumor_model_annot", made modifications in line 8, and changed line 27 to "cell_types.yaml". I didn't make any changes elsewhere. Is this acceptable? After execution, an error occurred at the step of model.fit(mixer), as shown in the figure. Could you please tell me the reason and how I should modify it?