diff --git a/.github/workflows/build-windows-executable-app.yaml b/.github/workflows/build-windows-executable-app.yaml index feb31e08..eee5ecbd 100644 --- a/.github/workflows/build-windows-executable-app.yaml +++ b/.github/workflows/build-windows-executable-app.yaml @@ -313,6 +313,7 @@ jobs: cp -r src streamlit_exe cp -r content streamlit_exe cp -r assets streamlit_exe + cp -r scripts streamlit_exe cp -r static streamlit_exe cp -r example-data streamlit_exe cp openms-bin/*.dll streamlit_exe diff --git a/.streamlit/config.toml b/.streamlit/config.toml index a68c7cb8..7f7fb9b2 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -6,7 +6,7 @@ developmentMode = false [server] maxUploadSize = 2000 #MB -port = 8501 # should be same as configured in deployment repo +port = 8502 # should be same as configured in deployment repo [theme] # The preset Streamlit theme that your custom theme inherits from. One of "light" or "dark". diff --git a/app.py b/app.py index c19c1e36..36b6d935 100644 --- a/app.py +++ b/app.py @@ -35,5 +35,15 @@ ], } + with open('settings.json', 'r') as f: + settings = json.load(f) + if settings['enable_flashida']: + pages["🧠 FLASHIda"] = ([ + st.Page(Path("content", "FLASHIda", "FLASHIdaWorkflow.py"), title="Workflow", icon="⚙️"), + st.Page(Path("content", "FLASHIda", "FLASHIdaViewer.py"), title="Viewer", icon="👀"), + st.Page(Path("content", "FLASHIda", "FLASHIdaSimulatorWorkflow.py"), title="Simulator", icon="⚙️"), + st.Page(Path("content", "FLASHIda", "FLASHIdaSimulatorViewer.py"), title="Viewer", icon="👀"), + ]) + pg = st.navigation(pages, expanded=True) pg.run() \ No newline at end of file diff --git a/content/FLASHIda/FLASHIdaSimulatorViewer.py b/content/FLASHIda/FLASHIdaSimulatorViewer.py new file mode 100644 index 00000000..27b19831 --- /dev/null +++ b/content/FLASHIda/FLASHIdaSimulatorViewer.py @@ -0,0 +1,126 @@ +import streamlit as st + +from pathlib import Path + +from src.common.common import page_setup, save_params +from src.workflow.FileManager import FileManager +from src.render.render import render_grid + +DEFAULT_LAYOUT = [['ms1_deconv_heat_map']] + +def select_experiment(): + st.session_state.selected_experiment0_ida = st.session_state.selected_experiment_dropdown_ida + print(st.session_state.selected_experiment0_ida) + if len(layout) > 1: + for exp_index in range(1, len(layout)): + if st.session_state[f'selected_experiment_dropdown_{exp_index}_ida'] is None: + continue + st.session_state[f"selected_experiment{exp_index}_ida"] = st.session_state[f'selected_experiment_dropdown_{exp_index}_ida'] + +def validate_selected_index(file_manager, selected_experiment): + results = file_manager.get_results_list(['simulation_dfs']) + if selected_experiment in st.session_state: + if st.session_state[selected_experiment] in results: + return name_to_index[st.session_state[selected_experiment]] + else: + del st.session_state[selected_experiment] + return None + +# page initialization +params = page_setup() + +# Get available results +file_manager = FileManager( + st.session_state["workspace"], + Path(st.session_state['workspace'], 'flashidasimulator', 'cache') +) + +results = file_manager.get_results_list(['simulation_dfs']) + +if file_manager.result_exists('layout', 'layout'): + layout = file_manager.get_results('layout', 'layout')['layout'] + side_by_side = layout['side_by_side'] + layout = layout['layout'] + +else: + layout = [DEFAULT_LAYOUT] + side_by_side = False + +### if no input file is given, show blank page +if len(results) == 0: + st.error('No results to show yet. Please run a workflow first!') + st.stop() + +# Map names to index +name_to_index = {n : i for i, n in enumerate(results)} + +if len(layout) == 2 and side_by_side: + c1, c2 = st.columns(2) + with c1: + st.selectbox( + "choose experiment", results, + key="selected_experiment_dropdown_ida", + index=validate_selected_index(file_manager, 'selected_experiment0_ida'), + on_change=select_experiment + ) + if 'selected_experiment0_ida' in st.session_state: + render_grid( + st.session_state.selected_experiment0_ida, layout[0], file_manager, + 'flashidasimulator', "selected_experiment0_ida", 'flash_viewer_grid_0_ida' + ) + with c2: + st.selectbox( + "choose experiment", results, + key=f'selected_experiment_dropdown_1_ida', + index=validate_selected_index(file_manager, 'selected_experiment1_ida'), + on_change=select_experiment + ) + if f"selected_experiment1_ida" in st.session_state: + with st.spinner('Loading component...'): + render_grid( + st.session_state["selected_experiment1_ida"], layout[1], + file_manager, 'flashidasimulator', 'selected_experiment1_ida', + 'flash_viewer_grid_1_ida' + ) + +else: + ### for only single experiment on one view + st.selectbox( + "choose experiment", results, + key="selected_experiment_dropdown_ida", + index=validate_selected_index(file_manager, 'selected_experiment0_ida'), + on_change=select_experiment + ) + + + if 'selected_experiment0_ida' in st.session_state: + print('Lets go!') + render_grid( + st.session_state.selected_experiment0_ida, layout[0], file_manager, + 'flashidasimulator', 'selected_experiment0_ida' + ) + + ### for multiple experiments on one view + if len(layout) > 1: + + for exp_index, exp_layout in enumerate(layout): + if exp_index == 0: continue # skip the first experiment + + st.divider() # horizontal line + + st.selectbox( + "choose experiment", results, + key=f'selected_experiment_dropdown_{exp_index}_ida', + index=validate_selected_index(file_manager, f'selected_experiment{exp_index}_ida'), + on_change=select_experiment + ) + # if #experiment input files are less than #layouts, all the pre-selection will be the first experiment + if f"selected_experiment{exp_index}_ida" in st.session_state: + render_grid( + st.session_state["selected_experiment%d_ida" % exp_index], + layout[exp_index], file_manager, 'flashidasimulator', + "selected_experiment%d_ida" % exp_index, + 'flash_viewer_grid_%d_ida' % exp_index + ) + +save_params(params) diff --git a/content/FLASHIda/FLASHIdaSimulatorWorkflow.py b/content/FLASHIda/FLASHIdaSimulatorWorkflow.py new file mode 100644 index 00000000..4e3b85c2 --- /dev/null +++ b/content/FLASHIda/FLASHIdaSimulatorWorkflow.py @@ -0,0 +1,21 @@ +import streamlit as st + +from src.Workflow import IdaSimulatorWorkflow +from src.common.common import page_setup + + +params = page_setup() + +wf = IdaSimulatorWorkflow() + +st.title('FLASHIda - Intelligent Data Acquisition') + +t = st.tabs(["📁 **File Upload**", "⚙️ **Configure**", "🚀 **Run**"]) +with t[0]: + wf.show_file_upload_section() + +with t[1]: + wf.show_parameter_section() + +with t[2]: + wf.show_execution_section() \ No newline at end of file diff --git a/content/FLASHIda/FLASHIdaViewer.py b/content/FLASHIda/FLASHIdaViewer.py new file mode 100644 index 00000000..d5976e71 --- /dev/null +++ b/content/FLASHIda/FLASHIdaViewer.py @@ -0,0 +1,119 @@ +import streamlit as st + +from pathlib import Path + +from src.common.common import page_setup, save_params +from src.workflow.FileManager import FileManager +from src.render.render import render_grid + + +DEFAULT_LAYOUT = [ + ['ms2_table'], + ['combined_spectrum'], + ['anno_spectrum', 'deconv_spectrum'] +] + + + +def select_experiment(): + st.session_state.selected_experiment0_ida = st.session_state.selected_experiment_dropdown_ida + if len(layout) > 1: + for exp_index in range(1, len(layout)): + if st.session_state[f'selected_experiment_dropdown_{exp_index}_ida'] is None: + continue + st.session_state[f"selected_experiment{exp_index}_ida"] = st.session_state[f'selected_experiment_dropdown_{exp_index}_ida'] + +def validate_selected_index(file_manager, selected_experiment): + results = file_manager.get_results_list( + ['deconv_dfs', 'anno_dfs', 'ms2_dfs'] + ) + if selected_experiment in st.session_state: + if st.session_state[selected_experiment] in results: + return name_to_index[st.session_state[selected_experiment]] + else: + del st.session_state[selected_experiment] + return None + +# page initialization +params = page_setup("IdaViewer") + +# Get available results +file_manager = FileManager( + st.session_state["workspace"], + Path(st.session_state['workspace'], 'flashida', 'cache') +) +results = file_manager.get_results_list( + ['deconv_dfs', 'anno_dfs', 'ms2_dfs'] +) + +if file_manager.result_exists('layout', 'layout'): + layout = file_manager.get_results('layout', 'layout')['layout'] + side_by_side = layout['side_by_side'] + layout = layout['layout'] + +else: + layout = [DEFAULT_LAYOUT] + side_by_side = False + +### if no input file is given, show blank page +if len(results) == 0: + st.error('No results to show yet. Please run a workflow first!') + st.stop() + +# Map names to index +name_to_index = {n : i for i, n in enumerate(results)} + +if len(layout) == 2 and side_by_side: + c1, c2 = st.columns(2) + with c1: + st.selectbox( + "choose experiment", results, + key="selected_experiment_dropdown_ida", + index=validate_selected_index(file_manager, 'selected_experiment0_ida'), + on_change=select_experiment + ) + if 'selected_experiment0_ida' in st.session_state: + render_grid(st.session_state.selected_experiment0_ida, layout[0], file_manager, 'flashida', 'selected_experiment0_ida') + with c2: + st.selectbox( + "choose experiment", results, + key=f'selected_experiment_dropdown_1_ida', + index=validate_selected_index(file_manager, 'selected_experiment1_ida'), + on_change=select_experiment + ) + if f"selected_experiment1_ida" in st.session_state: + render_grid(st.session_state.selected_experiment1_ida, layout[1], file_manager, 'flashida', 'selected_experiment1_ida', 'flash_viewer_grid_1') + + +else: + ### for only single experiment on one view + st.selectbox( + "choose experiment", results, + key="selected_experiment_dropdown_ida", + index=validate_selected_index(file_manager, 'selected_experiment0_ida'), + on_change=select_experiment + ) + + if 'selected_experiment0_ida' in st.session_state: + render_grid(st.session_state.selected_experiment0_ida, layout[0], file_manager, 'flashida', 'selected_experiment0_ida') + + ### for multiple experiments on one view + if len(layout) > 1: + + for exp_index, exp_layout in enumerate(layout): + if exp_index == 0: continue # skip the first experiment + + st.divider() # horizontal line + + st.selectbox( + "choose experiment", results, + key=f'selected_experiment_dropdown_{exp_index}_ida', + index=validate_selected_index(file_manager, f'selected_experiment{exp_index}_ida'), + on_change=select_experiment + ) + + # if #experiment input files are less than #layouts, all the pre-selection will be the first experiment + if f"selected_experiment{exp_index}_ida" in st.session_state: + render_grid(st.session_state["selected_experiment%d_ida" % exp_index], layout[exp_index], file_manager, 'flashida', f"selected_experiment{exp_index}_ida", 'flash_viewer_grid_%d' % exp_index) + +save_params(params) \ No newline at end of file diff --git a/content/FLASHIda/FLASHIdaWorkflow.py b/content/FLASHIda/FLASHIdaWorkflow.py new file mode 100644 index 00000000..6180dc2e --- /dev/null +++ b/content/FLASHIda/FLASHIdaWorkflow.py @@ -0,0 +1,180 @@ +import streamlit as st +import pandas as pd + +from src.Workflow import IdaWorkflow +from src.parse.deconv import parseDeconv +from src.parse.ida import parseIda +from src.common.common import page_setup, save_params + + +params = page_setup() + +wf = IdaWorkflow() + +st.title('FLASHIda - Intelligent Data Acquisition') + +t = st.tabs(["⚙️ **Configure**", "🚀 **Run**", "💡 **Manual Result Upload**"]) + + +with t[0]: + wf.show_parameter_section() + +with t[1]: + wf.show_execution_section() + +with t[2]: + def process_uploaded_files(uploaded_files): + + # Store all uploaded files + for file in uploaded_files: + if file.name.endswith("mzML"): + if file.name.endswith('_deconv.mzML'): + wf.file_manager.store_file( + file.name.split('_deconv.mzML')[0], 'out_deconv_mzML', + file, file_name=file.name + ) + elif file.name.endswith('_annotated.mzML'): + wf.file_manager.store_file( + file.name.split('_annotated.mzML')[0], 'anno_annotated_mzML', + file, file_name=file.name + ) + else: + st.warning(f'Invalid file : {file.name}') + elif file.name.endswith("tsv"): + if file.name.endswith('_spec2.tsv'): + wf.file_manager.store_file( + file.name.split('_spec2.tsv')[0], 'spec2_tsv', + file, file_name=file.name + ) + elif file.name.endswith('_ms2_toppic_prsm_single.tsv'): + wf.file_manager.store_file( + file.name.split('_ms2_toppic_prsm_single.tsv')[0], 'id_tsv', + file, file_name=file.name + ) + else: + st.warning(f'Invalid file : {file.name}') + else: + st.warning(f'Invalid file : {file.name}') + + # Get the unparsed files + input_files = set(wf.file_manager.get_results_list( + ['out_deconv_mzML', 'anno_annotated_mzML', 'spec2_tsv', 'id_tsv'], + partial=False + )) + parsed_files = set(wf.file_manager.get_results_list( + ['deconv_dfs', 'anno_dfs', 'ms2_dfs'], + partial=False + )) + unparsed_files = input_files - parsed_files + print(input_files) + print(parsed_files) + print(unparsed_files) + + # Process unparsed datasets + for unparsed_dataset in unparsed_files: + results = wf.file_manager.get_results( + unparsed_dataset, + ['out_deconv_mzML', 'anno_annotated_mzML', + 'spec2_tsv', 'id_tsv'], + partial=False + ) + id_tsv = results.pop('id_tsv') + parseDeconv(wf.file_manager, unparsed_dataset, **results) + parseIda(wf.file_manager, unparsed_dataset, results['spec2_tsv'], id_tsv) + + # Table with MS2 -> New + # MS1 plot with Isolation Window (FI Precursor) -> From Tag View + # MS2 Plot -> Exists + + # MS1 plot with Isolation Window (Actual Precursor) + + + st.subheader("**Upload FLASHIda output files (\*_annotated.mzML, \*_deconv.mzML, \*_spec2.tsv & \*_ms2_toppic_prsm_single.tsv)**") + st.info( + """ + **💡 How to upload files** + + 1. Browse files on your computer or drag and drops files + 2. Click the **Add the uploaded files** button to use them in the workflows + + Select data for analysis from the uploaded files shown below. + + **💡 Make sure that the same number of deconvolved and annotated mzML files are uploaded!** + """ + ) + with st.form('input_files', clear_on_submit=True): + uploaded_files = st.file_uploader( + "FLASHDeconv/TopPIC output mzML files and TSV files", accept_multiple_files=True, type=["mzML", "tsv"] + ) + _, c2, _ = st.columns(3) + if c2.form_submit_button("Add files to workspace", type="primary"): + if uploaded_files: + # A list of files is required, since online allows only single upload, create a list + if type(uploaded_files) != list: + uploaded_files = [uploaded_files] + + # opening file dialog and closing without choosing a file results in None upload + process_uploaded_files(uploaded_files) + st.success("Successfully added uploaded files!") + else: + st.warning("Upload some files before adding them.") + + # File Upload Table + experiments = ( + set(wf.file_manager.get_results_list(['spec2_tsv'])) + | set(wf.file_manager.get_results_list(['out_deconv_mzML'])) + | set(wf.file_manager.get_results_list(['anno_annotated_mzML'])) + | set(wf.file_manager.get_results_list(['ids_tsv'])) + ) + table = { + 'Experiment Name' : [], + 'Deconvolved Files' : [], + 'Annotated Files' : [], + 'MS2 TSV Files' : [], + 'ID TSV Files' : [], + } + for experiment in experiments: + table['Experiment Name'].append(experiment) + + if wf.file_manager.result_exists(experiment, 'out_deconv_mzML'): + table['Deconvolved Files'].append(True) + else: + table['Deconvolved Files'].append(False) + + if wf.file_manager.result_exists(experiment, 'anno_annotated_mzML'): + table['Annotated Files'].append(True) + else: + table['Annotated Files'].append(False) + + if wf.file_manager.result_exists(experiment, 'spec2_tsv'): + table['MS2 TSV Files'].append(True) + else: + table['MS2 TSV Files'].append(False) + + if wf.file_manager.result_exists(experiment, 'id_tsv'): + table['ID TSV Files'].append(True) + else: + table['ID TSV Files'].append(False) + + st.markdown('**Uploaded experiments in current workspace**') + st.dataframe(pd.DataFrame(table)) + + # Remove files + with st.expander("🗑️ Remove mzML files"): + to_remove = st.multiselect( + "select files", options=experiments + ) + c1, c2 = st.columns(2) + if c2.button( + "Remove **selected**", type="primary", disabled=not any(to_remove) + ): + for dataset_id in to_remove: + wf.file_manager.remove_results(dataset_id) + st.rerun() + + if c1.button("⚠️ Remove **all**"): + wf.file_manager.clear_cache() + st.success("All files removed!") + st.rerun() + +# save_params(params) diff --git a/scripts/flashida/write_method.py b/scripts/flashida/write_method.py new file mode 100644 index 00000000..47fceb9a --- /dev/null +++ b/scripts/flashida/write_method.py @@ -0,0 +1,107 @@ +import json +import sys + +import xml.etree.ElementTree as ET + +############################ +# default paramter values # +########################### +# +# Mandatory keys for each parameter +# key: a unique identifier +# value: the default value +# +# Optional keys for each parameter +# name: the name of the parameter +# hide: don't show the parameter in the parameter section (e.g. for input/output files) +# options: a list of valid options for the parameter +# min: the minimum value for the parameter (int and float) +# max: the maximum value for the parameter (int and float) +# step_size: the step size for the parameter (int and float) +# help: a description of the parameter +# widget_type: the type of widget to use for the parameter (default: auto) +# advanced: whether or not the parameter is advanced (default: False) + +DEFAULTS = [ + {"key": "in", "value": [], "help": "Input files for Python Script.", "hide": True}, + {'key': 'TopN', 'name': 'TopN', 'value': 3}, + {'key': 'Duration', 'name': 'Duration', 'value': 67}, + 'MS1', + {'key': 'Analyzer', 'name': 'Analyzer', 'value': 'Orbitrap'}, + {'key': 'FirstMass', 'name': 'FirstMass', 'value': 400}, + {'key': 'LastMass', 'name': 'LastMass', 'value': 2000}, + {'key': 'OrbitrapResolution', 'name': 'OrbitrapResolution', 'value': 120000}, + {'key': 'AGCTarget', 'name': 'AGCTarget', 'value': 800000}, + {'key': 'MaxIT', 'name': 'MaxIT', 'value': 50}, + {'key': 'Microscans', 'name': 'Microscans', 'value': 1}, + {'key': 'DataType', 'name': 'DataType', 'value': 'Centroid'}, + {'key': 'RFLens', 'name': 'RFLens', 'value': 30}, + {'key': 'SourceCID', 'name': 'SourceCID', 'value': 0}, + 'MS2', + {'key': 'Analyzer', 'name': 'Analyzer', 'value': 'Orbitrap'}, + {'key': 'FirstMass', 'name': 'FirstMass', 'value': 200}, + {'key': 'OrbitrapResolution', 'name': 'OrbitrapResolution', 'value': 60000}, + {'key': 'AGCTarget', 'name': 'AGCTarget', 'value': 500000}, + {'key': 'MaxIT', 'name': 'MaxIT', 'value': 118}, + {'key': 'Microscans', 'name': 'Microscans', 'value': 1}, + {'key': 'DataType', 'name': 'DataType', 'value': 'Centroid'}, + {'key': 'Activation', 'name': 'Activation', 'value': 'HCD'}, + {'key': 'CollisionEnergy', 'name': 'CollisionEnergy', 'value': 29}, + 'IDA', + {'key': 'MaxMs2CountPerMs1', 'name': 'MaxMs2CountPerMs1','value': 4}, + {'key': 'QScoreThreshold', 'name': 'QScoreThreshold', 'value': 0.2}, + {'key': 'TQScoreThreshold', 'name': 'TQScoreThreshold', 'value': 0.99}, + {'key': 'MinCharge', 'name': 'MinCharge', 'value': 4}, + {'key': 'MaxCharge', 'name': 'MaxCharge', 'value': 50}, + {'key': 'MinMass', 'name': 'MinMass', 'value': 500}, + {'key': 'MaxMass', 'name': 'MaxMass', 'value': 50000}, + {'key': 'Tolerances', 'name': 'Tolerances', 'value': [10.0, 10.0]}, + {'key': 'TargetLogs', 'name': 'TargetLogs', 'value': [r'C:\Users\KyowonJeong\Desktop\FLASHIdaTmp\test1.log']}, + {'key': 'RTWindow', 'name': 'RTWindow', 'value': 180}, + {'key': 'TargetMode', 'name': 'TargetMode', 'value': 0}, + {'key': 'UseFAIMS', 'name': 'UseFAIMS', 'value': False}, + {'key': 'UseCVQScore', 'name': 'UseCVQScore', 'value': False}, + {'key': 'CycleTime', 'name': 'CycleTime', 'value': 180}, + {'key': 'CVValues', 'name': 'CVValues', 'value': [-10.0, -30.0, -40.0, -50.0, -60.0]}, +] + +def get_params(): + if len(sys.argv) > 1: + with open(sys.argv[1], "r") as f: + return json.load(f) + else: + return {} + +if __name__ == "__main__": + params = get_params() + + input_xml = params.pop('input_xml') + + # Create method.xml + root = ET.Element("MethodParameters") + subsections = {} + for key, value in params.items(): + if ':' in key: + section, param = key.split(':') + if section not in subsections: + subsections[section] = ET.SubElement(root, section) + section = subsections[section] + tag = ET.SubElement(section, param) + else: + tag = ET.SubElement(root, key) + # Handle list inputs + if key in ['IDA:Tolerances', 'IDA:CVValues']: + for item in value.split('\n'): + element = ET.SubElement(tag, 'double') + element.text = item.strip() + elif key in ['IDA:TargetLogs']: + for item in value.split('\n'): + element = ET.SubElement(tag, 'string') + element.text = item.strip().replace('\\\\', '\\') + elif isinstance(value, bool): + tag.text = str(value).lower() + else: + tag.text = str(value) + tree = ET.ElementTree(root) + ET.indent(tree, space=" ") + tree.write(input_xml, encoding="utf-8", xml_declaration=True) \ No newline at end of file diff --git a/settings.json b/settings.json index 58bbb506..aaebdaa0 100644 --- a/settings.json +++ b/settings.json @@ -14,6 +14,7 @@ } }, "online_deployment": false, + "enable_flashida": true, "enable_workspaces": true, "test": true, "workspaces_dir": ".." diff --git a/src/Workflow.py b/src/Workflow.py index 5daa3c8d..a53043dd 100644 --- a/src/Workflow.py +++ b/src/Workflow.py @@ -1,16 +1,20 @@ +import re import json import time import multiprocessing import streamlit as st +import pyopenms as oms +from time import sleep from pathlib import Path from os import makedirs, listdir from shutil import copyfile, rmtree -from os.path import join, splitext, basename, exists, dirname +from os.path import join, splitext, basename, exists, dirname, abspath from src.parse.tnt import parseTnT from src.parse.deconv import parseDeconv +from src.parse.ida import parseIdaSimulator from src.workflow.WorkflowManager import WorkflowManager DEFAULT_THREADS = 8 @@ -410,6 +414,260 @@ def execution(self) -> None: rmtree(folder_path) + +class IdaWorkflow(WorkflowManager): + + def __init__(self) -> None: + # Initialize the parent class with the workflow name. + super().__init__("FLASHIda", st.session_state["workspace"]) + self.script_path = join('src', 'FLASHIda', 'run.py') + self.tool_name = 'FLASHIdaRunner' + + def configure(self) -> None: + self.ui.input_widget( + key="executable", name="Path to Flash.exe", default='', + widget_type="text" + ) + self.ui.input_widget( + key="executable-secondary", name="Path to secondary Flash.exe", default='', + widget_type="text" + ) + self.ui.input_widget( + key="raw-files", name="Path to raw files", default='', + widget_type="text" + ) + self.ui.input_widget( + key="method-files", name="Path to method files", default='', + widget_type="text" + ) + + def execution(self) -> None: + params = self.parameter_manager.get_parameters_from_json() + + # Validate primary FLASHIda executable input + flashida_path = Path(params['executable']) + if flashida_path.suffix.lower() != '.exe': + self.logger.log( + f'FLASHIda executable was provided with extension ' + f'\'{flashida_path.suffix}\'. Expected \'.exe\'' + ) + return + if flashida_path.is_file(): + self.logger.log(f'Found FLASHIda executable!') + else: + self.logger.log(f'{flashida_path} is not a file.') + return + + # Validate secondary FLASHIda executable input + flashida_secondary_path = Path(params['executable-secondary']) + if flashida_secondary_path.suffix.lower() != '.exe': + self.logger.log( + f'Secondary FLASHIda executable was provided with extension ' + f'\'{flashida_secondary_path.suffix}\'. Expected \'.exe\'' + ) + return + if flashida_secondary_path.is_file(): + self.logger.log(f'Found Secondary FLASHIda executable!') + else: + self.logger.log(f'{flashida_secondary_path} is not a file.') + return + + # Validate method file input + methods_folder_path = Path(params['method-files']) + if methods_folder_path.is_dir() and (params['method-files'] != ''): + self.logger.log(f'Found methods folder!') + else: + self.logger.log( + f'Method folder \'{methods_folder_path}\' ' + f'is not a folder.' + ) + return + + # Validate raw file input + raw_folder_path = Path(params['raw-files']) + if raw_folder_path.is_dir() and (params['raw-files'] != ''): + self.logger.log(f'Found raw file folder!') + else: + self.logger.log( + f'Raw folder \'{raw_folder_path}\' ' + f'is not a folder.' + ) + return + + # Find existing raw files + ign_raws, ign_methods, ign_secondary_flags = self._find_raws( + raw_folder_path + ) + if len(ign_raws) > 0: + self.logger.log( + 'Found the following existing raw files that match the scheme:' + ) + for i, (file, method, secondary) in enumerate( + zip(ign_raws, ign_methods, ign_secondary_flags) + ): + self.logger.log( + f"{i+1}:\t{file}\t({method}.xml\t" + f"{'secondary' if secondary else 'primary'})" + ) + self.logger.log('Ignoring these files!') + + self.logger.log('Listening for new raw files...') + while(True): + # Scan every 1s + sleep(1) + + # Search for new raws + new_raws, new_methods, new_secs = self._find_raws(raw_folder_path) + for raw, method, secondary in zip(new_raws, new_methods, new_secs): + if raw not in ign_raws: + break + else: + continue + + self.logger.log(f'Detected new raw \'{raw}\'') + + # Ignore raw in future cycles + ign_raws.append(raw) + ign_methods.append(method) + ign_secondary_flags.append(secondary) + + # Validate method + method_path = Path(methods_folder_path, f'{method}.xml') + raw_path = Path(raw_folder_path, raw) + exe_path = flashida_secondary_path if secondary else flashida_path + if method_path.is_file(): + self.logger.log(f'Found method \'{method_path}\'!') + self.logger.log(f'Starting FLASHDeconv...') + self.executor.run_command( + [exe_path, '-m', method_path, '-r', raw_path], + cwd = exe_path.parent + ) + self.logger.log('Listening for new raw files...') + + else: + self.logger.log( + f'Method \'{method_path}\' is not valid. Ignoring...' + ) + self.logger.log('Listening for new raw files...') + + + def _find_raws(self, raw_path): + # Find existing raw files + raws = [] + methods = [] + secondary_flags = [] + method_pattern = r'.*FLASHIda_([^.]+)\.raw' + for file in listdir(raw_path): + if not Path(raw_path, file).is_file(): + continue + match = re.search(method_pattern, file) + if match: + raws.append(str(file)) + full_suffix = match.group(1) + parts = full_suffix.split('_', 1) + method = parts[0] + methods.append(method) + suffix = parts[1] if len(parts) > 1 else None + if suffix is not None: + suffix = suffix.split('_')[0] if '_' in suffix else suffix + suffix = suffix.split('.')[0] if '.' in suffix else suffix + secondary = suffix == '2' if suffix else False + secondary_flags.append(secondary) + return raws, methods, secondary_flags + + + +class IdaSimulatorWorkflow(WorkflowManager): + + def __init__(self) -> None: + # Initialize the parent class with the workflow name. + super().__init__("FLASHIdaSimulator", st.session_state["workspace"]) + self.script_path = join('scripts', 'flashida', 'write_method.py') + self.tool_name = 'FLASHIdaRunner' + + def upload(self)-> None: + self.ui.upload_widget(key="mzML-files", name="MS data", file_types="mzML") + + def configure(self) -> None: + + self.ui.select_input_file("mzML-files", name='Dataset') + + self.ui.input_widget( + key="executable", name="Path to Flash.exe", default='', + widget_type="text" + ) + + self.ui.input_python(self.script_path) + + def execution(self) -> None: + params = self.parameter_manager.get_parameters_from_json() + + # Make sure output directory exists + base_path = dirname(self.workflow_dir) + + # Get input files + in_mzml = self.file_manager.get_files(self.params["mzML-files"])[0] + + # Generate output folder + current_base = splitext(basename(in_mzml))[0] + current_time = time.strftime("%Y%m%d-%H%M%S") + dataset_id = '%s_%s'%(current_base, current_time) + folder_path = join(base_path, 'FLASHIdaOutput', '%s_%s'%(current_base, current_time)) + makedirs(folder_path) + + # Generate temp paths for output files + input_txt = join(folder_path, 'simulation_data.txt') + input_xml = join(folder_path, 'method.xml') + output_tsv = join(folder_path, 'simulation_results.tsv') + + + # Convert input mzML to input format + self.logger.log('Converting mzML to simulation input format...') + exp = oms.MSExperiment() + oms.MzMLFile().load(in_mzml, exp) + output = [] + for s in exp.getSpectra(): + if s.getMSLevel() > 1: + continue + output.append(f'Spec\t{s.getRT()}\n') + output += [f'{mz}\t{intensity}\n' for mz, intensity in zip(*s.get_peaks())] + with open(input_txt, 'w') as f: + f.writelines(output) + + # Write method.xml + self.logger.log('Generating parameter file...') + self.executor.run_python( + self.script_path, {'input_xml' : join(folder_path, 'method.xml')} + ) + + # Run simulator + self.logger.log('Running FLASHIda simulator...') + self.executor.run_command( + [params['executable'], abspath(input_txt), abspath(output_tsv), abspath(input_xml)], + cwd = Path(params['executable']).parent + ) + + # Store all files + for file in listdir(folder_path): + self.file_manager.store_file( + dataset_id, str(file).replace('.', '_'), + Path(folder_path, file), file_name=file + ) + + # Fetch results + results = self.file_manager.get_results( + dataset_id, ['simulation_results_tsv'] + ) + + # Parse data + parseIdaSimulator( + self.file_manager, dataset_id, results['simulation_results_tsv'] + ) + + # Remove temporary folder + rmtree(folder_path) + + class QuantWorkflow(WorkflowManager): def __init__(self) -> None: diff --git a/src/parse/ida.py b/src/parse/ida.py new file mode 100644 index 00000000..1a0cb70c --- /dev/null +++ b/src/parse/ida.py @@ -0,0 +1,50 @@ +import pandas as pd +import numpy as np + +from src.parse.masstable import parseFLASHDeconvOutput, getMSSignalDF, getSpectraTableDF +from src.render.compression import downsample_heatmap, compute_compression_levels +from scipy.stats import gaussian_kde + +def parseIdaSimulator( + file_manager, dataset_id, out_simulation, logger=None +): + + simulation_data = pd.read_csv(out_simulation, sep='\t') + file_manager.store_data( + dataset_id, 'simulation_dfs', simulation_data + ) + + heatmap = simulation_data.loc[:,['monoMasses', 'rt', 'precursorIntensity']] + heatmap = heatmap.reset_index() + heatmap = simulation_data.rename(columns={ + 'monoMasses' : 'mass', 'precursorIntensity' : 'intensity', + 'index' : 'scan_idx' + }) + + # Store full sized version + file_manager.store_data( + dataset_id, f'ms1_deconv_heatmap', heatmap + ) + + # Store compressed versions + for size in reversed(compute_compression_levels(20000, len(heatmap), logger=logger)): + + + # Downsample iteratively + heatmap = downsample_heatmap(heatmap, max_datapoints=size) + # Store compressed version + file_manager.store_data( + dataset_id, f'ms1_deconv_heatmap_{size}', heatmap + ) + +def parseIda( + file_manager, dataset_id, spec2_tsv, id_tsv +): + ids = pd.read_csv(id_tsv, skiprows=25, sep='\t') + scans = pd.read_csv(spec2_tsv, sep='\t') + scans = scans.drop_duplicates(subset='ScanNum', keep='first') + scans['identified'] = scans['ScanNum'].isin(ids['Scan(s)']) + file_manager.store_data( + dataset_id, 'ms2_dfs', scans + ) + diff --git a/src/render/components.py b/src/render/components.py index 2469c1de..5b5daba3 100644 --- a/src/render/components.py +++ b/src/render/components.py @@ -6,7 +6,7 @@ # Create a _RELEASE constant. We'll set this to False while we're developing # the component, and True when we're ready to package and distribute it. -_RELEASE = True +_RELEASE = False _component_func = None @@ -50,6 +50,9 @@ def __init__(self, table_type): if table_type == 'ScanTable': self.title = 'Scan Table' self.componentName = "TabulatorScanTable" + elif table_type == 'MS2Table': + self.title = 'MS2 Table' + self.componentName = "TabulatorFragmentScanTable" elif table_type == 'MassTable': self.title = 'Mass Table' self.componentName = "TabulatorMassTable" diff --git a/src/render/initialize.py b/src/render/initialize.py index b8988934..68ad866c 100644 --- a/src/render/initialize.py +++ b/src/render/initialize.py @@ -67,7 +67,11 @@ def initialize_data(comp_name, selected_data, file_manager, tool): elif comp_name == 'combined_spectrum': data = file_manager.get_results(selected_data, ['combined_spectrum']) data_to_send['per_scan_data'] = data['combined_spectrum'] - component_arguments = PlotlyLineplotTagger(title="Augmented Deconvolved Spectrum") + if tool == 'flashida': + title = "Augmented Annotated Spectrum" + else: + title = "Augmented Deconvolved Spectrum" + component_arguments = PlotlyLineplotTagger(title=title) elif comp_name == 'anno_spectrum': data = file_manager.get_results(selected_data, ['anno_spectrum']) data_to_send['per_scan_data'] = data['anno_spectrum'] @@ -117,6 +121,13 @@ def initialize_data(comp_name, selected_data, file_manager, tool): data = file_manager.get_results(selected_data, ['protein_dfs']) data_to_send['protein_table'] = data['protein_dfs'] component_arguments = Tabulator('ProteinTable') + elif comp_name == 'ms2_table': + # TODO: Unify lookup or remove in vue + data = file_manager.get_results(selected_data, ['scan_table']) + data_to_send['per_scan_data'] = data['scan_table'] + data = file_manager.get_results(selected_data, ['ms2_dfs']) + data_to_send['id_table'] = data['ms2_dfs'] + component_arguments = Tabulator('MS2Table') elif comp_name == 'tag_table': data = file_manager.get_results(selected_data, ['tag_dfs']) data_to_send['tag_table'] = data['tag_dfs'] diff --git a/src/render/update.py b/src/render/update.py index c696ee02..b23c1b81 100644 --- a/src/render/update.py +++ b/src/render/update.py @@ -86,7 +86,12 @@ def filter_data(data, out_components, selection_store, additional_data, tool): component = out_components[0][0]['componentArgs']['title'] # Filter data if possible - if component in [ + if (component == 'Augmented Annotated Spectrum' and tool == 'flashida'): + if 'precursorScanIndex' not in selection_store: + data['per_scan_data'] = data['per_scan_data'].iloc[0:0,:] + else: + data['per_scan_data'] = data['per_scan_data'].iloc[selection_store['precursorScanIndex']:selection_store['precursorScanIndex']+1,:] + elif component in [ 'Annotated Spectrum', 'Deconvolved Spectrum', 'Augmented Deconvolved Spectrum', 'Mass Table', 'Sequence View', 'Internal Fragment Map' diff --git a/src/workflow/CommandExecutor.py b/src/workflow/CommandExecutor.py index 383dadc5..aa2cd9b8 100644 --- a/src/workflow/CommandExecutor.py +++ b/src/workflow/CommandExecutor.py @@ -60,7 +60,7 @@ def run_multiple_commands( end_time = time.time() self.logger.log(f"Total time to run {len(commands)} commands: {end_time - start_time:.2f} seconds", 1) - def run_command(self, command: list[str]) -> None: + def run_command(self, command: list[str], cwd=None) -> None: """ Executes a specified shell command and logs its execution details. @@ -73,12 +73,15 @@ def run_command(self, command: list[str]) -> None: # Ensure all command parts are strings command = [str(c) for c in command] + # Log cwd + if cwd is not None: + self.logger.log(f'Preparing command... cwd is set to \'{cwd}\'') # Log the execution start self.logger.log(f"Running command:\n"+' '.join(command)+"\nWaiting for command to finish...", 1) start_time = time.time() # Execute the command - process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd) child_pid = process.pid # Record the PID to keep track of running processes associated with this workspace/workflow @@ -254,7 +257,13 @@ def run_python(self, script_file: str, input_output: dict = {}) -> None: # run command without params self.run_command(["python", str(path)]) elif isinstance(defaults, list): - defaults = {entry["key"]: entry["value"] for entry in defaults} + subsection = '' + defaults = {} + for entry in defaults: + if isinstance(entry, str): + subsection = entry + continue + defaults[f'{subsection}:{entry["key"]}'] = entry["value"] # load paramters from JSON file params = {k: v for k, v in self.parameter_manager.get_parameters_from_json().items() if path.name in k} # update defaults diff --git a/src/workflow/FileManager.py b/src/workflow/FileManager.py index 0b2e6e1f..ab36b529 100644 --- a/src/workflow/FileManager.py +++ b/src/workflow/FileManager.py @@ -379,7 +379,12 @@ def get_results_list(self, name_tags: List[str], partial=False) -> List[str]: set(self._get_column_list('stored_data')) | set(self._get_column_list('stored_files')) ) - name_tags = [n for n in name_tags if n in available_columns] + + name_tags_available = [n for n in name_tags if n in available_columns] + if not partial and len(name_tags) != len(name_tags_available): + return [] + name_tags = name_tags_available + if len(name_tags) == 0: return [] diff --git a/src/workflow/StreamlitUI.py b/src/workflow/StreamlitUI.py index a8f7cf51..191b89dd 100644 --- a/src/workflow/StreamlitUI.py +++ b/src/workflow/StreamlitUI.py @@ -841,10 +841,18 @@ def input_python( elif isinstance(defaults, list): # display input widget for every entry in defaults # input widgets in n number of columns + subsection = '' cols = st.columns(num_cols) i = 0 for entry in defaults: - key = f"{path.name}:{entry['key']}" if "key" in entry else None + if isinstance(entry, str): + subsection = entry + st.markdown(f'**{entry}**') + continue + if len(subsection) > 0: + key = f"{path.name}:{subsection}:{entry['key']}" if "key" in entry else None + else: + key = f"{path.name}:{entry['key']}" if "key" in entry else None if key is None: st.error("Key not specified for parameter.") continue @@ -852,6 +860,9 @@ def input_python( if value is None: st.error("Value not specified for parameter.") continue + if isinstance(value, list): + value = '\n'.join(map(str, value)) + entry['widget_type'] = 'textarea' hide = entry["hide"] if "hide" in entry else False # no need to display input and output files widget or hidden parameters if hide: @@ -1166,8 +1177,10 @@ def export_parameters_markdown(self): tool_text.append(tool) if len(tool_text) > 1: tool_text = ", ".join(tool_text[:-1]) + " and " + tool_text[-1] - else: + elif len(tool_text) == 1: tool_text = tool_text[0] + else: + tool_text = '' result = subprocess.run( "FileFilter --help", shell=True, text=True, capture_output=True