diff --git a/app/callbacks.py b/app/callbacks.py index 268a3d2..f6b945d 100644 --- a/app/callbacks.py +++ b/app/callbacks.py @@ -1,3 +1,4 @@ +import io import json import os import pickle @@ -13,6 +14,8 @@ import plotly.graph_objects as go from config import GM_FILTER_DROPDOWN_BGC_CLASS_OPTIONS from config import GM_FILTER_DROPDOWN_MENU_OPTIONS +from config import GM_RESULTS_TABLE_MANDATORY_COLUMNS +from config import GM_RESULTS_TABLE_OPTIONAL_COLUMNS from config import GM_SCORING_DROPDOWN_MENU_OPTIONS from dash import ALL from dash import MATCH @@ -24,6 +27,7 @@ from dash import clientside_callback from dash import dcc from dash import html +from nplinker.metabolomics.spectrum import Spectrum dash._dash_renderer._set_react_version("18.2.0") # type: ignore @@ -113,22 +117,21 @@ def process_bgc_class(bgc_class: tuple[str, ...] | None) -> list[str]: processed_data: dict[str, Any] = {"n_bgcs": {}, "gcf_data": []} for gcf in gcfs: - gcf_bgc_classes = [cls for bgc in gcf.bgcs for cls in bgc_to_class[bgc.id]] - bgc_data = [ - (bgc.id, bgc.smiles[0] if bgc.smiles and bgc.smiles[0] is not None else "N/A") - for bgc in gcf.bgcs - ] - bgc_data.sort(key=lambda x: x[0]) - bgc_ids, bgc_smiles = zip(*bgc_data) - strains = [s.id for s in gcf.strains._strains] - strains.sort() + # Create pairs of (bgc_id, bgc) and sort by ID to maintain correspondence + bgc_pairs = [(bgc.id, bgc) for bgc in gcf.bgcs] + bgc_pairs.sort(key=lambda x: x[0]) # Sort by BGC ID + + bgc_ids = [pair[0] for pair in bgc_pairs] # Get sorted IDs + bgc_classes = [bgc_to_class[pair[0]] for pair in bgc_pairs] # Get corresponding classes + + strains = sorted([s.id for s in gcf.strains._strains]) + processed_data["gcf_data"].append( { "GCF ID": gcf.id, "# BGCs": len(gcf.bgcs), - "BGC Classes": list(set(gcf_bgc_classes)), # Using set to get unique classes - "BGC IDs": list(bgc_ids), - "BGC smiles": list(bgc_smiles), + "BGC Classes": bgc_classes, + "BGC IDs": bgc_ids, "strains": strains, } ) @@ -137,25 +140,34 @@ def process_bgc_class(bgc_class: tuple[str, ...] | None) -> list[str]: processed_data["n_bgcs"][len(gcf.bgcs)] = [] processed_data["n_bgcs"][len(gcf.bgcs)].append(gcf.id) - processed_links: dict[str, Any] = { - "gcf_id": [], - "spectrum_id": [], - "strains": [], - "method": [], - "score": [], - "cutoff": [], - "standardised": [], - } + if links is not None: + processed_links: dict[str, Any] = { + "gcf_id": [], + "spectrum": [], + "method": [], + "score": [], + "cutoff": [], + "standardised": [], + } - for link in links.links: - for method, data in link[2].items(): - processed_links["gcf_id"].append(link[0].id) - processed_links["spectrum_id"].append(link[1].id) - processed_links["strains"].append([s.id for s in link[1].strains._strains]) - processed_links["method"].append(method) - processed_links["score"].append(data.value) - processed_links["cutoff"].append(data.parameter["cutoff"]) - processed_links["standardised"].append(data.parameter["standardised"]) + for link in links.links: + if isinstance(link[1], Spectrum): # Then link[0] is a GCF (GCF -> Spectrum) + processed_links["gcf_id"].append(link[0].id) + processed_links["spectrum"].append( + { + "id": link[1].id, + "strains": sorted([s.id for s in link[1].strains._strains]), + "precursor_mz": link[1].precursor_mz, + "gnps_id": link[1].gnps_id, + } + ) + for method, data in link[2].items(): + processed_links["method"].append(method) + processed_links["score"].append(data.value) + processed_links["cutoff"].append(data.parameter["cutoff"]) + processed_links["standardised"].append(data.parameter["standardised"]) + else: + processed_links = {} return json.dumps(processed_data), json.dumps(processed_links) except Exception as e: @@ -169,11 +181,12 @@ def process_bgc_class(bgc_class: tuple[str, ...] | None) -> list[str]: Output("gm-filter-accordion-control", "disabled"), Output("gm-filter-blocks-id", "data", allow_duplicate=True), Output("gm-filter-blocks-container", "children", allow_duplicate=True), + Output("gm-table-card-header", "style"), + Output("gm-table-card-body", "style", allow_duplicate=True), Output("gm-scoring-accordion-control", "disabled"), Output("gm-scoring-blocks-id", "data", allow_duplicate=True), Output("gm-scoring-blocks-container", "children", allow_duplicate=True), - Output("gm-table-card-header", "style"), - Output("gm-table-card-body", "style", allow_duplicate=True), + Output("gm-results-button", "disabled"), Output("mg-tab", "disabled"), ], [Input("file-store", "data")], @@ -186,11 +199,12 @@ def disable_tabs_and_reset_blocks( bool, list[str], list[dmc.Grid], + dict, + dict[str, str], bool, list[str], list[dmc.Grid], - dict, - dict[str, str], + bool, bool, ]: """Manage tab states and reset blocks based on file upload status. @@ -203,7 +217,7 @@ def disable_tabs_and_reset_blocks( """ if file_path is None: # Disable the tabs, don't change blocks - return True, True, [], [], True, [], [], {}, {"display": "block"}, True + return True, True, [], [], {}, {"display": "block"}, True, [], [], True, True # Enable the tabs and reset blocks gm_filter_initial_block_id = [str(uuid.uuid4())] @@ -216,11 +230,12 @@ def disable_tabs_and_reset_blocks( False, gm_filter_initial_block_id, gm_filter_new_blocks, + {}, + {"display": "block"}, False, gm_scoring_initial_block_id, gm_scoring_new_blocks, - {}, - {"display": "block"}, + False, False, ) @@ -509,8 +524,12 @@ def gm_filter_apply( mask = df["GCF ID"].astype(str).isin(gcf_ids) masks.append(mask) elif menu == "BGC_CLASS" and bgc_classes: + # Get unique classes for filtering mask = df["BGC Classes"].apply( - lambda x: any(bc.lower() in [y.lower() for y in x] for bc in bgc_classes) + lambda x: any( + bc.lower() in {item.lower() for sublist in x for item in sublist} + for bc in bgc_classes + ) ) masks.append(mask) @@ -577,15 +596,14 @@ def gm_table_update_datatable( filtered_df = df new_checkbox_value = checkbox_value if checkbox_value is not None else [] - # Prepare the data for display - display_df = filtered_df[["GCF ID", "# BGCs", "BGC IDs", "BGC smiles", "strains"]] - display_data = display_df[["GCF ID", "# BGCs"]].to_dict("records") - # Prepare tooltip data tooltip_data = [] - for _, row in display_df.iterrows(): - bgc_ids_smiles_markdown = "| BGC IDs | SMILES |\n|---------|--------|\n" + "\n".join( - [f"| {id} | {smiles} |" for id, smiles in zip(row["BGC IDs"], row["BGC smiles"])] + for _, row in filtered_df.iterrows(): + bgc_tooltip_markdown = "| BGC ID | Class |\n|---------|--------|\n" + "\n".join( + [ + f"| {bgc_id} | {', '.join(bgc_class)} |" + for bgc_id, bgc_class in zip(row["BGC IDs"], row["BGC Classes"]) + ] ) strains_markdown = "| Strains |\n|----------|\n" + "\n".join( [f"| {strain} |" for strain in row["strains"]] @@ -593,17 +611,36 @@ def gm_table_update_datatable( tooltip_data.append( { - "# BGCs": {"value": bgc_ids_smiles_markdown, "type": "markdown"}, + "# BGCs": {"value": bgc_tooltip_markdown, "type": "markdown"}, "GCF ID": {"value": strains_markdown, "type": "markdown"}, } ) + # Prepare the data for display + filtered_df["BGC IDs"] = filtered_df["BGC IDs"].apply(", ".join) + filtered_df["BGC Classes"] = filtered_df["BGC Classes"].apply( + lambda x: ", ".join({item for sublist in x for item in sublist}) # Unique flattened classes + ) + filtered_df["MiBIG IDs"] = filtered_df["strains"].apply( + lambda x: ", ".join([s for s in x if s.startswith("BGC")]) or "None" + ) + filtered_df["strains"] = filtered_df["strains"].apply(", ".join) + columns = [ {"name": "GCF ID", "id": "GCF ID"}, {"name": "# BGCs", "id": "# BGCs", "type": "numeric"}, + {"name": "BGC Classes", "id": "BGC Classes"}, + {"name": "MiBIG IDs", "id": "MiBIG IDs"}, ] - return display_data, columns, tooltip_data, {"display": "block"}, [], new_checkbox_value + return ( + filtered_df.to_dict("records"), + columns, + tooltip_data, + {"display": "block"}, + [], + new_checkbox_value, + ) @app.callback( @@ -668,7 +705,6 @@ def gm_table_select_rows( selected_rows_data = df.iloc[selected_rows] - # TODO: to be removed later when the scoring part will be implemented output1 = f"Total rows: {len(df)}" output2 = f"Selected rows: {len(selected_rows)}\nSelected GCF IDs: {', '.join(selected_rows_data['GCF ID'].astype(str))}" @@ -731,7 +767,7 @@ def gm_scoring_create_initial_block(block_id: str) -> dmc.Grid: id={"type": "gm-scoring-dropdown-ids-cutoff-met", "index": block_id}, label="Cutoff", placeholder="Insert cutoff value as a number", - value="1", + value="0.05", className="custom-textinput", ) ], @@ -850,7 +886,7 @@ def gm_scoring_display_blocks( }, label="Cutoff", placeholder="Insert cutoff value as a number", - value="1", + value="0.05", className="custom-textinput", ), ], @@ -896,7 +932,7 @@ def gm_scoring_update_placeholder( # Callback was not triggered by user interaction, don't change anything raise dash.exceptions.PreventUpdate if selected_value == "METCALF": - return ({"display": "block"}, "Cutoff", "1") + return ({"display": "block"}, "Cutoff", "0.05") else: # This case should never occur due to the Literal type, but it satisfies mypy return ({"display": "none"}, "", "") @@ -933,9 +969,77 @@ def gm_scoring_apply( return df -# TODO: add the logic for outputing data in the results table, issue #33 @app.callback( - Input("gm-scoring-apply-button", "n_clicks"), + Output("gm-results-table-column-settings-modal", "is_open"), + [ + Input("gm-results-table-column-settings-button", "n_clicks"), + Input("gm-results-table-column-settings-close", "n_clicks"), + ], + [State("gm-results-table-column-settings-modal", "is_open")], +) +def toggle_column_settings_modal(n1, n2, is_open): + """Toggle the visibility of the column settings modal. + + Args: + n1: Number of clicks on the open button. + n2: Number of clicks on the close button. + is_open: Current state of the modal (open or closed). + + Returns: + The new state of the modal (open or closed). + """ + if n1 or n2: + return not is_open + return is_open + + +@app.callback( + Output("gm-results-table", "columns"), + [ + Input("gm-results-table-column-toggle", "value"), + Input("gm-results-button", "n_clicks"), + ], +) +def update_columns(selected_columns: list[str] | None, n_clicks: int | None) -> list[dict]: + """Update the columns of the results table based on user selections. + + Args: + selected_columns: List of selected columns to display. + n_clicks: Number of times the "Show Results" button has been clicked. + + Returns: + List of column definitions for the results table. + """ + # Start with mandatory columns + columns: list[dict] = GM_RESULTS_TABLE_MANDATORY_COLUMNS.copy() + + # Create a dictionary for optional columns lookup + optional_columns_dict = {col["id"]: col for col in GM_RESULTS_TABLE_OPTIONAL_COLUMNS} + + # Add the selected columns in the order they appear in selected_columns + if selected_columns: + columns.extend( + [ + optional_columns_dict[col_id] + for col_id in selected_columns + if col_id in optional_columns_dict + ] + ) + + return columns + + +@app.callback( + Output("gm-results-alert", "children"), + Output("gm-results-alert", "is_open"), + Output("gm-results-table", "data"), + Output("gm-results-table", "tooltip_data"), + Output("gm-results-table-card-body", "style"), + Output("gm-results-table-card-header", "style"), + Output("gm-results-table-column-settings-button", "disabled"), + Input("gm-results-button", "n_clicks"), + Input("gm-table", "derived_virtual_data"), + Input("gm-table", "derived_virtual_selected_rows"), State("processed-links-store", "data"), State({"type": "gm-scoring-dropdown-menu", "index": ALL}, "value"), State({"type": "gm-scoring-radio-items", "index": ALL}, "value"), @@ -943,27 +1047,258 @@ def gm_scoring_apply( ) def gm_update_results_datatable( n_clicks: int | None, - filtered_data: str, + virtual_data: list[dict] | None, + selected_rows: list[int] | None, + processed_links: str, dropdown_menus: list[str], radiobuttons: list[str], cutoffs_met: list[str], -): +) -> tuple[str, bool, list[dict], list[dict], dict, dict, bool]: """Update the results DataTable based on scoring filters. Args: - n_clicks: Number of times the "Show Spectra" button has been clicked. - filtered_data: JSON string of filtered data. + n_clicks: Number of times the "Show Results" button has been clicked. + virtual_data: Current filtered data from the GCF table. + selected_rows: Indices of selected rows in the GCF table. + processed_links: JSON string of processed links data. dropdown_menus: List of selected dropdown menu options. radiobuttons: List of selected radio button options. cutoffs_met: List of cutoff values for METCALF method. Returns: - None + Tuple containing alert message, visibility state, table data and settings, and header style. """ + triggered_id = ctx.triggered_id + + if triggered_id in ["gm-table-select-all-checkbox", "gm-table"]: + return "", False, [], [], {"display": "none"}, {"color": "#888888"}, True + + if n_clicks is None: + return "", False, [], [], {"display": "none"}, {"color": "#888888"}, True + + if not selected_rows: + return ( + "No GCFs selected. Please select GCFs and try again.", + True, + [], + [], + {"display": "none"}, + {"color": "#888888"}, + True, + ) + + if not virtual_data: + return "No data available.", True, [], [], {"display": "none"}, {"color": "#888888"}, True + try: - data = json.loads(filtered_data) - df = pd.DataFrame(data) - except (json.JSONDecodeError, KeyError, pd.errors.EmptyDataError): - return - df_results = gm_scoring_apply(df, dropdown_menus, radiobuttons, cutoffs_met) - print(df_results.head()) + links_data = json.loads(processed_links) + if len(links_data) == 0: + return ( + "No processed links available.", + True, + [], + [], + {"display": "none"}, + {"color": "#888888"}, + True, + ) + + # Get selected GCF IDs and their corresponding data + selected_gcfs = { + row["GCF ID"]: { + "MiBIG IDs": row["MiBIG IDs"], + "BGC Classes": row["BGC Classes"], + } + for i, row in enumerate(virtual_data) + if i in selected_rows + } + + # Convert links data to DataFrame + links_df = pd.DataFrame(links_data) + + # Apply scoring filters + filtered_df = gm_scoring_apply(links_df, dropdown_menus, radiobuttons, cutoffs_met) + + # Filter for selected GCFs and aggregate results + results = [] + for gcf_id in selected_gcfs: + gcf_links = filtered_df[filtered_df["gcf_id"] == gcf_id] + if not gcf_links.empty: + # Sort by score in descending order + gcf_links = gcf_links.sort_values("score", ascending=False) + + top_spectrum = gcf_links.iloc[0] + result = { + # Mandatory fields + "GCF ID": int(gcf_id), + "# Links": len(gcf_links), + "Average Score": round(gcf_links["score"].mean(), 2), + # Optional fields with None handling + "Top Spectrum ID": int(top_spectrum["spectrum"].get("id", float("nan"))), + "Top Spectrum Precursor m/z": round( + top_spectrum["spectrum"].get("precursor_mz", float("nan")), 4 + ) + if top_spectrum["spectrum"].get("precursor_mz") is not None + else float("nan"), + "Top Spectrum GNPS ID": top_spectrum["spectrum"].get("gnps_id", "None") + if top_spectrum["spectrum"].get("gnps_id") is not None + else "None", + "Top Spectrum Score": round(top_spectrum.get("score", float("nan")), 4) + if top_spectrum.get("score") is not None + else float("nan"), + "MiBIG IDs": selected_gcfs[gcf_id]["MiBIG IDs"], + "BGC Classes": selected_gcfs[gcf_id]["BGC Classes"], + # Store all spectrum data for later use (download, etc.) + "spectrum_ids_str": "|".join( + [str(s.get("id", "")) for s in gcf_links["spectrum"]] + ), + "spectrum_scores_str": "|".join( + [str(score) for score in gcf_links["score"].tolist()] + ), + } + results.append(result) + + if not results: + return ( + "No matching links found for selected GCFs.", + True, + [], + [], + {"display": "none"}, + {"color": "#888888"}, + True, + ) + + # Prepare tooltip data + tooltip_data = [] + for result in results: + spectrum_ids = ( + result["spectrum_ids_str"].split("|") if result["spectrum_ids_str"] else [] + ) + spectrum_scores = ( + [float(s) for s in result["spectrum_scores_str"].split("|")] + if result["spectrum_scores_str"] + else [] + ) + # Show only top 5 spectrums in tooltip + max_tooltip_entries = 5 + total_entries = len(result["spectrum_ids_str"]) + + spectrums_table = "| Spectrum ID | Score |\n|------------|--------|\n" + + # Add top entries + for spectrum_id, score in zip( + spectrum_ids[:max_tooltip_entries], + spectrum_scores[:max_tooltip_entries], + ): + spectrums_table += f"| {spectrum_id} | {round(score, 4)} |\n" + + # Add indication of more entries if applicable + if total_entries > max_tooltip_entries: + remaining = total_entries - max_tooltip_entries + spectrums_table += f"\n... {remaining} more entries ..." + + row_tooltip = { + "# Links": {"value": spectrums_table, "type": "markdown"}, + } + tooltip_data.append(row_tooltip) + + return ( + "", + False, + results, + tooltip_data, + {"display": "block"}, + {}, + False, + ) + + except Exception as e: + return ( + f"Error processing results: {str(e)}", + True, + [], + [], + {"display": "none"}, + {"color": "#888888"}, + True, + ) + + +@app.callback( + [ + Output("gm-download-button", "disabled"), + Output("gm-download-alert", "is_open"), + Output("gm-download-alert", "children"), + ], + [ + Input("gm-results-table", "data"), + ], +) +def toggle_download_button(table_data): + """Enable/disable download button based on data availability.""" + if not table_data: + return True, False, "" + return False, False, "" + + +@app.callback( + [ + Output("download-excel", "data"), + Output("gm-download-alert", "is_open", allow_duplicate=True), + Output("gm-download-alert", "children", allow_duplicate=True), + ], + Input("gm-download-button", "n_clicks"), + [ + State("gm-results-table", "data"), + ], + prevent_initial_call=True, +) +def generate_excel(n_clicks, table_data): + """Generate Excel file with two sheets: full results and detailed spectrum data.""" + if not ctx.triggered or not table_data: + return None, False, "" + + try: + output = io.BytesIO() + with pd.ExcelWriter(output, engine="xlsxwriter") as writer: + # Sheet 1: Best candidate links table + results_df = pd.DataFrame(table_data) + + # Filter out only the internal fields used for tooltips and processing + internal_fields = ["spectrum_ids_str", "spectrum_scores_str"] + export_columns = [col for col in results_df.columns if col not in internal_fields] + + # Use all non-internal columns + results_df = results_df[export_columns] + results_df.to_excel(writer, sheet_name="Best Candidate Links", index=False) + + # Sheet 2: Detailed spectrum data + detailed_data = [] + for row in table_data: + gcf_id = row["GCF ID"] + spectrum_ids = ( + row.get("spectrum_ids_str", "").split("|") + if row.get("spectrum_ids_str") + else [] + ) + scores = ( + [float(s) for s in row.get("spectrum_scores_str", "").split("|")] + if row.get("spectrum_scores_str") + else [] + ) + + # Add all spectrum entries without truncation + for spectrum_id, score in zip(spectrum_ids, scores): + detailed_data.append( + {"GCF ID": gcf_id, "Spectrum ID": int(spectrum_id), "Score": score} + ) + + detailed_df = pd.DataFrame(detailed_data) + detailed_df.to_excel(writer, sheet_name="All Candidate Links", index=False) + + # Prepare the file for download + excel_data = output.getvalue() + return dcc.send_bytes(excel_data, "nplinker_genom_to_metabol.xlsx"), False, "" + except Exception as e: + return None, True, f"Error generating Excel file: {str(e)}" diff --git a/app/config.py b/app/config.py index ad955a4..e59923b 100644 --- a/app/config.py +++ b/app/config.py @@ -3,8 +3,6 @@ {"label": "BGC Class", "value": "BGC_CLASS"}, ] -GM_SCORING_DROPDOWN_MENU_OPTIONS = [{"label": "Metcalf", "value": "METCALF"}] - GM_FILTER_DROPDOWN_BGC_CLASS_OPTIONS = [ {"label": "Alkaloid", "value": "ALKALOID"}, {"label": "NRP", "value": "NRP"}, @@ -15,3 +13,29 @@ {"label": "Other", "value": "OTHER"}, {"label": "Unknown", "value": "UNKNOWN"}, ] + +GM_SCORING_DROPDOWN_MENU_OPTIONS = [{"label": "Metcalf", "value": "METCALF"}] + +GM_RESULTS_TABLE_MANDATORY_COLUMNS = [ + {"name": "GCF ID", "id": "GCF ID", "type": "numeric"}, + {"name": "# Links", "id": "# Links", "type": "numeric"}, + {"name": "Average Score", "id": "Average Score", "type": "numeric"}, +] + +GM_RESULTS_TABLE_OPTIONAL_COLUMNS = [ + {"name": "Top Spectrum ID", "id": "Top Spectrum ID", "type": "numeric"}, + {"name": "Top Spectrum Precursor m/z", "id": "Top Spectrum Precursor m/z", "type": "numeric"}, + {"name": "Top Spectrum GNPS ID", "id": "Top Spectrum GNPS ID", "type": "text"}, + {"name": "Top Spectrum Score", "id": "Top Spectrum Score", "type": "numeric"}, + {"name": "MiBIG IDs", "id": "MiBIG IDs", "type": "text"}, + {"name": "BGC Classes", "id": "BGC Classes", "type": "text"}, +] + +GM_RESULTS_TABLE_CHECKL_OPTIONAL_COLUMNS = [ + "Top Spectrum ID", + "Top Spectrum Precursor m/z", + "Top Spectrum GNPS ID", + "Top Spectrum Score", + "MiBIG IDs", + "BGC Classes", +] diff --git a/app/layouts.py b/app/layouts.py index f176e4c..c819505 100644 --- a/app/layouts.py +++ b/app/layouts.py @@ -2,6 +2,7 @@ import dash_bootstrap_components as dbc import dash_mantine_components as dmc import dash_uploader as du +from config import GM_RESULTS_TABLE_CHECKL_OPTIONAL_COLUMNS from dash import dash_table from dash import dcc from dash import html @@ -119,7 +120,7 @@ gm_graph = dcc.Graph(id="gm-graph", className="mt-5 mb-3", style={"display": "none"}) # gm_table gm_table = dbc.Card( - [ + [ # TODO: Reset the selection table when a new file is uploaded dbc.CardHeader( [ "Data", @@ -225,15 +226,6 @@ dmc.AccordionPanel( [ gm_scoring_input_group, - html.Div( - dbc.Button( - "Set Scoring", - id="gm-scoring-apply-button", - color="primary", - className="mt-3", - ), - className="d-flex justify-content-center", - ), ] ), ], @@ -242,6 +234,187 @@ ], className="mt-5 mb-3", ) +gm_results = html.Div( + [ + html.Div( + dbc.Button( + "Show Results", + id="gm-results-button", + color="primary", + className="mt-3", + disabled=True, + ), + className="d-flex justify-content-center", + ), + html.Div( + dbc.Alert( + "Your alert message here", + id="gm-results-alert", + color="warning", + className="mt-3 text-center w-75 mx-auto", + is_open=False, + ), + className="d-flex justify-content-center", + ), + ] +) + +gm_results_table = dbc.Card( + [ + dbc.CardHeader( + [ + "Candidate Links", + dbc.Button( + "Columns settings", + id="gm-results-table-column-settings-button", + color="secondary", + size="sm", + className="float-end", + ), + dbc.Modal( + [ + dbc.ModalHeader("Select columns to display"), + dbc.ModalBody( + dbc.Checklist( + id="gm-results-table-column-toggle", + options=GM_RESULTS_TABLE_CHECKL_OPTIONAL_COLUMNS, + value=[GM_RESULTS_TABLE_CHECKL_OPTIONAL_COLUMNS[0]], + switch=True, + ) + ), + dbc.ModalFooter( + dbc.Button( + "Close", + id="gm-results-table-column-settings-close", + className="ms-auto", + ) + ), + ], + id="gm-results-table-column-settings-modal", + is_open=False, + ), + ], + id="gm-results-table-card-header", + style={"color": "#888888"}, + ), + dbc.CardBody( + [ + dash_table.DataTable( + id="gm-results-table", + columns=[], + data=[], + editable=False, + filter_action="none", + sort_action="native", + sort_mode="single", # Allow sorting by one column at a time + sort_as_null=["None", ""], # Treat these values as null for sorting + sort_by=[], + page_action="native", + page_current=0, + page_size=10, + style_table={"width": "100%"}, + style_cell={ + "textAlign": "left", + "padding": "5px", + "overflow": "hidden", + "textOverflow": "ellipsis", + "minWidth": "80px", + "width": "auto", + "maxWidth": "auto", + }, + style_header={ + "backgroundColor": "#FF6E42", + "fontWeight": "bold", + "color": "white", + "whiteSpace": "normal", + "height": "auto", + }, + style_data={ + "border": "1px solid #ddd", + "whiteSpace": "normal", + "height": "auto", + }, + style_data_conditional=[ + { + "if": {"state": "selected"}, + "backgroundColor": "white", + "border": "1px solid #ddd", + } + ], + tooltip_delay=0, + tooltip_duration=None, + css=( + [ + { + "selector": ".dash-table-tooltip", + "rule": """ + background-color: #ffd8cc; + font-family: monospace; + font-size: 12px; + max-width: none !important; + white-space: pre-wrap; + padding: 8px; + border: 1px solid #FF6E42; + box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1); + """, + } + ] + + [ + { + "selector": f'th[data-dash-column="{col}"] span.column-header--sort', + "rule": "display: none", + } + for col in [ + "GCF ID", + "# Links", + "Average Score", + "Top Spectrum ID", + "Top Spectrum GNPS ID", + "MiBIG IDs", + "BGC Classes", + ] + ] + + [ + # Style sort arrow hover state + { + "selector": ".column-header--sort:hover", + "rule": "color: white !important;", + } + ] + ), + tooltip={"type": "markdown"}, + ), + ], + id="gm-results-table-card-body", + style={"display": "none"}, + ), + ] +) +gm_results_download = html.Div( + [ + html.Div( + dbc.Button( + "Download Results (Excel)", + id="gm-download-button", + color="primary", + className="mt-3", + disabled=True, + ), + className="d-flex justify-content-center", + ), + html.Div( + dbc.Alert( + "Error downloading results", + id="gm-download-alert", + color="warning", + className="mt-3 text-center w-75 mx-auto", + is_open=False, + ), + className="d-flex justify-content-center", + ), + dcc.Download(id="download-excel"), + ] +) # gm tab content gm_content = dbc.Row( [ @@ -249,6 +422,9 @@ dbc.Col(gm_graph, width=10, className="mx-auto"), dbc.Col(gm_table, width=10, className="mx-auto"), dbc.Col(gm_scoring_accordion, width=10, className="mx-auto dbc"), + dbc.Col(gm_results, width=10, className="mx-auto"), + dbc.Col(gm_results_table, width=10, className="mt-3 mx-auto"), + dbc.Col(gm_results_download, width=10, className="mt-3 mx-auto"), ] ) # mg tab content diff --git a/requirements.txt b/requirements.txt index ff79347..3396050 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ dash-mantine-components dash_bootstrap_templates numpy dash-uploader==0.7.0a1 -packaging==21.3.0 \ No newline at end of file +packaging==21.3.0 +XlsxWriter \ No newline at end of file diff --git a/tests/data/mock_obj_data_no_links.pkl b/tests/data/mock_obj_data_no_links.pkl new file mode 100644 index 0000000..b1179db Binary files /dev/null and b/tests/data/mock_obj_data_no_links.pkl differ diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py index cc32d1b..987d65c 100644 --- a/tests/test_callbacks.py +++ b/tests/test_callbacks.py @@ -8,6 +8,7 @@ import pytest from dash_uploader import UploadStatus from app.callbacks import disable_tabs_and_reset_blocks +from app.callbacks import generate_excel from app.callbacks import gm_filter_add_block from app.callbacks import gm_filter_apply from app.callbacks import gm_scoring_apply @@ -15,11 +16,13 @@ from app.callbacks import gm_table_toggle_selection from app.callbacks import gm_table_update_datatable from app.callbacks import process_uploaded_data +from app.callbacks import toggle_download_button from app.callbacks import upload_data from . import DATA_DIR MOCK_FILE_PATH = DATA_DIR / "mock_obj_data.pkl" +MOCK_FILE_PATH_NO_LINKS = DATA_DIR / "mock_obj_data_no_links.pkl" @pytest.fixture @@ -43,17 +46,19 @@ def sample_processed_data(): { "GCF ID": "GCF_1", "# BGCs": 3, - "BGC Classes": ["NRPS", "PKS"], + "BGC Classes": [ + ["NRPS"], + ["PKS"], + ["NRPS"], + ], "BGC IDs": ["BGC_1", "BGC_2", "BGC_3"], - "BGC smiles": ["CCO", "CCN", "N/A"], "strains": ["Strain_1", "Strain_2", "Strain_3"], }, { "GCF ID": "GCF_2", "# BGCs": 2, - "BGC Classes": ["RiPP", "Terpene"], + "BGC Classes": [["RiPP"], ["Terpene"]], "BGC IDs": ["BGC_1", "BGC_3"], - "BGC smiles": ["CCO", "N/A"], "strains": ["Strain_3"], }, ] @@ -82,9 +87,14 @@ def test_process_uploaded_data_invalid_input(input_path): def test_process_uploaded_data_structure(): processed_data, processed_links = process_uploaded_data(MOCK_FILE_PATH) + processed_data_no_links, processed_links_no_links = process_uploaded_data( + MOCK_FILE_PATH_NO_LINKS + ) assert processed_data is not None assert processed_links is not None + assert processed_data_no_links == processed_data + assert len(json.loads(processed_links_no_links)) == 0 # type: ignore processed_data = json.loads(processed_data) processed_links = json.loads(processed_links) @@ -118,12 +128,16 @@ def test_process_uploaded_data_structure(): assert isinstance(gcf["GCF ID"], str) assert isinstance(gcf["# BGCs"], int) assert isinstance(gcf["BGC Classes"], list) + # Verify nested list structure for BGC Classes + for bgc_class in gcf["BGC Classes"]: + assert isinstance(bgc_class, list) + for cls in bgc_class: + assert isinstance(cls, str) # Check processed_links structure expected_link_keys = [ "gcf_id", - "spectrum_id", - "strains", + "spectrum", "method", "score", "cutoff", @@ -143,7 +157,7 @@ def test_process_uploaded_data_structure(): def test_disable_tabs(mock_uuid): # Test with None as input result = disable_tabs_and_reset_blocks(None) - assert result == (True, True, [], [], True, [], [], {}, {"display": "block"}, True) + assert result == (True, True, [], [], {}, {"display": "block"}, True, [], [], True, True) # Test with a string as input result = disable_tabs_and_reset_blocks(MOCK_FILE_PATH) @@ -154,26 +168,28 @@ def test_disable_tabs(mock_uuid): gm_filter_accordion_disabled, gm_filter_block_ids, gm_filter_blocks, + table_header_style, + table_body_style, gm_scoring_accordion_disabled, gm_scoring_block_ids, gm_scoring_blocks, - table_header_style, - table_body_style, + gm_results_disabled, mg_tab_disabled, ) = result assert gm_tab_disabled is False assert gm_filter_accordion_disabled is False - assert gm_scoring_accordion_disabled is False - assert table_header_style == {} - assert table_body_style == {"display": "block"} - assert mg_tab_disabled is False assert gm_filter_block_ids == ["test-uuid"] assert len(gm_filter_blocks) == 1 assert isinstance(gm_filter_blocks[0], dmc.Grid) + assert table_header_style == {} + assert table_body_style == {"display": "block"} + assert gm_scoring_accordion_disabled is False assert gm_scoring_block_ids == ["test-uuid"] assert len(gm_scoring_blocks) == 1 assert isinstance(gm_scoring_blocks[0], dmc.Grid) + assert gm_results_disabled is False + assert mg_tab_disabled is False @pytest.mark.parametrize( @@ -208,10 +224,13 @@ def test_gm_filter_apply(sample_processed_data): assert set(filtered_df["GCF ID"]) == set(gcf_ids) # Test BGC_CLASS filter - bgc_class = df["BGC Classes"].iloc[0][0] # Get the first BGC class from the first row + bgc_class = df["BGC Classes"].iloc[0][0][0] # Get first class from nested structure filtered_df = gm_filter_apply(df, ["BGC_CLASS"], [""], [[bgc_class]]) assert len(filtered_df) > 0 - assert all(bgc_class in classes for classes in filtered_df["BGC Classes"]) + assert any( + bgc_class in [cls for sublist in classes for cls in sublist] + for classes in filtered_df["BGC Classes"] + ) # Test no filter filtered_df = gm_filter_apply(df, [], [], []) @@ -240,9 +259,11 @@ def test_gm_table_update_datatable(sample_processed_data): assert data[1]["GCF ID"] == "GCF_2" # Check columns - assert len(columns) == 2 + assert len(columns) == 4 assert columns[0]["name"] == "GCF ID" assert columns[1]["name"] == "# BGCs" + assert columns[2]["name"] == "BGC Classes" + assert columns[3]["name"] == "MiBIG IDs" # Check style assert style == {"display": "block"} @@ -371,3 +392,35 @@ def test_gm_scoring_apply_empty_inputs(): assert len(result) == 1, "Should return original DataFrame" assert result.equals(df), "Should return unmodified DataFrame" + + +def test_toggle_download_button(): + """Test the toggle_download_button function with different inputs.""" + # Test with empty table data - should disable the button + result = toggle_download_button([]) + assert result == (True, False, "") + + # Test with populated table data - should enable the button + sample_data = [{"GCF ID": 1, "# Links": 5}] + result = toggle_download_button(sample_data) + assert result == (False, False, "") + + +def test_generate_excel_error_handling(): + """Test the generate_excel function error handling.""" + table_data = [{"GCF ID": 1, "spectrum_ids_str": "123"}] + + with ( + patch("app.callbacks.ctx") as mock_ctx, + patch("app.callbacks.pd.ExcelWriter") as mock_writer, + ): + mock_ctx.triggered = True + # Simulate an error during Excel generation + mock_writer.side_effect = Exception("Excel write error") + + result = generate_excel(1, table_data) + + # Should return an error message + assert result[0] is None + assert result[1] is True # Alert is open + assert "Error generating Excel file" in result[2]