Source code for spatialmeta.data._dataloader

import os
from pathlib import Path
import scanpy as sc
import pandas as pd
import warnings
import subprocess
from typing import Literal, Union
from pyimzml.ImzMLParser import ImzMLParser
from ..util._classes import AnnDataST, AnnDataSM, AnnDataJointSMST
MODULE_PATH = Path(__file__).parent
warnings.filterwarnings("ignore")

zenodo_accession = '14986870'
zenodo_file_path = pd.read_csv(os.path.join(MODULE_PATH, 'zenodo_url.txt'), sep='\t')

[docs]def list_datasets():
    """
    List all available datasets in the package.
    """
    return zenodo_file_path


[docs]def load_imzML_and_ibd(sample_name: str) -> ImzMLParser:
    """
    Load the imzML file for the given sample name.

    :param sample_name: str
        The name of the sample. Use `list_datasets` to get the list of all available datasets.
    """
    valid_sample_names = set(
        list(
            map(
                lambda x: x.split(".ibd")[0],
                filter(lambda x: x.endswith("ibd"), zenodo_file_path["file_name"]),
            )
        )
    )
    if sample_name not in valid_sample_names:
        raise ValueError(
            f"Invalid sample name. Valid sample names are {valid_sample_names}"
        )
    default_path_imzml = os.path.join(MODULE_PATH, f"./datasets/{sample_name}.imzML")
    default_path_ibd = os.path.join(MODULE_PATH, f"./datasets/{sample_name}.ibd")

    if os.path.exists(default_path_imzml) and os.path.exists(default_path_ibd):
        return ImzMLParser(default_path_imzml)
    else:
        import subprocess
        print(f"Downloading from https://zenodo.org/records/{zenodo_accession}/files/{sample_name}.imzML?download=1")
        ret1 = subprocess.run(
            [
                "curl",
                "-L",
                "-o",
                default_path_imzml,
                f"https://zenodo.org/records/{zenodo_accession}/files/{sample_name}.imzML?download=1",
            ],
            check=True,
        )
        print(f"Downloading from https://zenodo.org/records/{zenodo_accession}/files/{sample_name}.ibd?download=1")
        ret2 = subprocess.run(
            [
                "curl",
                "-L",
                "-o",
                default_path_ibd,
                f"https://zenodo.org/records/{zenodo_accession}/files/{sample_name}.ibd?download=1",
            ],
            check=True,
        )
        if ret1.returncode == 0 and ret2.returncode == 0:
            try:
                return ImzMLParser(default_path_imzml)
            except Exception as e:
                raise RuntimeError("Failed to download the dataset.")
        else:
            raise RuntimeError("Failed to download the dataset.")


[docs]def load_adata(
    sample_name: str,
    modality: Literal["ST", "SM", "joint"],
) -> Union[AnnDataST, AnnDataSM, AnnDataJointSMST]:
    """
    Load the AnnData object for the given sample name and modality.

    :param sample_name: str
        The name of the sample. Use `list_datasets` to get the list of all available datasets.
    :param modality: Literal["ST", "SM", "joint"]
        The modality of the dataset. Choose from "ST", "SM", or "joint".
    """
    valid_sample_names = set(
        list(
            map(
                lambda x: "_".join(x.split(".h5ad")[0].split("_")[2:]),
                filter(lambda x: x.endswith("h5ad"), zenodo_file_path["file_name"]),
            )
        )
    )
    if sample_name not in valid_sample_names:
        raise ValueError(
            f"Invalid sample name. Valid sample names are {valid_sample_names}"
        )

    default_path_h5ad = os.path.join(
        MODULE_PATH, f"./datasets/adata_{modality}_{sample_name}_raw.h5ad"
    )
    
    print(default_path_h5ad)
    if os.path.exists(default_path_h5ad):
        return sc.read(default_path_h5ad)
    else:
        import subprocess
        print(f"Downloading from https://zenodo.org/records/{zenodo_accession}/files/adata_{modality}_{sample_name}.h5ad?download=1")
        ret = subprocess.run(
            [
                "curl",
                "-L",
                "-o",
                default_path_h5ad,
                f"https://zenodo.org/records/{zenodo_accession}/files/adata_{modality}_{sample_name}.h5ad?download=1",
            ],
            check=True,
        )
        if ret.returncode == 0:
            try:
                if modality == "ST":
                    return AnnDataST.from_anndata(sc.read(default_path_h5ad))
                elif modality == "SM":
                    return AnnDataSM.from_anndata(sc.read(default_path_h5ad))
                elif modality == "joint":
                    return AnnDataJointSMST.from_anndata(sc.read(default_path_h5ad))
            except Exception as e:
                raise RuntimeError("Failed to download the dataset.")
        else:
            raise RuntimeError("Failed to download the dataset.")


[docs]def load_Vicari_2024_msi() -> pd.DataFrame:
    """
    Load the mouse 3 MSI data from Vicari et al., 2024.
    """

    default_path_msi = os.path.join(MODULE_PATH, f'./datasets/mouse3_brain_msi.csv')
    if os.path.exists(default_path_msi):
        return pd.read_csv(default_path_msi)
    else:
        import subprocess
        print(f"Downloading from https://zenodo.org/records/{zenodo_accession}/files/mouse3_brain_msi.csv?download=1")
        ret = subprocess.run(
            [
                "curl",
                "-L",
                "-o",
                default_path_msi,
                f"https://zenodo.org/records/{zenodo_accession}/files/mouse3_brain_msi.csv?download=1",
            ],
            check=True,
        )
        if ret.returncode == 0:
            try:
                return pd.read_csv(default_path_msi)
            except Exception as e:
                raise RuntimeError("Failed to download the dataset.")
        else:
            raise RuntimeError("Failed to download the dataset.")