import os
from pathlib import Path
import scanpy as sc
import pandas as pd
import warnings
import subprocess
from typing import Literal, Union
from pyimzml.ImzMLParser import ImzMLParser
from ..util._classes import AnnDataST, AnnDataSM, AnnDataJointSMST
MODULE_PATH = Path(__file__).parent
warnings.filterwarnings("ignore")
zenodo_accession = '14986870'
zenodo_file_path = pd.read_csv(os.path.join(MODULE_PATH, 'zenodo_url.txt'), sep='\t')
[docs]def list_datasets():
"""
List all available datasets in the package.
"""
return zenodo_file_path
[docs]def load_imzML_and_ibd(sample_name: str) -> ImzMLParser:
"""
Load the imzML file for the given sample name.
:param sample_name: str
The name of the sample. Use `list_datasets` to get the list of all available datasets.
"""
valid_sample_names = set(
list(
map(
lambda x: x.split(".ibd")[0],
filter(lambda x: x.endswith("ibd"), zenodo_file_path["file_name"]),
)
)
)
if sample_name not in valid_sample_names:
raise ValueError(
f"Invalid sample name. Valid sample names are {valid_sample_names}"
)
default_path_imzml = os.path.join(MODULE_PATH, f"./datasets/{sample_name}.imzML")
default_path_ibd = os.path.join(MODULE_PATH, f"./datasets/{sample_name}.ibd")
if os.path.exists(default_path_imzml) and os.path.exists(default_path_ibd):
return ImzMLParser(default_path_imzml)
else:
import subprocess
print(f"Downloading from https://zenodo.org/records/{zenodo_accession}/files/{sample_name}.imzML?download=1")
ret1 = subprocess.run(
[
"curl",
"-L",
"-o",
default_path_imzml,
f"https://zenodo.org/records/{zenodo_accession}/files/{sample_name}.imzML?download=1",
],
check=True,
)
print(f"Downloading from https://zenodo.org/records/{zenodo_accession}/files/{sample_name}.ibd?download=1")
ret2 = subprocess.run(
[
"curl",
"-L",
"-o",
default_path_ibd,
f"https://zenodo.org/records/{zenodo_accession}/files/{sample_name}.ibd?download=1",
],
check=True,
)
if ret1.returncode == 0 and ret2.returncode == 0:
try:
return ImzMLParser(default_path_imzml)
except Exception as e:
raise RuntimeError("Failed to download the dataset.")
else:
raise RuntimeError("Failed to download the dataset.")
[docs]def load_adata(
sample_name: str,
modality: Literal["ST", "SM", "joint"],
) -> Union[AnnDataST, AnnDataSM, AnnDataJointSMST]:
"""
Load the AnnData object for the given sample name and modality.
:param sample_name: str
The name of the sample. Use `list_datasets` to get the list of all available datasets.
:param modality: Literal["ST", "SM", "joint"]
The modality of the dataset. Choose from "ST", "SM", or "joint".
"""
valid_sample_names = set(
list(
map(
lambda x: "_".join(x.split(".h5ad")[0].split("_")[2:]),
filter(lambda x: x.endswith("h5ad"), zenodo_file_path["file_name"]),
)
)
)
if sample_name not in valid_sample_names:
raise ValueError(
f"Invalid sample name. Valid sample names are {valid_sample_names}"
)
default_path_h5ad = os.path.join(
MODULE_PATH, f"./datasets/adata_{modality}_{sample_name}_raw.h5ad"
)
print(default_path_h5ad)
if os.path.exists(default_path_h5ad):
return sc.read(default_path_h5ad)
else:
import subprocess
print(f"Downloading from https://zenodo.org/records/{zenodo_accession}/files/adata_{modality}_{sample_name}.h5ad?download=1")
ret = subprocess.run(
[
"curl",
"-L",
"-o",
default_path_h5ad,
f"https://zenodo.org/records/{zenodo_accession}/files/adata_{modality}_{sample_name}.h5ad?download=1",
],
check=True,
)
if ret.returncode == 0:
try:
if modality == "ST":
return AnnDataST.from_anndata(sc.read(default_path_h5ad))
elif modality == "SM":
return AnnDataSM.from_anndata(sc.read(default_path_h5ad))
elif modality == "joint":
return AnnDataJointSMST.from_anndata(sc.read(default_path_h5ad))
except Exception as e:
raise RuntimeError("Failed to download the dataset.")
else:
raise RuntimeError("Failed to download the dataset.")
[docs]def load_Vicari_2024_msi() -> pd.DataFrame:
"""
Load the mouse 3 MSI data from Vicari et al., 2024.
"""
default_path_msi = os.path.join(MODULE_PATH, f'./datasets/mouse3_brain_msi.csv')
if os.path.exists(default_path_msi):
return pd.read_csv(default_path_msi)
else:
import subprocess
print(f"Downloading from https://zenodo.org/records/{zenodo_accession}/files/mouse3_brain_msi.csv?download=1")
ret = subprocess.run(
[
"curl",
"-L",
"-o",
default_path_msi,
f"https://zenodo.org/records/{zenodo_accession}/files/mouse3_brain_msi.csv?download=1",
],
check=True,
)
if ret.returncode == 0:
try:
return pd.read_csv(default_path_msi)
except Exception as e:
raise RuntimeError("Failed to download the dataset.")
else:
raise RuntimeError("Failed to download the dataset.")