Plotting#

%load_ext autoreload
%autoreload 2

import logging

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import anndata as ad

from alphapepttools import pp
from alphapepttools.pl import colors
from alphapepttools.pl.colors import show_rgba_color_list
from alphapepttools.pl.figure import create_figure, label_axes, save_figure
from alphapepttools.pl.plots import Plots

logging.basicConfig(level=logging.INFO)

Basic colors, palettes and color maps for alphapepttools#

# colors, derived from the basic palette
base_colors = [
    "red",
    "green",
    "blue",
    "orange",
    "yellow",
    "lightred",
    "lightgreen",
    "lightblue",
    "lightorange",
    "grey",
    "white",
    "black",
]
show_rgba_color_list([colors.BaseColors.get(color) for color in base_colors])
../_images/4a83ad78f171fdbdcd43fd75b6a7effa652f06c215d4a87037252571fe5b9bd7.png
# color palettes
palettes = ["binary", "qualitative"]
for palette in palettes:
    show_rgba_color_list(colors.BasePalettes.get(palette))
../_images/4b4a9089d4dad97c147c367ab7a3e710351a0428ee9445498b146aa56e8bc5d7.png ../_images/88b98e808d2a2938ede9db9ceea3b85f0e56d0e7805cdef30a82aa6727907bd9.png
# colormaps
maps = ["sequential", "diverging"]
for cmap in maps:
    show_rgba_color_list(list(colors.BaseColormaps.get(cmap)(np.arange(0, 1, 0.001))))
../_images/c4a7d9f84bc5dedd7835d0511f006bf5af20b98e6e9105125cc16925870e5120.png ../_images/d32add72bf48a258e6dd7b0b147550beae1e9347b624aae1b9819e5ceb33c8b1.png
test_data = [*list(np.random.rand(100)), -100, 100]

# map colormaps to numerical values without capping
mapped_colors = colors.MappedColormaps("sequential").fit_transform(test_data)
show_rgba_color_list(mapped_colors)

# map colormaps to numerical values with capping
mapped_colors = colors.MappedColormaps("sequential", (5, 95)).fit_transform(test_data)
show_rgba_color_list(mapped_colors)
../_images/95a76d2ffb39a053af84aba56d6d6866477feb0fe002de4cb8054cb7cae19d5b.png ../_images/dc5b5e237f0d7033b0c6d2f0fe3c087ada308096bef247d57f72769b9f782976.png

Demonstrate figure.py submodule style & label handling#

# Create a 2x3 grid of subplots in an AxisManager
fig, axm = create_figure(
    nrows=2,
    ncols=3,
    figsize=(10, 6),
    # figure_padding=3
)

# Example dataset
x = np.linspace(0, 10, 100)
y_funcs = [
    lambda x: np.sin(x),
    lambda x: np.cos(x),
    lambda x: np.tan(x),
    lambda x: x**2,
    lambda x: np.exp(x / 5),
    lambda x: np.log(x + 1),
]

# Get qualitative palette
palette = colors.BasePalettes.get("qualitative", len(y_funcs))

# Iterate through all axes using next() and plot different functions
try:
    for i, func in enumerate(y_funcs):
        ax = axm.next()
        ax.plot(x, func(x), color=palette[i])
        label_axes(ax, xlabel="X values", ylabel="Y values", title=f"Function {i + 1}")
except StopIteration:
    pass

plt.show()

# Save the figure
save_figure(
    fig=fig,
    filename="example_figure.png",
    output_dir="./example_outputs",
    dpi=300,
    transparent=False,
)
../_images/6ba5c96f793620b9871bffc656ab21033666e1fda4c88c70e1f3bafe380b00b2.png

Basic histogram#

example_df = pd.DataFrame(
    {
        "values": np.concatenate([np.random.normal(i, size=200) + np.random.normal(i) for i in range(3)]),
        "values2": np.concatenate([np.random.normal(i, size=200) + np.random.normal(i) for i in range(3)]),
        "levels": [i for i in range(3) for _ in range(200)],
        "levels2": np.arange(0, 600),
    }
)
# with one color
fig, axm = create_figure(figsize=(5, 3))

# Use the AxisManager
ax = axm.next()
Plots.histogram(
    data=example_df,
    value_column="values",
    bins=20,
    color="lightgreen",
    ax=ax,
)
../_images/f382756044272a560a0d8c81c8b124851ebe7a8363b77413494beb24a06fa4ad.png
# with multiple colors based on levels
fig, axm = create_figure(figsize=(5, 3))

# Apply the AxisManager
ax = axm.next()
palette = colors.BasePalettes.get("qualitative", example_df["levels"].nunique())
Plots.histogram(
    data=example_df,
    value_column="values",
    color_map_column="levels",
    palette=palette,
    bins=20,
    color="blue",
    ax=ax,
    legend="auto",
    hist_kwargs={"alpha": 0.7, "histtype": "stepfilled", "edgecolor": "k"},
    legend_kwargs={"title": "Levels", "loc": "upper left"},
)
label_axes(ax, title="Histogram with multiple colors based on levels")
plt.show()

# Save the figure
save_figure(
    fig=fig,
    filename="example_histogram.png",
    output_dir="./example_outputs",
    dpi=300,
    transparent=False,
)
../_images/9b077bb16b33f4fb82761270cc06d57ca270bf548a1b4303008ae7364bedb099.png

Rank plot of protein median intensities across all samples#

def load_diann_pg_matrix(
    data_path: str,
) -> ad.AnnData:
    """Load diann sample data into a pandas dataframe"""
    from alphapepttools.pp.data import _to_anndata

    X = pd.read_pickle(data_path)

    # to be replaced by AlphaBase PSM reader
    return _to_anndata(X)
# read an example proteomics data
# load protein groups into an AnnData object with index & columns as obs & var
data_path = "./example_data/HeLa_QC_data.pkl"
adata = load_diann_pg_matrix(data_path)

# Add sample metadata
sample_metadata = pd.read_csv("./example_data/HeLa_QC_sample_metadata.csv", index_col=0)
adata = pp.add_metadata(adata, sample_metadata, axis=0)

# Add feature metadata
feature_metadata = pd.read_csv("./example_data/HeLa_QC_feature_metadata.csv", index_col=0)
adata = pp.add_metadata(adata, feature_metadata, axis=1)

Add a layer called ‘intensity’ to store raw data

adata.layers["intensity"] = adata.X.copy()
fig, axm = create_figure(3, 1, figsize=(5, 9))

# Create median rank plot from a dataframe
ax = axm.next()
data_df = adata.to_df()
Plots.rank_median_plot(data_df, ax=ax)
label_axes(ax, title="Rank median plot from a dataframe")

# figure using the default settings for AnnData
ax = axm.next()
Plots.rank_median_plot(adata, ax=ax)
label_axes(ax, title="Rank median plot")

# figure with a non-default layer
ax = axm.next()
Plots.rank_median_plot(adata, layer="intensity", ax=ax)
label_axes(ax, title="Rank median plot from intensity layer")
../_images/32531a27e16cd6d78718b6cf51ef2c9506937db8592eb0450caaefa1e290ebec.png
# create a column for mitochondrial proteins tp color code the rank plot
mt_mask = adata.var["Genes"].str.startswith("MT-")
adata.var["Mito_prot"] = "not_mito"
adata.var.loc[mt_mask, "Mito_prot"] = "mito"

fig, axs = create_figure(figsize=(5, 3))
ax = axs.next()
Plots.rank_median_plot(adata, ax=ax, color_map_column="Mito_prot", scatter_kwargs={"s": 10}, legend="auto")
../_images/4fdefae3d2e5c39e5a7e1d7bf61ae927aabe401c6cede023a229155e1012af7b.png
# get median intensity of all proteins and color by above and below median of medians
medians = np.nanmedian(adata.X, axis=0)
median_median = np.nanmedian(medians)
adata.var["Median_intensity"] = medians

med_mask = adata.var["Median_intensity"] > median_median
adata.var["Above_median"] = "blue"
adata.var.loc[med_mask, "Above_median"] = "red"

fig, axs = create_figure(figsize=(5, 3))
ax = axs.next()
Plots.rank_median_plot(adata, ax=ax, color_map_column="Above_median", scatter_kwargs={"s": 10})

label_axes(ax, title="Rank median plot with color based on median intensity")
../_images/6512acb7512c4b1c25f95f5a0edfc132db11dd29ee595c9f6fd6c5b6f8e435c3.png