1. Code to Recreate all Figures
[1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from matplotlib.ticker import FormatStrFormatter
import matplotlib as mpl
from matplotlib import font_manager
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['font.size'] = 12
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = 'Helvetica Neue'
params = {'legend.fontsize': 6,
'axes.labelsize': 6,
'axes.titlesize':6,
'xtick.labelsize':6,
'ytick.labelsize':6}
import matplotlib.pylab as pylab
pylab.rcParams.update(params)
from matplotlib.colors import LinearSegmentedColormap
c1='#B3262A'
c2='#2f559a'
c3 = "#FFFFFF"
cmap_classification_score = LinearSegmentedColormap.from_list("Prob_Unstim", [c2, c1], N=500)
cmap_unstim = LinearSegmentedColormap.from_list("Prob_Unstim", [c3, c1], N=500)
cmap_stim = LinearSegmentedColormap.from_list("Prob_Unstim", [c3, c2], N=500)
mm = 1/2.54 * 1/10 # mm in inches
2. Figure 1
2.1. Figure 1C
[2]:
data = pd.read_csv("../data/Raw_Data_LMD_cell_counting.csv")
data.n_excised = [str(x) for x in data.n_excised]
data.groupby("n_excised").mean().reset_index()
[2]:
n_excised | Replicate | n_counted | |
---|---|---|---|
0 | 100 | 3.0 | 79.4 |
1 | 500 | 1.5 | 387.5 |
[3]:
fig, axs = plt.subplots(1, 1, figsize = (28 * mm, 43 * mm))
sns.despine()
sns.stripplot(data, x = "n_excised", y = "n_counted", color = "black", s = 3, ax = axs)
sns.scatterplot(y="n_counted", x="n_excised", data= data.groupby("n_excised").mean().reset_index(), marker='_', s=150, color='k', ax = axs, linewidth = 0.25)
axs.set_ylim(0, 500)
axs.set_ylabel("regions recovered")
axs.set_xlabel("regionds excised")
axs.tick_params(axis='both', which='major', labelsize=6, size = 2, width = 0.25)
axs.spines["bottom"].set_linewidth(0.25)
axs.spines["left"].set_linewidth(0.25)
axs.set_xlim(-0.8, 1.5)
fig.tight_layout()
fig.savefig("../plots/LMD_region_counts.pdf", bbox_inches='tight')

2.2. Figure 1D
[4]:
data = pd.read_csv("../data/Raw_Data_LMD_FACS_comparision_data.csv")
#convert to percent of selected cells
data["Mean"] = (data["Mean"] / data["count per replicate"] * 100)
data["Std"] = (data["Std"]/ data["count per replicate"] * 100)
#get values for plotting
plot_final = data.sort_values(by = "enrichment", ascending=False)
labels = plot_final.groupby("enrichment")["Mean"].mean().sort_values(ascending= False).index
mean = plot_final.groupby("enrichment")["Mean"].mean().sort_values(ascending= False).values
std = plot_final.groupby("enrichment")["Mean"].sem().sort_values(ascending= False).values
#actually generate plot
fig, axs = plt.subplots(1, 1, figsize = (28 * mm, 43 * mm))
sns.despine()
from matplotlib.colors import to_rgba
colors = [c2, c1]
colors_20per_alpha = [to_rgba(color, alpha = 0.2) for color in colors]
axs.scatter(plot_final.enrichment, plot_final.Mean, color = [colors[0]]*3 + [colors[1]]*4, s = 1)
axs.bar(labels, mean, align='center', color =colors_20per_alpha, width = 0.5, edgecolor = colors, linewidth = 0.25) #ecolor= [c2, c1]
#add error bars
for pos, y, err, color in zip([0, 1], mean, std, colors):
axs.errorbar(pos, y, err, capsize = 1.5, linewidth = 0.25, color = color, capthick = 0.25)
axs.set_ylabel("% sgRNAs recovered", fontsize = 6, labelpad=0.2)
axs.set_ylim(0, 100)
axs.set_xlim(-0.8, 1.5)
axs.set_xticks(ticks = [0, 1], labels = ["LMD", "FACS"], rotation = -45)
axs.tick_params(axis='both', which='major', labelsize=6, size = 2, width = 0.25)
axs.spines["bottom"].set_linewidth(0.25)
axs.spines["left"].set_linewidth(0.25)
fig.tight_layout()
fig.savefig("../plots/FACS_LMD_comparision.pdf", bbox_inches='tight')

3. Figure 2
3.1. Figure 2A
Only the image region shown in this figure is deposited on Github. The complete imaging dataset with additional regions is deposited on Zenodo.
[5]:
from tifffile import imread
def colorize(im, color, clip_percentile=0.0):
"""
Helper function to create an RGB image from a single-channel image using a
specific color.
"""
# Check that we do just have a 2D image
if im.ndim > 2 and im.shape[2] != 1:
raise ValueError('This function expects a single-channel image!')
# Rescale the image according to how we want to display it
im_scaled = im.astype(np.float32) - np.percentile(im, clip_percentile)
im_scaled = im_scaled / np.percentile(im_scaled, 100 - clip_percentile)
im_scaled = np.clip(im_scaled, 0, 1)
# Need to make sure we have a channels dimension for the multiplication to work
im_scaled = np.atleast_3d(im_scaled)
# Reshape the color (here, we assume channels last)
color = np.asarray(color).reshape((1, 1, -1))
return im_scaled * color
def generate_composite(images, colors = [(0, 0, 1),(0, 1, 0), (1, 0, 0), (1, 0, 1)], plot = False):
colorized = []
for image, color in zip(images, colors):
image = colorize(image, color, 0.0)
colorized.append(image)
if plot:
for i in colorized:
plt.figure()
plt.imshow(i)
image = colorized[0]
for i in range(len(colorized)-1):
image += colorized[i+1]
return(np.clip(image, 0, 1))
def _percentile_norm(im, lower_value, upper_value):
IPR = upper_value - lower_value
out_im = im - lower_value
out_im = out_im / IPR
out_im = np.clip(out_im, 0, 1)
return out_im
#generate overview for unstim
image_paths = ["../data/example_imaged_100X_timecousre_LC3-mCherry/WT_Torin_Well2_tile10_ch2.tiff",
"../data/example_imaged_100X_timecousre_LC3-mCherry/WT_Torin_Well2_tile10_ch1.tiff",
"../data/example_imaged_100X_timecousre_LC3-mCherry/WT_Torin_Well2_tile10_ch3.tiff",
]
channels = []
for im_path in image_paths:
_im = imread(im_path)
_im = _im
channels.append(_im)
channels = np.array(channels)
_images_stim = []
for i in range(channels.shape[1]):
_im = channels[:, i, :550, 180:730]
_images_stim.append(_im)
_images_stim = np.array(_images_stim)
#define timepoints we want to look at
timepoints = [0, 1, 2, 8, 14]
_images_stim = _images_stim[timepoints, : , :, :]
fig, axs = plt.subplots(4, 5, figsize = (32, 24))
#normalize all images to the range of the brightest image (to preserve relativ comparision)
min_value = [np.quantile(_img, 0.01) for _img in _images_stim[-1]]
max_value = [np.quantile(_img, 0.99) for _img in _images_stim[-1]]
#normalization needs to be
for i, img in enumerate(_images_stim):
normed = [_percentile_norm(_img, min_value[j], max_value[j])for j, _img in enumerate(img)]
colorized = generate_composite(normed, ((1, 0, 0),(0,1, 0), (0, 0, 1)))
if i == 0:
axs[0, i].imshow(colorized)
axs[0, i].xaxis.set_visible(False)
axs[0, i].tick_params(left=False, labelleft=False)
axs[0, i].set_title(f"{timepoints[i]}h", fontsize = 30)
axs[1, i].imshow(normed[0], cmap = "gray")
axs[1, i].xaxis.set_visible(False)
axs[1, i].tick_params(left=False, labelleft=False)
axs[2, i].imshow(normed[1], cmap = "gray")
axs[2, i].xaxis.set_visible(False)
axs[2, i].tick_params(left=False, labelleft=False)
axs[3, i].imshow(normed[2], cmap = "gray")
axs[3, i].xaxis.set_visible(False)
axs[3, i].tick_params(left=False, labelleft=False)
else:
axs[0, i].imshow(colorized)
axs[0, i].axis("off")
axs[0, i].set_title(f"{timepoints[i]}h", fontsize = 30)
axs[1, i].imshow(normed[0], cmap = "gray")
axs[1, i].axis("off")
axs[2, i].imshow(normed[1], cmap = "gray")
axs[2, i].axis("off")
axs[3, i].imshow(normed[2], cmap = "gray")
axs[3, i].axis("off")
axs[0, 0].set_ylabel("mCherry-LC3\nmembrane\nnucelus", fontsize = 30, rotation = 0, labelpad=110)
axs[1, 0].set_ylabel("mCherry-LC3", fontsize = 30, rotation = 0, labelpad=110)
axs[2, 0].set_ylabel("membrane", fontsize = 30, rotation = 0, labelpad=110)
axs[3, 0].set_ylabel("nucelus", fontsize = 30, rotation = 0, labelpad=110)
fig.tight_layout()
fig.savefig("../plots/Torin_stimulation_WT_cells_timecourse_100X.pdf", bbox_inches='tight')

3.2. Figure 2D
[6]:
data = pd.read_csv("../data/Screen_A1_Raw_Data_Autophagy_Enrichment.csv", index_col = 0)
data = data.set_index("labels")
[7]:
from matplotlib.colors import to_rgba
ATG5_color = "#B3262A"
autophagy_color = "#B3262A"
non_targeting_color = "#2F559A"
ATG5_values = data.loc["ATG5"].values
non_targeting_controls = data.loc["Non-Targeting Control"].values
autophagy_values = data.loc["Autophagy"].values
median_ATG5 = np.median(ATG5_values)
median_non_targeting_controls = np.median(non_targeting_controls)
median_autophagy_values = np.median(autophagy_values)
df = pd.concat([pd.DataFrame({"values":ATG5_values.flatten(), "labels": ["ATG5"]*len(ATG5_values)}),
pd.DataFrame({"values":autophagy_values.flatten(), "labels" : ["Autophagy"]*len(autophagy_values)}),
pd.DataFrame({"values":non_targeting_controls.flatten(), "labels" : ["Non-Targeting Control"]*len(non_targeting_controls)})])
fig, axs = plt.subplots(1, 1, figsize = (94 * mm, 47 * mm))
sns.despine(offset = {"left": 10, "bottom":10})
axs.set_xlabel("enrichment of gRNAs")
sns.swarmplot(df, x = "values", y = "labels", ax = axs, dodge = False, hue = "labels", s = 2,
palette = {"ATG5":to_rgba(ATG5_color, alpha = 0.5), "Autophagy":to_rgba(autophagy_color, alpha = 0.5), "Non-Targeting Control":to_rgba(non_targeting_color, alpha = 0.5)},
edgecolors = [ATG5_color, autophagy_color, non_targeting_color], linewidth = 0.25
)
# sns.swarmplot(df, x = "values", y = "labels", ax = axs, dodge = False, hue = "labels", s = 3, linewidth = 0.25, fill = None)
axs.set_ylabel("sgRNA target", fontsize = 6)
axs.set_xlabel("fold enrichment of sgRNA in isolated cells over library", fontsize = 6)
axs.vlines(x = median_ATG5, ymin = -0.4, ymax = 0.4, color = ATG5_color, linewidth = 0.5 )
axs.vlines(x = median_autophagy_values, ymin = 0.6, ymax = 1.4, color = autophagy_color, linewidth = 0.5 )
axs.vlines(x = median_non_targeting_controls, ymin = 1.6, ymax = 2.4, color = non_targeting_color, linewidth = 0.5 )
#adjust limits so that ticks end at edge of scale
axs.set_xlim(4, 1000)
axs.set_xscale("log")
axs.tick_params(axis='both', which='major', labelsize=6, size = 2, width = 0.25)
axs.spines["bottom"].set_linewidth(0.25)
axs.spines["left"].set_linewidth(0.25)
axs.legend(bbox_to_anchor=(1.5, 1), frameon=False).remove()
fig.tight_layout()
fig.savefig("../plots/Screen_A1_swarmplot_autophagy_genes.pdf", transparent=False, format='pdf', bbox_inches='tight')
/home/sophia/mambaforge-pypy3/envs/SPARCS/lib/python3.9/site-packages/seaborn/categorical.py:3544: UserWarning: 27.8% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
warnings.warn(msg, UserWarning)

4. Figure 3
4.1. Figure 3A
Only the cropped image regions shown in the figure are deposited in the GitHub Repository. The whole slide images for the shown training slides are deposited on zenodo.
[8]:
from tifffile import imread
def colorize(im, color, clip_percentile=0.0):
"""
Helper function to create an RGB image from a single-channel image using a
specific color.
"""
# Check that we do just have a 2D image
if im.ndim > 2 and im.shape[2] != 1:
raise ValueError('This function expects a single-channel image!')
# Rescale the image according to how we want to display it
im_scaled = im.astype(np.float32) - np.percentile(im, clip_percentile)
im_scaled = im_scaled / np.percentile(im_scaled, 100 - clip_percentile)
im_scaled = np.clip(im_scaled, 0, 1)
# Need to make sure we have a channels dimension for the multiplication to work
im_scaled = np.atleast_3d(im_scaled)
# Reshape the color (here, we assume channels last)
color = np.asarray(color).reshape((1, 1, -1))
return im_scaled * color
def generate_composite(images, colors = [(0, 0, 1),(0, 1, 0), (1, 0, 0), (1, 0, 1)], plot = False):
"""
Helper function to generate a colorized overalyed image from multiple individual images
"""
colorized = []
for image, color in zip(images, colors):
image = colorize(image, color, 0.0)
colorized.append(image)
if plot:
for i in colorized:
plt.figure()
plt.imshow(i)
image = colorized[0]
for i in range(len(colorized)-1):
image += colorized[i+1]
return(np.clip(image, 0, 1))
fig, axs = plt.subplots(3, 4, figsize = (37.5, 28))
images = [f"../data/example_images_training_data/WT_Torin",
f"../data/example_images_training_data/Library_Unstim",
f"../data/example_images_training_data/ATG5_Torin"
]
channels = ["mCherryLC3", "WGA488", "Hoechst" ]
for i, image_path in enumerate(images):
image = []
for channel in channels:
image.append(imread(f"{image_path}_{channel}.tif"))
image = np.array(image)
colorized = generate_composite(image, ((1, 0, 0), (0,1, 0), (0, 0, 1) ))
axs[i, 3].imshow(colorized)
axs[i, 3].axis("off")
axs[i, 0].imshow(image[0], cmap = "gray")
axs[i, 0].xaxis.set_visible(False)
axs[i, 0].tick_params(left=False, labelleft=False)
axs[i, 1].imshow(image[1], cmap = "gray")
axs[i, 1].axis("off")
axs[i, 2].imshow(image[2], cmap = "gray")
axs[i, 2].axis("off")
axs[0, 0].set_title(f"mCherry-LC3", fontsize = 30)
axs[0, 1].set_title(f"membrane", fontsize = 30)
axs[0, 2].set_title(f"Hoechst", fontsize = 30)
axs[0, 3].set_title(f"mCherry-LC3\nmembrane\nHoechst", fontsize = 30)
axs[0, 0].set_ylabel("wt \n + Torin-1", rotation = 0, fontsize = 30, labelpad=70)
axs[1, 0].set_ylabel("library", rotation = 0, fontsize = 30, labelpad=70)
axs[2, 0].set_ylabel("ATG5-/- \n + Torin-1", rotation = 0, fontsize = 30, labelpad=70)
fig.tight_layout()
fig.savefig("../plots/Classifier2_Example_Training_Data.pdf")

4.2. Figure 3C
[9]:
layers = ["conv2d5", "conv2d6", "conv2d7","conv2d8","conv2d9", "linear1", "linear2", "linear3"]
for layer in layers:
input = f"../data/classifier_2.1_Test_Data/UMAP_data/Raw_data_UMAP_{layer}.csv"
df_train_umap = pd.read_csv(input)
outdir = f"../plots/UMAP_classifier2.1_layers/"
### Plot 1 Labels
fig, axs = plt.subplots(1, 1, figsize = (10, 10))
color_dict = dict({'T_02_stim_Cr203_C6':"#B3262A",
"T_02_stim_wt":'#2f559a',
"T_02_unstim_wt":"#5AADC5",
})
ax = sns.scatterplot(df_train_umap.sample(frac=1, random_state = 19), x = "UMAP_1", y = "UMAP_2", s = 4, hue = "class_label", alpha = 1, palette = color_dict, ax = axs, edgecolor = None, rasterized=True)
axs.tick_params(size = 3.5*1.773)
legend = ax.get_legend()
ax.get_legend().remove()
ax1_divider = make_axes_locatable(ax)
cax1 = ax1_divider.append_axes("right", size="2%", pad="2%")
cax1.axis("off")
cax1.legend(*ax.get_legend_handles_labels(), bbox_to_anchor = (1, 1))
ax.set_aspect('equal', adjustable='box')
# fig.tight_layout()
fig.savefig(f"{outdir}/UMAP_test_data_labels_{layer}.pdf", dpi = 500)
### PLOT 2: prob unstim
fig, axs = plt.subplots(1, 1, figsize = (10, 10))
ax = sns.scatterplot(data = df_train_umap.sample(frac=1, random_state = 19), x = "UMAP_1", y = "UMAP_2", s = 4, hue = "prob_unstim", alpha = 1, ax = axs, palette=cmap_classification_score, edgecolor = None, rasterized = True)
axs.tick_params(size = 3.5*1.773)
norm = plt.Normalize(0, 1)
sm = plt.cm.ScalarMappable(cmap=cmap_classification_score, norm=norm)
sm.set_array([])
# Remove the legend and add a colorbar
ax.get_legend().remove()
ax1_divider = make_axes_locatable(ax)
cax1 = ax1_divider.append_axes("right", size="2%", pad="2%")
cb1 = fig.colorbar(sm, cax=cax1)
cax1.tick_params(size = 3.5*1.773)
ax.set_aspect('equal', adjustable='box')
fig.savefig(f"{outdir}/UMAP_test_data_prob_unstim_{layer}.pdf", dpi = 500)
















[10]:
layers = ["conv2d8"]
for layer in layers:
input = f"../data/classifier_2.1_Test_Data/UMAP_data/Raw_data_UMAP_{layer}.csv"
df_train_umap = pd.read_csv(input)
outdir = f"../plots/UMAP_classifier2.1_layers/"
### Plot 1 Labels
fig, axs = plt.subplots(1, 1, figsize = (10, 10))
color_dict = dict({'T_02_stim_Cr203_C6':"#B3262A",
"T_02_stim_wt":'#2f559a',
"T_02_unstim_wt":"#5AADC5",
})
ax = sns.scatterplot(df_train_umap.sample(frac=1, random_state = 19), x = "UMAP_1", y = "UMAP_2", s = 10, hue = "class_label", alpha = 0.5, palette = color_dict, ax = axs, edgecolor = None, rasterized=True)
axs.tick_params(size = 3.5*1.773)
legend = ax.get_legend()
ax.get_legend().remove()
ax1_divider = make_axes_locatable(ax)
cax1 = ax1_divider.append_axes("right", size="2%", pad="2%")
cax1.axis("off")
cax1.legend(*ax.get_legend_handles_labels(), bbox_to_anchor = (1, 1))
ax.set_aspect('equal', adjustable='box')
# fig.tight_layout()
fig.savefig(f"{outdir}/UMAP_test_data_labels_{layer}.pdf", dpi = 500)
### PLOT 2: prob unstim
fig, axs = plt.subplots(1, 1, figsize = (10, 10))
ax = sns.scatterplot(data = df_train_umap.sample(frac=1, random_state = 19), x = "UMAP_1", y = "UMAP_2", s = 4, hue = "prob_unstim", alpha = 1, ax = axs, palette=cmap_classification_score, edgecolor = None, rasterized = True)
axs.tick_params(size = 3.5*1.773)
norm = plt.Normalize(0, 1)
sm = plt.cm.ScalarMappable(cmap=cmap_classification_score, norm=norm)
sm.set_array([])
# Remove the legend and add a colorbar
ax.get_legend().remove()
ax1_divider = make_axes_locatable(ax)
cax1 = ax1_divider.append_axes("right", size="2%", pad="2%")
cb1 = fig.colorbar(sm, cax=cax1)
cax1.tick_params(size = 3.5*1.773)
ax.set_aspect('equal', adjustable='box')
# fig.savefig(f"{outdir}/UMAP_test_data_prob_unstim_{layer}.pdf", dpi = 500)


4.3. Figure 3D
[11]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/T02_Raw_Data_classifier_metrics_classifier_2.1_2.2.csv")
versions = ["classifier_2.1"]
from sklearn.metrics import precision_recall_curve, roc_curve, auc
labels = ["stim", "ATG5_KO_Cr203_C6"]
results_all = {}
auc_values = {}
for version in versions:
data = subsampled_classification_results.loc[[x in labels for x in subsampled_classification_results.label]].get([version, "label"])
true_label = [0 if x == "stim" else 1 for x in data.label]
scores = data[version].tolist()
fpr, tpr, thresholds = roc_curve(true_label, scores)
precision, recall, thresholds = precision_recall_curve(true_label, scores)
fdr = 1 - precision
auc_roc = auc(fpr, tpr)
auc_precision_recall = auc(recall, precision)
auc_values[version] = pd.DataFrame({"ROC":[auc_roc], "precision_recall":[auc_precision_recall]})
fig, axs = plt.subplots(1, 1, figsize = (3, 2))
axs.plot(fpr, tpr, color = "black")
axs.set_xlabel("false positive rate")
axs.set_ylabel("true positive rate")
axs.tick_params(axis='both', which='major', labelsize=6, size = 3.5*1.773)
auc_results = "{0:.4g}".format(auc_values[version].ROC[0])
axs.text(x = 0.5, y= 0.45, s= f"AUC = {auc_results}", fontsize = 6, horizontalalignment= "center")
fig.savefig(f"../plots/{version}/T02_{version}_tpr_fpr_curve.pdf")

4.4. Figure 3E
[12]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/T02_Raw_Data_classifier_metrics_classifier_2.1_2.2.csv")
slides = ["unstim", "stim", "ATG5_KO_Cr203_C6"]
labels = ["wt", "wt + Torin-1", "ATG5-/- + Torin-1"]
versions = ["classifier_2.1"]
for version in versions:
fig, axs = plt.subplots(3, 1, figsize = (6, 5), sharey = True)
sns.despine(offset = 10)
for i, slide in enumerate(slides):
data = subsampled_classification_results.loc[[x == slide for x in subsampled_classification_results.label]].get([version])
hist = axs[i].hist(data, bins = 100, cumulative = False, histtype='step', bottom = 0, weights = np.ones(len(data)) / len(data)*100)
axs[i].set_ylim(0.01, 100)
axs[i].set_yscale("log")
axs[i].set_xlim(0, 1)
axs[i].yaxis.set_major_formatter(FormatStrFormatter('%.3g'))
axs[i].set_ylabel("cell count [%]")
axs[i].tick_params(size = 3.5*1.773)
axs[i].text(1.05, 1, s = labels[i])
axs[i].set_xlabel("classification score")
fig.tight_layout()
fig.savefig(f"../plots/{version}/T02_{version}_Histogram_classifier_performance_lines.pdf", bbox_inches='tight')

4.5. Figure 3F
[13]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/T02_Raw_Data_classifier_metrics_classifier_2.1_2.2.csv")
sample_label_lookup = {"unstim":"wt", "stim":"wt + Torin-1", "ATG5_KO_Cr203_C6":"ATG5-/- + Torin-1"}
versions = ["classifier_2.1"]
n_cells = subsampled_classification_results.label.value_counts()[0]
for version in versions:
_df = subsampled_classification_results.copy()
_df["threshold_label"] = _df[version] >= 0.5
heatmap_data = pd.DataFrame(_df.groupby("label").sum().threshold_label/n_cells*100)
heatmap_data.columns = ["unstimulated"]
heatmap_data["stimulated"] = 100 - heatmap_data.unstimulated
fig, axs = plt.subplots(1, 2, figsize = (2.3,2), sharex=False, sharey=True, gridspec_kw = {'wspace':0, 'hspace':0})
cbar_ax1 = fig.add_axes([0.4, 0, 0.54, 0.03])
cbar_ax2 = fig.add_axes([0.4, 0.03, 0.54, 0.03])
sns.heatmap(heatmap_data.stimulated.to_frame().sort_index(ascending= False), annot = True, ax = axs[0],
cmap= cmap_stim, cbar_kws={'label': '% cells in class', "orientation": "horizontal"}, fmt = ".3g", vmax=100, vmin = 0, cbar_ax=cbar_ax1,
linewidths=0.5, linecolor='black',
annot_kws={"size": 6})
sns.heatmap(heatmap_data.unstimulated.to_frame().sort_index(ascending= False), annot = True, ax = axs[1],
cmap= cmap_unstim, cbar_kws={ "orientation": "horizontal"}, fmt = ".3g", vmax=100, vmin = 0, cbar_ax=cbar_ax2,
linewidths=0.5, linecolor='black',
annot_kws={"size": 6})
#remove axis from second colorbar
cbar_ax2.axis("off")
#reassign labels into readable format
labels = [sample_label_lookup[item.get_text()] for item in axs[0].get_yticklabels()]
axs[0].set_yticklabels(labels)
axs[0].set_xticklabels(["on"])
axs[1].set_xticklabels(["off"])
#move x-axis ticks to top and remove ticks
axs[0].xaxis.set_ticks_position('top')
axs[1].xaxis.set_ticks_position('top')
axs[0].tick_params(left=False, top=False, bottom = False, rotation = 0)
axs[1].tick_params(left=False, top=False, bottom = False, rotation = 0)
#remove ylabel
axs[0].set_ylabel(None)
axs[1].set_ylabel(None)
fig.tight_layout()
fig.savefig(f"../plots/{version}/T02_{version}_heatmap.pdf", bbox_inches='tight')
/tmp/ipykernel_2144530/59980073.py:50: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
fig.tight_layout()

4.6. Figure 3G
[14]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/T02_Raw_Data_classifier_metrics_classifier_2.1_2.2.csv")
thresholds = [0.99999, 0.9999, 0.999, 0.9975, 0.995, 0.99, 0.985, 0.98, 0.975, 0.97, 0.965, 0.96]
labels = ["stim", "ATG5_KO_Cr203_C6"]
color1 = "#B3262A"
color2 = "#2f559a"
versions = ["classifier_2.1"]
for version in versions:
results = pd.read_csv(f"../data/{version}_Test_Data/{version}_precision_recall_data.csv", index_col = 0)
fig, axs = plt.subplots(1, 1, figsize = (4, 3))
sns.despine(offset = 10)
axs.plot(results.index.tolist(), results.precision.tolist(), clip_on = False, linewidth = 0.5, color = color1)
axs.scatter(x = results.index.tolist(), y = results.precision.tolist(), clip_on = False, alpha = 0.5, s = 7, color = color1)
axs.set_ylabel("Precision [%]", color = color1, fontsize = 6)
axs.set_xlabel("classification threshold", fontsize = 6)
axs.set_xlim(0.96, 1)
if version == 'classifier_2.2':
axs.set_ylim(96, 100)
elif version == 'classifier_2.1':
axs.set_ylim(98, 100)
axs.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
# twin object for two different y-axis on the sample plot
axs2=axs.twinx()
sns.despine(offset = 10, right = False)
# make a plot with different y-axis using second axis object
axs2.plot(results.index.tolist(), results.recall.tolist(), color = color2, clip_on = False, linewidth = 0.5)
axs2.scatter(x = results.index.tolist(), y = results.recall.tolist(), color = color2, clip_on = False, alpha = 0.5, s = 7, linewidth = 1)
axs2.set_ylabel("Recall [%]", color = color2, fontsize = 6)
if version == 'classifier_2.2':
axs2.set_ylim(91, 100)
elif version == 'classifier_2.1':
axs2.set_ylim(75, 100)
axs.tick_params(axis='both', which='major', labelsize=6)
axs2.tick_params(axis='both', which='major', labelsize=6)
#setting up X-axis label color to yellow
axs.yaxis.label.set_color(color1)
axs.tick_params(axis='y', colors=color1)
axs.spines['left'].set_color(color1)
axs2.yaxis.label.set_color(color2)
axs2.tick_params(axis='y', colors=color2)
axs2.spines['left'].set_color(color1)
axs2.spines['right'].set_color(color2)
fig.tight_layout()
fig.savefig(f"../plots/{version}/T02_{version}_Precision_recall_overlayed.pdf", bbox_inches='tight')

5. Figure 4
5.1. Figure 4B
[15]:
df_umap = pd.read_csv("../data/Screen_Analysis/screen_batches/Screen_B2_raw_data_UMAP.csv", index_col = 0)
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LinearSegmentedColormap
fig, axs = plt.subplots(1, 1, figsize = (10, 10))
c1='#B3262A'
c2='#2f559a'
c3 = "#FFFFFF"
cmap1 = LinearSegmentedColormap.from_list("Prob_Unstim", [c2, c1], N=500)
cmap = cmap1
sns.scatterplot(df_umap.sort_values(by= "class_label").sample(frac=1, random_state = 16), x = "UMAP_1", y = "UMAP_2", s = 1, hue = "prob_unstim", alpha = 1, palette = cmap, ax = axs, edgecolor = None, rasterized=True)
sns.move_legend(axs, "upper left", bbox_to_anchor=(1, 1))
axs.set_aspect('equal', adjustable='box')
axs.tick_params(size = 3.5*1.773)
divider = make_axes_locatable(axs)
cax = divider.append_axes('right', size='5%', pad=0.05)
norm = plt.Normalize(0, 1.0)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
fig.colorbar(sm, cax=cax, orientation='vertical', label = "Prob(Unstim)")
# Remove the legend and add a colorbar
axs.get_legend().remove()
fig.tight_layout()
fig.savefig("../plots/Screen_Analysis/Screen_B2_UMAP_classification_score_colored.pdf", dpi = 500)

5.2. Figure 4C
[16]:
all_results = pd.read_parquet("../data/Screen_Analysis/screen_batches/ScreenB2_raw_data_cell_classification_values_with_bin_annotation.parquet")
bins = {"bin1": (0.99999, 1.0),
"bin2": (0.999, 0.99999),
"bin3": (0.9975, 0.999),
"bin4": (0.995, 0.9975),
"bin5": (0.99, 0.995),
"bin6": (0.98, 0.99),
}
colors = ['#2f559a',"#B3262A","#5AADC5","#E46425","#F5DB12","#231F20","#D1D3D4"]
fig, axs = plt.subplots(1, 2, figsize = (14, 4))
sns.despine(offset = 0)
axs[0].hist(all_results.Prob_Unstim, bins = 200, bottom = 0, histtype = "stepfilled", color = "black");
axs[0].set_yscale("log")
axs[0].set_ylabel("cell count")
axs[0].set_xlabel("classification score")
axs[0].set_ylim(50, 10000000)
axs[0].set_xlim(0, 1)
hist = axs[1].hist(all_results[all_results.Prob_Unstim >= 0.975].Prob_Unstim, bins = 100, bottom = 0, histtype = "step", linewidth = 1, color = "black");
axs[1].set_yscale("log")
axs[1].set_ylabel("cell count")
axs[1].set_xlabel("classification score")
axs[1].set_xlim(0.975, 1)
axs[1].set_ylim(50, 10000)
for i, _bin in enumerate(bins.keys()):
lower, upper = bins[_bin]
rectangle = plt.Rectangle((lower, 0), upper-lower, hist[0].max(), fc=colors[i], alpha = 0.4, edgecolor = colors[i])
plt.gca().add_patch(rectangle)
fig.savefig("../plots/Screen_Analysis/ScreenB2_binning_strategy.pdf")

5.3. Figure 4D
[17]:
df_umap = pd.read_csv("../data/Screen_Analysis/screen_batches/Screen_B2_raw_data_UMAP.csv", index_col = 0)
fig, axs = plt.subplots(1, 1, figsize = (10, 10))
color_dict = dict({'bin1':'#2f559a',
"bin2":"#B3262A",
"bin3":"#5AADC5",
"bin4":"#E46425",
"bin5":"#F5DB12",
"bin6":"#231F20",
"autophagy-on":"#D1D3D4"
})
sns.scatterplot(df_umap.sort_values(by= "class_label").sample(frac=1, random_state = 16), x = "UMAP_1", y = "UMAP_2", s = 1, hue = "class_label", alpha = 1, palette = color_dict, ax = axs, edgecolor = None, rasterized=True)
sns.move_legend(axs, "upper left", bbox_to_anchor=(1, 1))
axs.set_aspect('equal', adjustable='box')
axs.tick_params(size = 3.5*1.773)
fig.tight_layout()
fig.savefig("../plots/Screen_Analysis/Screen_B2_UMAP_bins_colored.pdf", dpi = 500)

5.4. Figure 4F
p-values were calculated in prism with the file /data/Screen_Analysis/zscore_pvalue_analysis.pzfx and added in illustrator
individual genes were annotated manually in illustrator on the basis of the interactive plots
[18]:
all_results = pd.read_csv("../data/Screen_Analysis/Raw_Data_Sequencing_Analysis_gRNA_count_table.csv", index_col = 0)
import seaborn as sns
import scipy.stats as stats
_plot = all_results.get(["gene", "autophagy_gene"] + [x for x in all_results.columns if x.startswith("enrichment") or x.startswith("Enrichment")])
_plot = _plot.replace(0, np.NaN)
_plot = pd.concat([_plot.get(["gene", "autophagy_gene"]), stats.zscore(_plot[[x for x in _plot.columns if x.startswith("enrichment") or x.startswith("Enrichment")]], nan_policy = "omit")], axis = 1)
_plot = _plot.melt(id_vars= ["gene", "autophagy_gene"])
_plot.loc[_plot.autophagy_gene, "gene"] = "01_autophagy_gene"
_plot["gene"] = ["02_other" if x not in ["01_autophagy_gene", "Non-Targeting Control"] else x for x in _plot.gene]
_plot["label"] = [str(x) + str(y) for x,y in zip(_plot.variable, _plot.gene)]
_plot = _plot.sort_values(["variable", "gene"])
_plot = _plot.dropna()
import matplotlib.colors
colors = ['#2f559a', '#2f559a', '#2f559a', "#B3262A", "#B3262A", "#B3262A", "#5AADC5", "#5AADC5", "#5AADC5", "#E46425", "#E46425", "#E46425", "#F5DB12", "#F5DB12", "#F5DB12", "#231F20", "#231F20", "#231F20"]
labels_data = ['enrichment_bin101_autophagy_gene',
'enrichment_bin102_other',
'enrichment_bin1Non-Targeting Control',
'enrichment_bin201_autophagy_gene',
'enrichment_bin202_other',
'enrichment_bin2Non-Targeting Control',
'enrichment_bin301_autophagy_gene',
'enrichment_bin302_other',
'enrichment_bin3Non-Targeting Control',
'enrichment_bin401_autophagy_gene',
'enrichment_bin402_other',
'enrichment_bin4Non-Targeting Control',
'enrichment_bin501_autophagy_gene',
'enrichment_bin502_other',
'enrichment_bin5Non-Targeting Control',
'enrichment_bin601_autophagy_gene',
'enrichment_bin602_other',
'enrichment_bin6Non-Targeting Control']
labels_plot = ['autophagy',
'other',
'NTC',
'autophagy',
'other',
'NTC',
'autophagy',
'other',
'NTC',
'autophagy',
'other',
'NTC',
'autophagy',
'other',
'NTC',
'autophagy',
'other',
'NTC']
color_dict = dict(zip(labels_data, colors))
label_dict = dict(zip(labels_data, labels_plot))
# ticks = [0, 1, 2, 3.5, 4.5, 5.5, 7, 8, 9, 10.5, 11.5,12.5, 14, 15, 16, 17.5, 18.5, 19.5]
# position_dict = dict(zip(label_dict.keys(), ticks))
# colors_dict = dict(zip(ticks, color_dict.values()))
# label_dict = dict(zip(ticks, label_dict.values()))
import scipy.stats as stats
mm = 1/2.54 * 1/10 # mm in inches
fig, ax = plt.subplots(1, 1, figsize = (64.65*mm, 58.81*mm))
sns.despine(offset = 3)
flierprops = dict(marker='o')
ax = sns.boxplot(y = "label", x = "value", data = _plot, palette = color_dict, ax = ax, fliersize = 0.2, linewidth=0.25, orient="h", flierprops = flierprops )
#change color of fliers to max boxplot colors
colors = list(color_dict.values())
ids = [6, 13, 20, 27, 34, 41, 48, 55, 62, 69, 76, 83, 90, 97, 104, 111, 118, 125]
for _id, col in zip(ids, colors):
test = ax.get_children()[_id]
test.set_markeredgecolor(col)
test.set_markerfacecolor(col)
# setup x and y-axis correctly
ax.set_xlabel("sgRNA enrichment relative to \ninput library [z-score per bin]")
ax.set_ylabel("sgRNA category")
ax.tick_params(axis='both', which='major', labelsize=6)
ax.tick_params(size = 2, width = 0.25)
ax.spines["bottom"].set_linewidth(0.25)
ax.spines["left"].set_linewidth(0.25)
#correct labels to annotate for plot readability
labels = [label_dict[item.get_text()] for item in ax.get_yticklabels()]
ax.set_yticklabels(labels)
ax.set_xlim(-1.5, 22)
ax.set_xticks([0, 12, 24])
fig.tight_layout()
fig.savefig("../plots/Screen_Analysis/Zscores_of_enrichment_values.pdf", transparent=True)

[19]:
# export wide dataframe for calculation of p-values in prism
_plot = all_results.get(["gene", "autophagy_gene"] + [x for x in all_results.columns if x.startswith("enrichment") or x.startswith("Enrichment")])
_plot = _plot.replace(0, np.NaN)
_plot = pd.concat([_plot.get(["gene", "autophagy_gene"]), stats.zscore(_plot[[x for x in _plot.columns if x.startswith("enrichment") or x.startswith("Enrichment")]], nan_policy = "omit")], axis = 1)
_plot["gene_id"] = _plot.index.tolist()
_plot = _plot.melt(id_vars= ["gene", "autophagy_gene", "gene_id"])
_plot.loc[_plot.autophagy_gene, "gene"] = "autophagy"
_plot.loc[_plot.gene == "Non-Targeting Control", "gene"] = "NTC"
_plot["gene"] = ["other" if x not in ["autophagy", "NTC"] else x for x in _plot.gene]
_plot["label"] = [str(x).replace("enrichment_", "")+ "_" + str(y) for x,y in zip(_plot.variable, _plot.gene)]
_plot = _plot.sort_values(["variable", "gene"])
_plot = _plot.dropna()
pivoted_plot = pd.pivot(_plot, index= "gene_id", columns = "label", values= "value")
pivoted_plot.to_csv("../data/Screen_Analysis/Raw_Data_zcores_of_enrichment_pvalue_calculation.csv")
[20]:
# labelling of individual genes was done manually in illustrator with interactive html plots as a guide
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'iframe'
_plot = all_results.get(["gene", "autophagy_gene"] + [x for x in all_results.columns if x.startswith("enrichment") or x.startswith("Enrichment")])
_plot = _plot.replace(0, np.NaN)
_plot = pd.concat([_plot.get(["gene", "autophagy_gene"]), stats.zscore(_plot[[x for x in _plot.columns if x.startswith("enrichment") or x.startswith("Enrichment")]], nan_policy = "omit")], axis = 1)
_plot["gene_id"] = _plot.index.tolist()
_plot = _plot.melt(id_vars= ["gene", "autophagy_gene", "gene_id"])
_plot["gene_type"] = _plot.gene
_plot.loc[_plot.autophagy_gene, "gene_type"] = "autophagy"
_plot.loc[_plot.gene == "Non-Targeting Control", "gene_type"] = "NTC"
_plot["gene_type"] = ["other" if x not in ["autophagy", "NTC"] else x for x in _plot.gene_type]
_plot["label"] = [str(x).replace("enrichment_", "")+ "_" + str(y) for x,y in zip(_plot.variable, _plot.gene_type)]
_plot = _plot.sort_values(["variable", "gene_type"])
_plot = _plot.dropna()
fig = px.box(data_frame = _plot.sort_values(by = "label"), x = 'value', y = "label", hover_name = "gene")
order_axis = ['bin1_autophagy', 'bin1_other', 'bin1_NTC',
'bin2_autophagy', 'bin2_other', 'bin2_NTC',
'bin3_autophagy', 'bin3_other', 'bin3_NTC',
'bin4_autophagy', 'bin4_other', 'bin4_NTC',
'bin5_autophagy', 'bin5_other', 'bin5_NTC',
'bin6_autophagy', 'bin6_other', 'bin6_NTC']
order_axis.reverse()
fig.update_yaxes(categoryorder='array', categoryarray= order_axis)
fig.show()
#save to html
fig.write_html("../plots/Screen_Analysis/interactive_Zscores_of_enrichment_values.html")
5.5. Figure 4G
[21]:
all_results = pd.read_csv("../data/Screen_Analysis/Raw_Data_Sequencing_Analysis_gRNA_count_table.csv", index_col = 0)
autophagy_genes_list = pd.read_csv("../data/Screen_Analysis/Raw_Data_Autophagy_Genes_list.txt", header = None)[0].tolist()
[22]:
from matplotlib.colors import to_rgba
n_plots = len(["bin1", "bin2", "bin3", "bin4", "bin5", "bin6"])
colors = ['#2f559a', "#B3262A", "#5AADC5","#E46425", "#F5DB12", "#231F20"]
colors_50per_alpha = [to_rgba(color, alpha = 0.5) for color in colors]
white = "#FFFFFF"
fig, axs = plt.subplots(n_plots,1, figsize = (118.5 * mm, 220 * mm), sharey = False,)
pivot_files = {}
data_plots = None
sns.despine(offset = 0)
for i, _bin in enumerate(["bin1", "bin2", "bin3", "bin4", "bin5", "bin6"]):
data = all_results.get(["gene", "gRNA_names", f"enrichment_{_bin}", "depleted_gene", f"ngRNAs_{_bin}"])
data = data[data[f"ngRNAs_{_bin}"] >=2]
data["gRNA_names"] = ["gRNA." + x.split(".")[1] for x in data["gRNA_names"]]
_df = data.get(["gene"] + [x for x in data.columns if x.startswith("enrichment")])
_df = _df.groupby("gene").mean()
if data_plots is None:
data_plots = _df
else:
data_plots = data_plots.merge(_df, left_index = True, right_index = True, how = "outer")
_dat = data.groupby("gene")[f"enrichment_{_bin}"].mean().to_frame().sort_values(by =f"enrichment_{_bin}", ascending = False).head(50)
labels = _dat.index.tolist()
x = list(range(len(labels)))
y = _dat[f"enrichment_{_bin}"]
color = [colors_50per_alpha[i] if x in autophagy_genes_list else white for x in labels]
pivot_data = data.pivot_table(index = "gene", values = f"enrichment_{_bin}", columns = 'gRNA_names')
pivot_data = pivot_data.loc[labels].dropna(axis = 1, how = "all")
pivot_files[_bin] = pivot_data
axs[i].bar(x, y, align='center', color = color, width=1.0, edgecolor = colors[i], clip_on = True, linewidth=0.25)
axs[i].set_xticks(ticks = x, labels = labels, rotation=90, ha='center');
axs[i].tick_params(axis='both', which='major', labelsize=6, pad = 0, bottom = False)
axs[i].tick_params(size = 2, width = 0.25)
axs[i].spines["bottom"].set_linewidth(0.25)
axs[i].spines["left"].set_linewidth(0.25)
axs[i].set_ylabel("enrichment")
axs[i].set_ylim(0, np.ceil(pivot_data.max().max()/100)*100)
axs[i].set_xlim(-0.6,len(labels))
for column in pivot_data.columns:
y = pivot_data[column]
axs[i].scatter(x, y, color = colors[i], s = 2, clip_on = True)
axs[i].set_xticks(ticks = x, labels = labels, rotation=90, ha='center');
#make those ticks that correspond to autophagy genes bold
for label in axs[i].get_xticklabels() :
if label.get_text() in autophagy_genes_list:
font_prop = font_manager.FontProperties(weight = "bold", size = 6)
label.set_fontproperties(font_prop)
fig.tight_layout()
fig.savefig("../plots/Screen_Analysis/enrichment_bins.pdf", transparent = False)

5.6. Figure 4H
[23]:
all_results = pd.read_csv("../data/Screen_Analysis/Raw_Data_Sequencing_Analysis_gRNA_count_table.csv", index_col = 0)
autophagy_genes_list = pd.read_csv("../data/Screen_Analysis/Raw_Data_Autophagy_Genes_list.txt", header = None)[0].tolist()
[24]:
#get genes to plot for heatmaa
genes_df = all_results.get(["gene", "ngRNAs_normCountsfiltered_bin1", "ngRNAs_normCountsfiltered_bin2", "ngRNAs_normCountsfiltered_bin3", "ngRNAs_normCountsfiltered_bin4", "ngRNAs_normCountsfiltered_bin5", "ngRNAs_normCountsfiltered_bin6"]).set_index("gene")
genes_df = genes_df.loc[genes_df.index != "Non-Targeting Control", :]
genes_df = genes_df[~(genes_df.index.duplicated())]
genes_df = genes_df.sum(axis = 1).sort_values(ascending=False)
genes_df_filtered = genes_df[genes_df >= 9].to_frame()
genes_df_filtered.columns = ["gRNA_count"]
genes_to_plot = genes_df_filtered.index.tolist()
[25]:
# heatmap visualization
results = pd.DataFrame(columns = ["gene", "bin1", "bin2", "bin3", "bin4", "bin5", "bin6"])
for i, gene in enumerate(genes_to_plot):
_df = all_results[all_results.gene == gene]
_df = (_df.get([x for x in _df.columns if x.startswith("ngRNAs_nor")])).iloc[0]
results.loc[i, ["bin1", "bin2", "bin3", "bin4", "bin5", "bin6"]] = _df.values
results.loc[i, "gene"] = gene
results = results.set_index("gene")
indexes = results.index.tolist()
indexes.reverse()
results = results.loc[indexes, :]
fig, axs = plt.subplots(1, 2, figsize=(3, 9))
sns.despine(left = True, bottom = True,ax = axs[0])
sns.despine(left = True, bottom = True, top = False, ax = axs[1])
axs[0].pcolormesh(results.values.astype("int"), cmap = "Blues", snap = True,)
axs[0].set(xlabel = "bin")
axs[0].set_yticks(np.array(range(len(results.index.tolist()))) + 0.5);
axs[0].set_yticklabels(results.index.tolist(), rotation = 0);
axs[0].set_xticks(np.array([0, 1,2, 3,4, 5]) + 0.5);
axs[0].set_xticklabels([1, 2, 3, 4, 5, 6]);
axs[0].tick_params(axis='both', which='major', labelsize=6, pad = 2.5, left = False)
axs[0].xaxis.set_ticks_position('top')
for label in axs[0].get_yticklabels() :
if label.get_text() in autophagy_genes_list:
font_prop = font_manager.FontProperties(weight = "bold", size = 6)
label.set_fontproperties(font_prop)
width = 0.5
axs[1].barh(results.index.tolist(), results.sum(axis = 1), color = "#D1D3D4", height = 0.7)
axs[1].set_ylim(-0.5, len(results.index.tolist())-0.5)
axs[1].set_xlim(0, 24)
axs[1].set_xticks([0, 6, 12, 18, 24])
axs[1].set_xlabel("total sgRNAs")
axs[1].get_yaxis().set_visible(False)
axs[1].tick_params(axis='both', which='major', labelsize=6, pad = 2.5, left = False)
axs[1].xaxis.set_ticks_position('top')
fig.tight_layout()
fig.savefig("../plots/Screen_Analysis/heatmap_ngRNAS.pdf")

6. Figure 5
6.1. Figure 5A
enrichment values were calulcated in python and the enrichment value mapped to respective HEX color. The figure was then generated manually in illustrator.
[26]:
all_results = pd.read_csv("../data/Screen_Analysis/Raw_Data_Sequencing_Analysis_gRNA_count_table.csv", index_col = 0)
autophagy_genes_list = pd.read_csv("../data/Screen_Analysis/Raw_Data_Autophagy_Genes_list.txt", header = None)[0].tolist()
data_plots = None
for i, _bin in enumerate(["bin1", "bin2", "bin3", "bin4", "bin5", "bin6"]):
data = all_results.get(["gene", "gRNA_names", f"enrichment_{_bin}", "depleted_gene", f"ngRNAs_{_bin}"])
data = data[data[f"ngRNAs_{_bin}"] >=2]
data["gRNA_names"] = ["gRNA." + x.split(".")[1] for x in data["gRNA_names"]]
_df = data.get(["gene"] + [x for x in data.columns if x.startswith("enrichment")])
_df = _df.groupby("gene").mean()
if data_plots is None:
data_plots = _df
else:
data_plots = data_plots.merge(_df, left_index = True, right_index = True, how = "outer")
_values = data_plots.max(axis = 1)
_to_export_kegg_plot = pd.DataFrame(_values)
_to_export_kegg_plot.columns = ["max_enrichment_bins"]
_to_export_kegg_plot["max_enrichment_bins"] = np.log2(_to_export_kegg_plot["max_enrichment_bins"])
_to_export_kegg_plot = _to_export_kegg_plot[_to_export_kegg_plot.max_enrichment_bins >= 1]
def map_to_hex(values, cmap = "Blues"):
import matplotlib.cm as cm
import matplotlib.colors as mpl_colors
minima = np.floor(min(values))
maxima = np.ceil(max(values))
print(minima, maxima)
norm = mpl_colors.Normalize(vmin=minima, vmax=maxima, clip=False)
mapper = cm.ScalarMappable(norm=norm, cmap=cm.Blues)
colors = [mapper.to_rgba(x) for x in values]
colors = [mpl_colors.to_hex(x) for x in colors]
fig, ax = plt.subplots(figsize=(6, 1))
fig.subplots_adjust(bottom=0.5)
cb1 = mpl.colorbar.ColorbarBase(ax, cmap=cm.Blues,
norm=norm,
orientation='horizontal')
cb1.set_label("fold enrichment")
fig.savefig("colorbar_KEGG_enrichment.pdf")
fig.show()
return(colors)
_to_export_kegg_plot["color"] = map_to_hex(_to_export_kegg_plot.max_enrichment_bins)
_to_export_kegg_plot = _to_export_kegg_plot.get(["color", "max_enrichment_bins"])
_to_export_kegg_plot.to_csv("../data/Screen_Analysis/Raw_Data_enrichment_values_genes_autopahgy_pathway.tsv", sep = "\t")
1.0 9.0

6.2. Figure 5B
annotated gene labels were shifted in illustrator for better visibility
[27]:
all_results = pd.read_csv("../data/Screen_Analysis/Raw_Data_Sequencing_Analysis_gRNA_count_table.csv", index_col = 0)
autophagy_genes_list = pd.read_csv("../data/Screen_Analysis/Raw_Data_Autophagy_Genes_list.txt", header = None)[0].tolist()
[28]:
#define size of circles to plot
def scale_circle_size(input):
lookup = {2:20,
3:60,
4:100}
for key in lookup.keys():
input = input.replace(key, lookup[key])
return(input)
fig, axs = plt.subplots(1, 2, figsize = (12, 6))
for i, _bin in enumerate(["bin1", "bin2"]):
df = all_results[all_results["ngRNAs_"+_bin] >= 2] #only plot genes were we found at least 2 gRNAs
df = df.set_index("gene")
df.loc["Non-Targeting Control",f"ngRNAs_{_bin}" ] = 2 #manually set NTC gRNA numbre to 2 otherwise we would get very large dots for this
df = df.replace(0, np.NaN)
#dynamically adjust x and y-axis limits according to data
ymin = df.get(f"enrichment_{_bin}").min()
ymax = df.get(f"enrichment_{_bin}").max()
xmin = df.get(f"normCounts_{_bin}").min()
xmax = df.get(f"normCounts_{_bin}").max()
axs[i].set_ylim(1, np.ceil(ymax/1000)*1000)
axs[i].set_xlim(np.floor(xmin/0.000001)*0.000001, np.ceil(xmax/0.01)*0.01)
#get autophagy genes to plot
autophagy_genes_list_plot = [x for x in autophagy_genes_list if x != "EI24"] #filter to remove EI24 since we will visualize in different color
autophagy_genes_list_plot = [x for x in autophagy_genes_list_plot if x != "ATG5"] #filter to remove ATG5 since we will visualize in different color
_autophagy_genes = [x for x in autophagy_genes_list_plot if x in df.index.tolist()]
#subset data into non-autophagy related genes and autophagy-related genes
df2 = df.loc[_autophagy_genes, :]
df1 = df.loc[[x not in _autophagy_genes+["EI24"] for x in df.index.tolist()], :]
axs[i].scatter(df1.get('normCounts_' + _bin), df1.get('enrichment_' + _bin), s = scale_circle_size(df1.get(f"ngRNAs_{_bin}")), color = "gainsboro", edgecolor = "white")
axs[i].scatter(df2.get('normCounts_' + _bin), df2.get('enrichment_' + _bin), s = scale_circle_size(df2.get(f"ngRNAs_{_bin}")), color = "#5AADC5", label = "autophagy-related", edgecolor = "white")
if "EI24" in df.index.tolist():
df3 = df.loc["EI24"]
axs[i].scatter(df3.get('normCounts_' + _bin), df3.get('enrichment_' + _bin), s = scale_circle_size(df3.get(f"ngRNAs_{_bin}")), color = "#B3262A", label = "EI24", edgecolor = "white")
if "ATG5" in df.index.tolist():
df4 = df.loc["ATG5"]
axs[i].scatter(df4.get('normCounts_' + _bin), df4.get('enrichment_' + _bin), s = scale_circle_size(df4.get(f"ngRNAs_{_bin}")), color = "#F58235", label = "ATG5", edgecolor = "white")
#add annotation
df_annotate_write_out = df[np.array([df[f"enrichment_{_bin}"] >= 40, df[f"normCounts_{_bin}"] >= 0.0005]).T.any(axis = 1).tolist()].get([f"normCounts_{_bin}", f"enrichment_{_bin}"])
df_annotate_write_out = df_annotate_write_out.loc[[x for x in df_annotate_write_out.index.tolist() if x not in autophagy_genes_list]]
df_annotate_write_out.to_csv(f"gene_annotation_enrichment_normCount_scatter_plot_{_bin}.tsv")
df_annotate = df[np.array([df[f"enrichment_{_bin}"] >= 50, df[f"normCounts_{_bin}"] >= 0.001]).T.any(axis = 1).tolist()]
labels = df_annotate.index.tolist()
labels = [x for x in labels if x not in autophagy_genes_list]
df_annotate = df_annotate.loc[labels, :]
x = df_annotate[f"normCounts_{_bin}"].tolist()
y = df_annotate[f"enrichment_{_bin}"].tolist()
for ix, txt in enumerate(labels):
axs[i].annotate(txt, (x[ix], y[ix]) )
axs[i].set_xlabel("normalized counts")
axs[i].set_ylabel("enrichment")
axs[i].set_xscale("log")
axs[i].set_yscale("log")
axs[i].set_title(_bin)
axs[i].set_aspect(1.0/axs[i].get_data_ratio(), adjustable='box')
if i == 1:
axs[i].legend(bbox_to_anchor=(1.5, 1))
fig.tight_layout()
fig.savefig("../plots/Screen_Analysis/scatterplot_gRNAs_enrichment_normcounts.pdf")

6.3. Figure 5D
[29]:
#load saved data
data = pd.read_csv("../data/Raw_Data_EI24_timecourse_experiment.csv", index_col = 0)
#groupby condition and calculate the mean for each replicate over all cells
plot = data.drop(columns = ["well", "location", "ID", "timepoint",'region']).groupby(["replicate", "Genotype", "Condition"]).mean().reset_index()
plot.columns = ["replicate", "Genotype", "label", "Mean", "time"]
# calculate the mean and SEM over the 3 biological replicates
plot2 = plot.groupby(["Genotype", "time"])["Mean"].mean().to_frame()
plot2["Sem"] = plot.groupby(["Genotype", "time"])["Mean"].sem()
plot2 = plot2.reset_index()
[30]:
genotype_color = {"WT_1":"#2F559A",
"WT_2":"#5AADC5",
'EI24KO_1':"#B3262A",
'EI24KO_2':"#E46425",
}
fig, axs = plt.subplots(1, 1, figsize = (56*mm, 58*mm))
sns.despine()
for genotype in plot.Genotype.value_counts().index:
_data = plot2[plot2.Genotype == genotype]
_data = _data.sort_values("time")
axs.scatter(_data.time, _data.Mean, color = genotype_color[genotype], clip_on = False, s = 1)
axs.plot(_data.time, _data.Mean, color = genotype_color[genotype], label = genotype, linewidth = 0.25)
lower = _data.Mean - _data.Sem
upper = _data.Mean + _data.Sem
axs.fill_between(_data.time, lower , upper, color = genotype_color[genotype], edgecolors = None, alpha = 0.1)
axs.set_ylim(0, 1)
axs.set_xlim(0, 14)
axs.set_ylabel("classification score", fontsize = 6)
axs.set_xlabel("time [h]", fontsize = 6)
axs.tick_params(axis='both', which='major', labelsize=6, size = 2, width = 0.25)
axs.spines["bottom"].set_linewidth(0.25)
axs.spines["left"].set_linewidth(0.25)
axs.set_xticks([0, 2, 4, 6, 8, 10, 12, 14])
fig.tight_layout()
fig.savefig("../plots/EI24_timecourse.pdf")

6.4. Figure 5E
scale bars and arrows indicating specks were added manually in illustrator
[31]:
from tifffile import imread
def colorize(im, color, clip_percentile=0.0):
"""
Helper function to create an RGB image from a single-channel image using a
specific color.
"""
# Check that we do just have a 2D image
if im.ndim > 2 and im.shape[2] != 1:
raise ValueError('This function expects a single-channel image!')
# Rescale the image according to how we want to display it
im_scaled = im.astype(np.float32) - np.percentile(im, clip_percentile)
im_scaled = im_scaled / np.percentile(im_scaled, 100 - clip_percentile)
im_scaled = np.clip(im_scaled, 0, 1)
# Need to make sure we have a channels dimension for the multiplication to work
im_scaled = np.atleast_3d(im_scaled)
# Reshape the color (here, we assume channels last)
color = np.asarray(color).reshape((1, 1, -1))
return im_scaled * color
fig, axs = plt.subplots(2, 2, figsize = (10, 8.8))
axs[0, 0].imshow(colorize(imread("../data/example_images_100X_EI24_imaging/230224_U2OS_EI24_wt_Torin_0h.tif"), color = (1, 0, 0)))
axs[0, 0].xaxis.set_visible(False)
axs[0, 0].tick_params(left=False, labelleft=False)
axs[0, 0].set_title("-", fontsize = 30)
axs[0, 0].set_ylabel("Wt", rotation = 0, fontsize = 30, labelpad = 50)
axs[0, 1].imshow(colorize(imread("../data/example_images_100X_EI24_imaging/230224_U2OS_EI24_wt_Torin_14h.tif"), color = (1, 0, 0)))
axs[0, 1].axis("off")
axs[0, 1].set_title("14h", fontsize = 30)
axs[1, 0].imshow(colorize(imread("../data/example_images_100X_EI24_imaging/230224_U2OS_EI24_KO_Torin_0h.tif"), color = (1, 0, 0)))
axs[1, 0].xaxis.set_visible(False)
axs[1, 0].tick_params(left=False, labelleft=False)
axs[1, 0].set_ylabel("EI24 -/-", rotation = 0, fontsize = 30, labelpad = 50)
axs[1, 1].imshow(colorize(imread("../data/example_images_100X_EI24_imaging/230224_U2OS_EI24_KO_Torin_14h.tif"), color = (1, 0, 0)))
axs[1, 1].axis("off")
fig.tight_layout()
fig.savefig("../plots/EI24_100X_images.pdf")

7. Figure S1
7.1. Figure S1D
[32]:
# read data on fraction of contaminating reads from background population
data_excised = pd.read_csv("../data/celltrace_enrichment/fraction_reference_reads.csv", index_col = 0, sep = "\t")
data_excised["percentage_labelled_cells"] = 100 - data_excised.fraction_ref_reads * 100
# read data on fraction of celltrace positive slides contained on each slide
data_slides = pd.read_csv("../data/celltrace_enrichment/fraction_slides.csv", index_col = 0, sep = ",")
[33]:
#define plotting function to generate donut plots
def make_pie(sizes, text, colors, labels, startangle = 0):
import matplotlib.pyplot as plt
import numpy as np
col = colors
fig, ax = plt.subplots(figsize = (2, 2))
ax.axis('equal')
width = 0.45
kwargs = dict(colors=col, startangle=startangle, autopct='%1.1f%%')
outside = ax.pie(sizes, radius=1, pctdistance=1-width/2,
textprops={'fontsize': 6, "color":"white"},
wedgeprops={'linewidth': 0.5}, **kwargs)[0]
plt.setp(outside, width=width, edgecolor='white')
kwargs = dict(size=11, fontweight='bold', va='center')
ax.text(0, 0, text, ha='center', **kwargs)
fig.tight_layout()
return(fig)
[34]:
#### generate plots for celltype distribution in input slides
means = data_slides.groupby("celltype").mean()
#hela plot
hela = means.loc["Hela", "percentage_labelled_cells"]
values = [hela , 100 - hela]
label = ["labelled", "unlabelled"]
fig_hela = make_pie(values, "library",[c1,c2],label)
fig_hela.savefig("../plots/Donut_Hela_input_distribution.pdf",transparent=True, format='pdf', bbox_inches='tight')
#vero plot
vero = means.loc["Vero", "percentage_labelled_cells"]
values = [vero , 100 - vero]
label = ["labelled", "unlabelled"]
fig_vero = make_pie(values, "library",[c1,c2],label)
fig_vero.savefig("../plots/Donut_Vero_input_distribution.pdf",transparent=True, format='pdf', bbox_inches='tight')


[35]:
#### generate plots for excised cells
means_excised = data_excised.groupby("celltype").percentage_labelled_cells.mean()
hela, vero = means_excised
#hela plot
values = [hela , 100 - hela]
label = ["labelled", "unlabelled"]
fig_hela_excised = make_pie(values, "excised",[c1,c2],label, startangle = 180)
fig_hela_excised.savefig("../plots/Donut_Hela_excised.pdf",transparent=True, format='pdf', bbox_inches='tight')
#vero plot
values = [vero , 100 - vero]
label = ["labelled", "unlabelled"]
fig_vero_excised = make_pie(values, "excised",[c1,c2],label, startangle = 180)
fig_vero_excised.savefig("../plots/Donut_Vero_excised.pdf",transparent=True, format='pdf', bbox_inches='tight')


8. Figure S2
8.1. Figure S2B
[36]:
subsampled_classification_results = pd.read_csv("../data/Raw_Data_classifier_metrics_classifier_1.csv")
versions = ["classifier_1"]
thresholds = np.linspace(0.01, 0.99, 100)
labels = ["stim", "ATG5_KO_Cr203_C6"]
color1 = "black"
for version in versions:
results = pd.DataFrame(index = thresholds)
data = subsampled_classification_results.loc[[x in labels for x in subsampled_classification_results.label]].get([version, "label"])
data["label"] = ["negative" if x == "stim" else "positive" for x in data.label]
for _threshold in thresholds:
data[f"label_{_threshold}"] = ["positive" if x > _threshold else "negative" for x in data[version]]
data = data.copy() #to prevent dataframe fragmentation
#precision = TP / (TP + FP)
for _threshold in thresholds:
true_label = data.label.tolist()
label = data[f"label_{_threshold}"].tolist()
TP = 0
FP = 0
TN = 0
FN = 0
for tl, l in zip(true_label, label):
if tl == "positive":
if l == "positive":
TP += 1
elif l == "negative":
FN += 1
else:
print("Error")
elif tl == "negative":
if l == "positive":
FP += 1
elif l == "negative":
TN += 1
else:
print("error")
precision = TP / (TP + FP) * 100
recall = TP / (TP + FN) * 100
accuracy = (TP + TN) / (TP + FP + TN + FN) * 100
FPR = FP / (FP + TN) * 100
FDR = FP / (FP + TP) * 100
results.loc[_threshold, "TP"] = TP
results.loc[_threshold, "FP"] = FP
results.loc[_threshold, "TN"] = TN
results.loc[_threshold, "FN"] = FN
results.loc[_threshold, "precision"] = precision
results.loc[_threshold, "recall"] = recall
results.loc[_threshold, "accuracy"] = accuracy
results.loc[_threshold, "FPR"] = FPR
results.loc[_threshold, "FDR"] = FDR
results = results.copy() #to prevent dataframe fragmentation
results.to_csv(f"../data/{version}_Test_Data/{version}_FDR_data.csv")
fig, axs = plt.subplots(1, 1, figsize = (1.5, 3))
sns.despine(offset = 10)
axs.plot(results.index.tolist(), results.FDR.tolist(), clip_on = False, linewidth = 0.5, color = color1)
axs.scatter(x = results.index.tolist(), y = results.FDR.tolist(), clip_on = False, alpha = 0.5, s = 7, color = color1)
axs.set_ylabel("FDR [%]", color = color1, fontsize = 6)
axs.set_xlabel("classification \nscore cutoff", fontsize = 6)
axs.set_xlim(0, 1)
if version == 'classifier_1':
axs.set_ylim(0, 7)
axs.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
fig.tight_layout()
fig.savefig(f"../plots/{version}/{version}_FDR.pdf", bbox_inches='tight')

8.2. Figure S2C
[37]:
results = pd.read_csv("../data/Raw_Data_replicate_comparision_targeted_library.csv", index_col = 0)
results
[37]:
sequence | normalized_counts_A | normalized_counts_B | |
---|---|---|---|
gene | |||
ATG10 | GCGACTGCTACAGGGACCAT | 7.406274e-02 | 6.779847e-02 |
ATG4B | GGGGCTCACGGACATCAACG | 5.210339e-02 | 4.546776e-02 |
ATG7 | TCCGTGACCGTACCATGCAG | 5.200847e-02 | 4.494161e-02 |
ATG10 | ACAGACATGTCTTCCCATGG | 4.836661e-02 | 4.685815e-02 |
ATG3 | TAGTCCACCACTGTCCAACA | 3.530042e-02 | 2.710795e-02 |
... | ... | ... | ... |
NLRP3 | AGAGATTGATCTCAATCTTG | 1.628812e-04 | 0.000000e+00 |
RBM12B | CAGAGTCTTGAACATCACAA | 1.518706e-06 | 1.625904e-04 |
ADM2 | GGCTGCGGGACAGCGAGCCA | 3.796765e-07 | 1.011829e-04 |
TMEM39A | AGAGGAAAGTGCCTCGACTG | 0.000000e+00 | 7.477067e-04 |
MAGEA5 | CCTCATGTCACATTAAGGCA | 0.000000e+00 | 3.489065e-07 |
221 rows × 3 columns
[38]:
fig, axs = plt.subplots(1, 1, figsize = (6, 6))
#dynamically adjust x and y-axis limits of plot according to data
min = np.min([results["normalized_counts_A"].min(), results["normalized_counts_B"].min()])
max = np.max([results["normalized_counts_A"].max(), results["normalized_counts_B"].max()])
axs.set_ylim(min, np.ceil(max/0.01)*0.01)
axs.set_xlim(min, np.ceil(max/0.01)*0.01)
#get autophagy genes to plot
autophagy_genes_list_plot = [x for x in autophagy_genes_list if x != "EI24"] #filter to remove EI24 since we will visualize in different color
autophagy_genes_list_plot = [x for x in autophagy_genes_list_plot if x != "ATG5"] #filter to remove ATG5 since we will visualize in different color
_autophagy_genes = [x for x in autophagy_genes_list_plot if x in results.index.tolist()]
#subset data into non-autophagy related genes and autophagy-related genes
df2 = results.loc[_autophagy_genes, :]
df1 = results.loc[[x not in _autophagy_genes + ["EI24"] for x in results.index.tolist()], :]
axs.scatter(df1.get("normalized_counts_A"), df1.get("normalized_counts_B"), color = "gainsboro", edgecolor = "white")
axs.scatter(df2.get("normalized_counts_A"), df2.get("normalized_counts_B"), color = "#5AADC5", label = "autophagy-related", edgecolor = "white")
if "EI24" in results.index.tolist():
df3 = results.loc["EI24"]
axs.scatter(df3.get("normalized_counts_A"), df3.get("normalized_counts_B"), color = "#B3262A", label = "EI24", edgecolor = "white")
if "ATG5" in results.index.tolist():
df4 = results.loc["ATG5"]
axs.scatter(df4.get("normalized_counts_A"), df4.get("normalized_counts_B"), color = "#F58235", label = "ATG5", edgecolor = "white")
# Calculate the R-squared value
from scipy.stats import pearsonr
r_squared, p_value = pearsonr(results.normalized_counts_A, results.normalized_counts_B)
print(r_squared)
axs.set_xlabel("normalized counts replicate 1")
axs.set_ylabel("normalized counts replicate 2")
axs.set_aspect(1.0/axs.get_data_ratio(), adjustable='box')
axs.legend(bbox_to_anchor=(1.3, 1))
axs.plot([0,np.ceil(max/0.01)*0.01 ], [0,np.ceil(max/0.01)*0.01], color = "black", linestyle = "--", linewidth = 0.5)
axs.text( x = 0.05, y = 0.03, s = f"R$^2$ = {r_squared:.2f}", fontsize = 6)
fig.tight_layout()
fig.savefig(f"../plots/Replicate_correlation_targeted_library.pdf", bbox_inches = "tight")
0.9871717913360416

8.3. Figure S2D
[39]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/Raw_Data_classifier_metrics_classifier_1.csv")
versions = ["classifier_1"]
from sklearn.metrics import precision_recall_curve, roc_curve, auc
labels = ["stim", "ATG5_KO_Cr203_C6"]
results_all = {}
auc_values = {}
for version in versions:
data = subsampled_classification_results.loc[[x in labels for x in subsampled_classification_results.label]].get([version, "label"])
true_label = [0 if x == "stim" else 1 for x in data.label]
scores = data[version].tolist()
fpr, tpr, thresholds = roc_curve(true_label, scores)
precision, recall, thresholds = precision_recall_curve(true_label, scores)
fdr = 1 - precision
auc_roc = auc(fpr, tpr)
auc_precision_recall = auc(recall, precision)
auc_values[version] = pd.DataFrame({"ROC":[auc_roc], "precision_recall":[auc_precision_recall]})
fig, axs = plt.subplots(1, 1, figsize = (3, 2))
axs.plot(fpr, tpr, color = "black")
axs.set_xlabel("false positive rate")
axs.set_ylabel("true positive rate")
axs.tick_params(axis='both', which='major', labelsize=6, size = 3.5*1.773)
auc_results = "{0:.4g}".format(auc_values[version].ROC[0])
axs.text(x = 0.5, y= 0.45, s= f"AUC = {auc_results}", fontsize = 6, horizontalalignment= "center")
fig.savefig(f"../plots/{version}/{version}_tpr_fpr_curve.pdf")

[40]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/Raw_Data_classifier_metrics_classifier_1.csv")
versions = ["classifier_1"]
from sklearn.metrics import precision_recall_curve, roc_curve, auc
labels = ["stim", "ATG5_KO_Cr203_C6"]
results_all = {}
auc_values = {}
for version in versions:
data = subsampled_classification_results.loc[[x in labels for x in subsampled_classification_results.label]].get([version, "label"])
true_label = [0 if x == "stim" else 1 for x in data.label]
scores = data[version].tolist()
fpr, tpr, thresholds = roc_curve(true_label, scores)
precision, recall, thresholds = precision_recall_curve(true_label, scores)
fdr = 1 - precision
auc_roc = auc(fpr, tpr)
auc_precision_recall = auc(recall, precision)
auc_values[version] = pd.DataFrame({"ROC":[auc_roc], "precision_recall":[auc_precision_recall]})
fig, axs = plt.subplots(1, 1, figsize = (3, 2))
axs.plot(recall, precision, color = "black")
axs.set_xlabel("recall")
axs.set_ylabel("precision")
axs.tick_params(axis='both', which='major', labelsize=6, size = 3.5*1.773)
fig.savefig(f"../plots/{version}/{version}_precision_recall_curve.pdf")

8.4. Figure S2E
[41]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/Raw_Data_classifier_metrics_classifier_1.csv")
versions = ["classifier_1"]
slides = ["unstim", "stim", "ATG5_KO_Cr203_C6"]
labels = ["wt", "wt + Torin-1", "ATG5-/- + Torin-1"]
from matplotlib.ticker import FormatStrFormatter
for version in versions:
fig, axs = plt.subplots(3, 1, figsize = (6, 5), sharey = True)
sns.despine(offset = 10)
for i, slide in enumerate(slides):
data = subsampled_classification_results.loc[[x == slide for x in subsampled_classification_results.label]].get([version])
hist = axs[i].hist(data, bins = 100, cumulative = False, histtype='step', bottom = 0, weights = np.ones(len(data)) / len(data)*100)
axs[i].set_ylim(0.01, 100)
axs[i].set_yscale("log")
axs[i].set_xlim(0, 1)
axs[i].yaxis.set_major_formatter(FormatStrFormatter('%.3g'))
axs[i].set_ylabel("cell count [%]")
axs[i].tick_params(size = 3.5*1.773)
axs[i].text(1.05, 1, s = labels[i])
axs[i].set_xlabel("classification score")
fig.tight_layout()
fig.savefig(f"../plots/{version}/{version}_Histogram_classifier_performance_lines.pdf", bbox_inches='tight')

8.5. Figure S2F
[42]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/Raw_Data_classifier_metrics_classifier_1.csv")
versions = ["classifier_1"]
sample_label_lookup = {"unstim":"wt", "stim":"wt + Torin-1", "ATG5_KO_Cr203_C6":"ATG5-/- + Torin-1"}
n_cells = subsampled_classification_results.label.value_counts()[0]
for version in versions:
_df = subsampled_classification_results.copy()
_df["threshold_label"] = _df[version] >= 0.5
heatmap_data = pd.DataFrame(_df.groupby("label").sum().threshold_label/n_cells*100)
heatmap_data.columns = ["unstimulated"]
heatmap_data["stimulated"] = 100 - heatmap_data.unstimulated
fig, axs = plt.subplots(1, 2, figsize = (2.3,2), sharex=False, sharey=True, gridspec_kw = {'wspace':0, 'hspace':0})
cbar_ax1 = fig.add_axes([0.4, 0, 0.54, 0.03])
cbar_ax2 = fig.add_axes([0.4, 0.03, 0.54, 0.03])
sns.heatmap(heatmap_data.stimulated.to_frame().sort_index(ascending= False), annot = True, ax = axs[0],
cmap= cmap_stim, cbar_kws={'label': '% cells in class', "orientation": "horizontal"}, fmt = ".3g", vmax=100, vmin = 0, cbar_ax=cbar_ax1,
linewidths=0.5, linecolor='black',
annot_kws={"size": 6})
sns.heatmap(heatmap_data.unstimulated.to_frame().sort_index(ascending= False), annot = True, ax = axs[1],
cmap= cmap_unstim, cbar_kws={ "orientation": "horizontal"}, fmt = ".3g", vmax=100, vmin = 0, cbar_ax=cbar_ax2,
linewidths=0.5, linecolor='black',
annot_kws={"size": 6})
#remove axis from second colorbar
cbar_ax2.axis("off")
#reassign labels into readable format
labels = [sample_label_lookup[item.get_text()] for item in axs[0].get_yticklabels()]
axs[0].set_yticklabels(labels)
axs[0].set_xticklabels(["on"])
axs[1].set_xticklabels(["off"])
#move x-axis ticks to top and remove ticks
axs[0].xaxis.set_ticks_position('top')
axs[1].xaxis.set_ticks_position('top')
axs[0].tick_params(left=False, top=False, bottom = False, rotation = 0)
axs[1].tick_params(left=False, top=False, bottom = False, rotation = 0)
#remove ylabel
axs[0].set_ylabel(None)
axs[1].set_ylabel(None)
fig.tight_layout()
fig.savefig(f"../plots/{version}/{version}_heatmap.pdf", bbox_inches='tight')
/tmp/ipykernel_2144530/2736471724.py:49: UserWarning:
This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.

8.6. Figure S2G
[43]:
layers = ["conv2d5", "conv2d6", "conv2d7","conv2d8","conv2d9", "linear1", "linear2", ]
for layer in layers:
input = f"../data/classifier_1_Test_Data/UMAP_data/Raw_data_UMAP_{layer}.csv"
df_train_umap = pd.read_csv(input)
outdir = f"../plots/UMAP_classifier1_layers/"
### Plot 1 Labels
fig, axs = plt.subplots(1, 1, figsize = (10, 10))
color_dict = dict({'ClassifierA_Cr203_C6':"#B3262A",
"ClassifierA_test_stim":'#2f559a',
"ClassifierA_test_unstim":"#5AADC5",
})
ax = sns.scatterplot(df_train_umap.sample(frac=1, random_state = 19), x = "UMAP_1", y = "UMAP_2", s = 4, hue = "class_label", alpha = 1, palette = color_dict, ax = axs, edgecolor = None, rasterized=True)
axs.tick_params(size = 3.5*1.773)
legend = ax.get_legend()
ax.get_legend().remove()
ax1_divider = make_axes_locatable(ax)
cax1 = ax1_divider.append_axes("right", size="2%", pad="2%")
cax1.axis("off")
cax1.legend(*ax.get_legend_handles_labels(), bbox_to_anchor = (1, 1))
ax.set_aspect('equal', adjustable='box')
# fig.tight_layout()
fig.savefig(f"{outdir}/UMAP_test_data_labels_{layer}.pdf", dpi = 500)
### PLOT 2: prob unstim
fig, axs = plt.subplots(1, 1, figsize = (10, 10))
ax = sns.scatterplot(data = df_train_umap.sample(frac=1, random_state = 19), x = "UMAP_1", y = "UMAP_2", s = 4, hue = "prob_unstim", alpha = 1, ax = axs, palette=cmap_classification_score, edgecolor = None, rasterized = True)
axs.tick_params(size = 3.5*1.773)
norm = plt.Normalize(0, 1)
sm = plt.cm.ScalarMappable(cmap=cmap_classification_score, norm=norm)
sm.set_array([])
# Remove the legend and add a colorbar
ax.get_legend().remove()
ax1_divider = make_axes_locatable(ax)
cax1 = ax1_divider.append_axes("right", size="2%", pad="2%")
cb1 = fig.colorbar(sm, cax=cax1)
cax1.tick_params(size = 3.5*1.773)
ax.set_aspect('equal', adjustable='box')
fig.savefig(f"{outdir}/UMAP_test_data_prob_unstim_{layer}.pdf", dpi = 500)














8.7. Figure S2H
[44]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/T02_Raw_Data_classifier_metrics_classifier_2.1_2.2.csv")
versions = ["classifier_2.1"]
from sklearn.metrics import precision_recall_curve, roc_curve, auc
labels = ["stim", "ATG5_KO_Cr203_C6"]
results_all = {}
auc_values = {}
for version in versions:
data = subsampled_classification_results.loc[[x in labels for x in subsampled_classification_results.label]].get([version, "label"])
true_label = [0 if x == "stim" else 1 for x in data.label]
scores = data[version].tolist()
fpr, tpr, thresholds = roc_curve(true_label, scores)
precision, recall, thresholds = precision_recall_curve(true_label, scores)
fdr = 1 - precision
auc_roc = auc(fpr, tpr)
auc_precision_recall = auc(recall, precision)
auc_values[version] = pd.DataFrame({"ROC":[auc_roc], "precision_recall":[auc_precision_recall]})
fig, axs = plt.subplots(1, 1, figsize = (3, 2))
axs.plot(recall, precision, color = "black")
axs.set_xlabel("recall")
axs.set_ylabel("precision")
axs.tick_params(axis='both', which='major', labelsize=6, size = 3.5*1.773)
fig.savefig(f"../plots/{version}/T02_{version}_precision_recall_curve.pdf")

8.8. Figure S2I
[45]:
from tifffile import imread
from sparcscore.processing.preprocessing import percentile_normalization
def colorize(im, color, clip_percentile=0.0):
"""
Helper function to create an RGB image from a single-channel image using a
specific color.
"""
# Check that we do just have a 2D image
if im.ndim > 2 and im.shape[2] != 1:
raise ValueError('This function expects a single-channel image!')
# Rescale the image according to how we want to display it
im_scaled = im.astype(np.float32) - np.percentile(im, clip_percentile)
im_scaled = im_scaled / np.percentile(im_scaled, 100 - clip_percentile)
im_scaled = np.clip(im_scaled, 0, 1)
# Need to make sure we have a channels dimension for the multiplication to work
im_scaled = np.atleast_3d(im_scaled)
# Reshape the color (here, we assume channels last)
color = np.asarray(color).reshape((1, 1, -1))
return im_scaled * color
def generate_composite(images, colors = [(0, 0, 1),(0, 1, 0), (1, 0, 0), (1, 0, 1)], plot = False):
"""
Helper function to generate a colorized overalyed image from multiple individual images
"""
colorized = []
for image, color in zip(images, colors):
image = colorize(image, color, 0.0)
colorized.append(image)
if plot:
for i in colorized:
plt.figure()
plt.imshow(i)
image = colorized[0]
for i in range(len(colorized)-1):
image += colorized[i+1]
return(np.clip(image, 0, 1))
channels_stim = imread(f"../data/golgi_classifiers/Hela_Noco.tif")
channels_unstim = imread(f"../data/golgi_classifiers/Hela_unstim.tif")
#perform percentile normalization
channels_stim = np.array([percentile_normalization(x, 0.0001, 0.9999) for x in channels_stim])
channels_unstim = np.array([percentile_normalization(x, 0.0001, 0.9999) for x in channels_unstim])
#create composite image
unstim_composite = generate_composite(channels_unstim, ( (0, 0, 1), (0,1, 0), (1, 0, 0) ))
stim_composite = generate_composite(channels_stim, ((0, 0, 1), (0,1, 0), (1, 0, 0) ))
#plot figure
fig, axs = plt.subplots(2, 4, figsize = (4*10, 2*10+0.5))
for i in range(3):
axs[0, i].imshow(channels_unstim[i], cmap = "gray")
axs[1, i].imshow(channels_stim[i], cmap = "gray")
if i == 0:
axs[0, i].xaxis.set_visible(False)
axs[0, i].tick_params(left=False, labelleft=False)
axs[1, i].xaxis.set_visible(False)
axs[1, i].tick_params(left=False, labelleft=False)
else:
axs[0, i].axis("off")
axs[1, i].axis("off")
axs[0, 3].imshow(unstim_composite)
axs[0, 3].axis("off")
axs[1, 3].imshow(stim_composite)
axs[1, 3].axis("off")
axis_labels = ["nuclei", "membrane", "GM130", "GM130\nnuclei\nmembrane"]
for i, label in enumerate(axis_labels):
axs[0, i].set_title(label, fontsize = 30)
axis_labels = ["-", "Noco"]
for i, label in enumerate(axis_labels):
axs[i, 0].set_ylabel(label, fontsize = 30, rotation = 0, labelpad=110)
fig.tight_layout()
fig.savefig(f"../plots/Golgi_classifiers/Noco_classifier_input_image_example.pdf")

8.9. Figure S2J
[46]:
from tifffile import imread
from sparcscore.processing.preprocessing import percentile_normalization
def colorize(im, color, clip_percentile=0.0):
"""
Helper function to create an RGB image from a single-channel image using a
specific color.
"""
# Check that we do just have a 2D image
if im.ndim > 2 and im.shape[2] != 1:
raise ValueError('This function expects a single-channel image!')
# Rescale the image according to how we want to display it
im_scaled = im.astype(np.float32) - np.percentile(im, clip_percentile)
im_scaled = im_scaled / np.percentile(im_scaled, 100 - clip_percentile)
im_scaled = np.clip(im_scaled, 0, 1)
# Need to make sure we have a channels dimension for the multiplication to work
im_scaled = np.atleast_3d(im_scaled)
# Reshape the color (here, we assume channels last)
color = np.asarray(color).reshape((1, 1, -1))
return im_scaled * color
def generate_composite(images, colors = [(0, 0, 1),(0, 1, 0), (1, 0, 0), (1, 0, 1)], plot = False):
"""
Helper function to generate a colorized overalyed image from multiple individual images
"""
colorized = []
for image, color in zip(images, colors):
image = colorize(image, color, 0.0)
colorized.append(image)
if plot:
for i in colorized:
plt.figure()
plt.imshow(i)
image = colorized[0]
for i in range(len(colorized)-1):
image += colorized[i+1]
return(np.clip(image, 0, 1))
channels_stim = imread(f"../data/golgi_classifiers/Hela_BFA.tif")
channels_unstim = imread(f"../data/golgi_classifiers/Hela_unstim.tif")
#perform percentile normalization
channels_stim = np.array([percentile_normalization(x, 0.0001, 0.9999) for x in channels_stim])
channels_unstim = np.array([percentile_normalization(x, 0.0001, 0.9999) for x in channels_unstim])
#create composite image
unstim_composite = generate_composite(channels_unstim, ( (0, 0, 1), (0,1, 0), (1, 0, 0) ))
stim_composite = generate_composite(channels_stim, ((0, 0, 1), (0,1, 0), (1, 0, 0) ))
#plot figure
fig, axs = plt.subplots(2, 4, figsize = (4*10, 2*10+0.5))
for i in range(3):
axs[0, i].imshow(channels_unstim[i], cmap = "gray")
axs[1, i].imshow(channels_stim[i], cmap = "gray")
if i == 0:
axs[0, i].xaxis.set_visible(False)
axs[0, i].tick_params(left=False, labelleft=False)
axs[1, i].xaxis.set_visible(False)
axs[1, i].tick_params(left=False, labelleft=False)
else:
axs[0, i].axis("off")
axs[1, i].axis("off")
axs[0, 3].imshow(unstim_composite)
axs[0, 3].axis("off")
axs[1, 3].imshow(stim_composite)
axs[1, 3].axis("off")
axis_labels = ["nuclei", "membrane", "GM130", "GM130\nnuclei\nmembrane"]
for i, label in enumerate(axis_labels):
axs[0, i].set_title(label, fontsize = 30)
axis_labels = ["-", "BFA"]
for i, label in enumerate(axis_labels):
axs[i, 0].set_ylabel(label, fontsize = 30, rotation = 0, labelpad=110)
fig.tight_layout()
fig.savefig(f"../plots/Golgi_classifiers/BFA_classifier_input_image_example.pdf")

8.10. Figure S2K
[47]:
from sklearn.metrics import precision_recall_curve, roc_curve, auc
def generate_ROC_curve(true_labels, scores):
fpr, tpr, thresholds = roc_curve(true_labels, scores)
auc_roc = auc(fpr, tpr)
fig, axs = plt.subplots(1, 1, figsize = (3, 2))
axs.plot(fpr, tpr, color = "black")
axs.set_xlabel("false positive rate")
axs.set_ylabel("true positive rate")
axs.tick_params(axis='both', which='major', labelsize=6, size = 3.5*1.773)
auc_results = "{0:.4g}".format(auc_roc)
axs.text(x = 0.5, y= 0.45, s= f"AUC = {auc_results}", fontsize = 6, horizontalalignment= "center")
return(fig)
def generate_precision_recall_curve(true_labels, scores):
precision, recall, thresholds = precision_recall_curve(true_labels, scores)
auc_precision_recall = auc(recall, precision)
fig, axs = plt.subplots(1, 1, figsize = (3, 2))
axs.plot(recall, precision, color = "black")
axs.set_xlabel("recall")
axs.set_ylabel("precision")
axs.tick_params(axis='both', which='major', labelsize=6, size = 3.5*1.773)
return(fig)
data = pd.read_csv(f"../data/golgi_classifiers/classification_metrics_Noco/classification_results_test_dataset.csv", index_col = 0)
true_labels = data.labels
scores = data.probs
fig_roc = generate_ROC_curve(true_labels, scores)
fig_roc.savefig(f"../plots/Golgi_classifiers/Noco_ROC_curve.pdf")
fig_precision_recall = generate_precision_recall_curve(true_labels, scores)
fig_precision_recall.savefig(f"../plots/Golgi_classifiers/Noco_precision_recall_curve.pdf")


8.11. Figure S2L
[48]:
from sklearn.metrics import precision_recall_curve, roc_curve, auc
def generate_ROC_curve(true_labels, scores):
fpr, tpr, thresholds = roc_curve(true_labels, scores)
auc_roc = auc(fpr, tpr)
fig, axs = plt.subplots(1, 1, figsize = (3, 2))
axs.plot(fpr, tpr, color = "black")
axs.set_xlabel("false positive rate")
axs.set_ylabel("true positive rate")
axs.tick_params(axis='both', which='major', labelsize=6, size = 3.5*1.773)
auc_results = "{0:.4g}".format(auc_roc)
axs.text(x = 0.5, y= 0.45, s= f"AUC = {auc_results}", fontsize = 6, horizontalalignment= "center")
return(fig)
def generate_precision_recall_curve(true_labels, scores):
precision, recall, thresholds = precision_recall_curve(true_labels, scores)
auc_precision_recall = auc(recall, precision)
fig, axs = plt.subplots(1, 1, figsize = (3, 2))
axs.plot(recall, precision, color = "black")
axs.set_xlabel("recall")
axs.set_ylabel("precision")
axs.tick_params(axis='both', which='major', labelsize=6, size = 3.5*1.773)
return(fig)
data = pd.read_csv(f"../data/golgi_classifiers/classification_metrics_BFA/classification_results_test_dataset.csv", index_col = 0)
true_labels = data.labels
scores = data.probs
fig_roc = generate_ROC_curve(true_labels, scores)
fig_roc.savefig(f"../plots/Golgi_classifiers/BFA_ROC_curve.pdf")
fig_precision_recall = generate_precision_recall_curve(true_labels, scores)
fig_precision_recall.savefig(f"../plots/Golgi_classifiers/BFA_precision_recall_curve.pdf")


9. Figure S3
9.1. Figure S3A
Tables were formatted in illustrator
[49]:
# left table
pd.read_csv("../data/Screen_Analysis/screen_batches/ScreenB1_aggregated_results_number_of_cells_per_bin.csv")
[49]:
Unnamed: 0 | cells imaged | cells dissected | percentage_of_all_cells | |
---|---|---|---|---|
0 | bin1 | 937 | 795.0 | 0.016915 |
1 | bin2 | 10794 | 9113.0 | 0.194861 |
2 | bin3 | 5021 | 4387.0 | 0.090643 |
3 | bin4 | 4540 | 3968.0 | 0.081959 |
4 | bin5 | 4839 | 4270.0 | 0.087357 |
5 | bin6 | 5121 | 4525.0 | 0.092448 |
6 | cells >= 0.98 | 31252 | 27058.0 | 0.564183 |
7 | all_imaged_cells | 5539333 | NaN | 100.000000 |
[50]:
# right table
pd.read_csv("../data/Screen_Analysis/screen_batches/ScreenB2_aggregated_results_number_of_cells_per_bin.csv")
[50]:
Unnamed: 0 | cells imaged | cells dissected | percentage_of_all_cells | |
---|---|---|---|---|
0 | bin1 | 19516 | 14009.0 | 0.056883 |
1 | bin2 | 217766 | 179898.0 | 0.634718 |
2 | bin3 | 74672 | 52283.0 | 0.217645 |
3 | bin4 | 56478 | 41061.0 | 0.164615 |
4 | bin5 | 55477 | 41695.0 | 0.161698 |
5 | bin6 | 56102 | 43189.0 | 0.163519 |
6 | cells >= 0.98 | 480011 | 372135.0 | 1.399078 |
7 | all_imaged_cells | 34309090 | NaN | 100.000000 |
9.2. Figure S3B
slide names were removed and the slide batches annotated manually in illustrator
[51]:
_df1 = pd.read_parquet("../data/Screen_Analysis/screen_batches/ScreenB1_raw_data_cell_classification_values_with_bin_annotation.parquet")
_df2 = pd.read_parquet("../data/Screen_Analysis/screen_batches/ScreenB2_raw_data_cell_classification_values_with_bin_annotation.parquet")
all_results = pd.concat([_df1, _df2])
[52]:
all_results = all_results.get(["slide", "Prob_Unstim"])
data_cell_counts = all_results.groupby("slide").count()
data_cell_counts.columns = ["number_of_cells_per_slide"]
[53]:
fig, axs = plt.subplots(1,1, figsize = (5, 3))
sns.despine(offset = {"left":20, "bottom":10 })
axs.bar(x = data_cell_counts.index, height = data_cell_counts["number_of_cells_per_slide"], clip_on = False, color = "#D1D3D4")
axs.set_xticks(range(0, len(data_cell_counts.index.tolist())), labels = range(0, len(data_cell_counts.index)), rotation = 90);
axs.set_ylabel("cells per slide")
axs.set_xlabel("slidename")
axs.set_xlim(0, len(data_cell_counts.index)-1)
axs.set_ylim(0, 350000)
axs.tick_params(axis='both', which='major', labelsize=6)
axs.tick_params(axis='both', which='minor', labelsize=6)
fig.tight_layout()
fig.savefig("../plots/Screen_Analysis_number_of_cells_per_slide.pdf")

9.3. Figure S3C
median value was annotated manually in illustrator
[54]:
plot = pd.read_csv("../data/Screen_Analysis/Raw_Data_reference_lysate.csv")
plot["normalized_counts_reference_cell_number"] = plot["normalized_counts_reference"]*40620231#multiply with the total number of cells imaged over 2.3 and 2.2 screen
plot = plot[plot.gene != "Non-Targeting Control"]
fig, ax = plt.subplots(1, 1, figsize = (1.5, 1.2))
sns.despine(offset = 3)
hist = np.histogram(plot.groupby("gene").sum()["normalized_counts_reference_cell_number"].values, bins = 100);
median = np.median(plot.groupby("gene").sum()["normalized_counts_reference_cell_number"])
print(median)
std = plot.groupby("gene").sum()["normalized_counts_reference_cell_number"].sem(axis = 0)
ax.fill_between(hist[1][:-1], hist[0], color='gainsboro', label = "all gRNAs")
y_max = np.ceil(np.max(hist[0])/200) * 200
x_max = np.ceil(np.max(hist[1])/2000) * 2000
ax.set_ylim(0, 1400)
ax.set_xlim(0, x_max)
ax.vlines(median, ymin = 0, ymax = hist[0].max(), color = "red", label = "median", linewidth = 0.25)
ax.set_xlabel("number of cells / gene")
ax.set_ylabel("frequency")
ax.legend(frameon=False)
ax.tick_params(width = 0.25)
ax.spines["bottom"].set_linewidth(0.25)
ax.spines["left"].set_linewidth(0.25)
ax.spines["right"].set_linewidth(0.25)
ax.tick_params(size = 2)
ax.set_yticks([0, 200, 400, 600, 800, 1000, 1200, 1400], [0, 200, 400, 600, 800, 1000, 1200, 1400])
fig.savefig("../plots/Screen_Analysis/histogram_n_cells_per_gene_complete_screen.pdf", transparent=True, format='pdf', bbox_inches='tight')
1818.6068774524556

9.4. Figure S3D
[55]:
reference_lysate = pd.read_csv("../data/Screen_Analysis/Raw_Data_reference_lysate.csv")
hist_all = np.histogram(reference_lysate["normalized_counts_reference"], bins=100, density = False)
hist_non_targeting = np.histogram(reference_lysate[reference_lysate.gene == "Non-Targeting Control"]["normalized_counts_reference"], bins=100, density = False)
fig, ax = plt.subplots(figsize = (1.5, 1.2))
color = "#2f559a"
#plot distribution of all gRNAs
ax.fill_between(hist_all[1][1:], hist_all[0], color='gainsboro', label = "all sgRNAs")
#adjust y-axis and x-axis limits of first y-axis
y_max = np.ceil(np.max(hist_all[0])/10000) * 10000
x_max = np.ceil(np.max(hist_all[1])/0.00005) * 0.00005
ax.set_ylim(0, y_max)
ax.set_xlim(0, x_max)
ax.set_xticks(list(np.linspace(0, x_max, 3)))
ax.set_ylabel("frequency")
ax.set_xlabel("fraction of reads")
ax.tick_params(width = 0.25)
ax.spines["bottom"].set_linewidth(0.25)
ax.spines["left"].set_linewidth(0.25)
ax.spines["right"].set_linewidth(0.25)
ax.tick_params(size = 2)
#duplicate x-axis and add second y-axis to plot non-targeting gRNAs to
ax2 = ax.twinx()
ax2.tick_params(width = 0.25)
ax2.spines["bottom"].set_linewidth(0.25)
ax2.spines["left"].set_linewidth(0.25)
sns.lineplot(x = hist_non_targeting[1][:-1], y = hist_non_targeting[0], label = "non-targeting \nsgRNAs", linewidth=0.25, ax = ax2, color = color)
#adjust y-axis limits of second y-axis and label correctly
y_max = np.ceil(np.max(hist_non_targeting[0])/100) * 100
ax2.set_ylim(0, y_max)
ax2.set_ylabel("frequency non-targeting")
#change color of secondary y-axis
ax2.yaxis.label.set_color(color)
ax2.tick_params(axis='y', colors=color, width = 0.25)
ax2.spines['right'].set_color(color)
ax2.spines["right"].set_linewidth(0.25)
ax2.tick_params(size = 2)
#create legend
lines, labels = ax.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, frameon=False, fontsize = 6)
sns.despine(offset = 3, right= False)
fig.savefig("../plots/Screen_Analysis/histogram_gRNA_distribution_reference_lysate.pdf", transparent=True, format='pdf', bbox_inches='tight')

10. Figure S4
10.1. Figure S4B
[56]:
df_umap = pd.read_csv("../data/Screen_Analysis/screen_batches/Screen_B1_raw_data_UMAP.csv", index_col = 0)
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LinearSegmentedColormap
fig, axs = plt.subplots(1, 1, figsize = (5, 5))
c1='#B3262A'
c2='#2f559a'
c3 = "#FFFFFF"
cmap1 = LinearSegmentedColormap.from_list("Prob_Unstim", [c2, c1], N=500)
cmap = cmap1
sns.scatterplot(df_umap.sort_values(by= "class_label").sample(frac=1, random_state = 16), x = "UMAP_1", y = "UMAP_2", s = 1, hue = "prob_unstim", alpha = 1, palette = cmap, ax = axs, edgecolor = None, rasterized=True)
sns.move_legend(axs, "upper left", bbox_to_anchor=(1, 1))
axs.set_aspect('equal', adjustable='box')
axs.tick_params(size = 3.5*1.773)
divider = make_axes_locatable(axs)
cax = divider.append_axes('right', size='5%', pad=0.05)
norm = plt.Normalize(0, 1.0)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
fig.colorbar(sm, cax=cax, orientation='vertical', label = "Prob(Unstim)")
# Remove the legend and add a colorbar
axs.get_legend().remove()
fig.tight_layout()
fig.savefig("../plots/Screen_Analysis/Screen_B1_UMAP_classification_score_colored.pdf", dpi = 500)

10.2. Figure S4C
[57]:
all_results = pd.read_parquet("../data/Screen_Analysis/screen_batches/ScreenB1_raw_data_cell_classification_values_with_bin_annotation.parquet")
bins = {"bin1": (0.99999, 1.0),
"bin2": (0.999, 0.99999),
"bin3": (0.9975, 0.999),
"bin4": (0.995, 0.9975),
"bin5": (0.99, 0.995),
"bin6": (0.98, 0.99),
}
colors = ['#2f559a',"#B3262A","#5AADC5","#E46425","#F5DB12","#231F20","#D1D3D4"]
fig, axs = plt.subplots(1, 2, figsize = (14, 4))
sns.despine(offset = 0)
axs[0].hist(all_results.Prob_Unstim, bins = 200, bottom = 0, histtype = "stepfilled", color = "black");
axs[0].set_yscale("log")
axs[0].set_ylabel("cell count")
axs[0].set_xlabel("classification score")
axs[0].set_ylim(50, 10000000)
axs[0].set_xlim(0, 1)
hist = axs[1].hist(all_results[all_results.Prob_Unstim >= 0.975].Prob_Unstim, bins = 100, bottom = 0, histtype = "step", linewidth = 1, color = "black");
axs[1].set_yscale("log")
axs[1].set_ylabel("cell count")
axs[1].set_xlabel("classification score")
axs[1].set_xlim(0.975, 1)
axs[1].set_ylim(50, 10000)
for i, _bin in enumerate(bins.keys()):
lower, upper = bins[_bin]
rectangle = plt.Rectangle((lower, 0), upper-lower, hist[0].max(), fc=colors[i], alpha = 0.4, edgecolor = colors[i])
plt.gca().add_patch(rectangle)
fig.savefig("../plots/Screen_Analysis/ScreenB1_binning_strategy.pdf")

10.3. Figure S4D
[58]:
df_umap = pd.read_csv("../data/Screen_Analysis/screen_batches/Screen_B1_raw_data_UMAP.csv", index_col = 0)
fig, axs = plt.subplots(1, 1, figsize = (5, 5))
color_dict = dict({'bin1':'#2f559a',
"bin2":"#B3262A",
"bin3":"#5AADC5",
"bin4":"#E46425",
"bin5":"#F5DB12",
"bin6":"#231F20",
"autophagy-on":"#D1D3D4"
})
sns.scatterplot(df_umap.sort_values(by= "class_label").sample(frac=1, random_state = 16), x = "UMAP_1", y = "UMAP_2", s = 1, hue = "class_label", alpha = 1, palette = color_dict, ax = axs, edgecolor = None, rasterized=True)
sns.move_legend(axs, "upper left", bbox_to_anchor=(1, 1))
axs.set_aspect('equal', adjustable='box')
axs.tick_params(size = 3.5*1.773)
fig.tight_layout()
fig.savefig("../plots/Screen_Analysis/Screen_B1_UMAP_bins_colored.pdf", dpi = 500)

11. Figure S5
11.1. Figure S5A
[59]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/T02_Raw_Data_classifier_metrics_classifier_2.1_2.2.csv")
versions = ["classifier_2.2"]
from sklearn.metrics import precision_recall_curve, roc_curve, auc
labels = ["stim", "ATG5_KO_Cr203_C6"]
results_all = {}
auc_values = {}
for version in versions:
data = subsampled_classification_results.loc[[x in labels for x in subsampled_classification_results.label]].get([version, "label"])
true_label = [0 if x == "stim" else 1 for x in data.label]
scores = data[version].tolist()
fpr, tpr, thresholds = roc_curve(true_label, scores)
precision, recall, thresholds = precision_recall_curve(true_label, scores)
fdr = 1 - precision
auc_roc = auc(fpr, tpr)
auc_precision_recall = auc(recall, precision)
auc_values[version] = pd.DataFrame({"ROC":[auc_roc], "precision_recall":[auc_precision_recall]})
fig, axs = plt.subplots(1, 1, figsize = (3, 2))
axs.plot(fpr, tpr, color = "black")
axs.set_xlabel("false positive rate")
axs.set_ylabel("true positive rate")
axs.tick_params(axis='both', which='major', labelsize=6, size = 3.5*1.773)
auc_results = "{0:.4g}".format(auc_values[version].ROC[0])
axs.text(x = 0.5, y= 0.45, s= f"AUC = {auc_results}", fontsize = 6, horizontalalignment= "center")
fig.savefig(f"../plots/{version}/T02_{version}_tpr_fpr_curve.pdf")

11.2. Figure S5B
[60]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/T02_Raw_Data_classifier_metrics_classifier_2.1_2.2.csv")
versions = ["classifier_2.2"]
from sklearn.metrics import precision_recall_curve, roc_curve, auc
labels = ["stim", "ATG5_KO_Cr203_C6"]
results_all = {}
auc_values = {}
for version in versions:
data = subsampled_classification_results.loc[[x in labels for x in subsampled_classification_results.label]].get([version, "label"])
true_label = [0 if x == "stim" else 1 for x in data.label]
scores = data[version].tolist()
fpr, tpr, thresholds = roc_curve(true_label, scores)
precision, recall, thresholds = precision_recall_curve(true_label, scores)
fdr = 1 - precision
auc_roc = auc(fpr, tpr)
auc_precision_recall = auc(recall, precision)
auc_values[version] = pd.DataFrame({"ROC":[auc_roc], "precision_recall":[auc_precision_recall]})
fig, axs = plt.subplots(1, 1, figsize = (3, 2))
axs.plot(recall, precision, color = "black")
axs.set_xlabel("recall")
axs.set_ylabel("precision")
axs.tick_params(axis='both', which='major', labelsize=6, size = 3.5*1.773)
fig.savefig(f"../plots/{version}/T02_{version}_precision_recall_curve.pdf")

11.3. Figure S5C
[61]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/T02_Raw_Data_classifier_metrics_classifier_2.1_2.2.csv")
slides = ["unstim", "stim", "ATG5_KO_Cr203_C6"]
labels = ["wt", "wt + Torin-1", "ATG5-/- + Torin-1"]
versions = ["classifier_2.2"]
from matplotlib.ticker import FormatStrFormatter
for version in versions:
fig, axs = plt.subplots(3, 1, figsize = (6, 5), sharey = True)
sns.despine(offset = 10)
for i, slide in enumerate(slides):
data = subsampled_classification_results.loc[[x == slide for x in subsampled_classification_results.label]].get([version])
hist = axs[i].hist(data, bins = 100, cumulative = False, histtype='step', bottom = 0, weights = np.ones(len(data)) / len(data)*100)
axs[i].set_ylim(0.01, 100)
axs[i].set_yscale("log")
axs[i].set_xlim(0, 1)
axs[i].yaxis.set_major_formatter(FormatStrFormatter('%.3g'))
axs[i].set_ylabel("cell count [%]")
axs[i].tick_params(size = 3.5*1.773)
axs[i].text(1.05, 1, s = labels[i])
axs[i].set_xlabel("classification score")
fig.tight_layout()
fig.savefig(f"../plots/{version}/T02_{version}_Histogram_classifier_performance_lines.pdf", bbox_inches='tight')

11.4. Figure S5D
[62]:
#load previously saved data
subsampled_classification_results = pd.read_csv("../data/T02_Raw_Data_classifier_metrics_classifier_2.1_2.2.csv")
sample_label_lookup = {"unstim":"wt", "stim":"wt + Torin-1", "ATG5_KO_Cr203_C6":"ATG5-/- + Torin-1"}
versions = ["classifier_2.2"]
n_cells = subsampled_classification_results.label.value_counts()[0]
for version in versions:
_df = subsampled_classification_results.copy()
_df["threshold_label"] = _df[version] >= 0.5
heatmap_data = pd.DataFrame(_df.groupby("label").sum().threshold_label/n_cells*100)
heatmap_data.columns = ["unstimulated"]
heatmap_data["stimulated"] = 100 - heatmap_data.unstimulated
fig, axs = plt.subplots(1, 2, figsize = (2.3,2), sharex=False, sharey=True, gridspec_kw = {'wspace':0, 'hspace':0})
cbar_ax1 = fig.add_axes([0.4, 0, 0.54, 0.03])
cbar_ax2 = fig.add_axes([0.4, 0.03, 0.54, 0.03])
sns.heatmap(heatmap_data.stimulated.to_frame().sort_index(ascending= False), annot = True, ax = axs[0],
cmap= cmap_stim, cbar_kws={'label': '% cells in class', "orientation": "horizontal"}, fmt = ".3g", vmax=100, vmin = 0, cbar_ax=cbar_ax1,
linewidths=0.5, linecolor='black',
annot_kws={"size": 6})
sns.heatmap(heatmap_data.unstimulated.to_frame().sort_index(ascending= False), annot = True, ax = axs[1],
cmap= cmap_unstim, cbar_kws={ "orientation": "horizontal"}, fmt = ".3g", vmax=100, vmin = 0, cbar_ax=cbar_ax2,
linewidths=0.5, linecolor='black',
annot_kws={"size": 6})
#remove axis from second colorbar
cbar_ax2.axis("off")
#reassign labels into readable format
labels = [sample_label_lookup[item.get_text()] for item in axs[0].get_yticklabels()]
axs[0].set_yticklabels(labels)
axs[0].set_xticklabels(["on"])
axs[1].set_xticklabels(["off"])
#move x-axis ticks to top and remove ticks
axs[0].xaxis.set_ticks_position('top')
axs[1].xaxis.set_ticks_position('top')
axs[0].tick_params(left=False, top=False, bottom = False, rotation = 0)
axs[1].tick_params(left=False, top=False, bottom = False, rotation = 0)
#remove ylabel
axs[0].set_ylabel(None)
axs[1].set_ylabel(None)
fig.tight_layout()
fig.savefig(f"../plots/{version}/T02_{version}_heatmap.pdf", bbox_inches='tight')
/tmp/ipykernel_2144530/591137911.py:50: UserWarning:
This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.

11.5. Figure S5E
[63]:
import scipy.stats
#read data
all_results = pd.read_csv("../data/Screen_Analysis/Raw_Data_Sequencing_Analysis_gRNA_count_table.csv", index_col = 0)
autophagy_genes_list = pd.read_csv("../data/Screen_Analysis/Raw_Data_Autophagy_Genes_list.txt", header = None)[0].tolist()
#plot results for bin1 and bin2
bins = ["bin1", "bin2"]
fig, axs = plt.subplots(1, 2, figsize = (12, 6))
for i, _bin in enumerate(bins):
data = all_results.get(["gene", "gRNA_names", f"normCounts_{_bin}", "normalized_counts_reference", "depleted_gene", f"ngRNAs_{_bin}"])
data = data[data[f"ngRNAs_{_bin}"] >=2]
data["gRNA_names"] = ["gRNA." + x.split(".")[1] for x in data["gRNA_names"]]
#reduce list to only include sgRNAs that were sequenced
data = data[data[f"normCounts_{_bin}"] > 0]
tests_treatment = data.groupby("gene")[f'normCounts_{_bin}'].agg(list)
tests_control = data.groupby("gene")[f'normalized_counts_reference'].agg(list)
if (tests_treatment.index != tests_control.index).sum() > 0:
sys.exit("indexes not the same!")
#calculate p-values using one-tailed paired t-test for each of the genes found with at least 2gRNAs in each bin
results = pd.DataFrame(columns = ["p-value", "mean_treatment", "mean_control", "log2FC", "autophagy_status"])
for gene, values_control, values_treatment in zip(tests_treatment.index, tests_control, tests_treatment):
#calculate p-value on log10 values per gene
_, p_value = scipy.stats.ttest_rel(np.log10(values_treatment), np.log10(values_control), alternative='greater')
#save results to dataframe
results.loc[gene, "p-value"] = p_value
results.loc[gene, "mean_treatment"] = np.mean(values_treatment)
results.loc[gene, "mean_control"] = np.mean(values_control)
results.loc[gene, "log2FC"] = np.log2(np.mean(values_treatment) / np.mean(values_control))
results.loc[gene, "autophagy_status"] = gene in autophagy_genes_list
results.loc[gene, "-log10(p-value)"] = -1 * np.log10(p_value)
print(f"number of tests performed in {_bin}: {len(tests_treatment.index)}")
#get non-targeting control cutoff
log2FC_control = results.loc["Non-Targeting Control", "log2FC"]
#filter results to remove genes which showed a weaker enrichment than the non-targeting control
results_filtered = results[results.log2FC > log2FC_control]
#dynamically adjust x and y-axis limits of plot according to data
ymin = results_filtered.get(f"-log10(p-value)").min()
ymax = results_filtered.get(f"-log10(p-value)").max()
xmin = results_filtered.get(f"log2FC").min()
xmax = results_filtered.get(f"log2FC").max()
axs[i].set_ylim(0.5, np.ceil(ymax/0.5)*0.5)
axs[i].set_xlim(np.floor(xmin/1)*1, np.ceil(xmax/1)*1)
#get autophagy genes to plot
autophagy_genes_list_plot = [x for x in autophagy_genes_list if x != "EI24"] #filter to remove EI24 since we will visualize in different color
autophagy_genes_list_plot = [x for x in autophagy_genes_list_plot if x != "ATG5"] #filter to remove ATG5 since we will visualize in different color
_autophagy_genes = [x for x in autophagy_genes_list_plot if x in results_filtered.index.tolist()]
#subset data into non-autophagy related genes and autophagy-related genes
df2 = results_filtered.loc[_autophagy_genes, :]
df1 = results_filtered.loc[[x not in _autophagy_genes + ["EI24"] for x in results_filtered.index.tolist()], :]
axs[i].scatter(df1.get("log2FC"), df1.get("-log10(p-value)"), color = "gainsboro", edgecolor = "white", label = "other")
axs[i].scatter(df2.get("log2FC"), df2.get("-log10(p-value)"), color = "#5AADC5", label = "autophagy-related", edgecolor = "white")
if "EI24" in results_filtered.index.tolist():
df3 = results_filtered.loc["EI24"]
axs[i].scatter(df3.get("log2FC"), df3.get("-log10(p-value)"), color = "#F58235", label = "EI24", edgecolor = "white")
if "ATG5" in results_filtered.index.tolist():
df4 = results_filtered.loc["ATG5"]
axs[i].scatter(df4.get("log2FC"), df4.get("-log10(p-value)"), color = "#B3262A", label = "ATG5", edgecolor = "white")
#add annotation
df_annotate_write_out = results_filtered[np.array([results_filtered[f"log2FC"] >= 5, results_filtered[f"-log10(p-value)"] >= 3]).T.any(axis = 1).tolist()].get([f"log2FC", f"p-value"])
df_annotate_write_out = df_annotate_write_out.loc[[x for x in df_annotate_write_out.index.tolist() if x not in autophagy_genes_list]]
df_annotate_write_out.to_csv(f"../data/Screen_Analysis/gene_annotation_p_values_log2FC_scatterplot_{_bin}.tsv")
df_annotate = results_filtered[np.array([results_filtered[f"log2FC"] >= 5, results_filtered[f"-log10(p-value)"] >= 3]).T.any(axis = 1).tolist()]
labels = df_annotate.index.tolist()
labels = [x for x in labels if x not in autophagy_genes_list]
df_annotate = df_annotate.loc[labels, :]
x = df_annotate[f"log2FC"].tolist()
y = df_annotate[f"-log10(p-value)"].tolist()
for ix, txt in enumerate(labels):
axs[i].annotate(txt, (x[ix], y[ix]) )
axs[i].set_xlabel(r"log$_{2}$FC")
axs[i].set_ylabel(r"-log$_{10}$p-value")
axs[i].set_title(_bin)
axs[i].set_aspect(1.0/axs[i].get_data_ratio(), adjustable='box')
if i == 1:
axs[i].legend(bbox_to_anchor=(1.3, 1))
fig.tight_layout()
fig.savefig("../plots/scatterplot_p_values_vs_log2FC.pdf")
number of tests performed in bin1: 488
number of tests performed in bin2: 914

[ ]: