Skip to content

Get started

The single cell objects can be explored in R using the Monocle3 package. They contain UMAP coordinates for immediate plotting of your favorite genes, timepoints, or genetic perturbations. For help with more in-depth custom analyses, please visit the Monocle3 tutorial (linked above).

The R code used to produce figures in our paper is available via our GitHub repository.

However, we've included some instructions here to start exploring:

Loading and subsetting the data

library(monocle3)
library(ggplot2)

# Load the zebrafish perturbation data
perturb_cds <- readRDS("zscape_perturb_cds.RDS")

# Subset the noto and tbxta crispants and controls
noto_tbxta_cds <- perturb_cds[,colData(perturb_cds)$gene_target %in% c("noto", "tbxta", "ctrl-inj")]

# Don't forget to select timepoints too! 
noto_tbxta_cds <- perturb_cds[,colData(perturb_cds)$timepoint %in% c("18", "24", "36")]

# Plot the expression of epyc across conditions 
monocle3::plot_cells(noto_tbxta_cds, genes = c("epyc")) +
    # facet the plot by gene target 
    facet_wrap(~gene_target) +
    # fix the color scale
    scale_color_viridis_c()

Cell type-specific differential expression analysis for a perturbation

library(monocle3)
library(ggplot2)

# Load the zebrafish perturbation data
perturb_cds <- readRDS("zscape_perturb_cds.RDS")

# List the different genetic perturbations 
unique(colData(perturb_cds)$gene_target)

# Subset smoothened crispants and notochord cells (for example)
smo_nc_cds <- perturb_cds[,colData(perturb_cds)$gene_target %in% c("smo", "ctrl-inj") & 
            colData(perturb_cds)$cell_type_broad %in% c("notochord")]

# Now select the timepoints 
# note that the controls will include timepoints that are not included for smo
smo_nc_cds <- smo_nc_cds[,colData(smo_nc_cds)$timepoint %in% c("24", "36")]

# Use monocle3 to perform DE testing between controls and smo notochord cells
deg_cds <- smo_nc_cds[Matrix::rowSums(SingleCellExperiment::counts(smo_nc_cds) > 0) > 10,] # filter for expressed genes
fits <- fit_models(deg_cds, model_formula_str = "~gene_target", cores = 4)
mod_coef <- coefficient_table(fits)

# Parse the results table
smo.celltype.degs <- mod_coef %>% 
  mutate(up_in = case_when(
    estimate < 0 ~ "ctrl",
    estimate > 0 ~ "smo"))

smo.celltype.degs <- smo.celltype.degs %>% 
    filter(term != "(Intercept)") %>%
    select(up_in, id, gene_short_name, q_value, estimate, -model_summary, -model)

# filter for q-value < 0.05
sig.smo.celltype.degs <- smo.celltype.degs %>%
            filter(q_value < 0.05 ) %>%  
            arrange(up_in, q_value)

# check the deg number for each direction
sig.smo.celltype.degs %>% 
    group_by(up_in) %>%
    tally()

# save the differentially expressed genes as a csv
fwrite(sig.smo.celltype.degs, "smo_nc_24-36h_q05_degs.csv", 
        sep = ",", na = "NA")

Background art by Elena Bansh.