2023年3月

Installation

I recommend using Miniconda to manage packages and creating an entirely new environment to install scanpy.

conda install -c conda-forge scanpy python-igraph leidenalg

Basic conception before your start

AnnData class

An "AnnData" class is the interface to the Scanpy operation, meaning we must create an AnnData object as a parameter to the Scanpy function.
2023-03-28T15:06:16.png
At the most basic level, an AnnData object adata stores a data matrix adata.X, annotation of observations adata.obs and variables adata.var as pd.DataFrame and unstructured annotation adata.uns as dict. Names of observations and variables can be accessed via adata.obs_names and adata.var_names, respectively. AnnData objects can be sliced like dataframes, for example, adata_subset = adata[:, list_of_gene_names]

import scanpy as sc
import os
adata = sc.read_csv('../data/GSM5226574_C51ctr_raw_counts.csv').T
adata

Let's see the properties of AnnData.

type(adata) 
type(adata.X)
adata.var #extract var(python DataFrame object that describe genes)
adata.obs #extract observations(python DataFrame object that describe cells)

adata.var_names #return an index objects of gene names
adata.obs_names #return an index objects of cell barcodes

2023-03-28T15:28:07.png

安装包

首先安装所需要的包

library(Seurat)
library(stringr)
library(R.utils)
library(tidyverse)
library(ggsci)

获得绘图数据

Seurat包中的FeatchData函数可以从Seurat对象metadata中取相应的列并且返回一个数据框。

plot_data = FetchData(object = sce, 
                      #注意此处要加入unique函数,有时候一个marker基因对应多个细胞
                      vars = c(unique(marker_selected_1$gene), "celltype"), #选择所需要的列
                      slot = 'data') %>% 
  dplyr::rename(group = as.name("celltype")) %>% 
  tidyr::pivot_longer(cols = -group, names_to = 'Feat', values_to = 'Expr')

我们将绘图封装成一个函数

ViolinPlot <- function(object, groupBy, MarkerSelected) {
  # (1)获取绘图数据1
  plot_data = FetchData(object = object, 
                        vars = c(MarkerSelected$gene, groupBy), 
                        slot = 'data') %>% 
    dplyr::rename(group = as.name(groupBy)) %>% 
    tidyr::pivot_longer(cols = -group, names_to = 'Feat', values_to = 'Expr')
  
  # (2)获取绘图数据2
  ident_plot = MarkerSelected %>% 
    dplyr::select(cluster, gene)
  
  # (3)绘图
  figure_1 = ggplot(data = plot_data, mapping = aes(x = Expr,
                                                    y = fct_rev(factor(x = Feat, 
                                                                       levels = MarkerSelected$gene)), 
                                                    fill = group, 
                                                    label = group)) +
    geom_violin(scale = 'width', adjust = 1, trim = TRUE) +
    scale_x_continuous(expand = c(0, 0), labels = function(x)
      c(rep(x = '', times = length(x) - 2), x[length(x) - 1], '')) +
    facet_grid(cols = vars(group), scales = 'free') +
    cowplot::theme_cowplot(font_family = 'Arial') +
    scale_fill_manual(values = paletteer::paletteer_d('ggsci::category20c_d3')) +
    xlab('Expression Level') + 
    ylab('') +
    theme(legend.position = 'none', 
          panel.spacing = unit(x = 0, units = 'lines'),
          axis.line = element_blank(), #去除x和y轴坐标线(不包括axis tick);
          panel.background = element_rect(fill = NA, color = 'black'),
          strip.background = element_blank(), #去除分页题头背景;
          strip.text = element_text(color = 'black', size = 10, family = 'Arial', face = 'bold'),
          axis.text.x = element_text(color = 'black', family = 'Arial', size = 11),
          axis.text.y = element_blank(),
          axis.title.x = element_text(color = 'black', family = 'Arial', size = 15),
          axis.ticks.x = element_line(color = 'black', lineend = 'round'),
          axis.ticks.y = element_blank(),
          axis.ticks.length = unit(x = 0.1, units = 'cm'))
  
  figure_2 = ggplot(data = ident_plot, aes(x = 1,
                                           y = fct_rev(factor(x = gene, levels = MarkerSelected$gene)),
                                           fill = cluster)) +
    geom_tile() +
    theme_bw(base_size = 12) +
    scale_fill_manual(values = paletteer::paletteer_d('ggsci::category20c_d3')) + #注意这里的颜色个数,超过无法运行
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_discrete(expand = c(0, 0)) +
    guides(fill = guide_legend(direction = 'vertical',
                               label.position = 'right',
                               title.theme = element_blank(),
                               keyheight = 0.5,
                               nrow = 2)) +
    xlab('Feature') +
    theme(legend.text = element_text(family = 'Arial', color = 'black', size = 11),
          legend.position = 'bottom',
          legend.justification = 'left',
          legend.margin = margin(0,0,0,0),
          legend.box.margin = margin(-10,05,0,0),
          panel.spacing = unit(0, 'lines'),
          panel.background = element_blank(),
          panel.border = element_blank(),
          plot.background = element_blank(),
          plot.margin = unit(x = c(0,0,0,0), units = 'cm'),
          axis.title.y = element_blank(),
          axis.text.y = element_text(angle = 0, hjust = 1, vjust = 0.5, color = 'black', family = 'Arial'),
          axis.title.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.x = element_blank())
  
  figure_2 + figure_1 + patchwork::plot_layout(nrow = 1, widths = c(0.03, 0.97))
}