Introduction to Normaliztion and Differential Expression Analysis Functions in PRECISION.seq • precision.seq

rm(list = ls())
library(precision.seq)
library(DESeq2)

This package provides functions for nine depth normalization methods and for two differential expression analysis methods.

Functions for Depth Normalization

Among the nine normalization methods, six are re-scaling-based and three are regression-based. The input for each normalization function includes (1) sequencing count data in the format of a data frame or a data matrix (with columns for samples and rows for miRNAs/markers) and (2) the sample groups in the form of a character vector. For re-scaling-based methods, the output is a matrix of the normalized data and a vector of the estimated sample-specific scaling factors. For regression-based methods, the output is a matrix of the un-normalized data and a vector of the estimated adjustment factors.

test.TC <- norm.TC(data.test, data.group)
test.UQ <- norm.UQ(data.test, data.group)
test.med <- norm.med(data.test, data.group)
test.TMM <- norm.TMM(data.test, data.group)
test.DESeq <- norm.DESeq(data.test, data.group)
test.PoissonSeq <- norm.PoissonSeq(data.test)
test.QN <- norm.QN(data.test)
test.SVA <- norm.SVA(data.test, data.group)
test.RUVg <- norm.RUVg(data.test, data.group)
test.RUVs <- norm.RUVs(data.test, data.group)
test.RUVr <- norm.RUVr(data.test, data.group)

head(test.TMM$scaling.factor)
#> [1] 1.1988963 0.4295173 1.2377190 1.3487078 1.1959610 1.8503994
head(test.TMM$dat.normed[,1:5])
#>                    MXF2516      MXF742      MXF772       MXF832       MXF836
#> hsa-let-7a-2*      0.00000     0.00000     0.00000     1.482901     1.672295
#> hsa-let-7a(3)  70709.20148 14821.28856 48696.83516 53595.004767 39640.925166
#> hsa-let-7a*(2)  2829.26889    34.92292   126.84623   269.146516   102.846164
#> hsa-let-7b      7376.78480 10437.29762 20702.59825 20564.869850 14440.270362
#> hsa-let-7b*       24.18891    32.59473    58.97946    83.042451    71.072552
#> hsa-let-7c     23107.92019  1008.10838 13457.01204  5153.822120 11593.187526

head(test.SVA$adjust.factor)
#>             [,1]
#> [1,]  0.28535304
#> [2,]  0.03121055
#> [3,] -0.07462108
#> [4,] -0.06139403
#> [5,] -0.07546172
#> [6,] -0.12748833
head(test.SVA$dat.normed[,1:5])
#>                     MXF2516       MXF742        MXF772       MXF832
#> hsa-let-7a-2*  3.126439e+00    0.3419549    -0.8175775     1.327343
#> hsa-let-7a(3)  1.143937e+05 9605.7728904 52527.0508087 65911.068255
#> hsa-let-7a*(2) 3.041342e+03  -23.3533362   248.6987096   438.444546
#> hsa-let-7b     2.310206e+04 6042.4781503 21895.4551525 24668.362809
#> hsa-let-7b*    6.650161e+01   18.1017473    63.1931625   103.931485
#> hsa-let-7c     3.792122e+04 1550.5108226 13984.1515878  4752.753259
#>                      MXF836
#> hsa-let-7a-2*      1.173212
#> hsa-let-7a(3)  39575.789137
#> hsa-let-7a*(2)   215.731738
#> hsa-let-7b     13499.451384
#> hsa-let-7b*       75.082684
#> hsa-let-7c     11163.051990

Functions for Differential Expression Analysis

Two methods for assessing differential expression are edgeR and voom-limma. The input can be either the normalized dataset (from re-scaling normalization) or the un-normalized dataset plus adjustment factors (from regression-based normalization). The output includes the per-marker p-values, and the group mean differences, and the names of the significant markers (using a p-value cutoff specified by the user).

edgeR.benchmark <- DE.edgeR(data.benchmark, data.group)
voom.benchmark <- DE.voom(data.benchmark, data.group)

edgeR.test.TMM <- DE.edgeR(test.TMM$dat.normed, data.group)
voom.test.TMM <- DE.voom(test.TMM$dat.normed, data.group)

edgeR.test.RUVr <- DE.edgeR(data.test, data.group, 
                            normalized = FALSE,
                            adjust = test.RUVr$adjust.factor)
voom.test.RUVr <- DE.voom(data.test, data.group, 
                          normalized = FALSE,
                          adjust = test.RUVr$adjust.factor)

head(voom.benchmark$id.list)
#> [1] "hsa-miR-99a*"   "hsa-miR-26a-2*" "hsa-miR-146b"   "hsa-miR-7(3)"  
#> [5] "hsa-miR-138(2)" "hsa-miR-138-1*"
head(voom.benchmark$p.val)
#>   hsa-miR-99a* hsa-miR-26a-2*   hsa-miR-146b   hsa-miR-7(3) hsa-miR-138(2) 
#>   2.020888e-05   4.215158e-05   7.168172e-05   7.320699e-05   8.713003e-05 
#> hsa-miR-138-1* 
#>   1.607036e-04

head(voom.test.RUVr$id.list)
#> [1] "hsa-miR-125b(2)" "hsa-miR-99b"     "hsa-miR-99a*"    "hsa-miR-99a"    
#> [5] "hsa-miR-125a"    "hsa-miR-146b"
head(voom.test.RUVr$p.val)
#> hsa-miR-125b(2)     hsa-miR-99b    hsa-miR-99a*     hsa-miR-99a    hsa-miR-125a 
#>    1.138177e-07    1.177162e-07    3.501843e-07    6.603158e-07    2.243507e-06 
#>    hsa-miR-146b 
#>    3.071453e-06

Functions for Connecting Normalization and Differential Expression Analysis

A function is included in the package for connecting the step of depth normalization and the step of differential expression analysis.

res.norm <- pip.norm(data.test, data.group, "norm.TMM")
res.DE <- pip.norm.DE(data.test, data.group, "norm.TMM")

Example usage for SVA Normalization

Some normalization methods, such as the SVA method, compute “adjustment” instead of normalized counts. For illustration, we repeat the PRECISION.seq analysis using SVA.

# Compute normalization 
sva.norm <- norm.SVA(data.test, data.group)
# sva.norm contains the 
res.sva <- precision.seq(sva.norm$dat.normed, 
                         method.name="sva", 
                         adjust.factors=sva.norm$adjust.factor, 
                         DE.method="DE.voom", 
                         Pval=0.01)