vignettes/cor_dist.Rmd
cor_dist.Rmd
library(massstat)
library(massdataset)
library(magrittr)
library(dplyr)
data("liver_aging_pos")
liver_aging_pos
#> --------------------
#> massdataset version: 0.01
#> --------------------
#> 1.expression_data:[ 21607 x 24 data.frame]
#> 2.sample_info:[ 24 x 4 data.frame]
#> 3.variable_info:[ 21607 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> Creation ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2021-12-23 00:24:02
qc_id <-
liver_aging_pos %>%
activate_mass_dataset(what = "sample_info") %>%
dplyr::filter(group == "QC") %>%
dplyr::pull(sample_id)
object <-
mutate_rsd(liver_aging_pos, according_to_samples = qc_id)
###only remain the features with rt > 100, mz > 150 and rsd < 30
object <-
object %>%
activate_mass_dataset(what = "variable_info") %>%
dplyr::filter(rt > 100) %>%
dplyr::filter(mz > 150) %>%
dplyr::filter(rsd < 30)
##only remain the week 24 samples
object <-
object %>%
activate_mass_dataset(what = "sample_info") %>%
dplyr::filter(group == "24W")
dim(object)
#> [1] 751 10
object <-
object %>%
`+`(1) %>%
log(10) %>%
scale_data(method = "auto")
cor_data <-
object %>%
cor_mass_dataset(margin = "variable", data_type = "wider")
head(cor_data$correlation[,1:5])
#> M150T707 M151T618 M151T609 M152T412 M153T518
#> M150T707 1.00000000 0.2727273 -0.2000000 0.2848485 0.1151515
#> M151T618 0.27272727 1.0000000 -0.2363636 0.6969697 0.1151515
#> M151T609 -0.20000000 -0.2363636 1.0000000 -0.4909091 0.3939394
#> M152T412 0.28484848 0.6969697 -0.4909091 1.0000000 -0.1030303
#> M153T518 0.11515152 0.1151515 0.3939394 -0.1030303 1.0000000
#> M153T577 0.04242424 -0.1757576 0.1636364 -0.3090909 -0.3818182
head(cor_data$p_value[,1:5])
#> M150T707 M151T618 M151T609 M152T412 M153T518
#> M150T707 NA 0.44583834 0.5795840 0.42503815 0.7514197
#> M151T618 0.4458383 NA 0.5108853 0.02509668 0.7514197
#> M151T609 0.5795840 0.51088532 NA 0.14965567 0.2599978
#> M152T412 0.4250382 0.02509668 0.1496557 NA 0.7769985
#> M153T518 0.7514197 0.75141965 0.2599978 0.77699846 NA
#> M153T577 0.9073638 0.62718834 0.6514773 0.38484123 0.2762553
head(cor_data$n[,1:5])
#> M150T707 M151T618 M151T609 M152T412 M153T518
#> M150T707 10 10 10 10 10
#> M151T618 10 10 10 10 10
#> M151T609 10 10 10 10 10
#> M152T412 10 10 10 10 10
#> M153T518 10 10 10 10 10
#> M153T577 10 10 10 10 10
cor_data <-
object %>%
cor_mass_dataset(margin = "variable", data_type = "longer")
head(cor_data)
#> from to correlation p_value number p_adjust
#> 1 M151T618 M150T707 0.2727273 0.44583834 10 0.9625970
#> 2 M151T609 M150T707 -0.2000000 0.57958400 10 0.9723318
#> 3 M151T609 M151T618 -0.2363636 0.51088532 10 0.9683021
#> 4 M152T412 M150T707 0.2848485 0.42503815 10 0.9614259
#> 5 M152T412 M151T618 0.6969697 0.02509668 10 0.6847366
#> 6 M152T412 M151T609 -0.4909091 0.14965567 10 0.8874314
library(massstat)
library(massdataset)
library(tidyverse)
data("expression_data")
data("sample_info")
data("sample_info_note")
data("variable_info")
data("variable_info_note")
object =
create_mass_dataset(
expression_data = expression_data,
sample_info = sample_info,
variable_info = variable_info,
sample_info_note = sample_info_note,
variable_info_note = variable_info_note
)
object
#> --------------------
#> massdataset version: 0.99.8
#> --------------------
#> 1.expression_data:[ 1000 x 8 data.frame]
#> 2.sample_info:[ 8 x 4 data.frame]
#> 3.variable_info:[ 1000 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-02-22 01:33:11
x =
object %>%
log(2) %>%
scale()
variable_distance <-
dist_mass_dataset(x = x, margin = "variable")
head(as.matrix(variable_distance)[, 1:5])
#> M136T55_2_POS M79T35_POS M307T548_POS M183T224_POS M349T47_POS
#> M136T55_2_POS 0.000000 2.595449 4.743621 4.708308 1.940902
#> M79T35_POS 2.595449 0.000000 4.219588 3.106812 1.620770
#> M307T548_POS 4.743621 4.219588 0.000000 2.568996 3.986112
#> M183T224_POS 4.708308 3.106812 2.568996 0.000000 3.082528
#> M349T47_POS 1.940902 1.620770 3.986112 3.082528 0.000000
#> M182T828_POS 3.010099 2.743352 3.139591 2.175595 2.895630
sample_distance <-
dist_mass_dataset(x = x, margin = "sample")
head(as.matrix(sample_distance)[, 1:5])
#> Blank_3 Blank_4 QC_1 QC_2 PS4P1
#> Blank_3 0.00000 46.24363 46.22024 54.26607 47.31000
#> Blank_4 46.24363 0.00000 53.89434 60.88273 55.83349
#> QC_1 46.22024 53.89434 0.00000 40.44652 39.65971
#> QC_2 54.26607 60.88273 40.44652 0.00000 46.26019
#> PS4P1 47.31000 55.83349 39.65971 46.26019 0.00000
#> PS4P2 47.06702 55.78646 38.33452 49.76545 39.14705
sessionInfo()
#> R version 4.1.2 (2021-11-01)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur 10.16
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4
#> [5] readr_2.1.2 tidyr_1.2.0 tibble_3.1.6 tidyverse_1.3.1
#> [9] ggfortify_0.4.14 ggplot2_3.3.5 magrittr_2.0.2 masstools_0.99.3
#> [13] massdataset_0.99.8 massstat_0.99.6
#>
#> loaded via a namespace (and not attached):
#> [1] readxl_1.3.1 snow_0.4-4 backports_1.4.1
#> [4] circlize_0.4.14 Hmisc_4.6-0 systemfonts_1.0.3
#> [7] plyr_1.8.6 igraph_1.2.11 lazyeval_0.2.2
#> [10] splines_4.1.2 BiocParallel_1.28.3 crosstalk_1.2.0
#> [13] leaflet_2.1.0 digest_0.6.29 yulab.utils_0.0.4
#> [16] foreach_1.5.2 htmltools_0.5.2 fansi_1.0.2
#> [19] checkmate_2.0.0 memoise_2.0.1 cluster_2.1.2
#> [22] doParallel_1.0.17 openxlsx_4.2.5 tzdb_0.2.0
#> [25] limma_3.50.0 ComplexHeatmap_2.10.0 modelr_0.1.8
#> [28] extrafont_0.17 matrixStats_0.61.0 rARPACK_0.11-0
#> [31] extrafontdb_1.0 pkgdown_2.0.2 jpeg_0.1-9
#> [34] colorspace_2.0-2 rvest_1.0.2 ggrepel_0.9.1
#> [37] haven_2.4.3 textshaping_0.3.6 xfun_0.29
#> [40] crayon_1.5.0 jsonlite_1.7.3 impute_1.68.0
#> [43] survival_3.2-13 iterators_1.0.14 glue_1.6.1
#> [46] gtable_0.3.0 zlibbioc_1.40.0 GetoptLong_1.0.5
#> [49] Rttf2pt1_1.3.9 shape_1.4.6 BiocGenerics_0.40.0
#> [52] scales_1.1.1 vsn_3.62.0 DBI_1.1.2
#> [55] Rcpp_1.0.8 mzR_2.28.0 viridisLite_0.4.0
#> [58] htmlTable_2.4.0 clue_0.3-60 gridGraphics_0.5-1
#> [61] foreign_0.8-82 preprocessCore_1.56.0 clisymbols_1.2.0
#> [64] Formula_1.2-4 stats4_4.1.2 MsCoreUtils_1.6.0
#> [67] htmlwidgets_1.5.4 httr_1.4.2 RColorBrewer_1.1-2
#> [70] ellipsis_0.3.2 pkgconfig_2.0.3 XML_3.99-0.8
#> [73] dbplyr_2.1.1 nnet_7.3-17 sass_0.4.0
#> [76] utf8_1.2.2 ggplotify_0.1.0 reshape2_1.4.4
#> [79] tidyselect_1.1.1 rlang_1.0.1 munsell_0.5.0
#> [82] cellranger_1.1.0 tools_4.1.2 cachem_1.0.6
#> [85] cli_3.2.0 generics_0.1.2 broom_0.7.12
#> [88] evaluate_0.15 fastmap_1.1.0 mzID_1.32.0
#> [91] yaml_2.3.4 ragg_1.2.1 knitr_1.37
#> [94] fs_1.5.2 tidygraph_1.2.0 zip_2.2.0
#> [97] ncdf4_1.19 pbapply_1.5-0 xml2_1.3.3
#> [100] compiler_4.1.2 rstudioapi_0.13 plotly_4.10.0
#> [103] png_0.1-7 affyio_1.64.0 reprex_2.0.1
#> [106] bslib_0.3.1 stringi_1.7.6 RSpectra_0.16-0
#> [109] desc_1.4.0 MSnbase_2.20.4 lattice_0.20-45
#> [112] ProtGenerics_1.26.0 Matrix_1.4-0 ggsci_2.9
#> [115] vctrs_0.3.8 pillar_1.7.0 lifecycle_1.0.1
#> [118] BiocManager_1.30.16 jquerylib_0.1.4 MALDIquant_1.21
#> [121] GlobalOptions_0.1.2 data.table_1.14.2 corpcor_1.6.10
#> [124] patchwork_1.1.1 R6_2.5.1 latticeExtra_0.6-29
#> [127] pcaMethods_1.86.0 affy_1.72.0 gridExtra_2.3
#> [130] IRanges_2.28.0 codetools_0.2-18 fastDummies_1.6.3
#> [133] MASS_7.3-55 assertthat_0.2.1 rprojroot_2.0.2
#> [136] rjson_0.2.21 withr_2.4.3 S4Vectors_0.32.3
#> [139] hms_1.1.1 parallel_4.1.2 mixOmics_6.18.1
#> [142] grid_4.1.2 rpart_4.1.16 rmarkdown_2.11
#> [145] lubridate_1.8.0 Biobase_2.54.0 base64enc_0.1-3
#> [148] ellipse_0.4.2