回归分析

getwd()
setwd("/data/cotton/zhenpingliu/DNA_methlation_rawData/TM-1")
##### CHG repeat
# repeate 1
data1 <- read.table("CHG_context_TM1_qs_rep1_count.txt_binom.txt_fdr.bed_end_sorted")
# repeate2
data2 <- read.table("CHG_context_TM1_qs_rep2_count.txt_binom.txt_fdr.bed_end_sorted")

##### CpG repeate
data1 <- read.table("CpG_context_TM1_qs_rep1_count.txt_binom.txt_fdr.bed_end_sorted")
data2 <- read.table("CpG_context_TM1_qs_rep2_count.txt_binom.txt_fdr.bed_end_sorted")

#### CHH repeate

data1 <- read.table("CHH_context_TM1_qs_rep1_count.txt_binom.txt_fdr.bed_end_sorted")
data2 <- read.table("CHH_context_TM1_qs_rep2_count.txt_binom.txt_fdr.bed_end_sorted")
## merge data
data1$V3 <- data2$V2
regression <- cor.test(data1$V2, data2$V2, method = c("pearson"))
regression
## regression R2
R2 <- sprintf("italic(R^2) == %.2f", regression$estimate)
## regression p.value
Pvalue <- sprintf("italic(P.value) == %s", "2.2e-16")
## regression label
regressionLabel <- data.frame(r2 = R2, pvalue = Pvalue)
regressionLabel
library(ggplot2)
## plot scatterplot
p <- ggplot(data1, aes(x = V2, y = V3)) +
  geom_point()
# linear regression analysis
p <- p + geom_smooth(method = lm, colour = "red", size = 2)
p <- p + geom_text(
  data = regressionLabel,
  mapping = aes(x = 0.11, y = 1, label = r2, size = 2.5),
  parse = TRUE,
  inherit.aes = FALSE,
  show.legend = NA
)
# add regression annotion
p <- p + geom_text(
  data = regressionLabel,
  mapping = aes(x = 0.17, y = 0.9, label = pvalue, size = 2.5),
  parse = TRUE,
  inherit.aes = FALSE,
  show.legend = NA,
)
p <- p + theme(
  panel.grid = element_blank(),
  panel.background = element_blank(),
  axis.line = element_line(size = 0.5, color = "black"),
  axis.text.x = element_text(size = "10"),
  axis.text.y = element_text(size = "10"),
  axis.title.y = element_text(size = "18", color = "red"),
  axis.title.x = element_text(size = "18", color = "red"),
  legend.position = "none",
)
p + xlab("CpG Repeat 1") + ylab("CpG Repeate 2")

Last updated