R: Create Boxplots for TCGA Datasets
Create Boxplots for TCGA Datasets


Function creates boxplots (geom_boxplot) for TCGA Datasets.


boxplotTCGA(data, x, y, fill = x, coord.flip = TRUE, facet.names = NULL,
  ylab = y, xlab = x, legend.title = xlab, legend = "top", ...)



A data.frame from TCGA study containing variables to be plotted.


A character name of variable containing groups.


A character name of continous variable to be plotted.


A character names of fill variable. By default, the same as x.


Whether to flip coordinates.


A character of length maximum 2 containing names of variables to produce facets. See examples.


The name of y label. Remember about coord.flip.


The name of x label. Remember about coord.flip.


A character with legend's title.


A character specifying legend position. Allowed values are one of c("top", "bottom", "left", "right", "none"). Default is "top" side position. to remove the legend use legend = "none".


Further arguments passed to geom_boxplot.


# perfrom plot
expressionsTCGA(ACC.rnaseq, BLCA.rnaseq, BRCA.rnaseq, OV.rnaseq,
	extract.cols = "MET|4233") %>%
	rename(cohort = dataset,
	MET = `MET|4233`) %>%  
	#cancer samples
	filter(substr(bcr_patient_barcode, 14, 15) == "01") -> ACC_BLCA_BRCA_OV.rnaseq

boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "cohort", "MET")
boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "cohort", "log1p(MET)")
boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "reorder(cohort,log1p(MET), median)", "log1p(MET)")
boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "reorder(cohort,log1p(MET), max)", "log1p(MET)")
boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "reorder(cohort,log1p(MET), median)", "log1p(MET)",
xlab = "Cohort Type", ylab = "Logarithm of MET")
boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "reorder(cohort,log1p(MET), median)", "log1p(MET)", 
xlab = "Cohort Type", ylab = "Logarithm of MET", legend.title = "Cohorts")
boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "reorder(cohort,log1p(MET), median)", "log1p(MET)", 
xlab = "Cohort Type", ylab = "Logarithm of MET", legend.title = "Cohorts", legend = "bottom")

## facet example
mutationsTCGA(BRCA.mutations, OV.mutations, ACC.mutations, BLCA.mutations) %>% 
	filter(Hugo_Symbol == 'TP53') %>%
	filter(substr(bcr_patient_barcode, 14, 15) == "01") %>% # cancer tissue
	mutate(bcr_patient_barcode = substr(bcr_patient_barcode, 1, 12)) -> ACC_BLCA_BRCA_OV.mutations

mutationsTCGA(BRCA.mutations, OV.mutations, ACC.mutations, BLCA.mutations) -> ACC_BLCA_BRCA_OV.mutations_all

ACC_BLCA_BRCA_OV.rnaseq %>%
	mutate(bcr_patient_barcode = substr(bcr_patient_barcode, 1, 15)) %>%
	filter(bcr_patient_barcode %in% 
	substr(ACC_BLCA_BRCA_OV.mutations_all$bcr_patient_barcode, 1, 15)) %>%
	# took patients for which we had any mutation information
	# so avoided patients without any information about mutations
	mutate(bcr_patient_barcode = substr(bcr_patient_barcode, 1, 12)) %>%
	# strin_length(ACC_BLCA_BRCA_OV.mutations$bcr_patient_barcode) == 12
	by = "bcr_patient_barcode") %>% #joined only with tumor patients
	mutate(TP53 = ifelse(!, "Mut", "WILD")) %>%
	select(cohort, MET, TP53) -> ACC_BLCA_BRCA_OV.rnaseq_TP53mutations

 "reorder(cohort,log1p(MET), median)", "log1p(MET)", 
xlab = "Cohort Type", ylab = "Logarithm of MET",
 legend.title = "Cohorts", legend = "bottom",
facet.names = c("TP53"))

 "reorder(cohort,log1p(MET), median)", "log1p(MET)", 
xlab = "Cohort Type", ylab = "Logarithm of MET",
 legend.title = "Cohorts", legend = "bottom",
fill = c("TP53"))


> expressionsTCGA(ACC.rnaseq, BLCA.rnaseq, BRCA.rnaseq, OV.rnaseq,
+ 	extract.cols = "MET|4233") %>%
+ 	rename(cohort = dataset,
+ 	MET = `MET|4233`) %>%  
+ 	#cancer samples
+ 	filter(substr(bcr_patient_barcode, 14, 15) == "01") -> ACC_BLCA_BRCA_OV.rnaseq
> boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "cohort", "MET")
> boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "cohort", "log1p(MET)")
> boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "reorder(cohort,log1p(MET), median)", "log1p(MET)")
> boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "reorder(cohort,log1p(MET), max)", "log1p(MET)")
> boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "reorder(cohort,log1p(MET), median)", "log1p(MET)",
+ xlab = "Cohort Type", ylab = "Logarithm of MET")
> boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "reorder(cohort,log1p(MET), median)", "log1p(MET)", 
+ xlab = "Cohort Type", ylab = "Logarithm of MET", legend.title = "Cohorts")
> boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq, "reorder(cohort,log1p(MET), median)", "log1p(MET)", 
+ xlab = "Cohort Type", ylab = "Logarithm of MET", legend.title = "Cohorts", legend = "bottom")
> ## facet example
> library(RTCGA.mutations)
> library(dplyr)
> mutationsTCGA(BRCA.mutations, OV.mutations, ACC.mutations, BLCA.mutations) %>% 
+ 	filter(Hugo_Symbol == 'TP53') %>%
+ 	filter(substr(bcr_patient_barcode, 14, 15) == "01") %>% # cancer tissue
+ 	mutate(bcr_patient_barcode = substr(bcr_patient_barcode, 1, 12)) -> ACC_BLCA_BRCA_OV.mutations
> mutationsTCGA(BRCA.mutations, OV.mutations, ACC.mutations, BLCA.mutations) -> ACC_BLCA_BRCA_OV.mutations_all
> ACC_BLCA_BRCA_OV.rnaseq %>%
+ 	mutate(bcr_patient_barcode = substr(bcr_patient_barcode, 1, 15)) %>%
+ 	filter(bcr_patient_barcode %in% 
+ 	substr(ACC_BLCA_BRCA_OV.mutations_all$bcr_patient_barcode, 1, 15)) %>%
+ 	# took patients for which we had any mutation information
+ 	# so avoided patients without any information about mutations
+ 	mutate(bcr_patient_barcode = substr(bcr_patient_barcode, 1, 12)) %>%
+ 	# strin_length(ACC_BLCA_BRCA_OV.mutations$bcr_patient_barcode) == 12
+ 	left_join(ACC_BLCA_BRCA_OV.mutations,
+ 	by = "bcr_patient_barcode") %>% #joined only with tumor patients
+ 	mutate(TP53 = ifelse(!, "Mut", "WILD")) %>%
+ 	select(cohort, MET, TP53) -> ACC_BLCA_BRCA_OV.rnaseq_TP53mutations
> boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq_TP53mutations,
+  "reorder(cohort,log1p(MET), median)", "log1p(MET)", 
+ xlab = "Cohort Type", ylab = "Logarithm of MET",
+  legend.title = "Cohorts", legend = "bottom",
+ facet.names = c("TP53"))
> boxplotTCGA(ACC_BLCA_BRCA_OV.rnaseq_TP53mutations,
+  "reorder(cohort,log1p(MET), median)", "log1p(MET)", 
+ xlab = "Cohort Type", ylab = "Logarithm of MET",
+  legend.title = "Cohorts", legend = "bottom",
+ fill = c("TP53"))
