Design a site like this with WordPress.com
Get started

R codes for charts

This page contains several scripts for creating publication-quality charts in R. They are designed to run on Linux (as administrator, type sudo R) and were tested with R 3.6.3. An example xls file is provided to work on. The ggplot2, readxl and dplyr libraries must be loaded (and installed) prior to use.


Preliminary modifications

The following code makes the file suitable for changes.

###load libraries###
library(ggplot2)
library(readxl)
library(dplyr)

###load file###
df <- read_excel("R_charts.xlsx")
write.csv(df, "R_charts.csv", row.names = FALSE)
df <- read.csv("R_charts.csv")
#df[] <- lapply(df, as.character)

###create new variables###
df[, c(2)] <- sapply(df[, c(2)], as.numeric)
df$Agecat<-cut(df$Age, c(0,10,20,30,40,50,60,70,80,90))
df$stage_type <- ifelse(df$Stage == "0", "Zero", ifelse(df$Stage == "1", "First", ifelse(df$Stage == "2", "Second", ifelse(df$Stage == "3", "Third", NA))))

ylevel_order <- c('Zero', 'First', 'Second', 'Third')
age_level_order <- c('(0,10]', '(10,20]', '(20,30]', '(30,40]', '(40,50]', '(50,60]', '(60,70]', '(70,80]', '(80,90]') #especially useful if the order may not be preserved e.g. (100,110] is not last
write.csv(df, "R_charts.csv", row.names = FALSE)

Violin plots

jpeg(file="age_violin.jpeg", width=1600, height=1350, quality = 100)
ggplot(df, aes(x = factor(stage_type, level = ylevel_order), y=Age, fill = forcats::fct_inorder(stage_type))) + 
  geom_violin(trim=FALSE)+
  scale_fill_manual(values = c("#c9ff27", "#e50000", "#fdaa48", "#15b01a"))+
  labs(x="Stage", y = "Age")+
  ggtitle("Age distribution")+
  geom_boxplot(width=0.12, lwd=1.3)+
  stat_summary(fun=mean, geom="point", shape=4, size=15, color="black") +
  theme_classic()+
  annotate("text", x = 1:length(table(factor(df$stage_type, levels = ylevel_order))),
           y = aggregate(Age ~ stage_type, df, max)[ , 2],
           label = paste("N=",table(factor(df$stage_type, levels = ylevel_order)), sep = ""), col = "black", size=21, vjust=-1.5)+
  theme(plot.title = element_text(size=70, face="bold", hjust = 0.45), axis.text=element_text(size=50), axis.title=element_text(size=50), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"), legend.position="none")
dev.off()

Distribution charts

jpeg(file="stage_sex_all_100.jpeg", width=1600, height=1350, quality = 100)
df%>%
  count(Sex, stage_type)%>%
  group_by(stage_type)%>%
  mutate(pct = n /sum(n))%>%
  ggplot(aes(x = factor(stage_type, level = ylevel_order), y = pct, fill = Sex)) +
  scale_fill_manual(values = c("#DADAEB", "#9E9AC8")) +
  labs(x = "Stage", y = "Percentage",fill = "Sex") +
  geom_col(width=0.7)+
  geom_text(aes(label = paste0(round(pct * 100), '%')), size = 15,
            position = position_stack(vjust = 0.5))+
  coord_cartesian( ylim=c(0,1.05), expand = FALSE ) +
  ggtitle("Sex distro based on stage")+
  annotate("text", x = 1:length(table(factor(df$stage_type, levels = ylevel_order))),
           y = 0,
           label = paste("N=",table(factor(df$stage_type, levels = ylevel_order)), sep = ""), col = "black", size=21, vjust=-1.5)+
  theme(plot.title = element_text(size=70, face="bold", hjust = 0.5), axis.text=element_text(size=50), axis.title=element_text(size=50), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"), legend.title = element_text(size=50), legend.text = element_text(size=50), legend.key.size = unit(1, 'cm'))
dev.off()
jpeg(file="stage_age_all_100.jpeg", width=1600, height=1350, quality = 100)
df%>%
  count(Agecat, stage_type)%>% 
  group_by(stage_type)%>%
  mutate(pct = n /sum(n))%>%
  ggplot(aes(x = factor(stage_type, level = ylevel_order), y = pct, fill = factor(Agecat, level = age_level_order))) +
  #scale_fill_manual(values = c("#DADAEB", "#9E9AC8", "#6A51A3", #DADAEB)) +
  labs(x = "Stage", y = "Percentage",fill = "Age group") +
  geom_col(width=0.7)+
  geom_text(aes(label = paste0(round(pct * 100), '%')), size = 12.5,
            position = position_stack(vjust = 0.5))+
  coord_cartesian( ylim=c(0,1.05), expand = FALSE ) +
  ggtitle("Age distro based on stage")+
  annotate("text", x = 1:length(table(factor(df$stage_type, levels = ylevel_order))),
           y = 0.95,
           label = paste("N=",table(factor(df$stage_type, levels = ylevel_order)), sep = ""), col = "black", size=19, vjust=-1.5)+
  theme(plot.title = element_text(size=70, face="bold", hjust = 0.5), axis.text=element_text(size=50), axis.title=element_text(size=50), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"), legend.title = element_text(size=50), legend.text = element_text(size=50), legend.key.size = unit(1, 'cm'))
dev.off()
jpeg(file="stage_age_all_100_inverse.jpeg", width=2000, height=1350, quality = 100)
df%>%
  count(Agecat, stage_type)%>% 
  group_by(Agecat)%>%
  mutate(pct = n /sum(n))%>%
  ggplot(aes(x = factor(Agecat, level = age_level_order), y = pct, fill = factor(stage_type, level = ylevel_order))) +
  labs(x = "Age group", y = "Percentage",fill = "Stage") +
  geom_col(width=0.7)+
  geom_text(aes(label = paste0(round(pct * 100), '%')), size = 13.5,
            position = position_stack(vjust = 0.5))+
  coord_cartesian( ylim=c(0,1.05), expand = FALSE ) +
  ggtitle("Stage distro based on age")+
  scale_fill_manual(values = c("#15b01a", "#c9ff27", "#fdaa48", "#e50000")) +
  annotate("text", x = 1:length(table(factor(df$Agecat, levels = age_level_order))),
           y = 0.968,
           label = paste("N=",table(factor(df$Agecat, levels = age_level_order)), sep = ""), col = "black", size=13.5, vjust=-1.5)+
  theme(plot.title = element_text(size=50, face="bold", hjust = 0.5), axis.text=element_text(size=38), axis.title=element_text(size=50), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"), legend.title = element_text(size=50), legend.text = element_text(size=50), legend.key.size = unit(1, 'cm'))
dev.off()

Histograms

jpeg(file="days_treatment_F.jpeg", width=1600, height=1350, quality = 100)
df%>% filter(Sex == 'F') %>%
  ggplot(aes(days_of_treatment)) + 
  scale_x_continuous(expand = c(0, 0), limits = c(0, 50), breaks = seq(0, 49, by = 5))+
  scale_y_continuous(expand = c(0, 0), limits = c(0, 20), breaks = seq(0, 19, by = 5))+
  geom_histogram()+
  labs(x = "Days of treatment", y = "Number of patients")+
  ggtitle("Days of treatment (Women)")+
  theme_classic() + theme(plot.title = element_text(size=60, face="bold", hjust = 0.5), axis.text=element_text(size=60), axis.title=element_text(size=60), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()

jpeg(file="days_treatment_M.jpeg", width=1600, height=1350, quality = 100)
df%>% filter(Sex == 'M') %>%
  ggplot(aes(days_of_treatment)) +
  scale_x_continuous(expand = c(0, 0), limits = c(0, 50), breaks = seq(0, 49, by = 5))+
  scale_y_continuous(expand = c(0, 0), limits = c(0, 20), breaks = seq(0, 19, by = 5))+
  geom_histogram()+
  labs(x = "Days of treatment", y = "Number of patients")+
  ggtitle("Days of treatment (Men)")+
  theme_classic() + theme(plot.title = element_text(size=60, face="bold", hjust = 0.5), axis.text=element_text(size=60), axis.title=element_text(size=60), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()
jpeg(file="days_treatment_all.jpeg", width=1600, height=1350, quality = 100)
  ggplot(data=df, aes(days_of_treatment)) +
  scale_x_continuous(expand = c(0, 0), limits = c(0, 50), breaks = seq(0, 49, by = 5))+
  scale_y_continuous(expand = c(0, 0), limits = c(0, 30), breaks = seq(0, 29, by = 5))+
  geom_histogram()+
  labs(x = "Days of treatment", y = "Number of patients")+
  ggtitle("Days of treatment (M+F)")+
  theme_classic() + theme(plot.title = element_text(size=60, face="bold", hjust = 0.5), axis.text=element_text(size=60), axis.title=element_text(size=60), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()

jpeg(file="days_treatment_M_and_F.jpeg", width=1600, height=1350, quality = 100)
df%>%
  filter(!is.na(Sex)) %>%
  ggplot(aes(x=days_of_treatment, color=Sex, fill=Sex)) +
  geom_histogram(position="identity", alpha=0.5, bins=25)+
  #geom_vline(data=mu, aes(xintercept=grp.mean, color=Sex),
  #           linetype="dashed")+
  scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  labs(x="Days of treatment", y = "Number of patients")+
  ggtitle("Days of treatment (M/F)")+
  theme_classic() + theme(plot.title = element_text(size=60, face="bold", hjust = 0.5), axis.text=element_text(size=60), axis.title=element_text(size=60), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"), legend.title = element_text(size=60), legend.text = element_text(size=60), legend.key.size = unit(1, 'cm')) + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0), )
dev.off()

Website Built with WordPress.com.

Up ↑

%d bloggers like this: