This page contains several scripts for creating publication-quality charts in R. They are designed to run on Linux (as administrator, type sudo R) and were tested with R 3.6.3. An example xls file is provided to work on. The ggplot2, readxl and dplyr libraries must be loaded (and installed) prior to use.
Preliminary modifications
The following code makes the file suitable for changes.
###load libraries###
library(ggplot2)
library(readxl)
library(dplyr)
###load file###
df <- read_excel("R_charts.xlsx")
write.csv(df, "R_charts.csv", row.names = FALSE)
df <- read.csv("R_charts.csv")
#df[] <- lapply(df, as.character)
###create new variables###
df[, c(2)] <- sapply(df[, c(2)], as.numeric)
df$Agecat<-cut(df$Age, c(0,10,20,30,40,50,60,70,80,90))
df$stage_type <- ifelse(df$Stage == "0", "Zero", ifelse(df$Stage == "1", "First", ifelse(df$Stage == "2", "Second", ifelse(df$Stage == "3", "Third", NA))))
ylevel_order <- c('Zero', 'First', 'Second', 'Third')
age_level_order <- c('(0,10]', '(10,20]', '(20,30]', '(30,40]', '(40,50]', '(50,60]', '(60,70]', '(70,80]', '(80,90]') #especially useful if the order may not be preserved e.g. (100,110] is not last
write.csv(df, "R_charts.csv", row.names = FALSE)
Violin plots
jpeg(file="age_violin.jpeg", width=1600, height=1350, quality = 100)
ggplot(df, aes(x = factor(stage_type, level = ylevel_order), y=Age, fill = forcats::fct_inorder(stage_type))) +
geom_violin(trim=FALSE)+
scale_fill_manual(values = c("#c9ff27", "#e50000", "#fdaa48", "#15b01a"))+
labs(x="Stage", y = "Age")+
ggtitle("Age distribution")+
geom_boxplot(width=0.12, lwd=1.3)+
stat_summary(fun=mean, geom="point", shape=4, size=15, color="black") +
theme_classic()+
annotate("text", x = 1:length(table(factor(df$stage_type, levels = ylevel_order))),
y = aggregate(Age ~ stage_type, df, max)[ , 2],
label = paste("N=",table(factor(df$stage_type, levels = ylevel_order)), sep = ""), col = "black", size=21, vjust=-1.5)+
theme(plot.title = element_text(size=70, face="bold", hjust = 0.45), axis.text=element_text(size=50), axis.title=element_text(size=50), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"), legend.position="none")
dev.off()

Distribution charts
jpeg(file="stage_sex_all_100.jpeg", width=1600, height=1350, quality = 100)
df%>%
count(Sex, stage_type)%>%
group_by(stage_type)%>%
mutate(pct = n /sum(n))%>%
ggplot(aes(x = factor(stage_type, level = ylevel_order), y = pct, fill = Sex)) +
scale_fill_manual(values = c("#DADAEB", "#9E9AC8")) +
labs(x = "Stage", y = "Percentage",fill = "Sex") +
geom_col(width=0.7)+
geom_text(aes(label = paste0(round(pct * 100), '%')), size = 15,
position = position_stack(vjust = 0.5))+
coord_cartesian( ylim=c(0,1.05), expand = FALSE ) +
ggtitle("Sex distro based on stage")+
annotate("text", x = 1:length(table(factor(df$stage_type, levels = ylevel_order))),
y = 0,
label = paste("N=",table(factor(df$stage_type, levels = ylevel_order)), sep = ""), col = "black", size=21, vjust=-1.5)+
theme(plot.title = element_text(size=70, face="bold", hjust = 0.5), axis.text=element_text(size=50), axis.title=element_text(size=50), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"), legend.title = element_text(size=50), legend.text = element_text(size=50), legend.key.size = unit(1, 'cm'))
dev.off()

jpeg(file="stage_age_all_100.jpeg", width=1600, height=1350, quality = 100)
df%>%
count(Agecat, stage_type)%>%
group_by(stage_type)%>%
mutate(pct = n /sum(n))%>%
ggplot(aes(x = factor(stage_type, level = ylevel_order), y = pct, fill = factor(Agecat, level = age_level_order))) +
#scale_fill_manual(values = c("#DADAEB", "#9E9AC8", "#6A51A3", #DADAEB)) +
labs(x = "Stage", y = "Percentage",fill = "Age group") +
geom_col(width=0.7)+
geom_text(aes(label = paste0(round(pct * 100), '%')), size = 12.5,
position = position_stack(vjust = 0.5))+
coord_cartesian( ylim=c(0,1.05), expand = FALSE ) +
ggtitle("Age distro based on stage")+
annotate("text", x = 1:length(table(factor(df$stage_type, levels = ylevel_order))),
y = 0.95,
label = paste("N=",table(factor(df$stage_type, levels = ylevel_order)), sep = ""), col = "black", size=19, vjust=-1.5)+
theme(plot.title = element_text(size=70, face="bold", hjust = 0.5), axis.text=element_text(size=50), axis.title=element_text(size=50), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"), legend.title = element_text(size=50), legend.text = element_text(size=50), legend.key.size = unit(1, 'cm'))
dev.off()

jpeg(file="stage_age_all_100_inverse.jpeg", width=2000, height=1350, quality = 100)
df%>%
count(Agecat, stage_type)%>%
group_by(Agecat)%>%
mutate(pct = n /sum(n))%>%
ggplot(aes(x = factor(Agecat, level = age_level_order), y = pct, fill = factor(stage_type, level = ylevel_order))) +
labs(x = "Age group", y = "Percentage",fill = "Stage") +
geom_col(width=0.7)+
geom_text(aes(label = paste0(round(pct * 100), '%')), size = 13.5,
position = position_stack(vjust = 0.5))+
coord_cartesian( ylim=c(0,1.05), expand = FALSE ) +
ggtitle("Stage distro based on age")+
scale_fill_manual(values = c("#15b01a", "#c9ff27", "#fdaa48", "#e50000")) +
annotate("text", x = 1:length(table(factor(df$Agecat, levels = age_level_order))),
y = 0.968,
label = paste("N=",table(factor(df$Agecat, levels = age_level_order)), sep = ""), col = "black", size=13.5, vjust=-1.5)+
theme(plot.title = element_text(size=50, face="bold", hjust = 0.5), axis.text=element_text(size=38), axis.title=element_text(size=50), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"), legend.title = element_text(size=50), legend.text = element_text(size=50), legend.key.size = unit(1, 'cm'))
dev.off()

Histograms
jpeg(file="days_treatment_F.jpeg", width=1600, height=1350, quality = 100)
df%>% filter(Sex == 'F') %>%
ggplot(aes(days_of_treatment)) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 50), breaks = seq(0, 49, by = 5))+
scale_y_continuous(expand = c(0, 0), limits = c(0, 20), breaks = seq(0, 19, by = 5))+
geom_histogram()+
labs(x = "Days of treatment", y = "Number of patients")+
ggtitle("Days of treatment (Women)")+
theme_classic() + theme(plot.title = element_text(size=60, face="bold", hjust = 0.5), axis.text=element_text(size=60), axis.title=element_text(size=60), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()
jpeg(file="days_treatment_M.jpeg", width=1600, height=1350, quality = 100)
df%>% filter(Sex == 'M') %>%
ggplot(aes(days_of_treatment)) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 50), breaks = seq(0, 49, by = 5))+
scale_y_continuous(expand = c(0, 0), limits = c(0, 20), breaks = seq(0, 19, by = 5))+
geom_histogram()+
labs(x = "Days of treatment", y = "Number of patients")+
ggtitle("Days of treatment (Men)")+
theme_classic() + theme(plot.title = element_text(size=60, face="bold", hjust = 0.5), axis.text=element_text(size=60), axis.title=element_text(size=60), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()


jpeg(file="days_treatment_all.jpeg", width=1600, height=1350, quality = 100)
ggplot(data=df, aes(days_of_treatment)) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 50), breaks = seq(0, 49, by = 5))+
scale_y_continuous(expand = c(0, 0), limits = c(0, 30), breaks = seq(0, 29, by = 5))+
geom_histogram()+
labs(x = "Days of treatment", y = "Number of patients")+
ggtitle("Days of treatment (M+F)")+
theme_classic() + theme(plot.title = element_text(size=60, face="bold", hjust = 0.5), axis.text=element_text(size=60), axis.title=element_text(size=60), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
dev.off()
jpeg(file="days_treatment_M_and_F.jpeg", width=1600, height=1350, quality = 100)
df%>%
filter(!is.na(Sex)) %>%
ggplot(aes(x=days_of_treatment, color=Sex, fill=Sex)) +
geom_histogram(position="identity", alpha=0.5, bins=25)+
#geom_vline(data=mu, aes(xintercept=grp.mean, color=Sex),
# linetype="dashed")+
scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
labs(x="Days of treatment", y = "Number of patients")+
ggtitle("Days of treatment (M/F)")+
theme_classic() + theme(plot.title = element_text(size=60, face="bold", hjust = 0.5), axis.text=element_text(size=60), axis.title=element_text(size=60), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"), legend.title = element_text(size=60), legend.text = element_text(size=60), legend.key.size = unit(1, 'cm')) + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0), )
dev.off()

