Učitavanje paketa

require(ggplot2)
require(dplyr)
require(tidyr)

Učitavanje podataka

podaci <- read.csv("MAT2_2021.csv")
podaci$KOL.1 <- as.numeric(podaci$KOL.1)
podaci$KOL.2 <- as.numeric(podaci$KOL.2)
podaci$KOL.3 <- as.numeric(podaci$KOL.3)
podaci$Ocjena <- factor(podaci$OCJENA,levels = c("BP","1","2","3","4","5"))
podaciKOL <- podaci %>% select(KOL.1:KOL.3) %>% gather(kolokvij,bodovi)

Raspodjela bodova na 1. kolokviju

ggplot(podaci, aes(x=KOL.1)) + 
  geom_histogram(binwidth = 1, color="#ec8ae5", fill="#48d09b", alpha=0.7) +
  scale_x_continuous(name = "bodovi na 1. kolokviju", breaks = seq(0,21,2), limits = c(-1,21)) + 
  scale_y_continuous(name = "broj studenata", breaks = seq(0,40,2), limits = c(0,40)) +
  theme(panel.grid.minor.y = element_blank())

ggplot(podaci, aes(x=KOL.1)) + 
  geom_histogram(aes(y=..density..), binwidth = 1, color="#ec8ae5", fill="#48d09b", alpha=0.7) +
  geom_vline(aes(xintercept=mean(KOL.1, na.rm = TRUE)),color="blue", linetype="dashed", size=0.5, alpha=0.3) +
  geom_density(alpha=.2, fill="yellow") +
  geom_rug() +
  scale_x_continuous(name = "bodovi na 1. kolokviju", breaks = seq(0,21,2), limits = c(-1,21)) + 
  scale_y_continuous(limits = c(0,0.15))

Raspodjela bodova na 2. kolokviju

ggplot(podaci, aes(x=KOL.2)) + 
  geom_histogram(binwidth = 1, color="#ec8ae5", fill="#48d09b", alpha=0.7) +
  scale_x_continuous(name = "bodovi na 2. kolokviju", breaks = seq(0,21,2), limits = c(-1,21)) + 
  scale_y_continuous(name = "broj studenata", breaks = seq(0,40,2), limits = c(0,40)) +
  theme(panel.grid.minor.y = element_blank())

ggplot(podaci, aes(x=KOL.2)) + 
  geom_histogram(aes(y=..density..), binwidth = 1, color="#ec8ae5", fill="#48d09b", alpha=0.7) +
  geom_vline(aes(xintercept=mean(KOL.2, na.rm = TRUE)),color="blue", linetype="dashed", size=0.5, alpha=0.3) +
  geom_density(alpha=.2, fill="yellow") +
  geom_rug() +
  scale_x_continuous(name = "bodovi na 2. kolokviju", breaks = seq(0,21,2), limits = c(-1,21)) + 
  scale_y_continuous(limits = c(0,0.15))

Raspodjela bodova na 3. kolokviju

ggplot(podaci, aes(x=KOL.3)) + 
  geom_histogram(binwidth = 1, color="#ec8ae5", fill="#48d09b", alpha=0.7) +
  scale_x_continuous(name = "bodovi na 3. kolokviju", breaks = seq(0,21,2), limits = c(-1,21)) + 
  scale_y_continuous(name = "broj studenata", breaks = seq(0,40,2), limits = c(0,40)) +
  theme(panel.grid.minor.y = element_blank())

ggplot(podaci, aes(x=KOL.3)) + 
  geom_histogram(aes(y=..density..), binwidth = 1, color="#ec8ae5", fill="#48d09b", alpha=0.7) +
  geom_vline(aes(xintercept=mean(KOL.3, na.rm = TRUE)),color="blue", linetype="dashed", size=0.5, alpha=0.3) +
  geom_density(alpha=.2, fill="yellow") +
  geom_rug() +
  scale_x_continuous(name = "bodovi na 3. kolokviju", breaks = seq(0,21,2), limits = c(-1,21)) + 
  scale_y_continuous(limits = c(0,0.15))

Raspodjela bodova na sva tri kolokvija

ggplot(podaciKOL, aes(x=bodovi, fill=kolokvij, color=kolokvij)) + 
  geom_density(alpha=.2) +
  scale_x_continuous(name = "bodovi na kolokviju", breaks = seq(0,21,2), limits = c(-1,21)) + 
  scale_y_continuous( limits = c(0,0.15))

Violina i boxplot svakog kolokvija

kvartili, aritmetička sredina
summary(podaci %>% select(KOL.1:KOL.3))
##      KOL.1            KOL.2           KOL.3       
##  Min.   : 2.420   Min.   : 0.80   Min.   : 0.000  
##  1st Qu.: 6.970   1st Qu.: 7.59   1st Qu.: 4.530  
##  Median : 9.540   Median : 9.97   Median : 7.330  
##  Mean   : 9.374   Mean   :10.08   Mean   : 7.494  
##  3rd Qu.:11.580   3rd Qu.:12.64   3rd Qu.: 9.950  
##  Max.   :19.600   Max.   :18.50   Max.   :18.790  
##  NA's   :21       NA's   :31      NA's   :65
standardna devijacija
apply(podaci %>% select(KOL.1:KOL.3), 2, sd, na.rm=T)
##    KOL.1    KOL.2    KOL.3 
## 3.306555 3.750633 3.792708
ggplot(podaciKOL, aes(x=kolokvij, y=bodovi, fill=kolokvij)) +
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0.2, fill='black', alpha=0.2, binwidth = 0.9) +
  geom_violin(trim=F, alpha=0.3) + geom_boxplot(width=0.05) + xlab('') +
  theme(axis.text.x=element_text(size=11), axis.text.y=element_text(size=11), legend.position='none')

Raspodjela ukupnih bodova

ggplot(podaci, aes(x=UKUPNO)) + 
  geom_histogram(binwidth = 1, color="#ec8ae5", fill="#48d09b", alpha=0.7) +
  scale_x_continuous(name = "ukupni bodovi", breaks = seq(0,100,5), limits = c(0,100)) + 
  scale_y_continuous(name = "broj studenata", breaks = seq(0,16)) +
  theme(panel.grid.minor = element_blank())

ggplot(podaci, aes(x=UKUPNO)) + 
  geom_histogram(aes(y=..density..), color="#ec8ae5", fill="#48d09b", alpha=0.5, 
                 breaks = c(0,20,50,61,75,91,100)) +
  geom_vline(aes(xintercept=mean(UKUPNO)),color="blue", linetype="dashed", size=0.5, alpha=0.3) +
  geom_density(alpha=.2, fill="yellow") +
  geom_rug() +
  scale_x_continuous(name = "ukupni bodovi", breaks = seq(0,100,5), limits = c(0,100)) +
  theme(panel.grid.minor.x = element_blank())

Ocjene

ggplot(podaci, aes(x=factor(OCJENA))) +
  geom_bar(width=0.7, fill="steelblue") +
  geom_text(stat="count", aes(label=..count..), vjust=-0.5, nudge_x = -0.2, size = 3) +
  geom_text(aes( label = sprintf('(%s)', scales::percent(..prop..)), group = 1), stat= "count", 
            vjust = -.5, nudge_x = 0.15, size = 3) +
  scale_x_discrete(name = "Ocjena") +
  scale_y_continuous(name = "broj studenata", breaks = seq(0,150,10), limits = c(0,150))