---
title: "EP_APO - Klasteriranje po ishodima (bez stršila)"
output:
flexdashboard::flex_dashboard:
social: menu
orientation: columns
vertical_layout: fill
source_code: embed
---
```{css, echo=FALSE}
.sidebar { overflow: auto; }
.dataTables_scrollBody {
height:95% !important;
max-height:95% !important;
}
.chart-stage-flex {
overflow:auto !important;
}
```
```{r setup, include=FALSE}
library(readxl)
library(tidyverse)
library(cluster)
library(knitr)
library(kableExtra)
library(tidyverse)
library(corrplot)
library(dendextend)
library(factoextra)
data1 <- read_excel('EP_APO_Ocjene.xlsx', sheet=1) %>% select(STUDENT:I8)
data1 <- na.omit(data1)
data1_long <- data1 %>% pivot_longer(cols=I1:I8, names_to="Ishod", values_to="Vrijednost")
data1_noOutliers <- data1 %>%
filter(
between(I1, quantile(data1$I1, 0.25) - 1.5*IQR(data1$I1), quantile(data1$I1, 0.75) + 1.5*IQR(data1$I1)),
between(I2, quantile(data1$I2, 0.25) - 1.5*IQR(data1$I2), quantile(data1$I2, 0.75) + 1.5*IQR(data1$I2)),
between(I3, quantile(data1$I3, 0.25) - 1.5*IQR(data1$I3), quantile(data1$I3, 0.75) + 1.5*IQR(data1$I3)),
between(I4, quantile(data1$I4, 0.25) - 1.5*IQR(data1$I4), quantile(data1$I4, 0.75) + 1.5*IQR(data1$I4)),
between(I5, quantile(data1$I5, 0.25) - 1.5*IQR(data1$I5), quantile(data1$I5, 0.75) + 1.5*IQR(data1$I5)),
between(I6, quantile(data1$I6, 0.25) - 1.5*IQR(data1$I6), quantile(data1$I6, 0.75) + 1.5*IQR(data1$I6)),
between(I7, quantile(data1$I7, 0.25) - 1.5*IQR(data1$I7), quantile(data1$I7, 0.75) + 1.5*IQR(data1$I7)),
between(I8, quantile(data1$I8, 0.25) - 1.5*IQR(data1$I8), quantile(data1$I8, 0.75) + 1.5*IQR(data1$I8))
)
data2 <- data1_noOutliers %>% select(I1:I8)
data3 <- data.frame(scale(data2))
dist_student <- dist(data3)
hc_ward <- hclust(dist_student, method = "ward.D2")
ward_dend <- as.dendrogram(hc_ward) #%>% set("labels", 1:99)
ward_dend_color3 <- color_branches(ward_dend, k=3)
ward_dend_color4 <- color_branches(ward_dend, k=4)
klasteri3 <- cutree(hc_ward, k=3)
klasteri4 <- cutree(hc_ward, k=4)
data3_cluster3 <- data3 %>% mutate(klaster = klasteri3)
data3_cluster4 <- data3 %>% mutate(klaster = klasteri4)
data1_cluster3 <- data1_noOutliers %>% mutate(klaster = klasteri3)
data1_cluster4 <- data1_noOutliers %>% mutate(klaster = klasteri4)
AS <- data1_noOutliers %>% summarise_at(vars(I1:I8), mean, na.rm = TRUE)
rez3 <- data1_cluster3 %>% group_by(klaster) %>%
summarise_at(vars(I1:I8), mean, na.rm = TRUE) %>%
gather(Ishod, Prosjek, I1:I8)
rez3 <- rez3 %>%
add_row(klaster=rep(0,8), Ishod=c("I1","I2","I3","I4","I5","I6","I7","I8"),
Prosjek=as.numeric(AS))
rez3$klaster <- factor(rez3$klaster)
levels(rez3$klaster) <- c("svi", "1", "2", "3")
rez4 <- data1_cluster4 %>% group_by(klaster) %>%
summarise_at(vars(I1:I8), mean, na.rm = TRUE) %>%
gather(Ishod, Prosjek, I1:I8)
rez4 <- rez4 %>%
add_row(klaster=rep(0,8), Ishod=c("I1","I2","I3","I4","I5","I6","I7","I8"),
Prosjek=as.numeric(AS))
rez4$klaster <- factor(rez4$klaster)
levels(rez4$klaster) <- c("svi", "1", "2", "3", "4")
bodovi <- c(5,18,12,13,6,19,14,13)
sil3 <- silhouette(data3_cluster3$klaster, dist(data3))
sil4 <- silhouette(data3_cluster4$klaster, dist(data3))
viz3 <- data3_cluster3 %>% gather(Ishod, Bodovi, I1:I8)
viz4 <- data3_cluster4 %>% gather(Ishod, Bodovi, I1:I8)
tablica <- data1_noOutliers %>% rowwise() %>% mutate(ukupno = sum(c_across(I1:I8), na.rm = T)) %>% ungroup()
tablica3 <- tablica %>% mutate(klaster=klasteri3) %>%
group_by(klaster) %>%
arrange(desc(ukupno), .by_group = TRUE)
tablica4 <- tablica %>% mutate(klaster=klasteri4) %>%
group_by(klaster) %>%
arrange(desc(ukupno), .by_group = TRUE)
hl <- data.frame(Ishod=c("I1","I2","I3","I4","I5","I6","I7","I8"),
Bodovi=c(5,18,12,13,6,19,14,13),
xcor1=rep(2.5,8), xcor2=rep(3,8), ycor=c(5.5,20,13.5,14.5,6.6,21,15.5,14.5))
```
Stršila
=======================================================================
Column {data-width=500}
-------------------------------------
### Box plot
```{r fig.width=9}
ggplot(data1_long,aes(Ishod,Vrijednost)) +
geom_boxplot(outlier.colour = "red")
```
Column {data-width=500}
-------------------------------------
### Summary (sa stršilima)
```{r}
summary(data1)
```
### Summary (bez stršila)
```{r}
summary(data1_noOutliers)
```
Dendrogram {data-navmenu="Tri klastera"}
=======================================================================
Column {data-width=800}
-------------------------------------
### Tri klastera
```{r fig.width=18,fig.height=10}
plot(ward_dend_color3)
```
Column {data-width=100}
-------------------------------------
### Broj studenata u klasterima
```{r}
data3_cluster3 %>% count(klaster)
```
Silhouette {data-navmenu="Tri klastera"}
=======================================================================
Column {data-width=600}
-------------------------------------
### Silhouette
```{r}
fviz_silhouette(sil3, print.summary=FALSE)
```
Column {data-width=300}
-------------------------------------
### info
```{r}
summary(sil3)
```
Vizualizacija klastera {data-navmenu="Tri klastera"}
=======================================================================
Column {data-width=250}
-------------------------------------
### Funkcije gustoća za pojedini ishod
```{r}
ggplot(viz3, aes(x=Bodovi, fill=Ishod, color=Ishod)) +
geom_density(alpha=.2) +
facet_wrap(~Ishod) + theme(legend.position="none") + ylim(0,1.5)
```
Column {data-width=500}
-------------------------------------
### Funkcije gustoća za pojedini ishod unutar klastera
```{r}
ggplot(viz3, aes(x=Bodovi, fill=Ishod, color=Ishod)) +
geom_density(alpha=.2) + facet_grid(klaster ~ Ishod) + ylim(0,1.5) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),legend.position="none")
```
Dendrogram {data-navmenu="Četiri klastera"}
=======================================================================
Column {data-width=800}
-------------------------------------
### Četiri klastera
```{r fig.width=18,fig.height=10}
plot(ward_dend_color4)
```
Column {data-width=100}
-------------------------------------
### Broj studenata u klasterima
```{r}
data3_cluster4 %>% count(klaster)
```
Silhouette {data-navmenu="Četiri klastera"}
=======================================================================
Column {data-width=600}
-------------------------------------
### Silhouette
```{r}
fviz_silhouette(sil4, print.summary=FALSE)
```
Column {data-width=300}
-------------------------------------
### info
```{r}
summary(sil4)
```
Vizualizacija klastera {data-navmenu="Četiri klastera"}
=======================================================================
Column {data-width=250}
-------------------------------------
### Funkcije gustoća za pojedini ishod
```{r}
ggplot(viz4, aes(x=Bodovi, fill=Ishod, color=Ishod)) +
geom_density(alpha=.2) +
facet_wrap(~Ishod) + theme(legend.position="none") + ylim(0,1.5)
```
Column {data-width=500}
-------------------------------------
### Funkcije gustoća za pojedini ishod unutar klastera
```{r}
ggplot(viz4, aes(x=Bodovi, fill=Ishod, color=Ishod)) +
geom_density(alpha=.2) + facet_grid(klaster ~ Ishod) + ylim(0,1.5) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),legend.position="none")
```
Funkcije gustoća {data-navmenu="Usporedbe klastera"}
=======================================================================
Column {data-width=500}
-------------------------------------
### Tri klastera
```{r fig.height=4}
ggplot(viz3, aes(x=Bodovi, fill=Ishod, color=Ishod)) +
geom_density(alpha=.2) + facet_grid(klaster ~ Ishod) + ylim(0,1.5) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),legend.position="none")
```
Column {data-width=500}
-------------------------------------
### Četiri klastera
```{r}
ggplot(viz4, aes(x=Bodovi, fill=Ishod, color=Ishod)) +
geom_density(alpha=.2) + facet_grid(klaster ~ Ishod) + ylim(0,1.5) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),legend.position="none")
```
Tablice studenata {data-navmenu="Usporedbe klastera"}
=======================================================================
Column {data-width=500}
-------------------------------------
### Tri klastera
```{r}
tablica3 %>%
kbl(caption = "Tablica s tri klastera") %>%
kable_classic("hover",full_width = F, html_font = "Cambria")
```
Column {data-width=500}
-------------------------------------
### Četiri klastera
```{r}
tablica4 %>%
kbl(caption = "Tablica s četiri klastera") %>%
kable_classic("hover",full_width = F, html_font = "Cambria")
```
Planirani i ostvareni bodovi (slika1) {data-navmenu="Usporedbe klastera"}
=======================================================================
Column {.tabset .tabset-fade}
-----------------------------------------------------------------------
### Tri klastera
```{r fig.width=15}
ggplot(rez3, aes(x=Ishod, y=Prosjek, fill=klaster, color=klaster)) +
geom_bar(stat="identity", width=0.4, position = position_dodge(width=0.9)) +
geom_segment(aes(x=0.6,xend=1.4,y=bodovi[1],yend=bodovi[1]),color="black") +
geom_segment(aes(x=1.6,xend=2.4,y=bodovi[2],yend=bodovi[2]),color="black") +
geom_segment(aes(x=2.6,xend=3.4,y=bodovi[3],yend=bodovi[3]),color="black") +
geom_segment(aes(x=3.6,xend=4.4,y=bodovi[4],yend=bodovi[4]),color="black") +
geom_segment(aes(x=4.6,xend=5.4,y=bodovi[5],yend=bodovi[5]),color="black") +
geom_segment(aes(x=5.6,xend=6.4,y=bodovi[6],yend=bodovi[6]),color="black") +
geom_segment(aes(x=6.6,xend=7.4,y=bodovi[7],yend=bodovi[7]),color="black") +
geom_segment(aes(x=7.6,xend=8.4,y=bodovi[8],yend=bodovi[8]),color="black") +
geom_text(x=1, y=5.7, label=bodovi[1], color="black") +
geom_text(x=2, y=18.7, label=bodovi[2], color="black") +
geom_text(x=3, y=12.7, label=bodovi[3], color="black") +
geom_text(x=4, y=13.7, label=bodovi[4], color="black") +
geom_text(x=5, y=6.7, label=bodovi[5], color="black") +
geom_text(x=6, y=19.7, label=bodovi[6], color="black") +
geom_text(x=7, y=14.7, label=bodovi[7], color="black") +
geom_text(x=8, y=13.7, label=bodovi[8], color="black") +
geom_text(aes(label=round(Prosjek,2)), position=position_dodge(width=0.9), vjust=-0.5)
```
### Četiri klastera
```{r fig.width=15}
ggplot(rez4, aes(x=Ishod, y=Prosjek, fill=klaster, color=klaster)) +
geom_bar(stat="identity", width=0.4, position = position_dodge(width=0.9)) +
geom_segment(aes(x=0.6,xend=1.4,y=bodovi[1],yend=bodovi[1]),color="black") +
geom_segment(aes(x=1.6,xend=2.4,y=bodovi[2],yend=bodovi[2]),color="black") +
geom_segment(aes(x=2.6,xend=3.4,y=bodovi[3],yend=bodovi[3]),color="black") +
geom_segment(aes(x=3.6,xend=4.4,y=bodovi[4],yend=bodovi[4]),color="black") +
geom_segment(aes(x=4.6,xend=5.4,y=bodovi[5],yend=bodovi[5]),color="black") +
geom_segment(aes(x=5.6,xend=6.4,y=bodovi[6],yend=bodovi[6]),color="black") +
geom_segment(aes(x=6.6,xend=7.4,y=bodovi[7],yend=bodovi[7]),color="black") +
geom_segment(aes(x=7.6,xend=8.4,y=bodovi[8],yend=bodovi[8]),color="black") +
geom_text(x=1, y=5.7, label=bodovi[1], color="black") +
geom_text(x=2, y=18.7, label=bodovi[2], color="black") +
geom_text(x=3, y=12.7, label=bodovi[3], color="black") +
geom_text(x=4, y=13.7, label=bodovi[4], color="black") +
geom_text(x=5, y=6.7, label=bodovi[5], color="black") +
geom_text(x=6, y=19.7, label=bodovi[6], color="black") +
geom_text(x=7, y=14.7, label=bodovi[7], color="black") +
geom_text(x=8, y=13.7, label=bodovi[8], color="black") +
geom_text(aes(label=round(Prosjek,2)), position=position_dodge(width=0.9), vjust=-0.5, size=3)
```
Planirani i ostvareni bodovi (slika2) {data-navmenu="Usporedbe klastera"}
=======================================================================
Column {.tabset .tabset-fade}
-----------------------------------------------------------------------
### Tri klastera
```{r fig.width=10}
ggplot() +
geom_bar(data=rez3, aes(x=klaster, y=Prosjek, fill=klaster, color=klaster), stat="identity", width=0.65, position = position_dodge(width=0.9)) +
facet_wrap(~Ishod, scales="free") +
geom_hline(data= hl, aes(yintercept=Bodovi)) +
geom_text(data = hl, aes(x=xcor1, y=ycor, label=Bodovi), size=3) +
geom_text(data=rez3, aes(x=klaster, y=Prosjek, label=round(Prosjek,2)), vjust=-0.1, size=3)
```
### Četiri klastera
```{r fig.width=10}
ggplot() +
geom_bar(data=rez4, aes(x=klaster, y=Prosjek, fill=klaster, color=klaster), stat="identity", width=0.75, position = position_dodge(width=0.9)) +
facet_wrap(~Ishod, scales="free") +
geom_hline(data= hl, aes(yintercept=Bodovi)) +
geom_text(data = hl, aes(x=xcor2, y=ycor, label=Bodovi), size=3) +
geom_text(data=rez4, aes(x=klaster, y=Prosjek, label=round(Prosjek,2)), vjust=-0.1, size=3)
```
Planirani i ostvareni bodovi (slika3) {data-navmenu="Usporedbe klastera"}
=======================================================================
Column {data-width=500}
-------------------------------------
### Tri klastera
```{r fig.width=9, fig.height=8}
ggplot(data=rez3, aes(x=klaster, y=Prosjek, fill=klaster, color=klaster)) +
geom_bar(stat="identity", width=0.65, position = position_dodge(width=0.9)) +
geom_text(aes(label=round(Prosjek,2)), vjust=-0.3) + expand_limits(y = c(0, 21)) +
facet_wrap(~Ishod, scales="free",
labeller=as_labeller(c(I1="I1 (max: 5)", I2="I2 (max: 18)", I3="I3 (max: 12)",
I4="I4 (max: 13)", I5="I5 (max: 6)", I6="I6 (max: 19)",
I7="I7 (max: 14)", I8="I8 (max: 13)")))
```
Column {data-width=500}
-------------------------------------
### Četiri klastera
```{r fig.width=9,fig.height=8}
ggplot(data=rez4, aes(x=klaster, y=Prosjek, fill=klaster, color=klaster)) +
geom_bar(stat="identity", width=0.65, position = position_dodge(width=0.9)) +
geom_text(aes(label=round(Prosjek,2)), vjust=-0.3) + expand_limits(y = c(0, 21)) +
facet_wrap(~Ishod, scales="free",
labeller=as_labeller(c(I1="I1 (max: 5)", I2="I2 (max: 18)", I3="I3 (max: 12)",
I4="I4 (max: 13)", I5="I5 (max: 6)", I6="I6 (max: 19)",
I7="I7 (max: 14)", I8="I8 (max: 13)")))
```