Optimalni broj klastera
fviz_nbclust(data3, kmeans, method = "wss")

fviz_nbclust(data3, kmeans, method = "silhouette")

rez <- NbClust(data3, distance="euclidean", min.nc=2, max.nc=10, method="kmeans")

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##

## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 10 proposed 2 as the best number of clusters
## * 6 proposed 3 as the best number of clusters
## * 1 proposed 4 as the best number of clusters
## * 2 proposed 5 as the best number of clusters
## * 2 proposed 7 as the best number of clusters
## * 1 proposed 8 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
fviz_nbclust(rez) + theme_minimal()
## Among all indices:
## ===================
## * 2 proposed 0 as the best number of clusters
## * 1 proposed 1 as the best number of clusters
## * 10 proposed 2 as the best number of clusters
## * 6 proposed 3 as the best number of clusters
## * 1 proposed 4 as the best number of clusters
## * 2 proposed 5 as the best number of clusters
## * 2 proposed 7 as the best number of clusters
## * 1 proposed 8 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is 2 .

kmeans
k2 <- kmeans(data3, 2, nstart=100, iter.max=80)
k3 <- kmeans(data3, 3, nstart=100, iter.max=80)
k4 <- kmeans(data3, 4, nstart=100, iter.max=80)
k5 <- kmeans(data3, 5, nstart=100, iter.max=80)
p2 <- fviz_cluster(k2, geom = "point", data = data3) + ggtitle("k = 2")
p3 <- fviz_cluster(k3, geom = "point", data = data3) + ggtitle("k = 3")
p4 <- fviz_cluster(k4, geom = "point", data = data3) + ggtitle("k = 4")
p5 <- fviz_cluster(k5, geom = "point", data = data3) + ggtitle("k = 5")
grid.arrange(p2, p3, p4, p5, nrow = 2)

Silhouette
k2.sil<-silhouette(k2$cluster, dist(data3))
k3.sil<-silhouette(k3$cluster, dist(data3))
k4.sil<-silhouette(k4$cluster, dist(data3))
k5.sil<-silhouette(k5$cluster, dist(data3))
grid.arrange(fviz_silhouette(k2.sil), fviz_silhouette(k3.sil), fviz_silhouette(k4.sil), fviz_silhouette(k5.sil), nrow = 2)

## cluster size ave.sil.width
## 1 1 130 0.27
## 2 2 147 0.17
## cluster size ave.sil.width
## 1 1 146 0.15
## 2 2 76 0.25
## 3 3 55 0.18
## cluster size ave.sil.width
## 1 1 42 0.23
## 2 2 86 0.13
## 3 3 70 0.25
## 4 4 79 0.16
## cluster size ave.sil.width
## 1 1 44 0.11
## 2 2 44 0.18
## 3 3 65 0.24
## 4 4 58 0.16
## 5 5 66 0.18
Detaljniji rezultati
print(k2)
## K-means clustering with 2 clusters of sizes 130, 147
##
## Cluster means:
## KOL1 KOL2 KOL3 KVIZ ZADACI ESEJ
## 1 0.5207267 0.6138797 0.5052787 0.6740048 0.6232491 0.2887709
## 2 -0.4605066 -0.5428868 -0.4468451 -0.5960587 -0.5511727 -0.2553756
##
## Clustering vector:
## 1 3 5 6 7 8 9 10 11 13 14 15 16 17 18 19 20 21 22 23
## 1 2 1 1 2 2 1 1 1 1 2 2 1 2 2 1 1 1 2 1
## 24 25 26 27 28 29 31 33 34 35 36 37 38 39 40 41 42 43 44 47
## 2 2 2 2 2 2 2 1 1 2 1 2 1 1 2 1 1 2 1 2
## 48 49 50 52 53 54 55 56 57 58 59 61 62 63 64 65 66 67 68 69
## 2 2 1 2 2 1 1 1 2 2 2 1 1 1 2 2 1 2 1 1
## 70 71 72 73 74 75 76 78 80 83 85 86 87 88 89 90 91 92 93 96
## 2 1 2 1 2 1 1 2 1 2 2 2 1 1 2 1 1 2 2 1
## 97 98 99 100 101 102 103 104 107 110 111 112 113 114 115 116 117 118 119 120
## 1 2 1 1 2 1 2 2 1 1 2 2 2 2 2 2 2 1 2 1
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 2 2 1 2 2 1 2 2 2 1 2 2 1 1 2 2 2 2 1 1
## 141 143 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## 2 2 1 1 1 2 1 2 1 1 1 1 2 2 2 2 2 2 1 2
## 163 164 165 166 169 171 172 173 174 175 177 179 180 181 182 183 184 185 186 187
## 1 2 2 2 1 1 2 2 1 2 1 2 2 2 1 2 2 1 2 1
## 188 189 192 194 195 196 197 198 199 200 201 202 203 205 206 207 208 209 210 211
## 2 1 2 1 2 2 2 1 1 1 2 2 2 2 1 2 1 2 1 2
## 212 215 216 217 218 220 222 223 224 225 226 227 228 229 230 231 232 233 234 235
## 1 2 1 2 2 1 1 1 1 1 2 2 1 2 1 2 1 1 2 2
## 236 238 239 241 242 245 246 247 249 250 251 252 253 254 255 256 257 258 260 261
## 2 1 1 2 2 2 1 2 2 1 1 1 1 1 2 1 1 1 2 2
## 262 263 264 265 268 269 270 271 272 273 275 276 277 279 280 281 282 283 285 286
## 2 2 1 2 1 1 1 2 1 1 1 2 1 2 2 1 1 2 1 2
## 288 289 290 291 292 294 295 296 298 299 300 301 303 304 306 307 309 310 311 312
## 1 1 2 1 2 2 1 2 2 2 2 2 2 2 1 2 1 1 2 1
## 313 314 316 317 318 319 320 321 322 323 324 325 326 327 328 330 331
## 2 1 2 1 1 1 2 1 2 2 2 2 1 1 1 2 1
##
## Within cluster sum of squares by cluster:
## [1] 499.6581 708.1958
## (between_SS / total_SS = 27.1 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
print(k3)
## K-means clustering with 3 clusters of sizes 146, 76, 55
##
## Cluster means:
## KOL1 KOL2 KOL3 KVIZ ZADACI ESEJ
## 1 -0.1834344 -0.1524381 0.1182354 -0.08661922 0.05074048 -0.1578454
## 2 0.8232821 0.9432706 0.7222411 0.89448849 0.78145373 0.6122700
## 3 -0.6506912 -0.8987746 -1.3118672 -1.00608581 -1.21451987 -0.4270380
##
## Clustering vector:
## 1 3 5 6 7 8 9 10 11 13 14 15 16 17 18 19 20 21 22 23
## 1 1 1 1 1 1 2 1 2 2 3 3 2 3 1 2 1 2 1 1
## 24 25 26 27 28 29 31 33 34 35 36 37 38 39 40 41 42 43 44 47
## 1 1 1 1 3 1 1 1 2 1 1 1 2 2 1 2 2 3 2 3
## 48 49 50 52 53 54 55 56 57 58 59 61 62 63 64 65 66 67 68 69
## 1 3 1 3 1 1 2 2 1 3 3 2 2 2 1 3 2 1 1 2
## 70 71 72 73 74 75 76 78 80 83 85 86 87 88 89 90 91 92 93 96
## 1 2 3 2 1 1 1 3 1 3 1 1 2 2 3 1 1 1 1 1
## 97 98 99 100 101 102 103 104 107 110 111 112 113 114 115 116 117 118 119 120
## 2 1 1 2 3 1 1 1 2 2 1 1 1 1 1 3 1 2 1 1
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 1 1 2 3 1 2 3 1 1 2 3 3 1 2 3 1 1 1 1 1
## 141 143 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## 1 1 1 1 1 3 2 1 1 2 1 2 1 1 1 3 3 3 1 1
## 163 164 165 166 169 171 172 173 174 175 177 179 180 181 182 183 184 185 186 187
## 1 1 1 1 2 1 3 1 1 1 2 3 3 1 2 1 3 2 1 2
## 188 189 192 194 195 196 197 198 199 200 201 202 203 205 206 207 208 209 210 211
## 1 2 1 2 3 3 1 2 2 2 1 1 1 1 1 3 1 1 2 3
## 212 215 216 217 218 220 222 223 224 225 226 227 228 229 230 231 232 233 234 235
## 2 1 2 1 1 2 1 2 1 1 3 1 2 3 1 1 1 2 1 3
## 236 238 239 241 242 245 246 247 249 250 251 252 253 254 255 256 257 258 260 261
## 1 2 2 1 3 1 2 3 3 1 2 2 2 2 3 2 1 1 3 3
## 262 263 264 265 268 269 270 271 272 273 275 276 277 279 280 281 282 283 285 286
## 1 3 2 1 2 2 1 3 1 1 1 3 1 1 1 2 2 3 2 1
## 288 289 290 291 292 294 295 296 298 299 300 301 303 304 306 307 309 310 311 312
## 2 2 1 2 1 1 2 1 1 1 3 3 1 3 2 3 2 1 3 2
## 313 314 316 317 318 319 320 321 322 323 324 325 326 327 328 330 331
## 1 1 1 2 1 2 3 1 1 1 1 3 1 2 1 3 1
##
## Within cluster sum of squares by cluster:
## [1] 557.8535 237.0727 241.9307
## (between_SS / total_SS = 37.4 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
print(k4)
## K-means clustering with 4 clusters of sizes 42, 86, 70, 79
##
## Cluster means:
## KOL1 KOL2 KOL3 KVIZ ZADACI ESEJ
## 1 -0.7361994 -1.1217419 -1.4437151 -1.14897007 -1.1916411 -0.6603276
## 2 -0.4929941 -0.1602973 -0.1250046 -0.11980977 -0.3313052 0.6219011
## 3 0.7431580 1.0490240 0.7674573 0.92182333 0.8252306 0.6520890
## 4 0.2695798 -0.1586450 0.2236002 -0.07553481 0.2629751 -0.9037465
##
## Clustering vector:
## 1 3 5 6 7 8 9 10 11 13 14 15 16 17 18 19 20 21 22 23
## 4 2 4 4 2 4 3 4 4 3 1 1 3 1 4 3 4 4 4 4
## 24 25 26 27 28 29 31 33 34 35 36 37 38 39 40 41 42 43 44 47
## 2 4 4 2 1 4 4 2 3 2 4 2 4 3 2 3 3 1 3 1
## 48 49 50 52 53 54 55 56 57 58 59 61 62 63 64 65 66 67 68 69
## 2 1 4 2 4 4 3 3 4 2 2 3 3 2 2 1 3 2 2 3
## 70 71 72 73 74 75 76 78 80 83 85 86 87 88 89 90 91 92 93 96
## 4 3 1 3 2 4 2 1 4 1 2 2 3 3 1 4 2 2 4 4
## 97 98 99 100 101 102 103 104 107 110 111 112 113 114 115 116 117 118 119 120
## 3 4 2 3 1 2 2 2 3 3 4 2 2 4 2 2 4 3 4 3
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 4 4 4 1 4 3 1 2 4 3 1 4 2 3 1 4 4 2 4 3
## 141 143 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## 2 2 4 4 2 1 3 2 4 3 2 3 2 4 4 2 2 1 2 4
## 163 164 165 166 169 171 172 173 174 175 177 179 180 181 182 183 184 185 186 187
## 4 4 4 2 3 2 1 2 2 4 3 2 1 2 3 4 1 3 2 3
## 188 189 192 194 195 196 197 198 199 200 201 202 203 205 206 207 208 209 210 211
## 4 3 2 2 1 1 2 3 3 3 2 2 2 4 4 1 4 2 3 1
## 212 215 216 217 218 220 222 223 224 225 226 227 228 229 230 231 232 233 234 235
## 3 4 3 2 2 3 4 3 4 2 1 4 3 1 4 2 4 3 2 1
## 236 238 239 241 242 245 246 247 249 250 251 252 253 254 255 256 257 258 260 261
## 2 3 3 2 2 4 3 1 1 2 3 3 3 4 1 3 2 2 1 4
## 262 263 264 265 268 269 270 271 272 273 275 276 277 279 280 281 282 283 285 286
## 4 2 3 4 3 3 4 1 2 4 2 1 2 2 2 2 3 1 3 2
## 288 289 290 291 292 294 295 296 298 299 300 301 303 304 306 307 309 310 311 312
## 3 3 2 4 4 2 3 2 2 4 1 2 4 1 3 1 3 2 1 3
## 313 314 316 317 318 319 320 321 322 323 324 325 326 327 328 330 331
## 4 4 4 3 4 3 1 2 2 2 2 1 3 3 4 2 4
##
## Within cluster sum of squares by cluster:
## [1] 157.1299 294.3591 208.9729 254.6376
## (between_SS / total_SS = 44.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
print(k5)
## K-means clustering with 5 clusters of sizes 44, 44, 65, 58, 66
##
## Cluster means:
## KOL1 KOL2 KOL3 KVIZ ZADACI ESEJ
## 1 0.1630738 0.09479498 0.20768526 -0.74359193 -1.0050365 0.2472387
## 2 -0.8430350 -1.11515824 -1.50074475 -0.99282274 -1.1178169 -0.5489552
## 3 0.8538466 1.10690675 0.78457181 0.93380093 0.7997475 0.6677286
## 4 0.3411448 -0.16614517 0.11452921 0.02854013 0.3286158 -1.1263673
## 5 -0.6873960 -0.26388690 -0.01129158 0.21287664 0.3388219 0.5333708
##
## Clustering vector:
## 1 3 5 6 7 8 9 10 11 13 14 15 16 17 18 19 20 21 22 23
## 5 1 4 4 1 4 3 1 4 3 2 2 3 2 4 5 4 4 4 4
## 24 25 26 27 28 29 31 33 34 35 36 37 38 39 40 41 42 43 44 47
## 5 4 4 5 2 4 4 5 3 1 5 1 4 3 1 3 3 2 3 2
## 48 49 50 52 53 54 55 56 57 58 59 61 62 63 64 65 66 67 68 69
## 5 2 5 2 4 4 3 3 4 1 1 3 3 5 1 2 3 5 1 3
## 70 71 72 73 74 75 76 78 80 83 85 86 87 88 89 90 91 92 93 96
## 4 3 2 3 5 5 5 2 4 2 1 1 3 3 2 5 5 5 5 4
## 97 98 99 100 101 102 103 104 107 110 111 112 113 114 115 116 117 118 119 120
## 3 4 1 3 2 5 5 5 3 3 1 1 5 1 5 2 4 3 5 5
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 4 4 4 2 4 3 2 1 4 3 2 1 5 3 2 4 4 5 5 5
## 141 143 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## 5 5 5 4 5 1 3 1 4 3 5 3 5 5 1 2 1 2 5 4
## 163 164 165 166 169 171 172 173 174 175 177 179 180 181 182 183 184 185 186 187
## 4 4 4 5 3 5 2 5 5 4 3 1 2 5 3 5 2 3 1 5
## 188 189 192 194 195 196 197 198 199 200 201 202 203 205 206 207 208 209 210 211
## 1 3 1 5 2 2 1 3 3 3 1 5 5 4 1 2 4 1 3 2
## 212 215 216 217 218 220 222 223 224 225 226 227 228 229 230 231 232 233 234 235
## 3 4 3 5 5 3 4 3 4 5 2 4 3 2 4 1 4 3 5 2
## 236 238 239 241 242 245 246 247 249 250 251 252 253 254 255 256 257 258 260 261
## 5 3 3 5 2 1 3 1 2 5 3 3 3 4 2 3 5 1 2 4
## 262 263 264 265 268 269 270 271 272 273 275 276 277 279 280 281 282 283 285 286
## 4 5 3 4 3 3 4 2 5 4 5 2 5 1 5 5 3 2 3 1
## 288 289 290 291 292 294 295 296 298 299 300 301 303 304 306 307 309 310 311 312
## 3 3 5 4 4 5 3 5 5 4 1 1 1 2 3 2 3 1 2 3
## 313 314 316 317 318 319 320 321 322 323 324 325 326 327 328 330 331
## 4 4 4 3 1 3 2 1 1 5 1 2 5 3 4 2 4
##
## Within cluster sum of squares by cluster:
## [1] 142.9573 167.4976 181.7930 171.1235 172.5916
## (between_SS / total_SS = 49.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
Korelacije (Pearson)
ggcorr(data3, label = TRUE, label_round = 2, label_alpha = TRUE)

Korelacije unutar klastera (slučaj s 5 klastera)
data4 <- data3 %>% mutate(klaster=k5$cluster)
kor1 <- ggcorr(data4 %>% filter(klaster == 1) %>% select(-c("klaster")), label = TRUE, label_size = 3, label_round = 2, label_alpha = TRUE, size = 3, hjust = 0.65) + ggtitle("klaster: 1")
kor2 <- ggcorr(data4 %>% filter(klaster == 2) %>% select(-c("klaster")), label = TRUE, label_size = 3, label_round = 2, label_alpha = TRUE, size = 3, hjust = 0.65) + ggtitle("klaster: 2")
kor3 <- ggcorr(data4 %>% filter(klaster == 3) %>% select(-c("klaster")), label = TRUE, label_size = 3, label_round = 2, label_alpha = TRUE, size = 3, hjust = 0.65) + ggtitle("klaster: 3")
kor4 <- ggcorr(data4 %>% filter(klaster == 4) %>% select(-c("klaster")), label = TRUE, label_size = 3, label_round = 2, label_alpha = TRUE, size = 3, hjust = 0.65) + ggtitle("klaster: 4")
kor5 <- ggcorr(data4 %>% filter(klaster == 5) %>% select(-c("klaster")), label = TRUE, label_size = 3, label_round = 2, label_alpha = TRUE, size = 3, hjust = 0.65) + ggtitle("klaster: 5")
kor <- ggcorr(data3, label = TRUE, label_size = 3, label_round = 2, label_alpha = TRUE, size = 3, hjust = 0.65) + ggtitle("svi zajedno")
grid.arrange(kor1, kor2, kor3, kor4, kor5, kor, nrow = 3)

Funkcije gustoće po aktivnostima i klasterima (slučaj s 5 klastera)
d4 <- data2 %>% mutate(klaster = k5$cluster)
d5 <- d4 %>% gather(Aktivnost, Bodovi, KOL1:ESEJ)
originalni podaci
ggplot(d5, aes(x=Bodovi, fill=Aktivnost, color=Aktivnost)) +
geom_density(alpha=.2) +
facet_wrap(~Aktivnost)

ggplot(d5, aes(x=Bodovi, fill=Aktivnost, color=Aktivnost)) +
geom_density(alpha=.2) +
facet_grid(Aktivnost ~ klaster)

ggplot(d5, aes(x=Bodovi, fill=Aktivnost, color=Aktivnost)) +
geom_density(alpha=.2) +
facet_grid(klaster ~ Aktivnost)

normalizirani podaci
data5 <- data4 %>% gather(Aktivnost, Bodovi, KOL1:ESEJ)
ggplot(data5, aes(x=Bodovi, fill=Aktivnost, color=Aktivnost)) +
geom_density(alpha=.2) +
facet_wrap(~Aktivnost)

ggplot(data5, aes(x=Bodovi, fill=Aktivnost, color=Aktivnost)) +
geom_density(alpha=.2) +
facet_grid(Aktivnost ~ klaster)

ggplot(data5, aes(x=Bodovi, fill=Aktivnost, color=Aktivnost)) +
geom_density(alpha=.2) +
facet_grid(klaster ~ Aktivnost)

Kmeans (amap)
Manhattan metrika
K2 <- Kmeans(data3, 2, method = "manhattan", nstart=100, iter.max=80)
K3 <- Kmeans(data3, 3, method = "manhattan", nstart=100, iter.max=80)
K4 <- Kmeans(data3, 4, method = "manhattan", nstart=100, iter.max=80)
K5 <- Kmeans(data3, 5, method = "manhattan", nstart=100, iter.max=80)
K2.sil<-silhouette(K2$cluster, dist(data3))
K3.sil<-silhouette(K3$cluster, dist(data3))
K4.sil<-silhouette(K4$cluster, dist(data3))
K5.sil<-silhouette(K5$cluster, dist(data3))
grid.arrange(fviz_silhouette(K2.sil), fviz_silhouette(K3.sil), fviz_silhouette(K4.sil), fviz_silhouette(K5.sil), nrow = 2)

## cluster size ave.sil.width
## 1 1 136 0.18
## 2 2 141 0.25
## cluster size ave.sil.width
## 1 1 108 0.19
## 2 2 80 0.17
## 3 3 89 0.09
## cluster size ave.sil.width
## 1 1 92 0.12
## 2 2 62 0.19
## 3 3 59 0.09
## 4 4 64 0.05
## cluster size ave.sil.width
## 1 1 79 0.09
## 2 2 70 0.20
## 3 3 1 0.00
## 4 4 87 0.13
## 5 5 40 0.22
Euklidska metrika
K2e <- Kmeans(data3, 2, method = "euclidean", nstart=100, iter.max=80)
K3e <- Kmeans(data3, 3, method = "euclidean", nstart=100, iter.max=80)
K4e <- Kmeans(data3, 4, method = "euclidean", nstart=100, iter.max=80)
K5e <- Kmeans(data3, 5, method = "euclidean", nstart=100, iter.max=80)
K2e.sil<-silhouette(K2e$cluster, dist(data3))
K3e.sil<-silhouette(K3e$cluster, dist(data3))
K4e.sil<-silhouette(K4e$cluster, dist(data3))
K5e.sil<-silhouette(K5e$cluster, dist(data3))
grid.arrange(fviz_silhouette(K2e.sil), fviz_silhouette(K3e.sil), fviz_silhouette(K4e.sil), fviz_silhouette(K5e.sil), nrow = 2)

## cluster size ave.sil.width
## 1 1 147 0.17
## 2 2 130 0.27
## cluster size ave.sil.width
## 1 1 106 0.17
## 2 2 89 0.11
## 3 3 82 0.20
## cluster size ave.sil.width
## 1 1 49 0.10
## 2 2 72 0.22
## 3 3 62 0.13
## 4 4 94 0.16
## cluster size ave.sil.width
## 1 1 45 0.15
## 2 2 44 0.18
## 3 3 70 0.12
## 4 4 59 0.10
## 5 5 59 0.25