Compares cluster results from several samples because the cluster_paths()
algorithm assigns arbitrary cluster names to the sample that can vary for
each clustered sample.
The comparison is done by clustering the median paths of the samples' cluster.
Arguments
- x
named list containing data.frames with columns
segment,time,temperatureandcluster.- method
the cluster algorithm applied. Default is
"hdbscan".- minPts
minimum number of samples in a cluster (default is
2). Only applied whenmethod = "hdbscan"ormethod = "dbscan"is used.- ...
optional arguments passed to
cluster_paths().
Value
list.
- 'paths'
sf object of the median cooling paths of each sample's cluster
- 'diss
Dissimilarity matrix of the median paths
- 'hopkins'
Hopkins statistic of the clustering
- 'dist'
Used dissimilarity measure
- 'cluster'
data.frame with the old and new cluster assignments of the median paths (segments).
- 'mds'
data.frame with the multidimensional scaling coordinates of the dissimilarity of the median paths
- 'cluster_method'
the clustering method used
Details
The default cluster algorithm to assign comparable path families is "hdbscan"
trying to find clusters with at least 2 sample clusters. This algorithm may
also find "outliers" (NA values in cluster$cluster_new), that are clusters not
present in other samples.
See also
path_statistics() for calculating the median paths,
path_diss() for calculating the dissimilarity between the paths, and
cluster_paths() for clustering cooling paths.
Examples
# example data
data(tT_paths)
tT_paths_subset <- subset(tT_paths$paths, Comp_GOF >= 0.4)
# calculate dissimilarity and find 3 clusters:
tT_paths_subset_cl <- path_diss(tT_paths_subset) |> cluster_paths(3)
tT_paths_subset_all <- merge(tT_paths_subset, tT_paths_subset_cl, by = "segment")
# create two random subsets of the same sample
set.seed(20250411)
## select 100 random path segments
random_segments1 <- sample(unique(tT_paths$paths$segment), size = 100)
tT_paths_rnd1 <- subset(tT_paths$paths, segment %in% random_segments1)
tT_paths_rnd1_cl <- path_diss(tT_paths_rnd1) |>
cluster_paths(3) # calculate dissimilarity and find 3 clusters
tT_paths_rnd1 <- merge(tT_paths_rnd1, tT_paths_rnd1_cl, by = "segment")
## select 100 random path segments
random_segments2 <- sample(unique(tT_paths$paths$segment), size = 100)
tT_paths_rnd2 <- subset(tT_paths$paths, segment %in% random_segments2)
tT_paths_rnd2_cl <- path_diss(tT_paths_rnd2) |>
cluster_paths(4) # calculate dissimilarity and find 4 clusters
tT_paths_rnd2 <- merge(tT_paths_rnd2, tT_paths_rnd2_cl, by = "segment")
# combine all samples in a named list:
sample_list <- list("all" = tT_paths_subset_all, "s1" = tT_paths_rnd1, "s2" = tT_paths_rnd2)
# check how the individial clusters compare to each other:
cluster_samples(sample_list)
#> $paths
#> Simple feature collection with 10 features and 2 fields
#> Geometry type: LINESTRING
#> Dimension: XY
#> Bounding box: xmin: 0 ymin: 23.24454 xmax: 40 ymax: 65.43562
#> CRS: NA
#> # A tibble: 10 × 3
#> geometry sample cluster_old
#> * <LINESTRING> <chr> <chr>
#> 1 (0.2154473 24.71499, 1.1591 26.55475, 1.944927 26.93293, … all 1
#> 2 (0.004267409 23.96469, 1.063527 24.45831, 1.951099 24.770… all 2
#> 3 (0.2304599 27.90557, 1.162849 33.02217, 2.026578 29.54804… all 3
#> 4 (0.2107394 24.82631, 1.139026 25.46042, 1.95558 26.16914,… s1 1
#> 5 (0 23.24454, 0.9786171 24.58917, 1.931717 23.94287, 2.881… s1 2
#> 6 (0.1485138 27.78524, 1.18385 31.69537, 1.981107 31.4806, … s1 3
#> 7 (0.2105611 24.82631, 1.208798 26.05979, 2.006259 25.92004… s2 1
#> 8 (0.1854174 29.43847, 1.185072 36.67478, 1.999527 40.91297… s2 2
#> 9 (0 23.38913, 0.9953817 24.08435, 1.97012 24.231, 2.771527… s2 3
#> 10 (0 23.56792, 0.9552172 23.64332, 1.903507 23.90072, 2.861… s2 4
#>
#> $diss
#> 1 2 3 4 5 6 7
#> 2 11.857107
#> 3 12.193871 16.691665
#> 4 15.102393 4.468874 15.424051
#> 5 32.854017 21.759057 29.055547 17.751625
#> 6 10.710525 15.714853 4.420437 14.945145 32.557487
#> 7 13.220186 6.416828 12.973788 3.648220 19.633832 12.923656
#> 8 14.057358 19.955667 5.968636 19.346207 31.613409 6.125712 16.405143
#> 9 35.584225 24.489264 31.473238 20.481832 2.730208 35.287695 22.364039
#> 10 11.473066 7.799724 19.497439 9.574394 23.593637 19.362611 10.598462
#> 8 9
#> 2
#> 3
#> 4
#> 5
#> 6
#> 7
#> 8
#> 9 34.343616
#> 10 22.056694 26.323751
#>
#> $hopkins
#> statistic p-value
#> 0.5545917 0.6467965
#>
#> $dist
#> [1] "Hausdorff"
#>
#> $mds
#> MDS1 MDS2 sample cluster_new cluster_old
#> 1 -11.956042312 6.0997621 all 3 1
#> 2 0.165996586 6.3121044 all 2 2
#> 3 -9.105482180 -6.7360248 all 3 3
#> 4 2.744017829 2.4918257 s1 2 1
#> 5 19.651768903 -3.0584461 s1 1 2
#> 6 -12.756873749 -3.5504257 s1 3 3
#> 7 0.475673221 0.8690642 s2 2 1
#> 8 -11.572455541 -8.7502362 s2 3 2
#> 9 22.360112228 -3.7158714 s2 1 3
#> 10 -0.006714985 10.0382478 s2 2 4
#>
#> $cluster
#> sample cluster_new cluster_old
#> 1 all 3 1
#> 2 all 2 2
#> 3 all 3 3
#> 4 s1 2 1
#> 5 s1 1 2
#> 6 s1 3 3
#> 7 s2 2 1
#> 8 s2 3 2
#> 9 s2 1 3
#> 10 s2 2 4
#>
#> $cluster_method
#> [1] "hdbscan"
#>
#> attr(,"class")
#> [1] "list" "tTdiss"