forked from movestore/Point-Cluster-Detection
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRFunction.R
More file actions
116 lines (95 loc) · 6.08 KB
/
Copy pathRFunction.R
File metadata and controls
116 lines (95 loc) · 6.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
library('move')
library('geodist')
library('lubridate')
library('lutz')
library('sf')
rFunction = function(rad=NULL, dur=NULL, dur_unit="days", data, ...) {
Sys.setenv(tz="UTC")
if (is.null(rad))
{
logger.info("Your cluster radius is not supplied. We use default 200 m.")
rad <- 200
}
if (is.null(dur))
{
logger.info(paste0("Your minimum cluster duration is not supplied. We use 14 (",dur_unit,") as default."))
dur <- 14
}
# take out "remove" locations from data if there are any
remo <- FALSE
if (any(namesIndiv(data)=="remove"))
{
data.split <- move::split(data)
ix <- which(names(data.split)=="remove")
remove <- data.split[[ix]] #move object
data <- moveStack(data.split[-ix])
remo <- TRUE
logger.info(paste("Your data set contains", length(remove), "locations with the ID 'remove'. Clusters close (< rad) to those locations will be removed from your results."))
}
#tried to include recurse package here to pre-filter only revisited locations, but the runtime of getRecursions() was too long
# cluster for all locations (not by ID)
coos <- coordinates(data)
dista <- geodist_vec(x1=coos[,1],y1=coos[,2],measure="vincenty") #unit=m, "geodesic" is probably better, but takes even longer
#clu <- hclust(as.dist(dista),method="ward.D2") #measure in dendrogram is not distance
clu <- hclust(as.dist(dista),method="average")
#plot(as.dendrogram(clu), ylim = c(0,1000))
#abline(h=400,col=2)
memb <- cutree(clu,h=2*rad) #group membership for each location
data@data <- cbind(data@data,"clusterID"=memb)
cluID_all <- unique(memb)
cluID <- apply(matrix(cluID_all), 1, function(x) ifelse(as.numeric(difftime(max(timestamps(data)[data@data$clusterID==x]),min(timestamps(data)[data@data$clusterID==x]),unit=dur_unit))>=dur, x, NA))
cluID <- cluID[!is.na(cluID)]
if (length(cluID)>0)
{
midlon <- apply(matrix(cluID), 1, function(x) mean(coordinates(data[data@data$clusterID==x])[,1]))
midlat <- apply(matrix(cluID), 1, function(x) mean(coordinates(data[data@data$clusterID==x])[,2]))
#take out clusters in rad radius around "remove"
if (remo==TRUE)
{
remo_dist <- geodist_vec(x1=coordinates(remove)[,1],y1=coordinates(remove)[,2],x2=midlon,y2=midlat,measure="vincenty")
if (any(remo_dist<rad))
{
out <- which(remo_dist<rad,arr.ind=TRUE)[,2]
cluID <- cluID[-out]
midlon <- midlon[-out]
midlat <- midlat[-out]
logger.info(paste0(length(out)," clusters were removed from your results, because they were close (< rad) to the provided locations with ID 'remove'."))
}
}
result <- data[data@data$clusterID %in% cluID] #these are all locations that are in a (non-remove) cluster with difftime>dur
result@data <- result@data[,!sapply(result@data, function(x) all(is.na(x)))]
result@data$cluster.mid.long <- apply(matrix(result@data$clusterID), 1, function(x) midlon[which(cluID==x)])
result@data$cluster.mid.lat <- apply(matrix(result@data$clusterID), 1, function(x) midlat[which(cluID==x)])
tz_info_result <- tz_lookup_coords(coordinates(result)[,2], coordinates(result)[,1], method = "accurate")
result@data$timestamp.local <- apply(data.frame(timestamps(result),tz_info_result), 1, function(x) as.character(lubridate::with_tz(x[1], x[2])))
result.df <- data.frame(as.data.frame(result),coordinates(result))
clu.ix <- which(names(result.df) %in% c("clusterID","cluster.mid.long","cluster.mid.lat"))
result.df <- data.frame(result.df[,clu.ix],result.df[,-clu.ix])
#write.csv(result.df,file=paste0(Sys.getenv(x = "APP_ARTIFACTS_DIR", "/tmp/"),"Points_With_Clusters.csv"),row.names=FALSE)
write.csv(result.df,file="Points_With_Clusters.csv",row.names=FALSE)
#cluster table
n.locs <- apply(matrix(cluID), 1, function(x) length(which(result@data$clusterID==x)))
n.ids <- apply(matrix(cluID), 1, function(x) length(unique(result.df$trackId[result.df$clusterID==x])))
id.names <- apply(matrix(cluID), 1, function(x) paste(unique(result.df$trackId[result.df$clusterID==x]),collapse=", "))
id.locs <- id.durs <- character(length(cluID))
for (i in seq(along=cluID))
{
idsi <- as.character(unique(result.df$trackId[result.df$clusterID==cluID[i]]))
id.locs[i] <- paste(apply(matrix(idsi), 1, function(x) length(result.df$trackId[result.df$trackId==x & result.df$clusterID==cluID[i]])),collapse=", ")
id.durs[i] <- paste(apply(matrix(idsi), 1, function(x) round(as.numeric(difftime(max(result.df$timestamp[result.df$trackId==x & result.df$clusterID==cluID[i]]),min(result.df$timestamp[result.df$trackId==x & result.df$clusterID==cluID[i]]),units="days")),2)),collapse=", ")
}
timestamp.start <- apply(matrix(cluID), 1, function(x) paste(as.character(min(timestamps(result[result@data$clusterID==x]))),"UTC"))
timestamp.end <- apply(matrix(cluID), 1, function(x) paste(as.character(max(timestamps(result[result@data$clusterID==x]))),"UTC"))
duration <- as.numeric(difftime(as.POSIXct(timestamp.end), as.POSIXct(timestamp.start),units=dur_unit))
tz_info_clu<- tz_lookup_coords(midlat, midlon, method = "accurate")
timestamp.start.local <- apply(data.frame(timestamp.start,tz_info_clu), 1, function(x) as.character(lubridate::with_tz(x[1], x[2])))
timestamp.end.local <- apply(data.frame(timestamp.end,tz_info_clu), 1, function(x) as.character(lubridate::with_tz(x[1], x[2])))
clu_tab <- data.frame("cluster.ID"=cluID,"mid.long"=midlon,"mid.lat"=midlat,timestamp.start,timestamp.end,timestamp.start.local,timestamp.end.local,duration,n.locs,n.ids,id.names,id.locs,id.durs)
names(clu_tab)[names(clu_tab)=="duration"] <- paste0("duration (",dur_unit,")")
names(clu_tab)[names(clu_tab)=="id.durs"] <- paste0("id.durs (",dur_unit,")")
#write.csv(clu_tab,file=paste0(Sys.getenv(x = "APP_ARTIFACTS_DIR", "/tmp/"),"Cluster_Table.csv"),row.names=FALSE)
write.csv(clu_tab,file="Cluster_Table.csv",row.names=FALSE)
} else result <- NULL
# the use of package recurse or adehabitatLT does only work on tracks, but not for clusters by all animals...
return(result)
}