-
Notifications
You must be signed in to change notification settings - Fork 8
/
geocode-ggmaps.R
128 lines (120 loc) · 4.25 KB
/
geocode-ggmaps.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# LIRRARIES
library(dplyr)
library(tidyr)
library(forcats)
library(purrr)
library(stringr)
library(readxl)
library(writexl)
library(stringdist)
library(ggmap)
library(sp)
library(glue)
library(RSQLite)
library(memoise)
# GEOCODE -------------------------------------------------------------------------------------------------------------
ADDRESS_COMPONENTS = c("street_number", "street_name", "locality", "sublocality", "suburb", "town", "province", "country", "postal_code")
#
parse_geocode_results <- function(results) {
lapply(results, function(r) {
cbind(
tibble(
geo_address = tryCatch({r$formatted_address}, error = function(e) NA),
geo_lat = tryCatch({r$geometry$location$lat}, error = function(e) NA),
geo_lon = tryCatch({r$geometry$location$lng}, error = function(e) NA),
geo_type = tryCatch({r$geometry$location_type}, error = function(e) NA)
# types = paste(r$types, collapse = ", "),
# place_id = r$place_id,
# partial_match = r$partial_match
),
r$address_components %>%
lapply(as_tibble) %>%
bind_rows() %>%
filter(!types %in% c("political", "sublocality_level_1")) %>%
select(-short_name) %>% mutate(types = as.character(types)) %>%
mutate(
types = case_when(
types == "route" ~ "street_name",
types == "administrative_area_level_1" ~ "province",
types == "administrative_area_level_2" ~ "town",
TRUE ~ types
)
) %>%
complete(types = ADDRESS_COMPONENTS) %>%
mutate(
# Assert order.
types = factor(types, levels = ADDRESS_COMPONENTS),
# Rename levels.
types = fct_relabel(types, ~ paste0("geo_", .))
) %>%
spread(types, long_name) %>%
mutate(
geo_suburb = unique(c(.$geo_sublocality, .$geo_locality)) %>% paste(collapse = ", "),
geo_suburb = str_replace(geo_suburb, pattern = "NA,[:space:]|,[:space:]NA", replacement = ""),
geo_suburb = ifelse(
str_count(geo_suburb, "\\bDurban\\b") >= 1 & str_count(geo_town, "\\bDurban\\b") == 1,
str_replace(geo_suburb, "\\b,[:space:]Durban$\\b", ""),
geo_suburb),
geo_town = str_replace(geo_town, pattern = "\\b[:space:]Metro$\\b", replacement = ""),
geo_postal_code = as.integer(geo_postal_code)
) %>%
select(-geo_locality, -geo_sublocality)
)
}) %>% bind_rows()
}
# Although geocode() can handle a vector of addresses, here we query only a single address at a time.
#
tolerant_geocode <- function(address, max_retry = 5) {
message("-> ", address)
retries <- 0
#
while (TRUE) {
reply <- tryCatch(suppressMessages(geocode(address,
output = "all",
source = "google",
inject = "region=za",
override_limit = TRUE)),
error = function(e) e,
warning = function(w) {
# print(paste("WARNING ", w))
# if (grepl("SSL connect error", as.character(w))) {
# Sys.sleep(60)
# next
# }
})
#
if ("status" %in% names(reply)) break
#
cat("Waiting...")
retries <- retries + 1
Sys.sleep(5)
cat(glue(" retry {retries}."), "\n")
if (retries >= max_retry) return(NA)
}
# Check for no results.
#
if (reply$status == "ZERO_RESULTS") {
return(NA)
}
parse_geocode_results(reply$results)
# # Deal with duplicate matches (take match address which is closest to original address).
# mutate(
# distance = stringdist(tolower(address), tolower(geo_address))
# ) %>% arrange(distance) %>% select(-distance) %>% head(1)
}
# Create formatted address to use for geocoding:
gis_na <- mutate(gis_na,
fmt_address = Institution_Name,
fmt_address = ifelse(!is.na(StreetAddress), paste(fmt_address, StreetAddress, sep = ", "), fmt_address),
fmt_address = ifelse(!is.na(fmt_address), paste0(fmt_address, ", South Africa"), fmt_address)
)
# Geocode
geocode_gis_na <- gis_na %>%
select(fmt_address, everything())
geocode_gis_na <- geocode_gis_na %>%
mutate(geocode = map(fmt_address, possibly(tolerant_geocode, otherwise = NA)))
#
geocode_gis_na <- geocode_gis_na %>% filter(!NatEmis == 500448588)
geocode_gis_na <- geocode_gis_na %>%
select(NatEmis, fmt_address, geocode) %>%
unnest()