Bulk geocoding capabilities are provided via the geocode_addresses()
function in arcgisgeocode. Rather geocoding a single address and returning match candidates, the bulk geocoding capabilities take many addresses and geocode them all at once returning a single location per address.
Using the bulk geocoding capabilities can result in incurring a cost. See more about geocoding pricing.
In this example, you will geocode restaurant addresses in Boston, MA collected by the Boston Area Research Initiative (BARI). The data is originally from their data portal.
Step 1. Authenticate
In order to utilize the bulk geocoding capabilities of the ArcGIS World Geocoder, you must first authenticate using arcgisutils. In this example, we are using user-based authentication via auth_user()
. You may choose a different authentication function if it works better for you.
library(arcgisutils) library(arcgisgeocode) set_arc_token(auth_user())
Step 2. Prepare the data
Similar to using find_address_candidates()
the geocoding results return an ID that can be used to join back onto the original dataset. First, you will read in the dataset from a filepath using readr::read_csv()
and then create a unique identifier with dplyr::mutate()
and dplyr::row_number()
.
# Boston Yelp addresses # Source: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/DMWCBT fp <- "https://analysis-1.maps.arcgis.com/sharing/rest/content/items/0423768816b343b69d9a425b82351912/data" library(dplyr) restaurants <- readr::read_csv(fp) |> mutate(id = row_number()) restaurants
#> # A tibble: 2,664 × 28 #> restaurant_name restaurant_ID restaurant_address restaurant_tag rating price review_number unique_reviewer #> <chr> <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> #> 1 100% Delicias 2 635 Hyde Park Ave… Latin America… 2 $$ 37 34 #> 2 100% Delicias Express 3 660A Centre St,Ja… Dominican,Emp… 4 <NA> 26 25 #> 3 107 4 107 Salem St,Bost… Restaurants, NA <NA> 0 0 #> 4 140 Supper Club 6 138 St James Ave,… Diners, 5 <NA> 1 1 #> 5 163 Vietnamese Sandwiche… 7 66 Harrison Ave,B… Vietnamese,Co… 3.5 $ 335 335 #> 6 180 Cafe 8 23 Edinboro St,Bo… Cafes, 4 <NA> 8 8 #> 7 180 Restaurant and Lounge 9 174 Lincoln St,Bo… Restaurants, NA <NA> 0 0 #> 8 224 Boston Street Restau… 11 224 Boston St,Dor… American (New… 4 $$ 248 248 #> 9 24 Hour Pizza Delivery 12 686 Morton St,Bos… Pizza, 1 $$$$ 31 31 #> 10 2Twenty2 13 222 Friend St,Bos… Asian Fusion,… 3 <NA> 63 63 #> # ℹ 2,654 more rows #> # ℹ 20 more variables: reviews_Jan_19 <dbl>, reviews_Feb_19 <dbl>, reviews_Mar_19 <dbl>, reviews_Apr_19 <dbl>, #> # reviews_May_19 <dbl>, reviews_Jun_19 <dbl>, reviews_Jul_19 <dbl>, reviews_Aug_19 <dbl>, reviews_Jan_20 <dbl>, #> # reviews_Feb_20 <dbl>, reviews_Mar_20 <dbl>, reviews_Apr_20 <dbl>, reviews_May_20 <dbl>, reviews_Jun_20 <dbl>, #> # reviews_Jul_20 <dbl>, reviews_Aug_20 <dbl>, restaurant_neighborhood <chr>, GIS_ID <dbl>, CT_ID_10 <dbl>, id <int>
Step 3. Geocode addresses
The restaurant addresses are contained in the restaurant_address
column. Pass this column into the single_line
argument of geocode_addresses()
and store the results in geocoded
.
geocoded <- geocode_addresses( single_line = restaurants[["restaurant_address"]] ) # preview the first 10 columns glimpse(geocoded[, 1:10])
#> Rows: 2,664 #> Columns: 11 #> $ result_id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27… #> $ loc_name <chr> "World", "World", "World", "World", "World", "World", "World", "World", "World", "World", "World"… #> $ status <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M… #> $ score <dbl> 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 99.41, 100.00, 100.00, 100.00, 10… #> $ match_addr <chr> "635 Hyde Park Avenue, Roslindale, Massachusetts, 02131", "660A Centre Street, Jamaica Plain, Mas… #> $ long_label <chr> "635 Hyde Park Avenue, Roslindale, MA, 02131, USA", "660A Centre Street, Jamaica Plain, MA, 02130… #> $ short_label <chr> "635 Hyde Park Avenue", "660A Centre Street", "107 Salem Street", "138 Saint James Avenue", "66 H… #> $ addr_type <chr> "PointAddress", "PointAddress", "PointAddress", "PointAddress", "PointAddress", "PointAddress", "… #> $ type_field <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ place_name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ geometry <POINT [°]> POINT (-71.11936 42.27857), POINT (-71.11386 42.31285), POINT (-71.05538 42.3642), POINT (-…
You can use dplyr::reframe()
to geocode these addresses in a dplyr-friendly way.
Step 4. Join the results
In the previous step you geocoded the addresses and returned a data frame containing the location information. More likely than not, it would be helpful to have the locations joined onto the original dataset. You can do this by using dplyr::left_join()
and joining on the id
column you created and the result_id
from the geocoding results.
joined_addresses <- left_join( restaurants, geocoded, by = c("id" = "result_id") ) dplyr::glimpse(joined_addresses)
#> Rows: 2,664 #> Columns: 87 #> $ restaurant_name <chr> "100% Delicias", "100% Delicias Express", "107", "140 Supper Club", "163 Vietnamese S… #> $ restaurant_ID <dbl> 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 16, 17, 18, 21, 22, 23, 25, 26, 27, 29, 30, 34, 35, … #> $ restaurant_address <chr> "635 Hyde Park Ave,Roslindale, MA 02131,", "660A Centre St,Jamaica Plain, MA 02130,",… #> $ restaurant_tag <chr> "Latin American,Dominican,", "Dominican,Empanadas,", "Restaurants,", "Diners,", "Viet… #> $ rating <dbl> 2.0, 4.0, NA, 5.0, 3.5, 4.0, NA, 4.0, 1.0, 3.0, 4.0, 3.0, 4.0, 4.5, 3.5, NA, 4.0, 3.0… #> $ price <chr> "$$", NA, NA, NA, "$", NA, NA, "$$", "$$$$", NA, NA, "$$$", "$$", NA, "$", NA, "$$", … #> $ review_number <dbl> 37, 26, 0, 1, 335, 8, 0, 248, 31, 63, 10, 232, 77, 25, 72, 0, 484, 259, 83, 0, 77, 66… #> $ unique_reviewer <dbl> 34, 25, 0, 1, 335, 8, 0, 248, 31, 63, 10, 232, 77, 25, 72, 0, 481, 258, 83, 0, 77, 66… #> $ reviews_Jan_19 <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 8, 0, 1, 7, 0, 1, 0, 2, 0, 1, 0, 1, 0, 0, 1, 2, 0, 3, 8, 0… #> $ reviews_Feb_19 <dbl> 1, 2, 0, 0, 0, 0, 0, 4, 0, 3, 0, 0, 2, 0, 0, 0, 4, 0, 2, 0, 1, 0, 0, 0, 0, 0, 2, 14, … #> $ reviews_Mar_19 <dbl> 1, 3, 0, 0, 0, 1, 0, 5, 1, 2, 0, 0, 3, 0, 2, 0, 1, 1, 1, 0, 1, 1, 5, 0, 0, 0, 3, 8, 0… #> $ reviews_Apr_19 <dbl> 0, 3, 0, 0, 1, 0, 0, 3, 0, 4, 0, 3, 5, 0, 0, 0, 3, 0, 2, 0, 0, 1, 8, 0, 1, 1, 3, 13, … #> $ reviews_May_19 <dbl> 2, 1, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 6, 0, 0, 0, 2, 2, 1, 0, 0, 0, 3, 0, 1, 1, 3, 13, … #> $ reviews_Jun_19 <dbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 4, 0, 1, 3, 0, 0, 0, 1, 2, 3, 0, 1, 0, 8, 0, 1, 1, 6, 9, 0… #> $ reviews_Jul_19 <dbl> 0, 1, 0, 0, 3, 1, 0, 4, 1, 0, 4, 0, 3, 0, 2, 0, 4, 3, 1, 0, 1, 1, 5, 0, 1, 0, 4, 12, … #> $ reviews_Aug_19 <dbl> 0, 7, 0, 0, 0, 0, 0, 3, 0, 7, 3, 0, 0, 0, 0, 0, 5, 2, 2, 0, 1, 0, 4, 0, 1, 1, 4, 9, 0… #> $ reviews_Jan_20 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 5, 1, 0, 0, 4, 0, 1, 0, 1, 1, 4, 0, 2, 0, 3, 6, 0… #> $ reviews_Feb_20 <dbl> 0, 1, 0, 0, 1, 0, 0, 2, 0, 2, 1, 3, 8, 6, 0, 0, 3, 1, 1, 0, 0, 1, 3, 0, 4, 0, 1, 7, 0… #> $ reviews_Mar_20 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 6, 0, 0, 1, 2, 3, 0, 0, 2, 1, 0, 0, 0, 2, 2, 0… #> $ reviews_Apr_20 <dbl> 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0… #> $ reviews_May_20 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0… #> $ reviews_Jun_20 <dbl> 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 6, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0… #> $ reviews_Jul_20 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 3, 0, 0, 6, 0, 0, 0, 0, 1, 0, 0, 1, 3, 2, 0… #> $ reviews_Aug_20 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 4, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0… #> $ restaurant_neighborhood <chr> "Roslindale", "Jamaica Plain", "Boston", "Boston", "Boston", "Boston", "Boston", "Dor… #> $ GIS_ID <dbl> 1806741000, 1901410000, 302366000, 401087000, 305264000, 304435000, 305341000, 702993… #> $ CT_ID_10 <dbl> 25025140400, 25025120400, 25025030400, 25025010600, 25025070200, 25025070101, 2502507… #> $ id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24… #> $ loc_name <chr> "World", "World", "World", "World", "World", "World", "World", "World", "World", "Wor… #> $ status <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", … #> $ score <dbl> 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 99.41, 100.00, 100.00… #> $ match_addr <chr> "635 Hyde Park Avenue, Roslindale, Massachusetts, 02131", "660A Centre Street, Jamaic… #> $ long_label <chr> "635 Hyde Park Avenue, Roslindale, MA, 02131, USA", "660A Centre Street, Jamaica Plai… #> $ short_label <chr> "635 Hyde Park Avenue", "660A Centre Street", "107 Salem Street", "138 Saint James Av… #> $ addr_type <chr> "PointAddress", "PointAddress", "PointAddress", "PointAddress", "PointAddress", "Poin… #> $ type_field <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ place_name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ place_addr <chr> "635 Hyde Park Avenue, Roslindale, Massachusetts, 02131", "660A Centre Street, Jamaic… #> $ phone <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ url <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ rank <dbl> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 2… #> $ add_bldg <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ add_num <chr> "635", "660A", "107", "138", "66", "23", "174", "224", "686", "222", "465", "49", "22… #> $ add_num_from <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ add_num_to <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ add_range <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ side <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ st_pre_dir <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ st_pre_type <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ st_name <chr> "Hyde Park", "Centre", "Salem", "Saint James", "Harrison", "Edinboro", "Lincoln", "Bo… #> $ st_type <chr> "Avenue", "Street", "Street", "Avenue", "Avenue", "Street", "Street", "Street", "Stre… #> $ st_dir <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ bldg_type <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ bldg_name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ level_type <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ level_name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ unit_type <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ unit_name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ sub_addr <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ st_addr <chr> "635 Hyde Park Avenue", "660A Centre Street", "107 Salem Street", "138 Saint James Av… #> $ block <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ sector <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ nbrhd <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ district <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ city <chr> "Roslindale", "Jamaica Plain", "Boston", "Boston", "Boston", "Boston", "Boston", "Dor… #> $ metro_area <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ subregion <chr> "Suffolk County", "Suffolk County", "Suffolk County", "Suffolk County", "Suffolk Coun… #> $ region <chr> "Massachusetts", "Massachusetts", "Massachusetts", "Massachusetts", "Massachusetts", … #> $ region_abbr <chr> "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "… #> $ territory <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ zone <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… #> $ postal <chr> "02131", "02130", "02113", "02116", "02111", "02111", "02111", "02125", "02126", "021… #> $ postal_ext <chr> "4723", NA, "2227", "5071", "1907", "2131", "2404", "1786", NA, "1801", "5597", "1319… #> $ country <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "… #> $ cntry_name <chr> "United States", "United States", "United States", "United States", "United States", … #> $ lang_code <chr> "ENG", "ENG", "ENG", "ENG", "ENG", "ENG", "ENG", "ENG", "ENG", "ENG", "ENG", "ENG", "… #> $ distance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… #> $ x <dbl> -71.11927, -71.11409, -71.05552, -71.07630, -71.06120, -71.05975, -71.05861, -71.0610… #> $ y <dbl> 42.27855, 42.31286, 42.36421, 42.34947, 42.35137, 42.35184, 42.35067, 42.32237, 42.28… #> $ display_x <dbl> -71.11936, -71.11386, -71.05538, -71.07624, -71.06105, -71.05988, -71.05843, -71.0608… #> $ display_y <dbl> 42.27857, 42.31285, 42.36420, 42.34923, 42.35131, 42.35182, 42.35060, 42.32229, 42.28… #> $ xmin <dbl> -71.12036, -71.11486, -71.05638, -71.07724, -71.06205, -71.06088, -71.05943, -71.0618… #> $ xmax <dbl> -71.11836, -71.11286, -71.05438, -71.07524, -71.06005, -71.05888, -71.05743, -71.0598… #> $ ymin <dbl> 42.27757, 42.31185, 42.36320, 42.34823, 42.35031, 42.35082, 42.34960, 42.32129, 42.28… #> $ ymax <dbl> 42.27957, 42.31385, 42.36520, 42.35023, 42.35231, 42.35282, 42.35160, 42.32329, 42.28… #> $ ex_info <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "29",… #> $ geometry <POINT [°]> POINT (-71.11936 42.27857), POINT (-71.11386 42.31285), POINT (-71.05538 42.364…