## Simple feature collection with 6 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -5.143751 ymin: 41.33375 xmax: 9.560416 ymax: 50.16764
## Geodetic CRS: WGS 84
## # A tibble: 6 × 12
## GID_1 GID_0 COUNTRY NAME_1 VARNAME_1 NL_NAME_1 TYPE_1 ENGTYPE_1 CC_1 HASC_1
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 FRA.1_1 FRA France Auver… NA NA Région Region NA FR.AR
## 2 FRA.2_1 FRA France Bourg… NA NA Région Region NA FR.BF
## 3 FRA.3_1 FRA France Breta… NA NA Région Region NA FR.BT
## 4 FRA.4_1 FRA France Centr… NA NA Région Region NA FR.CN
## 5 FRA.5_1 FRA France Corse Corsica NA Région Region NA FR.CE
## 6 FRA.6_1 FRA France Grand… NA NA Région Region NA FR.AO
## # ℹ 2 more variables: ISO_1 <chr>, geom <MULTIPOLYGON [°]>
# Map France boundary ----maps::map(regions ="France", fill =TRUE, col ="black")# Add retrieved coordinates ----points(x = content$"lon", y = content$"lat", pch =19, cex =1, col ="red")# Add retrieved name ----text(x = content$"lon", y = content$"lat", labels = content$"name", pos =2, col ="white", family ="serif")
# Retrieve coordinates ----get_coords_from_location(city ="Montpellier", country ="France")
## name lon lat
## 1 Montpellier 3.876734 43.61124
Automation
# List of cities ----cities <-c("Montpellier", "Paris", "Strasbourg", "Grenoble", "Bourges")# Retrieve coordinates ----coords <-data.frame()for (city in cities) { coord <-get_coords_from_location(city = city, country ="France") coords <-rbind(coords, coord)}coords
## name lon lat
## 1 Montpellier 3.876734 43.61124
## 2 Paris 2.320041 48.85889
## 3 Strasbourg 7.750713 48.58461
## 4 Grenoble 5.735782 45.18756
## 5 Bourges 2.399125 47.08117
Exercise (40 min)
Accessing data
Part 1: Download New Zealand boundaries from https://gadm.org/ (GeoJSON Level 0).
Use the function download.file().
Part 2: Download GBIF occurrences for two bat species endemic to the islands of New Zealand:
Mystacina tuberculata (New Zealand lesser short-tailed bat)
Chalinolobus tuberculatus (New Zealand long-tailed bat)
Use the function rgbif::occ_search(). Do not forget to export the data.
Part 3: Download the PanTHERIA database, a species-level database of life history, ecology, and geography of extant and recently extinct mammals available here.
Use the function download.file() and the function readr::read_delim() to import the database.
Bonus: Plot a New Zealand map with GBIF occurrences.
# Convert 'pop2021' to numeric ----top10$"pop2021"<-gsub(" ", "", top10$"pop2021")top10$"pop2021"<-as.numeric(top10$"pop2021")top10
## # A tibble: 10 × 4
## rang2024 commune departement pop2021
## <int> <chr> <chr> <dbl>
## 1 1 Paris Paris 2133111
## 2 2 Marseille Bouches-du-Rhône 873076
## 3 3 Lyon Métropole de Lyon 522250
## 4 4 Toulouse Haute-Garonne 504078
## 5 5 Nice Alpes-Maritimes 348085
## 6 6 Nantes Loire-Atlantique 323204
## 7 7 Montpellier Hérault 302454
## 8 8 Strasbourg Bas-Rhin 291313
## 9 9 Bordeaux Gironde 261804
## 10 10 Lille Nord 236710
Scrap other elements
Detect HTML element by tag
# Extract content of h1 element ----rvest::html_element(content, css ="h1") |> rvest::html_text2()
## [1] "Liste des communes de France les plus peuplées"
Scrap other elements
Detect HTML element by tag
# Extract content of h1 element ----rvest::html_element(content, css ="h1") |> rvest::html_text2()
## [1] "Liste des communes de France les plus peuplées"
Detect HTML elements by tag
# Extract content of the first h2 element ----rvest::html_element(content, css ="h2") |> rvest::html_text2()
## [1] "Sommaire"
# Extract content of all h2 elements ----rvest::html_elements(content, css ="h2") |> rvest::html_text2()
## [1] "Sommaire"
## [2] "Cadre des données"
## [3] "Vue d'ensemble"
## [4] "Communes de plus de 30 000 habitants"
## [5] "Communes ayant compté plus de 30 000 habitants avant 2024"
## [6] "Notes et références"
## [7] "Voir aussi"
Scrap other elements
Detect HTML element by tag
# Extract content of h1 element ----rvest::html_element(content, css ="h1") |> rvest::html_text2()
## [1] "Liste des communes de France les plus peuplées"
Detect HTML elements by tag
# Extract content of the first h2 element ----rvest::html_element(content, css ="h2") |> rvest::html_text2()
## [1] "Sommaire"
# Extract content of all h2 elements ----rvest::html_elements(content, css ="h2") |> rvest::html_text2()
## [1] "Sommaire"
## [2] "Cadre des données"
## [3] "Vue d'ensemble"
## [4] "Communes de plus de 30 000 habitants"
## [5] "Communes ayant compté plus de 30 000 habitants avant 2024"
## [6] "Notes et références"
## [7] "Voir aussi"
Detect HTML element by ID
# Extract content of the h2 element detected by its id ----rvest::html_element(content, css ="#Cadre_des_données") |> rvest::html_text2()
## [1] "Cadre des données"
Scrap other elements
Detect HTML element by tag
# Extract content of h1 element ----rvest::html_element(content, css ="h1") |> rvest::html_text2()
## [1] "Liste des communes de France les plus peuplées"
Detect HTML elements by tag
# Extract content of the first h2 element ----rvest::html_element(content, css ="h2") |> rvest::html_text2()
## [1] "Sommaire"
# Extract content of all h2 elements ----rvest::html_elements(content, css ="h2") |> rvest::html_text2()
## [1] "Sommaire"
## [2] "Cadre des données"
## [3] "Vue d'ensemble"
## [4] "Communes de plus de 30 000 habitants"
## [5] "Communes ayant compté plus de 30 000 habitants avant 2024"
## [6] "Notes et références"
## [7] "Voir aussi"
Detect HTML element by ID
# Extract content of the h2 element detected by its id ----rvest::html_element(content, css ="#Cadre_des_données") |> rvest::html_text2()
## [1] "Cadre des données"
Extract attribute
# Extract URL of the first image ----image_url <- rvest::html_element(content, css ="img") |> rvest::html_attr(name ="src")image_url
## [1] "/static/images/icons/wikipedia.png"
Scrap other elements
Detect HTML element by tag
# Extract content of h1 element ----rvest::html_element(content, css ="h1") |> rvest::html_text2()
## [1] "Liste des communes de France les plus peuplées"
Detect HTML elements by tag
# Extract content of the first h2 element ----rvest::html_element(content, css ="h2") |> rvest::html_text2()
## [1] "Sommaire"
# Extract content of all h2 elements ----rvest::html_elements(content, css ="h2") |> rvest::html_text2()
## [1] "Sommaire"
## [2] "Cadre des données"
## [3] "Vue d'ensemble"
## [4] "Communes de plus de 30 000 habitants"
## [5] "Communes ayant compté plus de 30 000 habitants avant 2024"
## [6] "Notes et références"
## [7] "Voir aussi"
Detect HTML element by ID
# Extract content of the h2 element detected by its id ----rvest::html_element(content, css ="#Cadre_des_données") |> rvest::html_text2()
## [1] "Cadre des données"
Extract attribute
# Extract URL of the first image ----image_url <- rvest::html_element(content, css ="img") |> rvest::html_attr(name ="src")image_url
## [1] "/static/images/icons/wikipedia.png"
# Build image full URL ----image_url <-paste0(base_url, image_url)image_url
# Extract content of h1 element ----rvest::html_element(content, css ="h1") |> rvest::html_text2()
## [1] "Liste des communes de France les plus peuplées"
Detect HTML elements by tag
# Extract content of the first h2 element ----rvest::html_element(content, css ="h2") |> rvest::html_text2()
## [1] "Sommaire"
# Extract content of all h2 elements ----rvest::html_elements(content, css ="h2") |> rvest::html_text2()
## [1] "Sommaire"
## [2] "Cadre des données"
## [3] "Vue d'ensemble"
## [4] "Communes de plus de 30 000 habitants"
## [5] "Communes ayant compté plus de 30 000 habitants avant 2024"
## [6] "Notes et références"
## [7] "Voir aussi"
Detect HTML element by ID
# Extract content of the h2 element detected by its id ----rvest::html_element(content, css ="#Cadre_des_données") |> rvest::html_text2()
## [1] "Cadre des données"
Extract attribute
# Extract URL of the first image ----image_url <- rvest::html_element(content, css ="img") |> rvest::html_attr(name ="src")image_url
## [1] "/static/images/icons/wikipedia.png"
# Build image full URL ----image_url <-paste0(base_url, image_url)image_url