plant_genera.Rmd
Read a file with taxon names. The function read.csv
will read them as a data frame or table.
taxa <- read.csv("../data-raw/plant_genera.txt", header = FALSE) str(taxa) #> 'data.frame': 20 obs. of 1 variable: #> $ V1: chr "Abrotanella" "Acaena" "Acanthopsis" "Acer" ...
Make a character vector of taxon names
taxa <- taxa$V1 str(taxa) #> chr [1:20] "Abrotanella" "Acaena" "Acanthopsis" "Acer" "Achillea" "Achlys" ...
We will use the tnrs_match_names
function from the rotl
R package.
taxa_tnrs <- rotl::tnrs_match_names(taxa) ls(taxa_tnrs) #> [1] "approximate_match" "flags" "is_synonym" #> [4] "number_matches" "ott_id" "search_string" #> [7] "unique_name"
It generates a dat frame with teh results of the TNRS match to the OpenTree Taxonomy (OTT).
We want the OTT id numbers.
taxa_ott_ids <- taxa_tnrs$ott_id
To extract a synthetic subtree containing all descendants of any taxon, we will use the function tol_subtree
, from the rotl
R package, again.
subtree_taxon1 <- rotl::tol_subtree(taxa_ott_ids[1], label_format = "name") subtree_taxon1 #> #> Phylogenetic tree with 23 tips and 1 internal nodes. #> #> Tip labels: #> Abrotanella_rosulata, Abrotanella_inconspicua, Abrotanella_linearifolia, Abrotanella_purpurea, Abrotanella_fertilis, Abrotanella_forsteroides, ... #> Node labels: #> Abrotanella #> #> Unrooted; no branch lengths.
The function just allows one OTT id at a time, to get all subtrees at once, we can use a for loop or a handy lapply
function.
subtree_taxa_all_name <- lapply(taxa_ott_ids, rotl::tol_subtree, label_format = "name") #> Error: HTTP failure: 400 #> list(contesting_trees = list(`ot_311@tree1` = list(attachment_points = list(list(children_from_taxon = list("node34943"), parent = "node34939"), list(children_from_taxon = list("node34941"), parent = "node34940")))), mrca = "mrcaott189412ott213652")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon). subtree_taxa_all_id <- lapply(taxa_ott_ids, rotl::tol_subtree, label_format = "id") #> Error: HTTP failure: 400 #> list(contesting_trees = list(`ot_311@tree1` = list(attachment_points = list(list(children_from_taxon = list("node34943"), parent = "node34939"), list(children_from_taxon = list("node34941"), parent = "node34940")))), mrca = "mrcaott189412ott213652")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
It is easier to catch errors with a for loop:
subtree_taxa_all_id <- vector(mode = "list")
for (n in taxa_ott_ids){
subtree <- try(rotl::tol_subtree(n, label_format = "id"))
# print(subtree)
subtree_taxa_all_id <- c(subtree_taxa_all_id, list(subtree))
}
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`ot_311@tree1` = list(attachment_points = list(list(children_from_taxon = list("node34943"), parent = "node34939"), list(children_from_taxon = list("node34941"), parent = "node34940")))), mrca = "mrcaott189412ott213652")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#>
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`ot_311@tree1` = list(attachment_points = list(list(children_from_taxon = list("ott6123725"), parent = "node27222"), list(children_from_taxon = list("node27228"), parent = "node27227"), list(children_from_taxon = list("node27252"), parent = "node27251"), list(children_from_taxon = list("node27258"), parent = "node27257"), list(children_from_taxon = list("node27262"), parent = "node27261"), list(children_from_taxon = list("node27313"), parent = "node27312"), list(children_from_taxon = list(
#> "node27318"), parent = "node27316"), list(children_from_taxon = list("node27322"), parent = "node27321"), list(children_from_taxon = list("node27326"), parent = "node27325"), list(children_from_taxon = list("node27339"), parent = "node27335"), list(children_from_taxon = list("node27338"), parent = "node27336")))), mrca = "mrcaott23895ott49697")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#>
#>
Progress [-----------------------------------] 0/2 ( 0) ?s
Progress [==================================] 2/2 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: ott4416, ott395142
#>
Progress [-----------------------------------] 0/1 ( 0) ?s
Progress [==================================] 1/1 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: ott480490
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`ot_311@tree1` = list(attachment_points = list(list(children_from_taxon = list("node20107"), parent = "node20106"), list(children_from_taxon = list("node20152"), parent = "node20151")))), mrca = "mrcaott2441ott44343")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#>
#>
Progress [-----------------------------------] 0/2 ( 0) ?s
Progress [==================================] 2/2 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: ott662962, ott804933
#>
Progress [-----------------------------------] 0/1 ( 0) ?s
Progress [==================================] 1/1 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: ott301496
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`ot_1647@tree1` = list(attachment_points = list(list(children_from_taxon = list("node10"), parent = "node9"), list(children_from_taxon = list("node14"), parent = "node13")))), mrca = "mrcaott93631ott93641")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#>
#>
Progress [-----------------------------------] 0/3 ( 0) ?s
Progress [==================================] 3/3 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: ott441629, ott487617, ott869834
#>
Progress [----------------------------------] 0/23 ( 0) ?s
Progress [================================] 23/23 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: ott197343, ott197337, ott229165, ott737256, ott258933, ott1057538,
#> ott54669, ott650740, ott737242, ott476268, ott748378, ott116325, ott679999,
#> ott357132, ott781600, ott983159, ott144323, ott748370, ott488393, ott505054,
#> ott70786, ott737251, ott3994178
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`ot_311@tree1` = list(attachment_points = list(list(children_from_taxon = list("node27549"), parent = "node27548"), list(children_from_taxon = list("node27553"), parent = "node27552"), list(children_from_taxon = list("node27555"), parent = "node27554"), list(children_from_taxon = list("node27558"), parent = "node27557")))), mrca = "mrcaott105992ott139602")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#>
#>
Progress [-----------------------------------] 0/3 ( 0) ?s
Progress [==================================] 3/3 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: ott117369, ott639085, ott639096
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`pg_2608@tree6288` = list(attachment_points = list(list(children_from_taxon = list("node1091405"), parent = "node1091399"), list(children_from_taxon = list("node1091401"), parent = "node1091400")))), mrca = "mrcaott41288ott41290")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
length(subtree_taxa_all_id)
#> [1] 20
Let’s do the same with scientific names:
subtree_taxa_all_name <- vector(mode = "list")
for (n in taxa_ott_ids){
subtree <- try(rotl::tol_subtree(n, label_format = "name"))
# print(subtree)
subtree_taxa_all_name <- c(subtree_taxa_all_name, list(subtree))
}
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`ot_311@tree1` = list(attachment_points = list(list(children_from_taxon = list("node34943"), parent = "node34939"), list(children_from_taxon = list("node34941"), parent = "node34940")))), mrca = "mrcaott189412ott213652")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#>
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`ot_311@tree1` = list(attachment_points = list(list(children_from_taxon = list("ott6123725"), parent = "node27222"), list(children_from_taxon = list("node27228"), parent = "node27227"), list(children_from_taxon = list("node27252"), parent = "node27251"), list(children_from_taxon = list("node27258"), parent = "node27257"), list(children_from_taxon = list("node27262"), parent = "node27261"), list(children_from_taxon = list("node27313"), parent = "node27312"), list(children_from_taxon = list(
#> "node27318"), parent = "node27316"), list(children_from_taxon = list("node27322"), parent = "node27321"), list(children_from_taxon = list("node27326"), parent = "node27325"), list(children_from_taxon = list("node27339"), parent = "node27335"), list(children_from_taxon = list("node27338"), parent = "node27336")))), mrca = "mrcaott23895ott49697")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#>
#>
Progress [-----------------------------------] 0/2 ( 0) ?s
Progress [==================================] 2/2 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: Achillea formosa, Achillea pindicola
#>
Progress [-----------------------------------] 0/1 ( 0) ?s
Progress [==================================] 1/1 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: Achnatherum pekinense
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`ot_311@tree1` = list(attachment_points = list(list(children_from_taxon = list("node20107"), parent = "node20106"), list(children_from_taxon = list("node20152"), parent = "node20151")))), mrca = "mrcaott2441ott44343")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#>
#>
Progress [-----------------------------------] 0/2 ( 0) ?s
Progress [==================================] 2/2 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: Aerides odorata, Aerides multiflora
#>
Progress [-----------------------------------] 0/1 ( 0) ?s
Progress [==================================] 1/1 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: Afzelia bella
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`ot_1647@tree1` = list(attachment_points = list(list(children_from_taxon = list("node10"), parent = "node9"), list(children_from_taxon = list("node14"), parent = "node13")))), mrca = "mrcaott93631ott93641")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#>
#>
Progress [-----------------------------------] 0/3 ( 0) ?s
Progress [==================================] 3/3 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: Alchemilla procumbens, Alchemilla fischeri, Alchemilla filicaulis
#>
Progress [----------------------------------] 0/23 ( 0) ?s
Progress [================================] 23/23 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: Allium obtusum, Allium howellii, Allium peninsulare, Allium bolanderi,
#> Allium rotundum, Allium schoenoprasum subsp. schoenoprasum, Allium vodopjanovae,
#> Allium victorialis, Allium sanbornii, Allium cyaneum, Allium ampeloprasum,
#> Allium ericetorum, Allium platyspathum, Allium materculae, Allium cepa,
#> Allium cupuliferum, Allium bourgeaui, Allium sativum, Allium taquetii, Allium
#> scorodoprasum, Allium carinatum, Allium neapolitanum, Allium cassium
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`ot_311@tree1` = list(attachment_points = list(list(children_from_taxon = list("node27549"), parent = "node27548"), list(children_from_taxon = list("node27553"), parent = "node27552"), list(children_from_taxon = list("node27555"), parent = "node27554"), list(children_from_taxon = list("node27558"), parent = "node27557")))), mrca = "mrcaott105992ott139602")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#>
#>
Progress [-----------------------------------] 0/3 ( 0) ?s
Progress [==================================] 3/3 (100) 0s
#> Warning in collapse_singles(tr, show_progress): Dropping singleton nodes with
#> labels: Alnus acuminata, Alnus jorullensis, Alnus hirsuta
#> Error : HTTP failure: 400
#> list(contesting_trees = list(`pg_2608@tree6288` = list(attachment_points = list(list(children_from_taxon = list("node1091405"), parent = "node1091399"), list(children_from_taxon = list("node1091401"), parent = "node1091400")))), mrca = "mrcaott41288ott41290")[/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
length(subtree_taxa_all_name)
#> [1] 20
Check which ones errored and which ones produced a subtree:
sapply(subtree_taxa_all_name, class) #> [1] "phylo" "try-error" "phylo" "try-error" "phylo" "phylo" #> [7] "phylo" "phylo" "try-error" "phylo" "phylo" "phylo" #> [13] "phylo" "try-error" "phylo" "phylo" "try-error" "phylo" #> [19] "phylo" "try-error" is_error <- sapply(subtree_taxa_all_name, class) %in% "try-error" is_phylo <- sapply(subtree_taxa_all_name, class) %in% "phylo"
Out of the 20 taxon OTT ids tried, 6 were unsuccessful and the remainder 14 retrieved a subtree successfully.
We will make a new list object containing subtrees only, excluding the errors:
subtree_taxa_phylo_name <- subtree_taxa_all_name[is_phylo] length(subtree_taxa_phylo_name) #> [1] 14 names(subtree_taxa_phylo_name) <- taxa[is_phylo] subtree_taxa_phylo_id <- subtree_taxa_all_id[is_phylo] length(subtree_taxa_phylo_id) #> [1] 14 names(subtree_taxa_phylo_id) <- taxa[is_phylo]
Save the trees:
assign("subtrees", get("subtree_taxa_phylo_name")) save(list = "subtrees", file = "../data-raw/subtrees.RData")
Now, we can make a vector containing all taxon names that are tip labels in the synthetic subtrees:
subtaxa_name <- sapply(subtree_taxa_phylo_name, "[", "tip.label") length(subtaxa_name) #> [1] 14 names(subtaxa_name) <- names(subtree_taxa_phylo_name) subtaxa_id <- sapply(subtree_taxa_phylo_id, "[", "tip.label") length(subtaxa_id) #> [1] 14 names(subtaxa_id) <- names(subtree_taxa_phylo_id) subtaxa_id[1] #> $Abrotanella #> [1] "ott148523" "ott365007" "ott365009" "ott365025" "ott425771" #> [6] "ott425773" "ott425777" "ott425779" "ott425781" "ott425786" #> [11] "ott786604" "ott900584" "ott900587" "ott932307" "ott1005999" #> [16] "ott1006002" "ott1006005" "ott1006011" "ott1006014" "ott7607732" #> [21] "ott7607733" "ott7607734" "ott7607735"
We will use the function occ_search
from the rgbif
R package. Let’s try it first with the scientific names in subtaxa_name
:
name <- gsub("_", " ", subtaxa_name[[1]][1]) name_gbif <- rgbif::occ_search(scientificName = name, fields=c('name','decimalLatitude', 'decimalLongitude'), limit = 100) ls(name_gbif) #> [1] "data" "facets" "hierarchy" "media" "meta" ls(name_gbif$data) #> [1] "decimalLatitude" "decimalLongitude" name_gbif$data #> # A tibble: 8 x 2 #> decimalLongitude decimalLatitude #> <dbl> <dbl> #> 1 169. -52.6 #> 2 169. -52.5 #> 3 169. -52.5 #> 4 169. -52.5 #> 5 169. -52.6 #> 6 169. -52.6 #> 7 169 -52.6 #> 8 169. -52.5
Next, we will do it in a loop:
# for each phylo object i for (i in 1:length(subtaxa_name)){ print(i) # create an empty list to hold results subtaxa_gbif <- vector(mode = "list") # for each tip label "name" in phylo object i for (name in subtaxa_name[[i]]){ print(name) # replace underscores by spaces in name name <- gsub("_", " ", name) # get gbif records for "name", with a try name_gbif <- try(rgbif::occ_search(scientificName = name, fields=c('order', 'family', 'genus', 'acceptedScientificName', 'decimalLatitude', 'decimalLongitude', "elevation", "familyKey", "genusKey", "taxonKey"), limit = 5)) # if try was successful and produced a gbif object # and data is not empty (i.e., there are gbif records for "name") if(inherits(name_gbif, "gbif") & !is.null(name_gbif$data)){ # sometimes, there are records but they are not retrieved # if data was more than 0 rows if(nrow(name_gbif$data)){ # create a data frame object containing "name" and gbif data for "name" name_gbif <- cbind(name, name_gbif$data) } } # concatenate the result of each "name" into a list subtaxa_gbif <- c(subtaxa_gbif, list(name_gbif)) } # use subtaxon names to name the list of results for all subtaxa in taxa names(subtaxa_gbif) <- subtaxa_name[[i]] # save results as an R object with name = names(subtaxa_name)[i] assign(names(subtaxa_name)[i], get("subtaxa_gbif")) save(list = names(subtaxa_name)[i], file = paste0("../data-raw/", names(subtaxa_name)[i], ".RData")) # which ones are data frames is_data <- sapply(subtaxa_gbif, class) %in% "data.frame" # merge all data into a single table tab <- dplyr::bind_rows(subtaxa_gbif[is_data]) write.csv(tab, file = paste0("../data-raw/", names(subtaxa_name)[i], ".csv")) }