# library(ropentree)

Creating a CSV file of a pre-mapped nameset

If your taxon names are on a newick tree file, read the tree into R with the read.tree function from the ape package. This will create a phylo object:

phy <- ape::read.tree(file="../data-raw/mgharvey-tyranni-f73aa7f/species_trees/final_timetrees/T400F_complete.tre")
ls(phy)
#> [1] "edge"        "edge.length" "Nnode"       "tip.label"
str(phy)
#> List of 4
#>  $ edge       : int [1:3878, 1:2] 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 ...
#>  $ edge.length: num [1:3878] 2.576 5.129 1.554 1.733 0.961 ...
#>  $ Nnode      : int 1939
#>  $ tip.label  : chr [1:1940] "Syna_azara_L8177" "Syna_cours_L58452" "Syna_azara_L31671" "Syna_azara_LSU1232" ...
#>  - attr(*, "class")= chr "phylo"
#>  - attr(*, "order")= chr "cladewise"

Now, create a vector of original tip taxon labels

original_labels <- phy$tip.label

head(original_labels, 10)
#>  [1] "Syna_azara_L8177"      "Syna_cours_L58452"     "Syna_azara_L31671"    
#>  [4] "Syna_azara_LSU1232"    "Syna_azara_WGAV730"    "Syna_front_L52049"    
#>  [7] "Syna_azara_LSUMZ49623" "Syna_albes_MPEGMT50"   "Syna_albes_K3064"     
#> [10] "Syna_alblar_L44453"

In this particular case, tip taxon labels are not suitable for mapping because the scientific names are cut off (e.g. Gallus gallus is “Gallus_L36208”). So, we will need to link the tip taxon labels with the complete scientific name, as provided on a supplementary data table from the original publication.

To do this, first read the supplementary data table file linking tip taxon labels with the complete scientific taxon names using the read.csv function:

data_table <- read.csv(file = "../data-raw/mgharvey-tyranni-f73aa7f/Species_name_map_uids.csv")

head(data_table, 10)
#>    X.1  X sample_uid            tipnamecodes        matrix.species
#> 1    1  1       3314    Acanth_chlris_O3RIFL  Acanthisitta chloris
#> 2    2  2       3314    Acanth_chlris_O3RIFL  Acanthisitta chloris
#> 3    3  3       1597       Acroba_fon_L26329 Acrobatornis fonsecai
#> 4    4  4        884    Acropt_ortnyx_L30027  Acropternis orthonyx
#> 5    5  5        884    Acropt_ortnyx_L30027  Acropternis orthonyx
#> 6    6  6     200000 Acropt_ortnyx_PhAMC1246  Acropternis orthonyx
#> 7    7  7        884    Acropt_ortnyx_L30027  Acropternis orthonyx
#> 8    8  8     200000 Acropt_ortnyx_PhAMC1246  Acropternis orthonyx
#> 9    9  9       2583    Agrior_albcau_L72330   Agriornis albicauda
#> 10  10 10       2583    Agrior_albcau_L72330   Agriornis albicauda
#>    matrix.subspecies aos.howardmoore.species  aos.clements.species
#> 1                  ?    Acanthisitta chloris  Acanthisitta chloris
#> 2                  ?    Acanthisitta chloris  Acanthisitta chloris
#> 3                  -   Acrobatornis fonsecai Acrobatornis fonsecai
#> 4         infuscatus    Acropternis orthonyx  Acropternis orthonyx
#> 5         infuscatus    Acropternis orthonyx  Acropternis orthonyx
#> 6           orthonyx    Acropternis orthonyx  Acropternis orthonyx
#> 7         infuscatus    Acropternis orthonyx  Acropternis orthonyx
#> 8           orthonyx    Acropternis orthonyx  Acropternis orthonyx
#> 9          albicauda     Agriornis albicauda   Agriornis albicauda
#> 10         albicauda     Agriornis albicauda   Agriornis albicauda
#>             iucn.species howardmoore.family             jetz.tip    notes
#> 1   Acanthisitta chloris    Acanthisittidae Acanthisitta_chloris outgroup
#> 2   Acanthisitta chloris    Acanthisittidae Acanthisitta_chloris outgroup
#> 3  Acrobatornis fonsecai        Furnariidae                 <NA>         
#> 4   Acropternis orthonyx     Rhinocryptidae Acropternis_orthonyx         
#> 5   Acropternis orthonyx     Rhinocryptidae Acropternis_orthonyx         
#> 6   Acropternis orthonyx     Rhinocryptidae Acropternis_orthonyx         
#> 7   Acropternis orthonyx     Rhinocryptidae Acropternis_orthonyx         
#> 8   Acropternis orthonyx     Rhinocryptidae Acropternis_orthonyx         
#> 9    Agriornis albicauda         Tyrannidae  Agriornis_albicauda         
#> 10   Agriornis albicauda         Tyrannidae  Agriornis_albicauda
ls(data_table)
#>  [1] "aos.clements.species"    "aos.howardmoore.species"
#>  [3] "howardmoore.family"      "iucn.species"           
#>  [5] "jetz.tip"                "matrix.species"         
#>  [7] "matrix.subspecies"       "notes"                  
#>  [9] "sample_uid"              "tipnamecodes"           
#> [11] "X"                       "X.1"
class(data_table)
#> [1] "data.frame"

Now, we can create a vector of scientific species names that are suitable for TNRS matching. To save on computing time, let’s use the unique function to eliminate duplicated scientific names and leave just one copy per name:

length(data_table$matrix.species)
#> [1] 4639
adjusted_labels <- unique(data_table$matrix.species)
head(adjusted_labels, 10)
#>  [1] "Acanthisitta chloris"  "Acrobatornis fonsecai" "Acropternis orthonyx" 
#>  [4] "Agriornis albicauda"   "Agriornis lividus"     "Agriornis micropterus"
#>  [7] "Agriornis montanus"    "Agriornis murinus"     "Akletos goeldii"      
#> [10] "Akletos melanoceps"
length(adjusted_labels)
#> [1] 1330

Let’s check for NA’s and remove them:

any(is.na(adjusted_labels))
#> [1] TRUE
index_na <- which(is.na(adjusted_labels))
adjusted_labels <- adjusted_labels[-index_na]

Let’s also remove some unwanted backslash characters using the function gsub:

adjusted_labels <- gsub("\\\\", "", adjusted_labels)

For the sake of reproducibility, write down the names as a “.txt” file that can be read back into R using read.csv:


write(adjusted_labels, file = "../data-raw/harvey2020scientific_names.txt")

adjusted_labels  <-  read.csv(file = "../data-raw/harvey2020scientific_names.txt", header = FALSE)[,1]

Now, perform a TNRS match on the vector of unique scientific names with the tnrs_match_names function from the rotl package:

matched_labels  <-  sapply(adjusted_labels, function(x){
  write(x, file = "../data-raw/test.txt", append = TRUE)
  rotl::tnrs_match_names(names = x)}
  )
#> Warning: Akletos goeldii are not matched
#> Warning: Akletos melanoceps are not matched
#> Warning: Ammonastes pelzelni are not matched
#> Warning: Ampelornis griseiceps are not matched
#> Warning: Aprositornis disjuncta are not matched
#> Warning: Ceratopipra chloromeros are not matched
#> Warning: Ceratopipra rubrocapilla are not matched
#> Warning: Cercomacroides laeta are not matched
#> Warning: Cercomacroides parkeri are not matched
#> Warning: Deconychura sp. nov. are not matched
#> Warning: Erythropitta dohertyi are not matched
#> Warning: Euchrepomis humeralis"" are not matched
#> Warning: Euchrepomis sharpei are not matched
#> Warning: Hafferia zeledoni are not matched
#> Warning: Heliobletus aff. contaminatus are not matched
#> Warning: Hylopezus aff. nattereri are not matched
#> Warning: Merulaxis aff. ater are not matched
#> Warning: Myrmelastes caurensis are not matched
#> Warning: Myrmelastes hyperythra are not matched
#> Warning: Myrmoderus loricata are not matched
#> Warning: Myrmoderus ruficauda are not matched
#> Warning: Myrmoderus squamosa are not matched
#> Warning: Certhiaxis sp. nov. are not matched
#> Warning: Herpsilochmus aff. motacilloides Puno"" are not matched
#> Warning: Myiornis sp. nov. are not matched
#> Warning: Phacellodomus sp. nov. are not matched
#> Warning: Oneillornis lunulatus are not matched
#> Warning: Oneillornis salvini are not matched
#> Warning: Poliocrania exsul are not matched
#> Warning: Sciaphylax castanea are not matched
#> Warning: Sciaphylax hemimelaena are not matched
#> Warning: Sipia berlepschi are not matched
#> Warning: Sipia laemosticta are not matched
#> Warning: Suiriri affinis (islerorum) are not matched
#> Warning: Heteroxolmis (Xolmis) dominicanus are not matched
str(matched_labels)
#> List of 9303
#>  $ : chr "acanthisitta chloris"
#>  $ : chr "Acanthisitta chloris"
#>  $ : logi FALSE
#>  $ : int 1085740
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "acrobatornis fonsecai"
#>  $ : chr "Acrobatornis fonsecai"
#>  $ : logi FALSE
#>  $ : int 286119
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "acropternis orthonyx"
#>  $ : chr "Acropternis orthonyx"
#>  $ : logi FALSE
#>  $ : int 286117
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "agriornis albicauda"
#>  $ : chr "Agriornis albicauda"
#>  $ : logi FALSE
#>  $ : int 3599206
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "agriornis lividus"
#>  $ : chr "Agriornis lividus"
#>  $ : logi FALSE
#>  $ : int 3599207
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "agriornis micropterus"
#>  $ : chr "Agriornis micropterus"
#>  $ : logi FALSE
#>  $ : int 305334
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "agriornis montanus"
#>  $ : chr "Agriornis montanus"
#>  $ : logi FALSE
#>  $ : int 1036002
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "agriornis murinus"
#>  $ : chr "Agriornis murinus"
#>  $ : logi FALSE
#>  $ : int 727889
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "akletos goeldii"
#>  $ : chr NA
#>  $ : logi NA
#>  $ : int NA
#>  $ : logi NA
#>  $ : logi NA
#>  $ : int NA
#>  $ : chr "akletos melanoceps"
#>  $ : chr NA
#>  $ : logi NA
#>  $ : int NA
#>  $ : logi NA
#>  $ : logi NA
#>  $ : int NA
#>  $ : chr "alectrurus risora"
#>  $ : chr "Alectrurus risora"
#>  $ : logi FALSE
#>  $ : int 238477
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "alectrurus tricolor"
#>  $ : chr "Alectrurus tricolor"
#>  $ : logi FALSE
#>  $ : int 258072
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "ammonastes pelzelni"
#>  $ : chr NA
#>  $ : logi NA
#>  $ : int NA
#>  $ : logi NA
#>  $ : logi NA
#>  $ : int NA
#>  $ : chr "ampelioides tschudii"
#>  $ : chr "Ampelioides tschudii"
#>  $ : logi FALSE
#>  $ : int 860316
#>  $ : logi FALSE
#>  $ : chr ""
#>  $ : int 1
#>  $ : chr "ampelion rubrocristatus"
#>   [list output truncated]
#>  - attr(*, "dim")= int [1:2] 7 1329
#>  - attr(*, "dimnames")=List of 2
#>   ..$ : chr [1:7] "search_string" "unique_name" "approximate_match" "ott_id" ...
#>   ..$ : chr [1:1329] "Acanthisitta chloris" "Acrobatornis fonsecai" "Acropternis orthonyx" "Agriornis albicauda" ...

Now, we need to extract the OTT ids that resulted from the TNRS match. First, match the original vector of tip taxon names with results from the TNRS match:

index <- match(data_table$matrix.species, names(matched_labels["ott_id",]))
head(index, 10)
#>  [1] 1 1 2 3 3 3 3 3 4 4

This will allow you to create a vector of OTT ids that correspond to the original tip taxon labels:

ott <- unlist(matched_labels["ott_id",][index])
head(ott, 10)
#>  Acanthisitta chloris  Acanthisitta chloris Acrobatornis fonsecai 
#>               1085740               1085740                286119 
#>  Acropternis orthonyx  Acropternis orthonyx  Acropternis orthonyx 
#>                286117                286117                286117 
#>  Acropternis orthonyx  Acropternis orthonyx   Agriornis albicauda 
#>                286117                286117               3599206 
#>   Agriornis albicauda 
#>               3599206

Finally, create a data frame containing the necessary information for OpenTree’s importing nameset tool, and write it as csv file:

results <- data.frame(original = original_labels, adjusted = data_table$matrix.species, ott_id = ott)

write.csv(results, file = "../data-raw/harvey2020nameset.csv)

Creating a json file