mirror of
https://github.com/johrpan/geposan.git
synced 2025-10-26 10:47:25 +01:00
data: Add more chromosomes
This commit is contained in:
parent
f940d7d9b0
commit
8ced026b79
6 changed files with 303 additions and 13 deletions
34
scripts/chromosome_names.R
Normal file
34
scripts/chromosome_names.R
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
library(data.table)
|
||||
library(httr)
|
||||
|
||||
ensembl_api_url <- "https://rest.ensembl.org"
|
||||
|
||||
#' Perform a request to the Ensembl REST API.
|
||||
ensembl_request <- function(api_path) {
|
||||
content(stop_for_status(GET(
|
||||
paste0(ensembl_api_url, api_path),
|
||||
content_type_json()
|
||||
)))
|
||||
}
|
||||
|
||||
#' Get IDs of all available vertebrates.
|
||||
get_species_ids <- function() {
|
||||
species <- ensembl_request("/info/species")$species
|
||||
sapply(species, function(species) species$name)
|
||||
}
|
||||
|
||||
#' Get all chromosomes names for a species.
|
||||
get_species_chromosomes <- function(species_id) {
|
||||
chromosomes <- unlist(ensembl_request(
|
||||
paste0("/info/assembly/", species_id)
|
||||
)$karyotype)
|
||||
}
|
||||
|
||||
#' Get a vector of all available unqiue chromosome names.
|
||||
#'
|
||||
#' There are multiple names for mitochondrial DNA which have to be removed
|
||||
#' manually, unfortunately.
|
||||
get_all_chromosomes <- function() {
|
||||
chromosomes <- sapply(get_species_ids(), get_species_chromosomes)
|
||||
unique(unlist(chromosomes))
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue