diff --git a/DESCRIPTION b/DESCRIPTION
index 12c2548..00a2214 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -27,6 +27,7 @@ Imports:
     tensorflow
 Suggests:
     biomaRt,
+    httr,
     plotly,
     rlog,
     stringr,
diff --git a/data/distances.rda b/data/distances.rda
index 0a47bfa..52a94f2 100644
Binary files a/data/distances.rda and b/data/distances.rda differ
diff --git a/data/genes.rda b/data/genes.rda
index 731c316..a0b8d02 100644
Binary files a/data/genes.rda and b/data/genes.rda differ
diff --git a/data/species.rda b/data/species.rda
index 57875bb..5b96c12 100644
Binary files a/data/species.rda and b/data/species.rda differ
diff --git a/scripts/chromosome_names.R b/scripts/chromosome_names.R
new file mode 100644
index 0000000..cf1a59a
--- /dev/null
+++ b/scripts/chromosome_names.R
@@ -0,0 +1,34 @@
+library(data.table)
+library(httr)
+
+ensembl_api_url <- "https://rest.ensembl.org"
+
+#' Perform a request to the Ensembl REST API.
+ensembl_request <- function(api_path) {
+    content(stop_for_status(GET(
+        paste0(ensembl_api_url, api_path),
+        content_type_json()
+    )))
+}
+
+#' Get IDs of all available vertebrates.
+get_species_ids <- function() {
+    species <- ensembl_request("/info/species")$species
+    sapply(species, function(species) species$name)
+}
+
+#' Get all chromosomes names for a species.
+get_species_chromosomes <- function(species_id) {
+    chromosomes <- unlist(ensembl_request(
+        paste0("/info/assembly/", species_id)
+    )$karyotype)
+}
+
+#' Get a vector of all available unqiue chromosome names.
+#'
+#' There are multiple names for mitochondrial DNA which have to be removed
+#' manually, unfortunately.
+get_all_chromosomes <- function() {
+    chromosomes <- sapply(get_species_ids(), get_species_chromosomes)
+    unique(unlist(chromosomes))
+}
diff --git a/scripts/ensembl.R b/scripts/ensembl.R
index aa5189e..e640a0b 100644
--- a/scripts/ensembl.R
+++ b/scripts/ensembl.R
@@ -17,24 +17,279 @@ species <- ensembl_datasets[, .(
     name = stringr::str_match(description, "(.*) genes \\(.*\\)")[, 2]
 )]
 
+# List of assemblies that the Ensembl Rest API advertises as chromosomes.
+# Mitochondrial DNA has been manually removed. Unfortunately, species IDs from
+# the Ensembl REST API don't map to dataset names in the BioMart interface.
+# Because of that, we can't programatically filter chromosome names.
+#
+# See get_all_chromosomes()
+valid_chromosome_names <- c(
+    "1",
+    "2",
+    "3",
+    "4",
+    "5",
+    "6",
+    "7",
+    "8",
+    "9",
+    "10",
+    "11",
+    "12",
+    "13",
+    "14",
+    "15",
+    "16",
+    "17",
+    "18",
+    "19",
+    "20",
+    "21",
+    "22",
+    "23",
+    "24",
+    "25",
+    "26",
+    "27",
+    "28",
+    "29",
+    "Z",
+    "1A",
+    "4A",
+    "30",
+    "31",
+    "32",
+    "33",
+    "34",
+    "35",
+    "36",
+    "37",
+    "38",
+    "39",
+    "40",
+    "X",
+    "25LG1",
+    "25LG2",
+    "LGE22",
+    "Y",
+    "41",
+    "42",
+    "43",
+    "44",
+    "45",
+    "46",
+    "47",
+    "48",
+    "49",
+    "50",
+    "LG34",
+    "LG35",
+    "2A",
+    "2B",
+    "LG1",
+    "LG2",
+    "LG3",
+    "LG4",
+    "LG5",
+    "LG6",
+    "LG7",
+    "LG8",
+    "LG9",
+    "LG10",
+    "LG11",
+    "LG12",
+    "LG13",
+    "LG14",
+    "LG15",
+    "LG16",
+    "LG17",
+    "LG18",
+    "LG19",
+    "LG20",
+    "LG21",
+    "LG22",
+    "LG23",
+    "W",
+    "LG24",
+    "LG25",
+    "LG26",
+    "LG27",
+    "LG28",
+    "LG29",
+    "LG30",
+    "LG01",
+    "LG02",
+    "LG03",
+    "LG04",
+    "LG05",
+    "LG06",
+    "LG07",
+    "LG08",
+    "LG09",
+    "A1",
+    "A2",
+    "A3",
+    "B1",
+    "B2",
+    "B3",
+    "B4",
+    "C1",
+    "C2",
+    "D1",
+    "D2",
+    "D3",
+    "D4",
+    "E1",
+    "E2",
+    "E3",
+    "F1",
+    "F2",
+    "LGE64",
+    "LG7_11",
+    "a",
+    "b",
+    "c",
+    "d",
+    "f",
+    "g",
+    "h",
+    "LG28B",
+    "LG30F",
+    "LG36F",
+    "LG37M",
+    "LG42F",
+    "LG44F",
+    "LG45M",
+    "LG48F",
+    "LG49B",
+    "ssa01",
+    "ssa02",
+    "ssa03",
+    "ssa04",
+    "ssa05",
+    "ssa06",
+    "ssa07",
+    "ssa08",
+    "ssa09",
+    "ssa10",
+    "ssa11",
+    "ssa12",
+    "ssa13",
+    "ssa14",
+    "ssa15",
+    "ssa16",
+    "ssa17",
+    "ssa18",
+    "ssa19",
+    "ssa20",
+    "ssa21",
+    "ssa22",
+    "ssa23",
+    "ssa24",
+    "ssa25",
+    "ssa26",
+    "ssa27",
+    "ssa28",
+    "ssa29",
+    "2a",
+    "2b",
+    "7a",
+    "7b",
+    "I",
+    "II",
+    "III",
+    "IV",
+    "V",
+    "VI",
+    "VII",
+    "VIII",
+    "IX",
+    "XI",
+    "XII",
+    "XIII",
+    "XIV",
+    "XV",
+    "XVI",
+    "LGE22C19W28_E50C23",
+    "1a",
+    "22a",
+    "sgr01",
+    "sgr02",
+    "sgr03",
+    "sgr04",
+    "sgr05",
+    "sgr06",
+    "sgr07",
+    "sgr08",
+    "sgr09",
+    "sgr10",
+    "sgr11",
+    "sgr12",
+    "sgr13",
+    "sgr14",
+    "sgr15",
+    "sgr16",
+    "sgr17",
+    "sgr18",
+    "sgr19",
+    "XVII",
+    "XVIII",
+    "XIX",
+    "XX",
+    "XXI",
+    "XXII",
+    "XXIII",
+    "XXIV",
+    "groupI",
+    "groupII",
+    "groupIII",
+    "groupIV",
+    "groupV",
+    "groupVI",
+    "groupVII",
+    "groupVIII",
+    "groupIX",
+    "groupX",
+    "groupXI",
+    "groupXII",
+    "groupXIII",
+    "groupXIV",
+    "groupXV",
+    "groupXVI",
+    "groupXVII",
+    "groupXVIII",
+    "groupXIX",
+    "groupXX",
+    "groupXXI",
+    "2L",
+    "2R",
+    "3L",
+    "3R",
+    "MIC_1",
+    "MIC_10",
+    "MIC_11",
+    "MIC_2",
+    "MIC_3",
+    "MIC_4",
+    "MIC_5",
+    "MIC_6",
+    "MIC_7",
+    "MIC_8",
+    "MIC_9",
+    "X1",
+    "X2",
+    "X3",
+    "X4",
+    "X5"
+)
+
 #' Get all chromosome names for an Ensembl dataset.
 #'
-#' The following chromosome naming schemes will be recognized and have been
-#' sourced from Ensembl by manually screening chromosome-level assemblies.
-#'
-#'  - a decimal number (most species' autosomes)
-#'  - X, Y, W or Z (gonosomes)
-#'  - LG followed by a decimal number (some fishes)
-#'  - ssa/sgr followed by a number (Atlantic salmon/Turquoise killifish)
-#'
-#' The function tries to filter out those chromosome names from the available
+#' The function tries to filter out valid chromosome names from the available
 #' assemblies in the dataset.
 get_chromosome_names <- function(dataset) {
     chromosome_names <- biomaRt::listFilterOptions(dataset, "chromosome_name")
-    chromosome_names[stringr::str_which(
-        chromosome_names,
-        "^(LG|sgr|ssa)?[0-9]+|[XYWZ]$"
-    )]
+    chromosome_names[chromosome_names %chin% valid_chromosome_names]
 }
 
 # Retrieve information on human genes. This will only include genes on