Update data to Ensembl 110

This commit is contained in:
Elias Projahn 2023-09-27 13:11:08 +02:00
parent b4fb39f8de
commit e19f154ab5
8 changed files with 132 additions and 132 deletions

View file

@ -18,7 +18,7 @@ Encoding: UTF-8
LazyData: true LazyData: true
LazyDataCompression: xz LazyDataCompression: xz
Roxygen: list(markdown = TRUE) Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.0 RoxygenNote: 7.2.3
Depends: Depends:
R (>= 4.1) R (>= 4.1)
Imports: Imports:

View file

@ -17,7 +17,7 @@
#' @format A [data.table] with the following columns: #' @format A [data.table] with the following columns:
#' \describe{ #' \describe{
#' \item{id}{Ensembl gene ID} #' \item{id}{Ensembl gene ID}
#' \item{name}{The gene's HGNC name} #' \item{name}{The gene's HGNC name (if available)}
#' \item{chrosome}{The human chromosome the gene is located on} #' \item{chrosome}{The human chromosome the gene is located on}
#' } #' }
"genes" "genes"

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -8,7 +8,7 @@
A \link{data.table} with the following columns: A \link{data.table} with the following columns:
\describe{ \describe{
\item{id}{Ensembl gene ID} \item{id}{Ensembl gene ID}
\item{name}{The gene's HGNC name} \item{name}{The gene's HGNC name (if available)}
\item{chrosome}{The human chromosome the gene is located on} \item{chrosome}{The human chromosome the gene is located on}
} }
} }

View file

@ -19,7 +19,7 @@ get_species_ids <- function() {
#' Get all chromosomes names for a species. #' Get all chromosomes names for a species.
get_species_chromosomes <- function(species_id) { get_species_chromosomes <- function(species_id) {
chromosomes <- unlist(ensembl_request( unlist(ensembl_request(
paste0("/info/assembly/", species_id) paste0("/info/assembly/", species_id)
)$karyotype) )$karyotype)
} }

View file

@ -3,7 +3,7 @@ library(data.table)
rlog::log_info("Connecting to Ensembl API") rlog::log_info("Connecting to Ensembl API")
# Object to access the Ensembl API. # Object to access the Ensembl API.
ensembl <- biomaRt::useEnsembl("ensembl", version = 106) ensembl <- biomaRt::useEnsembl("ensembl", version = 110)
# Retrieve species information. # Retrieve species information.
@ -42,69 +42,48 @@ valid_chromosome_names <- c(
"17", "17",
"18", "18",
"19", "19",
"20",
"X", "X",
"groupI",
"groupII",
"groupIII",
"groupIV",
"groupV",
"groupVI",
"groupVII",
"groupVIII",
"groupIX",
"groupX",
"groupXI",
"groupXII",
"groupXIII",
"groupXIV",
"groupXV",
"groupXVI",
"groupXVII",
"groupXVIII",
"groupXIX",
"groupXX",
"groupXXI",
"20",
"Y", "Y",
"21", "21",
"4A",
"1A",
"22", "22",
"23", "23",
"24", "24",
"25LG1", "25",
"25LG2",
"26", "26",
"27", "27",
"28", "28",
"LGE22",
"Z",
"25",
"29", "29",
"A1", "30",
"A2", "31",
"A3", "32",
"B1", "33",
"B2", "34",
"B3", "35",
"B4", "36",
"C1", "37",
"C2", "38",
"D1",
"D2",
"D3",
"D4",
"E1",
"E2",
"E3",
"F1",
"F2",
"2A",
"2B",
"LG01",
"LG02",
"LG03",
"LG04",
"LG05",
"LG06",
"LG07",
"LG08",
"LG09",
"LG10",
"LG11",
"LG12",
"LG13",
"LG14",
"LG15",
"LG16",
"LG17",
"LG18",
"LG19",
"LG20",
"LG21",
"LG22",
"LG23",
"LG24",
"LG25",
"I", "I",
"II", "II",
"III", "III",
@ -128,53 +107,21 @@ valid_chromosome_names <- c(
"XXII", "XXII",
"XXIII", "XXIII",
"XXIV", "XXIV",
"7a",
"7b",
"Z",
"W", "W",
"30", "a",
"31", "b",
"32", "c",
"33", "d",
"34", "f",
"35", "g",
"36", "h",
"37",
"38",
"39", "39",
"40", "40",
"2L",
"2R",
"3L",
"3R",
"LG1",
"LG2",
"LG3",
"LG4",
"LG5",
"LG6",
"LG7",
"LG8",
"LG9",
"LG26",
"LG27",
"LG28",
"LG29",
"LG30",
"1a", "1a",
"7b",
"22a", "22a",
"LGE22C19W28_E50C23",
"LGE64",
"7a",
"MIC_1",
"MIC_10",
"MIC_11",
"MIC_2",
"MIC_3",
"MIC_4",
"MIC_5",
"MIC_6",
"MIC_7",
"MIC_8",
"MIC_9",
"sgr01", "sgr01",
"sgr02", "sgr02",
"sgr03", "sgr03",
@ -194,18 +141,95 @@ valid_chromosome_names <- c(
"sgr17", "sgr17",
"sgr18", "sgr18",
"sgr19", "sgr19",
"LGE64",
"2A",
"2B",
"X1", "X1",
"X2", "X2",
"X3", "X3",
"X4", "X4",
"X5", "X5",
"a", "LG1",
"b", "LG2",
"c", "LG3",
"d", "LG4",
"f", "LG5",
"g", "LG6",
"h", "LG7",
"LG8",
"LG9",
"LG10",
"LG11",
"LG12",
"LG13",
"LG14",
"LG15",
"LG16",
"LG17",
"LG18",
"LG19",
"LG20",
"LG22",
"LG23",
"4A",
"1A",
"25LG1",
"25LG2",
"LGE22",
"LG21",
"A1",
"A2",
"A3",
"B1",
"B2",
"B3",
"B4",
"C1",
"C2",
"D1",
"D2",
"D3",
"D4",
"E1",
"E2",
"E3",
"F1",
"F2",
"LG34",
"LG35",
"LG24",
"LG25",
"LG26",
"LG27",
"LG28",
"LG29",
"LG30",
"MIC_1",
"MIC_10",
"MIC_11",
"MIC_2",
"MIC_3",
"MIC_4",
"MIC_5",
"MIC_6",
"MIC_7",
"MIC_8",
"MIC_9",
"2L",
"2R",
"3L",
"3R",
"LGE22C19W28_E50C23",
"LG01",
"LG02",
"LG03",
"LG04",
"LG05",
"LG06",
"LG07",
"LG08",
"LG09",
"LG7_11",
"41", "41",
"42", "42",
"43", "43",
@ -224,31 +248,7 @@ valid_chromosome_names <- c(
"LG44F", "LG44F",
"LG45M", "LG45M",
"LG48F", "LG48F",
"LG49B", "LG49B"
"LG34",
"LG35",
"LG7_11",
"groupI",
"groupII",
"groupIII",
"groupIV",
"groupV",
"groupVI",
"groupVII",
"groupVIII",
"groupIX",
"groupX",
"groupXI",
"groupXII",
"groupXIII",
"groupXIV",
"groupXV",
"groupXVI",
"groupXVII",
"groupXVIII",
"groupXIX",
"groupXX",
"groupXXI"
) )
#' Get all chromosome names for an Ensembl dataset. #' Get all chromosome names for an Ensembl dataset.