diff --git a/DESCRIPTION b/DESCRIPTION
index 6793809..5073b9f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -29,3 +29,8 @@ Imports:
     rclipboard,
     shiny,
     shinyWidgets
+Suggests:
+    biomaRt,
+    edgeR,
+    purrr,
+    stringr
diff --git a/scripts/input.R b/scripts/input.R
index d1ff95d..40f6de6 100644
--- a/scripts/input.R
+++ b/scripts/input.R
@@ -2,22 +2,74 @@
 # further analysis. Note that this requires very good computational resources
 # and especially a lot of RAM because of the size of the data.
 
+library(biomaRt)
+library(edgeR)
 library(data.table)
 library(here)
 
 i_am("scripts/input.R")
 
 # Source: https://storage.googleapis.com/gtex_analysis_v8/rna_seq_data/
-# GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_tpm.gct.gz
-# The file has been edited removing the lines above the column headers.
-data_wide_samples <- fread(here("scripts", "input", "gtex.tsv.gz"))
+# GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_reads.gct.gz
+read_counts <- fread(here("scripts", "input", "gtex_read_counts.tsv.gz"))
 
 setnames(
-  data_wide_samples,
+  read_counts,
   c("Name", "Description"),
   c("gene", "hgnc_symbol")
 )
 
+# Remove Ensembl gene version numbers.
+read_counts[, gene := stringr::str_split(gene, "\\.") |> purrr::map_chr(1)]
+
+# Get gene lengths from Ensembl.
+
+ensembl <- useEnsembl("ensembl", version = 107)
+dataset <- useDataset("hsapiens_gene_ensembl", mart = ensembl)
+gene_lengths <- data.table(getBM(
+  attributes = c(
+    "ensembl_gene_id",
+    "start_position",
+    "end_position"
+  ),
+  mart = dataset
+))
+
+setnames(gene_lengths, "ensembl_gene_id", "gene")
+
+gene_lengths[, length := end_position - start_position]
+
+read_counts <- merge(
+  read_counts,
+  gene_lengths,
+  by = "gene",
+  all.x = TRUE
+)
+
+read_counts <- read_counts[!is.na(length)]
+hgnc_symbols <- read_counts$hgnc_symbol
+gene_lengths <- read_counts$length
+
+read_counts <- as.matrix(
+  read_counts[, !c(
+    "hgnc_symbol",
+    "start_position",
+    "end_position",
+    "length"
+  )],
+  rownames = "gene"
+)
+
+# Normalize read counts for gene lengths
+read_counts <- read_counts / (gene_lengths / 1000)
+
+getpm <- DGEList(counts = read_counts) |>
+  calcNormFactors() |>
+  cpm()
+
+data_wide_samples <- data.table(getpm, keep.rownames = "gene")
+data_wide_samples[, hgnc_symbol := hgnc_symbols]
+
 data_long <- melt(
   data_wide_samples,
   id.vars = c("gene", "hgnc_symbol"),
diff --git a/scripts/ranking.R b/scripts/ranking.R
index 5312786..bf8620d 100644
--- a/scripts/ranking.R
+++ b/scripts/ranking.R
@@ -8,9 +8,6 @@ i_am("scripts/input.R")
 
 data <- fread(here("scripts", "output", "results.csv"))
 
-# Keep only the actual Ensembl ID for each gene.
-data[, gene := stringr::str_split(gene, "\\.") |> purrr::map_chr(1)]
-
 data[, score := 0.5 * above_95 +
   0.25 * mean_expression_normalized +
   -0.25 * sd_expression_normalized]