From bae0723533e6c55ecb701c68720406543e38c3cd Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Thu, 28 Apr 2022 11:01:07 +0200 Subject: [PATCH] Add script to approximate centromere locations --- scripts/centromeres.R | 59 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 scripts/centromeres.R diff --git a/scripts/centromeres.R b/scripts/centromeres.R new file mode 100644 index 0000000..cd620d8 --- /dev/null +++ b/scripts/centromeres.R @@ -0,0 +1,59 @@ +library(data.table) + +# This depends on the current development version of this package to be +# installed in order to retrieve the distance data. +distances <- geposan::distances + +setorder(distances, "start_position") + +chromosomes <- distances[, + .(chromosome_length = max(end_position)), + by = c("species", "chromosome_name") +] + +handle_chromosome <- function(species_, chromosome_name_) { + chromosome_distances <- distances[ + species == species_ & chromosome_name == chromosome_name_ + ] + + chromosome_distances[, + center_position := mean(c(start_position, end_position)), + by = gene + ] + + setorder(chromosome_distances, center_position) + + chromosome_distances[1, gap_size := 0] + + for (index in 2:nrow(chromosome_distances)) { + previous_end_position <- chromosome_distances[index - 1, end_position] + chromosome_distances[ + index, + gap_size := start_position - previous_end_position + ] + } + + chromosome_gaps <- chromosome_distances[order(gap_size, decreasing = TRUE)] + + # Retrieve the two largest gap sizes. + max_gap_size <- chromosome_gaps[1, gap_size] + next_gap_size <- chromosome_gaps[2, gap_size] + + list( + centromere_start = chromosome_distances[ + which.max(gap_size) - 1, + end_position + ], + centromere_end = chromosome_distances[ + which.max(gap_size), + start_position + ], + centromere_dominance = max_gap_size / next_gap_size + ) +} + +chromosomes[, + c("centromere_start", "centromere_end", "centromere_dominance") := + handle_chromosome(species, chromosome_name), + by = c("species", "chromosome_name") +]