mirror of
https://github.com/johrpan/geposan.git
synced 2025-10-26 18:57:25 +01:00
Add script to approximate centromere locations
This commit is contained in:
parent
5bfd20b6b6
commit
bae0723533
1 changed files with 59 additions and 0 deletions
59
scripts/centromeres.R
Normal file
59
scripts/centromeres.R
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
library(data.table)
|
||||
|
||||
# This depends on the current development version of this package to be
|
||||
# installed in order to retrieve the distance data.
|
||||
distances <- geposan::distances
|
||||
|
||||
setorder(distances, "start_position")
|
||||
|
||||
chromosomes <- distances[,
|
||||
.(chromosome_length = max(end_position)),
|
||||
by = c("species", "chromosome_name")
|
||||
]
|
||||
|
||||
handle_chromosome <- function(species_, chromosome_name_) {
|
||||
chromosome_distances <- distances[
|
||||
species == species_ & chromosome_name == chromosome_name_
|
||||
]
|
||||
|
||||
chromosome_distances[,
|
||||
center_position := mean(c(start_position, end_position)),
|
||||
by = gene
|
||||
]
|
||||
|
||||
setorder(chromosome_distances, center_position)
|
||||
|
||||
chromosome_distances[1, gap_size := 0]
|
||||
|
||||
for (index in 2:nrow(chromosome_distances)) {
|
||||
previous_end_position <- chromosome_distances[index - 1, end_position]
|
||||
chromosome_distances[
|
||||
index,
|
||||
gap_size := start_position - previous_end_position
|
||||
]
|
||||
}
|
||||
|
||||
chromosome_gaps <- chromosome_distances[order(gap_size, decreasing = TRUE)]
|
||||
|
||||
# Retrieve the two largest gap sizes.
|
||||
max_gap_size <- chromosome_gaps[1, gap_size]
|
||||
next_gap_size <- chromosome_gaps[2, gap_size]
|
||||
|
||||
list(
|
||||
centromere_start = chromosome_distances[
|
||||
which.max(gap_size) - 1,
|
||||
end_position
|
||||
],
|
||||
centromere_end = chromosome_distances[
|
||||
which.max(gap_size),
|
||||
start_position
|
||||
],
|
||||
centromere_dominance = max_gap_size / next_gap_size
|
||||
)
|
||||
}
|
||||
|
||||
chromosomes[,
|
||||
c("centromere_start", "centromere_end", "centromere_dominance") :=
|
||||
handle_chromosome(species, chromosome_name),
|
||||
by = c("species", "chromosome_name")
|
||||
]
|
||||
Loading…
Add table
Add a link
Reference in a new issue