ideogram
Version:
Chromosome visualization for the web
158 lines (133 loc) • 5.37 kB
Plain Text
def update_bands(centromere, bands, new_bands, chr, i, j):
"""Helper function for Ensembl-Genomaize centromere merging
"""
cen_start, cen_stop = centromere
# Extend nearest p-arm band's stop coordinate to the
# p_cen's start coordinate (minus 1)
cen_start_pre = str(int(cen_start) - 1)
new_bands[chr][i - j - 1][3] = cen_start_pre
new_bands[chr][i - j - 1][5] = cen_start_pre
# Extend nearest q-arm band's start coordinate to the
# q_cen's stop coordinate (plus 1)
cen_stop_post = str(int(cen_stop) + 1)
bands[i + j][1] = cen_stop_post
bands[i + j][3] = cen_stop_post
return [bands, new_bands]
def get_pcen_and_qcen(centromere, chr):
"""Helper function for Ensembl-Genomaize centromere merging
"""
cen_start, cen_stop = centromere
# Coordinates of the centromere itself
cen_mid = int(cen_start) + round((int(cen_stop)-int(cen_start))/2)
pcen = [
'p', 'pcen', cen_start, str(cen_mid - 1),
cen_start, str(cen_mid - 1), 'acen'
]
qcen = [
'q', 'qcen', str(cen_mid), cen_stop,
str(cen_mid), cen_stop, 'acen'
]
return [pcen, qcen]
def get_centromere_parts(centromere, chr, new_bands, bands, band, i, j, pcen_index):
"""Helper function for Ensembl-Genomaize centromere merging
"""
band_start, band_stop = band[1:3]
cen_start, cen_stop = centromere
pcen = None
qcen = None
if int(band_stop) < int(cen_start):
arm = 'p'
else:
arm = 'q'
if int(band_start) < int(cen_stop):
# Omit any q-arm bands that start before q-arm pericentromeric band
if chr == '1':
logger.info('Omit band:')
logger.info(band)
j += 1
return [j, arm]
if pcen_index is None:
pcen_index = i - j
bands, new_bands = update_bands(centromere, bands, new_bands, chr,
i, j)
pcen, qcen = get_pcen_and_qcen(centromere, chr)
return [arm, pcen, qcen, new_bands, bands, j, pcen_index]
def merge_centromeres(bands_by_chr, centromeres, logger_obj):
"""Adds p and q arms to cytobands; thus adds centromere to each chromosome.
This is a special case for Zea mays (maize, i.e. corn).
Ensembl Genomes provides band data with no cytogenetic arm assignment.
Genomaize provides centromere positions for each chromosome.
This function merges those two datasets to provide input directly
useable to Ideogram.js.
"""
global logger
logger = logger_obj
logger.info('Entering merge_centromeres')
new_bands = {}
for chr in bands_by_chr:
bands = bands_by_chr[chr]
new_bands[chr] = []
centromere = centromeres[chr]
pcen_index = None
j = 0
for i, band in enumerate(bands):
new_band = band
# This is gross. Can this function be small *and* readable?
parts = get_centromere_parts(centromere, chr, new_bands, bands, band, i, j, pcen_index)
if len(parts) > 2:
(arm, pcen, qcen, new_bands, bands, j, pcen_index) = parts
new_band.insert(0, arm)
new_bands[chr].append(new_band)
else:
(j, arm) = parts
if pcen_index is not None:
new_bands[chr].insert(pcen_index, qcen)
new_bands[chr].insert(pcen_index, pcen)
return new_bands
def parse_centromeres(bands_by_chr, logger_obj):
"""Adds p and q arms to cytobands, by parsing embedded centromere bands.
This is a special case for assigning cytogenetic arms to certain organisms
from Ensembl Genomes, including: Aspergillus fumigatus, Aspergillus
nidulans, Aspergillus niger, Aspergillus oryzae (various fungi);
Oryza sativa (rice); and Hordeum vulgare (barley).
Bands are assigned an arm based on their position relative to the embedded
centromere.
"""
global logger
logger = logger_obj
logger.info('Entering parse_centromeres')
# If centromeres aren't embedded in the input banding data,
# then simply return the input without modification.
has_centromere = False
for chr in bands_by_chr:
bands = bands_by_chr[chr]
for band in bands:
stain = band[-1]
if stain == 'acen':
has_centromere = True
if has_centromere is False:
return bands_by_chr
new_bands = {}
for chr in bands_by_chr:
bands = bands_by_chr[chr]
new_bands[chr] = []
# On each side of the centromere -- the p-arm side and the q-arm
# side -- there is a band with a "stain" value of "acen". Here,
# we find the index of the acen band on the p-arm side. That
# band and all bands to the left of it are on the p arm. All
# bands to the right of it are on the q arm.
pcen_index = None
for i, band in enumerate(bands):
stain = band[-1]
if stain == 'acen':
pcen_index = i
for i, band in enumerate(bands):
arm = ''
if pcen_index is not None:
if i < pcen_index:
arm = 'p'
else:
arm = 'q'
band.insert(0, arm)
new_bands[chr].append(band)
return new_bands