UNPKG

ideogram

Version:

Chromosome visualization for the web

85 lines (65 loc) 2.36 kB
''' Converts GVF data from dbVar to JSON-formatted annotations''' import re, json, random annots = [] chrs = [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y" ] lengths_GRCh37 = { "1": 249250621, "2": 243199373, "3": 198022430, "4": 191154276, "5": 180915260, "6": 171115067, "7": 159138663, "8": 146364022, "9": 141213431, "10": 135534747, "11": 135006516, "12": 133851895, "13": 115169878, "14": 107349540, "15": 102531392, "16": 90354753, "17": 81195210, "18": 78077248, "19": 59128983, "20": 63025520, "21": 48129895, "22": 51304566, "X": 155270560, "Y": 59373566 } lengths_GRCh38 = { "1": 248956422, "2": 242193529, "3": 198295559, "4": 190214555, "5": 181538259, "6": 170805979, "7": 159345973, "8": 145138636, "9": 138394717, "10": 133797422, "11": 135086622, "12": 133275309, "13": 114364328, "14": 107043718, "15": 101991189, "16": 90338345, "17": 83257441, "18": 80373285, "19": 58617616, "20": 64444167, "21": 46709983, "22": 50818468, "X": 156040895, "Y": 57227415 } file_name = "../data/annotations/estd214_1000_Genomes_Consortium_Phase_3.GRCh38.remap.var.germline.gvf" file = open(file_name, "r").readlines() for chr in chrs: annots.append({"chr": chr, "annots": []}); for line in file[1:]: if line[0] == "#": continue columns = line.strip().split("\t") chr = columns[0] # E.g. NC_000001.11 -> 1 # This RefSeq hack only works for human chromosomes 1-22 chr = str(int(chr.split(".")[0][-2:])) if chr == "23": chr = "X" elif chr == "24": chr = "Y" if chr not in chrs: # E.g. chrMT, alternate loci scaffolds continue name = "" gff_attrs = columns[8].split(";") for attr in gff_attrs: tmp = attr.split("=") if tmp[0] == "Name": name = tmp[1] start = int(columns[3]) stop = int(columns[4]) - start annot = [ name, start, stop, 1 # placeholder for future use ] annots[chr]["annots"].append(annot) annots = json.dumps(annots) annots = '{"annots":' + annots + '}' open("../data/annotations/dbvar_estd214.var.json", "w").write(annots)