UNPKG

@gmod/jbrowse

Version:

JBrowse - client-side genome browser

185 lines (148 loc) 11.4 kB
require(['JBrowse/Store/SeqFeature/GFF3/GFF3Parser'],function( GFF3Parser) { describe("GFF3Parser (low level tests)", function() { // GFF3Parser takes a GFF3 URL and converts it to an array of hash refs where each // hash has a "parent" key/value pair and zero or more "children" key/value pairs, // and the children in turn can have more parent/children. // variables for holding fixtures and parsed output var gff3Parser; var gff3String, gff3String2, gff3String3, gff3String4, gff3String5, gff3String6, gff3String7, gff3String8, gff3String9; var jsonOutput, jsonOutput2, jsonOutput3, jsonOutput4, jsonOutput5, jsonOutput6, jsonOutput7, jsonOutput8, jsonOutput9; beforeEach(function() { gff3Parser = new GFF3Parser(); gff3String = "Group1.33 maker gene 245454 247006 . + . ID=this_parent_id_12345;Name=maker-Group1%2E33-pred_gff_GNOMON-gene-4.137;\nGroup1.33 maker mRNA 245454 247006 . + . ID=1:gnomon_566853_mRNA;Parent=this_parent_id_12345;Name=gnomon_566853_mRNA;_AED=0.45;_eAED=0.45;_QI=138|1|1|1|1|1|4|191|259;\nGroup1.33 maker exon 245454 245533 . + . ID=1:gnomon_566853_mRNA:exon:5976;Parent=1:gnomon_566853_mRNA;\nGroup1.33 maker exon 245702 245879 . + . ID=1:gnomon_566853_mRNA:exon:5977;Parent=1:gnomon_566853_mRNA;\nGroup1.33 maker exon 246046 246278 . + . ID=1:gnomon_566853_mRNA:exon:5978;Parent=1:gnomon_566853_mRNA;\nGroup1.33 maker exon 246389 247006 . + . ID=1:gnomon_566853_mRNA:exon:5979;Parent=1:gnomon_566853_mRNA;\nGroup1.33 maker five_prime_UTR 245454 245533 . + . ID=1:gnomon_566853_mRNA:five_prime_utr;Parent=1:gnomon_566853_mRNA;\nGroup1.33 maker five_prime_UTR 245702 245759 . + . ID=1:gnomon_566853_mRNA:five_prime_utr;Parent=1:gnomon_566853_mRNA;\nGroup1.33 maker CDS 245760 245879 . + 0 ID=1:gnomon_566853_mRNA:cds;Parent=1:gnomon_566853_mRNA;\nGroup1.33 maker CDS 246046 246278 . + 0 ID=1:gnomon_566853_mRNA:cds;Parent=1:gnomon_566853_mRNA;\nGroup1.33 maker CDS 246389 246815 . + 1 ID=1:gnomon_566853_mRNA:cds;Parent=1:gnomon_566853_mRNA;\nGroup1.33 maker three_prime_UTR 246816 247006 . + . ID=1:gnomon_566853_mRNA:three_prime_utr;Parent=1:gnomon_566853_mRNA;\n"; jsonOutput = gff3Parser.parse( gff3String ); gff3String2 = "Group1.33 maker mRNA 245454 247006 . + . ID=1:gnomon_566853_mRNA;Parent=maker-Group1%2E33-pred_gff_GNOMON-gene-4.137;Name=gnomon_566853_mRNA;_AED=0.45;_eAED=0.45;_QI=138|1|1|1|1|1|4|191|259;\nGroup1.33 maker gene 245454 247006 . + . ID=maker-Group1%2E33-pred_gff_GNOMON-gene-4.137;Name=maker-Group1%2E33-pred_gff_GNOMON-gene-4.137;"; jsonOutput2 = gff3Parser.parse( gff3String2 ); gff3String3 = "Group1.33 maker mRNA 245454 247006 . + . ID=1:gnomon_566853_mRNA;metacharacterzoo=%2C%3D%3B%7C%28%29%5B%7B%7D%5E%24%2A%2B%3F%2E%25%26"; jsonOutput3 = gff3Parser.parse( gff3String3 ); gff3String4 = "##gff-version 3\n##FASTA\nGroup1.33 maker gene 245454 247006 . + . ID=this_parent_id_12345;Name=maker-Group1%2E33-pred_gff_GNOMON-gene-4.137;"; jsonOutput4 = gff3Parser.parse( gff3String4 ); // test for legacy fasta pragma for Artemis: instead of // ##FASTA // just a greater-than: // > gff3String9 = "##gff-version 3\n>\nGroup1.33 maker gene 245454 247006 . + . ID=this_parent_id_12345;Name=maker-Group1%2E33-pred_gff_GNOMON-gene-4.137;"; jsonOutput9 = gff3Parser.parse( gff3String9 ); gff3String5 = "##gff-version 3\n#Group1.33 maker gene 245454 247006 . + . ID=this_parent_id_12345;Name=maker-Group1%2E33-pred_gff_GNOMON-gene-4.137;"; jsonOutput5 = gff3Parser.parse( gff3String5 ); // here's a fixture with great granchildren. doesn't necessarily make sense biologically, just using this to test parsing of deep features. // the deepest I'm going to test is great-great-grandchildren. gff3String6 = "Group1.33 maker gene 245454 247006 . + . ID=this_parent_id_12345;Name=maker-Group1%2E33-pred_gff_GNOMON-gene-4.137;\nGroup1.33 maker mRNA 245454 247006 . + . ID=1:gnomon_566853_mRNA;Parent=this_parent_id_12345;Name=gnomon_566853_mRNA;_AED=0.45;_eAED=0.45;_QI=138|1|1|1|1|1|4|191|259;\nGroup1.33 maker exon 245454 245533 . + . ID=1:gnomon_566853_mRNA:exon:5976;Parent=1:gnomon_566853_mRNA;\nGroup1.33 maker three_prime_UTR 246816 247006 . + . ID=1:gnomon_566853_mRNA:three_prime_utr;Parent=1:gnomon_566853_mRNA:exon:5976;\nGroup1.33 maker TF_binding_site 246816 246820 . + . ID=1:gnomon_566853_TFBS;Parent=1:gnomon_566853_mRNA:three_prime_utr;"; jsonOutput6 = gff3Parser.parse( gff3String6 ); // test for proper handling of children shared with different parents gff3String7 = "Group1.33 maker mRNA 245454 247006 . + . ID=mrna_1;\nGroup1.33 maker mRNA 245454 247006 . + . ID=mrna_2;\nGroup1.33 maker exon 245454 245533 . + . ID=exon_1;Parent=mrna_1,mrna_2;"; jsonOutput7 = gff3Parser.parse( gff3String7 ); // test for proper handling of features split on multiple lines with same id ("discontinuous features") gff3String8 = "ctg123 example match 26122 26126 . + . ID=match001;\nctg123 example match 26497 26869 . + . ID=match001;\nctg123 example match 27201 27325 . + . ID=match001;\nctg123 example match 27372 27433 . + . ID=match001;\nctg123 example match 27565 27565 . + . ID=match001;"; jsonOutput8 = gff3Parser.parse( gff3String8 ); }); it("should respond to parse", function() { expect(gff3Parser.parse).toBeDefined(); }); it("should return something non-null", function() { expect(jsonOutput).not.toBeNull(); }); it("should return non-null 'parsedData' attribute", function() { expect(jsonOutput["parsedData"]).not.toBeNull(); }); it("should return non-null 'parsedErrors' attribute", function() { expect(jsonOutput["parsedErrors"]).not.toBeNull(); }); it("should return non-null 'parsedWarnings' attribute", function() { expect(jsonOutput["parsedWarnings"]).not.toBeNull(); }); it("should return a parent with the right ID in parsed JSON", function() { expect(jsonOutput["parsedData"][0]["ID"]).toEqual("this_parent_id_12345"); }); it("should data array of 9 element in parsed JSON", function() { expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"]).toBeDefined(); expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"].length).toEqual(9); }); it("should correctly parse first field of GFF3", function() { expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"][0]).toEqual("Group1.33"); }); it("should correctly parse second field of GFF3", function() { expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"][1]).toEqual("maker"); }); it("should correctly parse third field of GFF3", function() { expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"][2]).toEqual("gene"); }); it("should correctly parse fourth field of GFF3", function() { expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"][3]).toEqual("245454"); }); it("should correctly parse five field of GFF3", function() { expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"][4]).toEqual("247006"); }); it("should correctly parse sixth field of GFF3", function() { expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"][5]).toEqual("."); }); it("should correctly parse seventh field of GFF3", function() { expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"][6]).toEqual("+"); }); it("should correctly parse eighth field of GFF3", function() { expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"][7]).toEqual("."); }); it("should correctly parse ninth field of GFF3", function() { expect(jsonOutput["parsedData"][0]["data"][0]["rawdata"][8]).toEqual('ID=this_parent_id_12345;Name=maker-Group1%2E33-pred_gff_GNOMON-gene-4.137;'); }); it("should correctly parse attributes in ninth field without hex codes", function() { expect(jsonOutput3["parsedData"][0]["data"][0]["attributes"]["ID"][0]).toEqual("1:gnomon_566853_mRNA"); }); it("should correctly parse attributes in ninth field with hex codes", function() { expect(jsonOutput3["parsedData"][0]["data"][0]["attributes"]["metacharacterzoo"][0]).toEqual(',=;|()[{}^$*+?.%&'); }); it("should return children in parsed JSON", function() { expect(jsonOutput["parsedData"][0]["children"][0]).toBeDefined(); }); it("should put child into 'children' array of parent (when parent is seen before child)", function() { expect(jsonOutput["parsedData"][0]["children"][0]).toBeDefined(); expect(jsonOutput["parsedData"][0]["children"][0]["ID"]).toEqual("1:gnomon_566853_mRNA"); }); it("should put child into 'children' array of parent (when child is seen before parent)", function() { expect(jsonOutput2["parsedData"][0]["children"][0]["ID"]).toEqual("1:gnomon_566853_mRNA"); }); it("should put grandchildren into 'children' array of 'children' array of grandparent", function() { expect(jsonOutput["parsedData"][0]["children"][0]["children"][0]["ID"]).toEqual("1:gnomon_566853_mRNA:exon:5976"); }); it("should put great-grandchildren into 'children' array of 'children' array of 'children' array of great-grandparent", function() { expect(jsonOutput6["parsedData"][0]["children"][0]["children"][0]["children"][0]["ID"]).toEqual("1:gnomon_566853_mRNA:three_prime_utr"); }); it("should put great-great-grandchildren into 'children' array of 'children' array of 'children' array of 'children' array of great-great-grandparent", function() { expect(jsonOutput6["parsedData"][0]["children"][0]["children"][0]["children"][0]["children"][0]["ID"]).toEqual("1:gnomon_566853_TFBS"); }); it("should stop parsing at ##FASTA pragma", function() { expect(jsonOutput4["parsedData"]).toEqual([]); }); it("should stop parsing at legacy Fasta pragma, i.e. just a newline and gt sign: \n>", function() { expect(jsonOutput9["parsedData"]).toEqual([]); }); it("should ignore # lines", function() { expect(jsonOutput5["parsedData"]).toEqual([]); }); it("should properly parse features split on multiple lines ('discontinuous features')", function() { // ctg123 example match 26122 26126 . + . ID=match001; // ctg123 example match 26497 26869 . + . ID=match001; // ctg123 example match 27201 27325 . + . ID=match001; // ctg123 example match 27372 27433 . + . ID=match001; // ctg123 example match 27565 27565 . + . ID=match001; expect(jsonOutput8["parsedData"][0]["data"][0]["rawdata"]).toBeDefined(); expect(jsonOutput8["parsedData"][0]["data"][1]["rawdata"]).toBeDefined(); expect(jsonOutput8["parsedData"][0]["data"][2]["rawdata"]).toBeDefined(); expect(jsonOutput8["parsedData"][0]["data"][3]["rawdata"]).toBeDefined(); expect(jsonOutput8["parsedData"][0]["data"][4]["rawdata"]).toBeDefined(); }); it("should properly parse features with multiple parents", function() { // gff3String7 = "Group1.33 maker mRNA 245454 247006 . + . ID=mrna_1; // Group1.33 maker mRNA 245454 247006 . + . ID=mrna_2; // Group1.33 maker exon 245454 245533 . + . ID=exon_1;Parent=mrna_1,mrna_2;"; // first parent should have child expect(jsonOutput7["parsedData"][0]["children"].length).toEqual(1); // and it should be the right child expect(jsonOutput7["parsedData"][0]["children"][0]["ID"]).toEqual("exon_1"); // second parent should have child expect(jsonOutput7["parsedData"][1]["children"].length).toEqual(1); // and it should be the right child expect(jsonOutput7["parsedData"][1]["children"][0]["ID"]).toEqual("exon_1"); }); }); });