UNPKG

@gracexwho/model-card-generator

Version:

Tool for generating model cards for Jupyter Notebook.

304 lines (240 loc) 9.26 kB
"use strict"; exports.__esModule = true; var py = require("../dist/es5"); var dagre = require("dagre"); var Graph = require("graphlib").Graph; var graphing = require("../Graph.js").Graph; var dagreD3 = require("dagre-d3"); var d3 = require("d3"); var fs = require('fs'); var args = process.argv.slice(2); var name = args[0]; var countLines = 0; console.log(name); //const path = './'; //const name = 'fold.py' var contents = fs.readFileSync(name); let jsondata = JSON.parse(contents); //console.log(jsondata); console.log("-----------------------------"); function readCode(jsondata) { var notebookCode = "\n"; const rewriter = new py.MagicsRewriter(); for (let cell of jsondata['cells']) { if (cell['cell_type'] == 'code') { var sourceCode = ""; for (let line of cell['source']) { countLines += 1; if (line[0] == '%') { line = rewriter.rewriteLineMagic(line); //console.log("magic: " + line); HAHA ALL IT DID WAS COMMENT IT OUT FOR ME // for some reason rewriteCellMagic doesn't work right now } sourceCode += line; } //let sourceCode = cell['source'].filter(str => str[0] != '%').join(""); //console.log(sourceCode); notebookCode += sourceCode + '\n'; } } //console.log(notebookCode); printDefUse(notebookCode); } readCode(jsondata); /** TO DO * Find notebooks whose specs are actually included in the specs file ughhh * FIND SCOPE OF THE MAX EXTENT OF AN IMPORT STATEMENT -> when the subsequent def is no longer used beyond a certain point * * Curate notebooks so there's no "no spec for module ____" error * GRAPHVIZ to visualize the dataflow graphs (import statements) * * * "import" and "from" types are both import statements, idk why they separate them but ok * * * * Can print node types module -> see def_use relevant to those nodes only? = DONE * We're interested in the IMPORT -> DEF def-use pairs = DONE * How to get it to print only the definition line of code??? = DONE * Figure out how to slice import statements = DONE * @type {string} */ var text = fs.readFileSync(name).toString(); function printDefUse(code){ let tree = py.parse(code); //console.log(py.walk(tree).map(function (node) { return node.type; })); let cfg = new py.ControlFlowGraph(tree); //console.log(cfg.blocks); //console.log("-----"); //console.log(cfg.blocks[0]); const analyzer = new py.DataflowAnalyzer(); const flows = analyzer.analyze(cfg).dataflows; var importScope = {}; var lineToCode = {}; for (let flow of flows.items) { let fromNode = py.printNode(flow.fromNode).split("\n"); let toNode = py.printNode(flow.toNode).split("\n"); lineToCode[flow.fromNode.location.first_line] = fromNode[0]; lineToCode[flow.fromNode.location.last_line] = fromNode[fromNode.length-1]; lineToCode[flow.toNode.location.last_line] = toNode[toNode.length-1]; lineToCode[flow.toNode.location.first_line] = toNode[0]; if (flow.fromNode.type == "from" || flow.fromNode.type == "import") { importScope[flow.fromNode.location.first_line] = -1; // add to list of dictionaries } //g.setEdge(flow.fromNode.location.first_line.toString(), flow.toNode.location.first_line.toString()); } var n = countLines; console.log("NOTEBOOK NAME: ", name); console.log("NUMBER OF LINES OF CODE ", n); // need graph size to be size of lineToCode, not number of edges var numgraph = new graphing(n+1); for (let flow of flows.items) { numgraph.addEdge(flow.fromNode.location.first_line, flow.toNode.location.first_line); } findImportScope(importScope, lineToCode, numgraph); // store the location of "import" in a graph and do depth-first search = note last location // build a graph } function findImportScope(importScope, lineToCode, numgraph) { //console.log("edges", g.edges()); //console.log("nodes", g.nodes()); //console.log("import statements: ", importScope); //console.log("line to code: ", lineToCode); //console.log(lineToCode[11]); //console.log(numgraph.edge.length); //console.log(numgraph.edge); var imports = Object.keys(importScope); var scopes = {}; for (let lineNum of imports) { var result = numgraph.findLongestPathSrc(numgraph.edge.length, parseInt(lineNum)) scopes[lineNum] = result[0]; var order = result[1]; console.log(lineToCode[lineNum]); console.log("START: ", lineNum.toString(), " END: ", scopes[lineNum]); //console.log(result[1]); //generateEdges(order); //labelNodeColor(order[0], order, lineToCode); } //console.log(scopes); } // TO DO: Convert "order" into the _ -> _ format, with everything following an import statement being the colour of that import statement // generate colour map txt file for each notebook // then go to pycharm and run cindy_graph.py function generateEdges(order) { var i; for (i=0; i<order.length-1;i++) { var writeData = order[i].toString() + '->' + order[i+1].toString() + '\n' fs.appendFileSync('C:/Users/grace/PycharmProjects/model-card-generator/graphviz/' + name + '.txt', writeData); } } function labelNodeColor(node, order, lineToCode) { var nodeCode = lineToCode[node]; var filePathColor = 'C:/Users/grace/PycharmProjects/model-card-generator/graphviz/' + name + '.color.txt'; //var filePathColor = './' + name + '.color.txt'; var filePathCode = 'C:/Users/grace/PycharmProjects/model-card-generator/graphviz/' + name + '.code.txt'; //var filePathCode = './' + name + '.code.txt'; var label = ''; var color = ''; if (nodeCode.includes('datasets')) { // everything in order should be label = "Data Cleaning"; } else if(nodeCode.includes('.impute')) { label = "Data Cleaning"; } else if(nodeCode.includes('.feature_extraction')) { label = "Data Cleaning"; } else if(nodeCode.includes('.utils')) { label = "Data Cleaning"; } else if(nodeCode.includes('.preprocessing')) { label = "Preprocessing"; }else if(nodeCode.includes('.clustering')) { label = "Preprocessing"; }else if(nodeCode.includes('.feature_selection')) { label = "Preprocessing"; }else if(nodeCode.includes('.pipeline')) { label = "Preprocessing"; }else if(nodeCode.includes('.linear_model')) { label = "Model Training"; }else if(nodeCode.includes('.multi_class')) { label = "Model Training"; }else if(nodeCode.includes('.naive_bayes')) { label = "Model Training"; }else if(nodeCode.includes('.neighbors')) { label = "Model Training"; }else if(nodeCode.includes('.neural_network')) { label = "Model Training"; }else if(nodeCode.includes('.svm')) { label = "Model Training"; }else if(nodeCode.includes('.semi_supervised')) { label = "Model Training"; }else if(nodeCode.includes('.tree')) { label = "Model Training"; }else if(nodeCode.includes('model_selection')) { label = "Model Evaluation"; }else if(nodeCode.includes('metrics')) { label = "Model Evaluation"; }else { label = "Null"; } if (label === "Data Cleaning") { color = "green"; }else if (label === "Preprocessing") { color = "blue"; }else if (label === "Model Training") { color = "orange"; }else if (label === "Model Evaluation") { color = "red"; }else { color = "white"; } for (let vertex of order) { //console.log("vertex", vertex, color); var writeData = vertex.toString() + '->' + color + '\n' var writeCodeData = vertex.toString() + '->' + lineToCode[vertex] + '\n'; fs.appendFileSync(filePathColor, writeData); fs.appendFileSync(filePathCode, writeCodeData); } } /** Data cleaning datasets impute feature_extraction utils Preprocessing preprocessing clustering feature_selection pipeline Model Training linear_model multi_class naive_bayes neighbors neural_network svm semi_supervised tree Model Evaluation model_selection metrics **/ //console.log(text + "\n") //printDefUse(text) /** var Graph = require("graphlib").Graph; // Create a new directed graph var g = new Graph(); // Add node "a" to the graph with no label g.setNode("a"); g.hasNode("a"); // => true // g.setEdge("c", "d", { k: 456 }); // Add node "b" to the graph with a String label g.setNode("b", "b's value"); // Get the label for node b g.node("b"); // => "b's value" // Add node "c" to the graph with an Object label g.setNode("c", { k: 123 }); **/