UNPKG

pxt-core

Version:

Microsoft MakeCode provides Blocks / JavaScript / Python tools and editors

360 lines (358 loc) • 12.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.splitName = exports.Cache = exports.uploadRefs = void 0; const child_process = require("child_process"); var U = pxt.Util; /* Purpose: UploadRefs uploads git objects (commits, trees, blobs) to cloud storage for retrieval when serving our web apps or docs. The cloud also has logic to request (from github) git objects and store them in the cloud however the server can run out of memory (which should be fixable) when we're uploading lots of objects so this CLI command is useful when uploading large amounts of git objects. TODOs (updated 8/14/2019 by Daryl & Michal) - Upload tree & file objects first: currently this code uploads all commits first and then the associated "tree" and blob objects. The issue with this is that the cloud checks for the exists of a commit object and assumes if it exists that all the associated tree objects have already been uploaded. So if "uploadRefs" gets interrupted, the git cache could be in an inconsitent state where commits are uploaded but not all of the necessary data is present. To fix the broken state, we simply need to let uploadRefs run to completion, but it'd be better to not allow this inconsitency in the first place by uploading commits last. - Handle network interruptions: when running "pxt uploadrefs", we occassionally get "INTERNAL ERROR: Error: socket hang up" errors which can leave things in a bad state (see above.) We should have retry logic built in. - Add commandline switches for: - Traverse parent commits. By default uploadRefs will not follow the parents of a commit, but there may be times where this is useful (it could save the server extra work). - Start from a specific commit. If uploadRefs gets interrupted it would save a lot of time if we could pass a certain commit to resume from. */ async function uploadRefs(id, repoUrl) { pxt.log(`uploading refs starting from ${id} in ${repoUrl} to ${pxt.Cloud.apiRoot}`); let gitCatFile; let gitCatFileBuf = new U.PromiseBuffer(); let apiLockAsync = new U.PromiseQueue(); let gitCache = new Cache(); let lastUsage = 0; let repoPath = ''; startGitCatFile(); let visited = {}; let toCheck = []; await processCommit(id); await uploadMissingObjects(undefined, true); await refreshRefs(id, repoUrl); killGitCatFile(); process.exit(0); async function processCommit(id, uploadParents = false) { if (visited[id]) return; visited[id] = true; await uploadMissingObjects(id); //console.log('commit: ' + id); let obj = await getGitObjectAsync(id); if (obj.type != "commit") throw new Error("bad type"); if (uploadParents && obj.commit.parents) { // Iterate through every parent and parse the commit. for (let parent of obj.commit.parents) { await processCommit(parent); } } // Process every tree await processTreeEntry('000', obj.commit.tree); } async function processTree(entries) { for (let entry of entries) { //console.log(entry.name, entry.sha); await processTreeEntry(entry.mode, entry.sha); } } async function processTreeEntry(mode, id) { if (visited[id]) return; visited[id] = true; await uploadMissingObjects(id); if (mode.indexOf('1') != 0) { let obj = await getGitObjectAsync(id); if (obj.type == 'tree') { //console.log('tree:' + obj.id); await processTree(obj.tree); } else { throw new Error("bad entry type: " + obj.type); } } } function maybeGcGitCatFile() { if (!gitCatFile) return; let d = Date.now() - lastUsage; if (d < 3000) return; //console.log("[gc] git cat-file") gitCatFile.stdin.end(); gitCatFile = null; gitCatFileBuf.drain(); } function startGitCatFile() { if (!lastUsage) { setInterval(maybeGcGitCatFile, 1000); } lastUsage = Date.now(); if (!gitCatFile) { //console.log("[run] git cat-file --batch") gitCatFile = child_process.spawn("git", ["cat-file", "--batch"], { cwd: repoPath, env: process.env, stdio: "pipe", shell: false }); gitCatFile.stderr.setEncoding("utf8"); gitCatFile.stderr.on('data', (msg) => { console.error("[git cat-file error] " + msg); }); gitCatFile.stdout.on('data', (buf) => gitCatFileBuf.push(buf)); } } function killGitCatFile() { gitCatFile.kill(); } async function uploadMissingObjects(id, force) { if (id) toCheck.push(id); if (toCheck.length > 50 || force) { let hashes = toCheck; toCheck = []; // Check with cloud console.log(`checking hashes with cloud`); let response = await pxt.Cloud.privateRequestAsync({ url: 'upload/status', data: { hashes: hashes } }); let missingHashes = response.json.missing; for (let missing of missingHashes) { let obj = await getGitObjectAsync(missing); // Upload data to cloud console.log(`uploading raw ${missing} with type ${obj.type} to cloud`); await pxt.Cloud.privateRequestAsync({ url: `upload/raw`, data: { type: obj.type, content: obj.data.toString('base64'), encoding: 'base64', hash: missing } }); } } } async function refreshRefs(id, repoUrl) { console.log("Updating refs"); const data = { HEAD: id, repoUrl: repoUrl }; await pxt.Cloud.privateRequestAsync({ url: `upload/rawrefs`, data: data }); } function getGitObjectAsync(id) { if (!id || /[\r\n]/.test(id)) throw new Error("bad id: " + id); let cached = gitCache.get(id); if (cached) return Promise.resolve(cached); return apiLockAsync.enqueue("cat-file", () => { // check again, maybe the object has been cached while we were waiting cached = gitCache.get(id); if (cached) return Promise.resolve(cached); //console.log("cat: " + id) startGitCatFile(); gitCatFile.stdin.write(id + "\n"); let sizeLeft = 0; let bufs = []; let res = { id: id, type: "", memsize: 64, data: null }; let typeBuf = null; let loop = () => gitCatFileBuf.shiftAsync() .then(buf => { startGitCatFile(); // make sure the usage counter is updated if (!res.type) { //console.log(`cat-file ${id} -> ${buf.length} bytes; ${buf[0]} ${buf[1]}`) if (typeBuf) { buf = Buffer.concat([typeBuf, buf]); typeBuf = null; } else { while (buf[0] == 10) buf = buf.slice(1); } let end = buf.indexOf(10); //console.log(`len-${buf.length} pos=${end}`) if (end < 0) { if (buf.length == 0) { // skip it } else { typeBuf = buf; } //console.info(`retrying read; sz=${buf.length}`) return loop(); } let line = buf; if (end >= 0) { line = buf.slice(0, end); buf = buf.slice(end + 1); } else { throw new Error("bad cat-file respose: " + buf.toString("utf8").slice(0, 100)); } let lineS = line.toString("utf8"); if (/ missing/.test(lineS)) { throw new Error("file missing"); } let m = /^([0-9a-f]{40}) (\S+) (\d+)/.exec(lineS); if (!m) throw new Error("invalid cat-file response: " + lineS + " <nl> " + buf.toString("utf8")); res.id = m[1]; res.type = m[2]; sizeLeft = parseInt(m[3]); res.memsize += sizeLeft; // approximate } if (buf.length > sizeLeft) { buf = buf.slice(0, sizeLeft); } bufs.push(buf); sizeLeft -= buf.length; if (sizeLeft <= 0) { res.data = Buffer.concat(bufs); return res; } else { return loop(); } }); return loop().then(obj => { //console.log(`[cat-file] ${id} -> ${obj.id} ${obj.type} ${obj.data.length}`) if (obj.type == "tree") { obj.tree = parseTree(obj.data); } else if (obj.type == "commit") { obj.commit = parseCommit(obj.data); } // check if this is an object in a specific revision, not say on 'master' // and if it's small enough to warant caching if (/^[0-9a-f]{40}/.test(id)) { gitCache.set(id, obj, obj.memsize); } return obj; }); }); } } exports.uploadRefs = uploadRefs; const maxCacheSize = 32 * 1024 * 1024; const maxCacheEltSize = 256 * 1024; class Cache { constructor() { this.cache = {}; this.size = 0; } get(key) { if (!key) return null; if (this.cache.hasOwnProperty(key)) return this.cache[key]; return null; } set(key, v, sz) { if (!key) return; delete this.cache[key]; if (!v || sz > maxCacheEltSize) return; if (this.size + sz > maxCacheSize) { this.flush(); } this.size += sz; this.cache[key] = v; } flush() { this.size = 0; this.cache = {}; } } exports.Cache = Cache; function splitName(fullname) { let m = /(.*)\/([^\/]+)/.exec(fullname); let parent = null; let name = ""; if (!m) { if (fullname == "/") { } else if (fullname.indexOf("/") == -1) { parent = "/"; name = fullname; } else { throw new Error("bad name"); } } else { parent = m[1] || "/"; name = m[2]; } return { parent, name }; } exports.splitName = splitName; function parseTree(buf) { let entries = []; let ptr = 0; while (ptr < buf.length) { let start = ptr; while (48 <= buf[ptr] && buf[ptr] <= 55) ptr++; if (buf[ptr] != 32) throw new Error("bad tree format"); let mode = buf.slice(start, ptr).toString("utf8"); ptr++; start = ptr; while (buf[ptr]) ptr++; if (buf[ptr] != 0) throw new Error("bad tree format 2"); let name = buf.slice(start, ptr).toString("utf8"); ptr++; let sha = buf.slice(ptr, ptr + 20).toString("hex"); ptr += 20; if (ptr > buf.length) throw new Error("bad tree format 3"); entries.push({ mode, name, sha }); } return entries; } function parseCommit(buf) { let cmt = buf.toString("utf8"); let mtree = /^tree (\S+)/m.exec(cmt); let mpar = /^parent (.+)/m.exec(cmt); let mauthor = /^author (.+) (\d+) ([+\-]\d{4})$/m.exec(cmt); let midx = cmt.indexOf("\n\n"); return { tree: mtree[1], parents: mpar ? mpar[1].split(/\s+/) : undefined, author: mauthor[1], date: parseInt(mauthor[2]), msg: cmt.slice(midx + 2) }; }