UNPKG

china-area-data-lg

Version:

中国省份城市区数据

142 lines (128 loc) 3.36 kB
const request = require('request-promise') const iconv = require('iconv-lite') const cheerio = require('cheerio') const md5 = require('md5') const path = require('path') const fs = require('fs') const special = require('./special') function sleep(time) { return new Promise(resolve => { console.log(`sleep: ${time}`) setTimeout(() => { resolve() }, time) }) } async function getPage(url) { let content = '' const id = md5(url) const file = `./temp/${id}.html` if (fs.existsSync(file)) { content = fs.readFileSync(file, 'utf-8') } else { const rs = await request(url, { resolveWithFullResponse: true, encoding: null, }) content = iconv.decode(rs.body, 'gb2312') fs.writeFileSync(file, content) } return content } ; (async() => { const cityIds = [] const rs = { '86': {} } const level0 = await getPage( 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html') const $ = cheerio.load(level0) const list = $('.provincetr td a') const provinces = [] const urls = [] list.each((index, el) => { const url = `http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/${$(el).attr('href')}` const data = { id: $(el).attr('href').split('.')[0] + '0000', name: $(el).text() } rs['86'][data.id] = data.name urls.push(url) }) const specialProvinces = { "710000": "台湾省", "810000": "香港特别行政区", "820000": "澳门特别行政区" } Object.assign(rs['86'], specialProvinces) // 市 for (const url of urls) { const level1 = await getPage(url) const $ = cheerio.load(level1) const list = $('.citytr td a') const countyUrls = [] list.each(async(index, el) => { const url = `http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/${$(el).attr('href')}` // filter number const text = $(el).text() if (!/\d+/.test(text)) { const data = { id: $(el).attr('href').split('.')[0].slice(3) + '00', name: $(el).text() } const parentId = data.id.slice(0, 2) + '0000' if (!rs[parentId]) { rs[parentId] = {} } cityIds.push(data.id) rs[parentId][data.id] = data.name } countyUrls.push(url) }) for (const url of countyUrls) { const level2 = await getPage(url) const $ = cheerio.load(level2) const list = $('.countytr td, .towntr td') list.each(async(index, el) => { const link = $(el).find('a') let data = {} if (link.length) { if (!/\d+/.test($(el).text())) { data = { id: $(link[0]).attr('href').split('.')[0].split('/')[1], name: $(el).text() } } } else { if (!/\d+/.test($(el).text())) { data = { id: $(list[index - 1]).text().replace(/0+$/g, ''), name: $(el).text() } } } if (data.id) { const parentId = data.id.slice(0, 4) + '00' if (!rs[parentId]) { rs[parentId] = {} } rs[parentId][data.id] = data.name } }) } } setTimeout(() => { fs.writeFileSync('../data.json', JSON.stringify(Object.assign({}, rs, special), null, 2)) fs.writeFileSync('./data.json', JSON.stringify(Object.assign({}, rs, special), null, 2)) const _2levelCityIds = cityIds.filter(c => !Object.keys(rs[c] || {}).length) fs.writeFileSync('./only_2_level_city_id.json', JSON.stringify( _2levelCityIds, null, 2)) console.log('done') process.exit(0) }, 6000) })()