@mtatko/tiktok-scraper
Version:
TikTok Scraper & Downloader. Scrape information from User, Trending and HashTag pages and download video posts
1,113 lines (1,112 loc) • 47.2 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k];
result["default"] = mod;
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
const request_promise_1 = __importDefault(require("request-promise"));
const os_1 = require("os");
const fs_1 = require("fs");
const json2csv_1 = require("json2csv");
const ora_1 = __importDefault(require("ora"));
const bluebird_1 = require("bluebird");
const events_1 = require("events");
const socks_proxy_agent_1 = require("socks-proxy-agent");
const async_1 = require("async");
const url_1 = require("url");
const constant_1 = __importDefault(require("../constant"));
const helpers_1 = require("../helpers");
const _ = __importStar(require("lodash"));
const core_1 = require("../core");
class TikTokScraper extends events_1.EventEmitter {
constructor({ download, filepath, filetype, proxy, strictSSL = true, asyncDownload, cli = false, event = false, progress = false, input, number, since, type, by_user_id = false, store_history = false, historyPath = '', noWaterMark = false, useTestEndpoints = false, fileName = '', timeout = 0, bulk = false, zip = false, test = false, hdVideo = false, webHookUrl = '', method = 'POST', headers, verifyFp = '', sessionList = [], }) {
super();
this.storeValue = '';
this.userIdStore = '';
this.verifyFp = verifyFp;
this.mainHost = useTestEndpoints ? 'https://t.tiktok.com/' : 'https://m.tiktok.com/';
this.headers = headers;
this.download = download;
this.filepath = process.env.SCRAPING_FROM_DOCKER ? '/usr/app/files' : filepath || '';
this.fileName = fileName;
this.json2csvParser = new json2csv_1.Parser({ flatten: true });
this.filetype = filetype;
this.input = input;
this.test = test;
this.proxy = proxy;
this.strictSSL = strictSSL;
this.number = number;
this.since = since;
this.csrf = '';
this.zip = zip;
this.cookieJar = request_promise_1.default.jar();
this.hdVideo = hdVideo;
this.sessionList = sessionList;
this.asyncDownload = asyncDownload || 5;
this.asyncScraping = () => {
switch (this.scrapeType) {
case 'user':
case 'trend':
return 1;
default:
return 1;
}
};
this.collector = [];
this.event = event;
this.scrapeType = type;
this.cli = cli;
this.spinner = ora_1.default({ text: 'TikTok Scraper Started', stream: process.stdout });
this.byUserId = by_user_id;
this.storeHistory = cli && download && store_history;
this.historyPath = process.env.SCRAPING_FROM_DOCKER ? '/usr/app/files' : historyPath || os_1.tmpdir();
this.idStore = '';
this.noWaterMark = noWaterMark;
this.maxCursor = 0;
this.noDuplicates = [];
this.timeout = timeout;
this.bulk = bulk;
this.validHeaders = false;
this.Downloader = new core_1.Downloader({
progress,
cookieJar: this.cookieJar,
proxy,
noWaterMark,
headers,
filepath: process.env.SCRAPING_FROM_DOCKER ? '/usr/app/files' : filepath || '',
bulk,
});
this.webHookUrl = webHookUrl;
this.method = method;
this.httpRequests = {
good: 0,
bad: 0,
};
this.store = [];
}
get fileDestination() {
if (this.fileName) {
if (!this.zip && this.download) {
return `${this.folderDestination}/${this.fileName}`;
}
return this.filepath ? `${this.filepath}/${this.fileName}` : this.fileName;
}
switch (this.scrapeType) {
case 'user':
case 'hashtag':
if (!this.zip && this.download) {
return `${this.folderDestination}/${this.input}_${Date.now()}`;
}
return this.filepath ? `${this.filepath}/${this.input}_${Date.now()}` : `${this.input}_${Date.now()}`;
default:
if (!this.zip && this.download) {
return `${this.folderDestination}/${this.scrapeType}_${Date.now()}`;
}
return this.filepath ? `${this.filepath}/${this.scrapeType}_${Date.now()}` : `${this.scrapeType}_${Date.now()}`;
}
}
get folderDestination() {
switch (this.scrapeType) {
case 'user':
return this.filepath ? `${this.filepath}/${this.input}` : this.input;
case 'hashtag':
return this.filepath ? `${this.filepath}/#${this.input}` : `#${this.input}`;
case 'music':
return this.filepath ? `${this.filepath}/music_${this.input}` : `music_${this.input}`;
case 'trend':
return this.filepath ? `${this.filepath}/trend` : `trend`;
case 'video':
return this.filepath ? `${this.filepath}/video` : `video`;
default:
throw new TypeError(`${this.scrapeType} is not supported`);
}
}
get getApiEndpoint() {
switch (this.scrapeType) {
case 'user':
return `${this.mainHost}share/item/list`;
case 'trend':
return `${this.mainHost}api/recommend/item_list/`;
case 'hashtag':
return `${this.mainHost}api/challenge/item_list/`;
case 'music':
return `${this.mainHost}api/music/item_list/`;
default:
throw new TypeError(`${this.scrapeType} is not supported`);
}
}
get getProxy() {
const proxy = Array.isArray(this.proxy) && this.proxy.length ? this.proxy[Math.floor(Math.random() * this.proxy.length)] : this.proxy;
if (proxy) {
if (proxy.indexOf('socks4://') > -1 || proxy.indexOf('socks5://') > -1) {
return {
socks: true,
proxy: new socks_proxy_agent_1.SocksProxyAgent(proxy),
};
}
return {
socks: false,
proxy,
};
}
return {
socks: false,
proxy: '',
};
}
request({ uri, method, qs, body, form, headers, json, gzip, followAllRedirects, simple = true }, bodyOnly = true, simpleOptionsFlag = false, unsignedUrl = '', signature = '') {
return new Promise(async (resolve, reject) => {
const proxy = this.getProxy;
const options = Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign({ jar: this.cookieJar, uri,
method }, (qs ? { qs } : {})), (body ? { body } : {})), (form ? { form } : {})), { headers: Object.assign(Object.assign(Object.assign({}, this.headers), headers), (this.csrf ? { 'x-secsdk-csrf-token': this.csrf } : {})) }), (json ? { json: true } : {})), (gzip ? { gzip: true } : {})), { resolveWithFullResponse: true, followAllRedirects: followAllRedirects || false, simple }), (proxy.proxy && proxy.socks ? { agent: proxy.proxy } : {})), (proxy.proxy && !proxy.socks ? { proxy: `http://${proxy.proxy}/` } : {})), (this.strictSSL === false ? { rejectUnauthorized: false } : {})), { timeout: 10000 });
const simpleOptions = {
jar: this.cookieJar,
uri: `${unsignedUrl}&_signature=${signature}`,
headers: {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
},
json: true
};
const session = this.sessionList[Math.floor(Math.random() * this.sessionList.length)];
if (session) {
this.cookieJar.setCookie(session, 'https://tiktok.com');
}
const cookies = this.cookieJar.getCookieString('https://tiktok.com');
if (cookies.indexOf('tt_webid_v2') === -1) {
this.cookieJar.setCookie(`tt_webid_v2=69${helpers_1.makeid(17)}; Domain=tiktok.com; Path=/; Secure; hostOnly=false`, 'https://tiktok.com');
}
try {
let response;
if (simpleOptionsFlag) {
response = await request_promise_1.default(simpleOptions);
}
else {
response = await request_promise_1.default(options);
}
if (options.method === 'HEAD') {
const csrf = response.headers['x-ware-csrf-token'];
this.csrf = csrf.split(',')[1];
}
setTimeout(() => {
resolve(bodyOnly ? response.body : response);
}, this.timeout);
}
catch (error) {
reject(error);
}
});
}
returnInitError(error) {
if (this.cli && !this.bulk) {
this.spinner.stop();
}
if (this.event) {
this.emit('error', error);
}
else {
throw error;
}
}
async scrape() {
if (this.cli && !this.bulk) {
this.spinner.start();
}
if (this.download && !this.zip) {
try {
await bluebird_1.fromCallback(cb => fs_1.mkdir(this.folderDestination, { recursive: true }, cb));
}
catch (error) {
return this.returnInitError(error.message);
}
}
if (!this.scrapeType || constant_1.default.scrape.indexOf(this.scrapeType) === -1) {
return this.returnInitError(`Missing scraping type. Scrape types: ${constant_1.default.scrape} `);
}
if (this.scrapeType !== 'trend' && !this.input) {
return this.returnInitError('Missing input');
}
console.log('version v2.8');
await this.mainLoop();
if (this.event) {
return this.emit('done', 'completed');
}
if (this.storeHistory) {
await this.getDownloadedVideosFromHistory();
}
if (this.noWaterMark) {
await this.withoutWatermark();
}
const [json, csv, zip] = await this.saveCollectorData();
if (this.storeHistory) {
this.collector.forEach(item => {
if (this.store.indexOf(item.id) === -1 && item.downloaded) {
this.store.push(item.id);
}
});
await this.storeDownloadProgress();
}
if (this.webHookUrl) {
await this.sendDataToWebHookUrl();
}
return Object.assign(Object.assign(Object.assign(Object.assign(Object.assign({ headers: Object.assign(Object.assign({}, this.headers), { cookie: this.cookieJar.getCookieString('https://tiktok.com') }), collector: this.collector }, (this.download ? { zip } : {})), (this.filetype === 'all' ? { json, csv } : {})), (this.filetype === 'json' ? { json } : {})), (this.filetype === 'csv' ? { csv } : {})), (this.webHookUrl ? { webhook: this.httpRequests } : {}));
}
withoutWatermark() {
return new Promise((resolve, reject) => {
async_1.forEachLimit(this.collector, 5, async (item) => {
try {
item.videoApiUrlNoWaterMark = await this.extractVideoId(item);
item.videoUrlNoWaterMark = await this.getUrlWithoutTheWatermark(item.videoApiUrlNoWaterMark);
}
catch (_a) {
throw new Error(`Can't extract unique video id`);
}
}, err => {
if (err) {
return reject(err);
}
resolve(null);
});
});
}
async extractVideoId(item) {
if (item.createTime > 1595808000) {
return '';
}
try {
const result = await request_promise_1.default({
uri: item.videoUrl,
headers: this.headers,
});
const position = Buffer.from(result).indexOf('vid:');
if (position !== -1) {
const id = Buffer.from(result)
.slice(position + 4, position + 36)
.toString();
return `https://api2-16-h2.musical.ly/aweme/v1/play/?video_id=${id}&vr_type=0&is_play_url=1&source=PackSourceEnum_PUBLISH&media_type=4${this.hdVideo ? `&ratio=default&improve_bitrate=1` : ''}`;
}
}
catch (_a) {
}
return '';
}
async getUrlWithoutTheWatermark(uri) {
if (!uri) {
return '';
}
const options = {
uri,
method: 'GET',
headers: {
'user-agent': 'com.zhiliaoapp.musically/2021600040 (Linux; U; Android 5.0; en_US; SM-N900T; Build/LRX21V; Cronet/TTNetVersion:6c7b701a 2020-04-23 QuicVersion:0144d358 2020-03-24)',
'sec-fetch-mode': 'navigate',
},
followAllRedirects: true,
simple: false,
};
try {
const response = await this.request(options, false);
return response.request.uri.href;
}
catch (err) {
throw new Error(`Can't extract video url without the watermark`);
}
}
mainLoop() {
return new Promise((resolve, reject) => {
const taskArray = Array.from({ length: 1000 }, (v, k) => k + 1);
async_1.forEachLimit(taskArray, this.asyncScraping(), (item, cb) => {
switch (this.scrapeType) {
case 'user':
this.getUserId()
.then(query => this.submitScrapingRequest(Object.assign(Object.assign({}, query), { cursor: this.maxCursor }), true))
.then(kill => cb(kill || null))
.catch(error => cb(error));
break;
case 'hashtag':
this.getHashTagId()
.then(query => this.submitScrapingRequest(Object.assign(Object.assign({}, query), { cursor: item === 1 ? 0 : (item - 1) * query.count }), true))
.then(kill => cb(kill || null))
.catch(error => cb(error));
break;
case 'trend':
this.getTrendingFeedQuery()
.then(query => this.submitScrapingRequest(Object.assign({}, query), true))
.then(kill => cb(kill || null))
.catch(error => cb(error));
break;
case 'music':
this.getMusicFeedQuery()
.then(query => this.submitScrapingRequest(Object.assign(Object.assign({}, query), { cursor: item === 1 ? 0 : (item - 1) * query.count }), true))
.then(kill => cb(kill || null))
.catch(error => cb(error));
break;
default:
break;
}
}, err => {
if (err && err !== true) {
return reject(err);
}
resolve(null);
});
});
}
async submitScrapingRequest(query, updatedApiResponse = false) {
try {
if (!this.validHeaders) {
if (this.scrapeType === 'trend') {
await this.getValidHeaders(`https://www.tiktok.com/foryou`, false, 'GET');
}
this.validHeaders = true;
}
const result = await this.scrapeData(query);
if (result && result.statusCode !== 0) {
throw new Error(`Can't scrape more posts`);
}
const { hasMore, maxCursor, cursor } = result;
if ((!result.itemListData) && (updatedApiResponse && !result.itemList) || (!updatedApiResponse && !result.items)) {
throw new Error('No more posts');
}
const { done } = await this.collectPosts(result.itemListData ? result.itemListData : result.itemList);
this.collector = _.reject(this.collector, _.isEmpty);
if (!hasMore) {
console.error(`Only ${this.collector.length} results could be found.`);
return true;
}
if (done) {
return true;
}
this.maxCursor = parseInt(maxCursor === undefined ? cursor : maxCursor, 10);
return false;
}
catch (error) {
console.error(error);
throw error.message ? new Error(error.message) : error;
}
}
async saveCollectorData() {
if (this.download) {
if (this.cli) {
this.spinner.stop();
}
if (this.collector.length && !this.test) {
await this.Downloader.downloadPosts({
zip: this.zip,
folder: this.folderDestination,
collector: this.collector,
fileName: this.fileDestination,
asyncDownload: this.asyncDownload,
});
}
}
let json = '';
let csv = '';
let zip = '';
if (this.collector.length) {
json = `${this.fileDestination}.json`;
csv = `${this.fileDestination}.csv`;
zip = this.zip ? `${this.fileDestination}.zip` : this.folderDestination;
await this.saveMetadata({ json, csv });
}
if (this.cli) {
this.spinner.stop();
}
return [json, csv, zip];
}
async saveMetadata({ json, csv }) {
if (this.collector.length) {
switch (this.filetype) {
case 'json':
await bluebird_1.fromCallback(cb => fs_1.writeFile(json, JSON.stringify(this.collector), cb));
break;
case 'csv':
await bluebird_1.fromCallback(cb => fs_1.writeFile(csv, this.json2csvParser.parse(this.collector), cb));
break;
case 'all':
await Promise.all([
await bluebird_1.fromCallback(cb => fs_1.writeFile(json, JSON.stringify(this.collector), cb)),
await bluebird_1.fromCallback(cb => fs_1.writeFile(csv, this.json2csvParser.parse(this.collector), cb)),
]);
break;
default:
break;
}
}
}
async getDownloadedVideosFromHistory() {
try {
const readFromStore = (await bluebird_1.fromCallback(cb => fs_1.readFile(`${this.historyPath}/${this.storeValue}.json`, { encoding: 'utf-8' }, cb)));
this.store = JSON.parse(readFromStore);
}
catch (_a) {
}
this.collector = this.collector.map(item => {
if (this.store.indexOf(item.id) !== -1) {
item.repeated = true;
}
return item;
});
this.collector = this.collector.filter(item => !item.repeated);
}
async storeDownloadProgress() {
const historyType = this.scrapeType === 'trend' ? 'trend' : `${this.scrapeType}_${this.input}`;
const totalNewDownloadedVideos = this.collector.filter(item => item.downloaded).length;
if (this.storeValue && totalNewDownloadedVideos) {
let history = {};
try {
const readFromStore = (await bluebird_1.fromCallback(cb => fs_1.readFile(`${this.historyPath}/tiktok_history.json`, { encoding: 'utf-8' }, cb)));
history = JSON.parse(readFromStore);
}
catch (error) {
history[historyType] = {
type: this.scrapeType,
input: this.input,
downloaded_posts: 0,
last_change: new Date(),
file_location: `${this.historyPath}/${this.storeValue}.json`,
};
}
if (!history[historyType]) {
history[historyType] = {
type: this.scrapeType,
input: this.input,
downloaded_posts: 0,
last_change: new Date(),
file_location: `${this.historyPath}/${this.storeValue}.json`,
};
}
history[historyType] = {
type: this.scrapeType,
input: this.input,
downloaded_posts: history[historyType].downloaded_posts + totalNewDownloadedVideos,
last_change: new Date(),
file_location: `${this.historyPath}/${this.storeValue}.json`,
};
try {
await bluebird_1.fromCallback(cb => fs_1.writeFile(`${this.historyPath}/${this.storeValue}.json`, JSON.stringify(this.store), cb));
}
catch (_a) {
}
try {
await bluebird_1.fromCallback(cb => fs_1.writeFile(`${this.historyPath}/tiktok_history.json`, JSON.stringify(history), cb));
}
catch (_b) {
}
}
}
mapItem(post) {
let item = {};
if (this.scrapeType == 'user') {
if (this.noDuplicates.indexOf(post.itemInfos.id) === -1) {
this.noDuplicates.push(post.itemInfos.id);
item = Object.assign(Object.assign({ id: post.itemInfos.id, secretID: post.itemInfos.id, text: post.itemInfos.text, createTime: post.itemInfos.createTime, authorMeta: {
id: post.authorInfos.userId,
secUid: post.authorInfos.secUid,
name: post.authorInfos.uniqueId,
nickName: post.authorInfos.nickName,
verified: post.authorInfos.verified,
signature: post.authorInfos.signature,
avatar: post.authorInfos.avatarLarger,
following: post.authorStats.followingCount,
fans: post.authorStats.followerCount,
heart: post.authorStats.heartCount,
video: post.authorStats.videoCount,
digg: post.authorStats.diggCount,
} }, (post.music
? {
musicMeta: {
musicId: post.music.id,
musicName: post.music.title,
musicAuthor: post.music.authorName,
musicOriginal: post.music.original,
musicAlbum: post.music.album,
playUrl: post.music.playUrl,
coverThumb: post.music.coverThumb,
coverMedium: post.music.coverMedium,
coverLarge: post.music.coverLarge,
duration: post.music.duration,
},
}
: {})), { covers: {
default: post.itemInfos.covers,
origin: post.itemInfos.coversOrigin,
dynamic: post.itemInfos.coversDynamic,
}, imageUrl: post.itemInfos.covers[0], webVideoUrl: `https://www.tiktok.com/@${post.authorInfos.uniqueId}/video/${post.itemInfos.id}`, videoUrl: post.itemInfos.video.urls, videoUrlNoWaterMark: '', videoApiUrlNoWaterMark: '', videoMeta: {
height: post.itemInfos.video.videoMeta.height,
width: post.itemInfos.video.videoMeta.width,
duration: post.itemInfos.video.videoMeta.duration,
}, diggCount: post.itemInfos.diggCount, shareCount: post.itemInfos.shareCount, playCount: post.itemInfos.playCount, commentCount: post.itemInfos.commentCount, downloaded: false, mentions: post.itemInfos.text.match(/(@\w+)/g) || [], hashtags: post.itemInfos.challengeInfoList
? post.itemInfos.challengeInfoList.map(({ id, title, desc, coverLarger }) => ({
id,
name: title,
title: desc,
cover: coverLarger,
}))
: [], effectStickers: post.itemInfos.stickerTextList
? post.itemInfos.stickerTextList.map(({ ID, name }) => ({
id: ID,
name,
}))
: [] });
}
}
if (this.scrapeType == 'trend') {
if (this.noDuplicates.indexOf(post.id) === -1) {
this.noDuplicates.push(post.id);
item = Object.assign(Object.assign({ id: post.id, secretID: post.video.id, text: post.desc, createTime: post.createTime, authorMeta: {
id: post.author.id,
secUid: post.author.secUid,
name: post.author.uniqueId,
nickName: post.author.nickName,
verified: post.author.verified,
signature: post.author.signature,
avatar: post.author.avatarLarger,
following: post.authorStats.followingCount,
fans: post.authorStats.followerCount,
heart: post.authorStats.heartCount,
video: post.authorStats.videoCount,
digg: post.authorStats.diggCount,
} }, (post.music
? {
musicMeta: {
musicId: post.music.id,
musicName: post.music.title,
musicAuthor: post.music.authorName,
musicOriginal: post.music.original,
musicAlbum: post.music.album,
playUrl: post.music.playUrl,
coverThumb: post.music.coverThumb,
coverMedium: post.music.coverMedium,
coverLarge: post.music.coverLarge,
duration: post.music.duration,
},
}
: {})), { covers: {
default: post.video.cover,
origin: post.video.originCover,
dynamic: post.video.dynamicCover,
}, imageUrl: post.itemInfos.covers[0], webVideoUrl: `https://www.tiktok.com/@${post.author.uniqueId}/video/${post.id}`, videoUrl: post.video.downloadAddr, videoUrlNoWaterMark: '', videoApiUrlNoWaterMark: '', videoMeta: {
height: post.video.height,
width: post.video.width,
duration: post.video.duration,
}, diggCount: post.stats.diggCount, shareCount: post.stats.shareCount, playCount: post.stats.playCount, commentCount: post.stats.commentCount, downloaded: false, mentions: post.desc.match(/(@\w+)/g) || [], hashtags: post.challenges
? post.challenges.map(({ id, title, desc, coverLarger }) => ({
id,
name: title,
title: desc,
cover: coverLarger,
}))
: [], effectStickers: post.effectStickers
? post.effectStickers.map(({ ID, name }) => ({
id: ID,
name,
}))
: [] });
}
}
return item;
}
collectPosts(posts) {
const result = {
done: false,
};
for (let i = 0; i < posts.length; i += 1) {
if (result.done) {
break;
}
let post = posts[i];
if (this.since && post.createTime < this.since) {
result.done = constant_1.default.chronologicalTypes.indexOf(this.scrapeType) !== -1;
if (result.done) {
break;
}
else {
continue;
}
}
const item = this.mapItem(post);
if (this.event) {
this.emit('data', item);
this.collector.push({});
}
else {
this.collector.push(item);
}
if (this.number) {
if (this.collector.length >= this.number) {
result.done = true;
break;
}
}
}
result.done = true;
return result;
}
async getValidHeaders(url = '', signUrl = true, method = 'HEAD') {
const options = Object.assign(Object.assign({ uri: url, method }, (signUrl
? {
qs: {
_signature: helpers_1.sign(url, this.headers['user-agent']),
},
}
: {})), { headers: {
'x-secsdk-csrf-request': 1,
'x-secsdk-csrf-version': '1.2.5',
} });
try {
await this.request(options);
}
catch (error) {
throw new Error(error.message);
}
}
async scrapeData(qs) {
this.storeValue = this.scrapeType === 'trend' ? 'trend' : qs.id || qs.challengeID || qs.musicID;
const unsignedURL = `${this.getApiEndpoint}?${new url_1.URLSearchParams(qs).toString()}`;
const _signature = this.signGivenUrl(unsignedURL);
const options = {
uri: this.getApiEndpoint,
method: 'GET',
qs: Object.assign(Object.assign({}, qs), { _signature }),
json: true
};
try {
const response = await this.request(options, true, this.scrapeType == 'user' ? true : false, unsignedURL, await _signature);
return response;
}
catch (error) {
throw new Error(error.message);
}
}
async getTrendingFeedQuery() {
return {
aid: 1988,
app_name: 'tiktok_web',
device_platform: 'web_pc',
lang: '',
count: 30,
from_page: 'fyp',
itemID: 1,
};
}
async getMusicFeedQuery() {
const musicIdRegex = /.com\/music\/[\w+-]+-(\d{15,22})/.exec(this.input);
if (musicIdRegex) {
this.input = musicIdRegex[1];
}
return {
musicID: this.input,
lang: '',
aid: 1988,
count: 30,
cursor: 0,
verifyFp: '',
};
}
async getHashTagId() {
if (this.idStore) {
return {
challengeID: this.idStore,
count: 30,
cursor: 0,
aid: 1988,
verifyFp: this.verifyFp,
};
}
const id = encodeURIComponent(this.input);
const query = {
uri: `${this.mainHost}node/share/tag/${id}?uniqueId=${id}`,
qs: {
user_agent: this.headers['user-agent'],
},
method: 'GET',
json: true,
};
try {
const response = await this.request(query);
if (response.statusCode !== 0) {
throw new Error(`Can not find the hashtag: ${this.input}`);
}
this.idStore = response.challengeInfo.challenge.id;
return {
challengeID: this.idStore,
count: 30,
cursor: 0,
aid: 1988,
verifyFp: this.verifyFp,
};
}
catch (error) {
throw new Error(error.message);
}
}
async getUserId() {
if (this.byUserId || this.idStore) {
return {
secUid: '',
id: this.input,
type: 1,
count: 30,
minCursor: 0,
maxCursor: 0,
shareUid: ''
};
}
try {
const response = await this.getUserProfileInfo();
this.idStore = response.user.secUid;
this.userIdStore = response.user.id;
return {
secUid: '',
id: this.userIdStore,
type: 1,
count: 30,
minCursor: 0,
maxCursor: 0,
shareUid: ''
};
}
catch (error) {
throw new Error(error.message);
}
}
async getUserProfileInfo() {
console.log('running version -- v2.8');
if (!this.input) {
throw new Error(`Username is missing`);
}
let userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36';
let url = `https://www.tiktok.com/node/share/user/@${this.input}?aid=1988`;
const options = {
url: url,
method: 'GET',
"rejectUnauthorized": false,
'headers': {
'User-Agent': userAgent,
'connection': 'keep-alive',
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-language": "en-US,en;q=0.9,ar;q=0.8,de;q=0.7",
"cache-control": "max-age=0",
"sec-ch-ua": "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"101\", \"Google Chrome\";v=\"101\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1"
}
};
!_.isNil(this.proxy) ? _.extend(options, { proxy: this.proxy }) : '';
console.log('using proxy ...', this.proxy);
const response = await request_promise_1.default(url, options);
let parsedResponse = JSON.parse(response);
let emptyResponse = _.isEmpty(_.get(parsedResponse, 'userInfo'));
let statusCode = _.get(parsedResponse, 'statusCode');
if (!emptyResponse) {
const userMetadata = parsedResponse;
return userMetadata.userInfo;
}
if (emptyResponse) {
options['uri'] = `http://tiktok.com/@${this.input}`;
options['method'] = 'head';
options['resolveWithFullResponse'] = true;
try {
let headResponse = await request_promise_1.default(options);
statusCode = headResponse.statusCode;
}
catch (e) {
statusCode = Object(e)['statusCode'];
}
switch (statusCode) {
case 10202:
case 404: throw new Error(`${statusCode} User does not exist`);
case 200:
default: throw new Error(`${statusCode} transient error`);
}
}
throw new Error(`Can't extract user metadata from the html page. Make sure that user does exist and try to use proxy`);
}
async getHashtagInfo() {
if (!this.input) {
throw new Error(`Hashtag is missing`);
}
const query = {
uri: `${this.mainHost}node/share/tag/${this.input}?uniqueId=${this.input}`,
qs: {
appId: 1233,
},
method: 'GET',
json: true,
};
try {
const response = await this.request(query);
if (!response) {
throw new Error(`Can't find hashtag: ${this.input}`);
}
if (response.statusCode !== 0) {
throw new Error(`Can't find hashtag: ${this.input}`);
}
return response.challengeInfo;
}
catch (error) {
throw new Error(error.message);
}
}
async getMusicInfo() {
if (!this.input) {
throw new Error(`Music is missing`);
}
const musicTitle = /music\/([\w-]+)-\d+/.exec(this.input);
const musicId = /music\/[\w-]+-(\d+)/.exec(this.input);
const query = {
uri: `https://www.tiktok.com/node/share/music/${musicTitle ? musicTitle[1] : ''}-${musicId ? musicId[1] : ''}`,
qs: {
screen_width: 1792,
screen_height: 1120,
lang: 'en',
priority_region: '',
referer: '',
root_referer: '',
app_language: 'en',
is_page_visible: true,
history_len: 6,
focus_state: true,
is_fullscreen: false,
aid: 1988,
app_name: 'tiktok_web',
timezone_name: '',
device_platform: 'web',
musicId: musicId ? musicId[1] : '',
musicName: musicTitle ? musicTitle[1] : '',
},
method: 'GET',
json: true,
};
const unsignedURL = `${query.uri}?${new url_1.URLSearchParams(query.qs).toString()}`;
const _signature = helpers_1.sign(unsignedURL, this.headers['user-agent']);
query.qs._signature = _signature;
try {
const response = await this.request(query);
if (response.statusCode !== 0) {
throw new Error(`Can't find music data: ${this.input}`);
}
return response.musicInfo;
}
catch (error) {
throw new Error(error.message);
}
}
async signUrl() {
if (!this.input) {
throw new Error(`Url is missing`);
}
return helpers_1.sign(this.input, this.headers['user-agent']);
}
async signGivenUrl(url) {
return helpers_1.sign(url, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36');
}
async getVideoMetadataFromHtml() {
const options = {
uri: this.input,
method: 'GET',
json: true,
};
try {
const response = await this.request(options);
if (!response) {
throw new Error(`Can't extract video meta data`);
}
const rawVideoMetadata = response
.split(/<script id="__NEXT_DATA__" type="application\/json" nonce="[\w-]+" crossorigin="anonymous">/)[1]
.split(`</script>`)[0];
const videoProps = JSON.parse(rawVideoMetadata);
const videoData = videoProps.props.pageProps.itemInfo.itemStruct;
return videoData;
}
catch (error) {
throw new Error(`Can't extract video metadata: ${this.input}`);
}
}
async getVideoLink(url, regex, regexlvl2, targetRegex) {
if (targetRegex.exec(url)) {
return url;
}
let isShortLinkLvl = regex.exec(url) || regexlvl2.exec(url);
if (isShortLinkLvl) {
const userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36';
let headers = {
'User-Agent': userAgent,
};
var response = null;
await request_promise_1.default({
url: url,
headers: headers,
method: 'HEAD',
followAllRedirects: false,
followRedirect: false,
}).then(res => {
response = res.request.uri.href;
}).catch(e => {
response = e.response.location;
});
response = response.split('?')[0];
return response;
}
}
async getVideoMetadata(url = '') {
let count = 0;
let shortLinkLvl1 = /vm.tiktok.com\/([\w.-]+)/;
let shortLinkLvl2 = /m.tiktok.com\/([\w.-]+)\/(\d+)/;
let targetLinkregex = /tiktok.com\/(@[\w.-]+)\/video\/(\d+)/;
while (!targetLinkregex.exec(url)) {
url = await this.getVideoLink(url || this.input, shortLinkLvl1, shortLinkLvl2, targetLinkregex);
count += 1;
console.log('url is', url);
if (count > 3) {
break;
}
}
const videoData = targetLinkregex.exec(url);
if (videoData) {
const videoUsername = videoData[1];
const videoId = videoData[2];
const options = {
method: 'GET',
uri: `https://www.tiktok.com/node/share/video/${videoUsername}/${videoId}`,
json: true,
};
try {
const response = await this.request(options);
if (response.statusCode === 0) {
response.itemInfo.itemStruct['longUrl'] = url;
return response.itemInfo.itemStruct;
}
}
catch (err) {
if (err.statusCode === 404) {
throw new Error('Video does not exist');
}
}
}
throw new Error(`Can't extract video metadata: ${this.input}`);
}
async getVideoMeta(html = true) {
if (!this.input) {
throw new Error(`Url is missing`);
}
let videoData = {};
if (false) {
console.log('getting getVideoMetadataFromHtml');
videoData = await this.getVideoMetadataFromHtml();
}
else {
console.log('getting getVideoMetadataFromAPI');
videoData = await this.getVideoMetadata();
}
const videoItem = {
id: videoData.id,
secretID: videoData.video.id,
text: videoData.desc,
createTime: videoData.createTime,
authorMeta: {
id: videoData.author.id,
secUid: videoData.author.secUid,
name: videoData.author.uniqueId,
nickName: videoData.author.nickName,
following: videoData.authorStats.followingCount,
fans: videoData.authorStats.followerCount,
heart: videoData.authorStats.heartCount,
video: videoData.authorStats.videoCount,
digg: videoData.authorStats.diggCount,
verified: videoData.author.verified,
private: videoData.author.secret,
signature: videoData.author.signature,
avatar: videoData.author.avatarLarger,
},
musicMeta: {
musicId: videoData.music.id,
musicName: videoData.music.title,
musicAuthor: videoData.music.authorName,
musicOriginal: videoData.music.original,
coverThumb: videoData.music.coverThumb,
coverMedium: videoData.music.coverMedium,
coverLarge: videoData.music.coverLarge,
duration: videoData.music.duration,
},
imageUrl: videoData.video.cover,
longUrl: videoData.longUrl,
videoUrl: videoData.video.playAddr,
videoUrlNoWaterMark: '',
videoApiUrlNoWaterMark: '',
videoMeta: {
width: videoData.video.width,
height: videoData.video.height,
ratio: videoData.video.ratio,
duration: videoData.video.duration,
duetEnabled: videoData.duetEnabled,
stitchEnabled: videoData.stitchEnabled,
duetInfo: videoData.duetInfo,
},
covers: {
default: videoData.video.cover,
origin: videoData.video.originCover,
},
diggCount: videoData.stats.diggCount,
shareCount: videoData.stats.shareCount,
playCount: videoData.stats.playCount,
commentCount: videoData.stats.commentCount,
downloaded: false,
mentions: videoData.desc.match(/(@\w+)/g) || [],
hashtags: videoData.challenges
? videoData.challenges.map(({ id, title, desc, profileLarger }) => ({
id,
name: title,
title: desc,
cover: profileLarger,
}))
: [],
effectStickers: videoData.effectStickers
? videoData.effectStickers.map(({ ID, name }) => ({
id: ID,
name,
}))
: [],
};
try {
if (this.noWaterMark) {
videoItem.videoApiUrlNoWaterMark = await this.extractVideoId(videoItem);
videoItem.videoUrlNoWaterMark = await this.getUrlWithoutTheWatermark(videoItem.videoApiUrlNoWaterMark);
}
}
catch (_a) {
}
this.collector.push(videoItem);
return videoItem;
}
sendDataToWebHookUrl() {
return new Promise(resolve => {
async_1.forEachLimit(this.collector, 3, (item, cb) => {
request_promise_1.default(Object.assign(Object.assign(Object.assign({ uri: this.webHookUrl, method: this.method, headers: {
'user-agent': 'TikTok-Scraper',
} }, (this.method === 'POST' ? { body: item } : {})), (this.method === 'GET' ? { qs: { json: encodeURIComponent(JSON.stringify(item)) } } : {})), { json: true }))
.then(() => {
this.httpRequests.good += 1;
})
.catch(() => {
this.httpRequests.bad += 1;
})
.finally(() => cb(null));
}, () => {
resolve(null);
});
});
}
}
exports.TikTokScraper = TikTokScraper;