@xbibzlibrary/tiktokscrap
Version:
Powerful TikTok Scraper and Downloader Library
321 lines (284 loc) • 10.6 kB
text/typescript
import * as cheerio from 'cheerio';
import { TikTokVideo, TikTokPhoto, TikTokUser, TikTokHashtag, TikTokComment } from '../types';
import { ParseError } from '../errors';
import Logger from './logger';
export class Parser {
private logger = Logger;
public parseVideoData(html: string): TikTokVideo {
try {
const $ = cheerio.load(html);
const scriptData = this.extractScriptData($);
if (!scriptData || !scriptData.ItemModule) {
throw new ParseError('Could not find video data in the page');
}
const videoId = Object.keys(scriptData.ItemModule)[0];
const videoData = scriptData.ItemModule[videoId];
if (!videoData) {
throw new ParseError('Invalid video data structure');
}
return this.parseVideoObject(videoData);
} catch (error) {
this.logger.error(`Error parsing video data: ${(error as Error).message}`);
throw new ParseError(`Failed to parse video data: ${(error as Error).message}`);
}
}
public parsePhotoData(html: string): TikTokPhoto {
try {
const $ = cheerio.load(html);
const scriptData = this.extractScriptData($);
if (!scriptData || !scriptData.ItemModule) {
throw new ParseError('Could not find photo data in the page');
}
const photoId = Object.keys(scriptData.ItemModule)[0];
const photoData = scriptData.ItemModule[photoId];
if (!photoData) {
throw new ParseError('Invalid photo data structure');
}
return this.parsePhotoObject(photoData);
} catch (error) {
this.logger.error(`Error parsing photo data: ${(error as Error).message}`);
throw new ParseError(`Failed to parse photo data: ${(error as Error).message}`);
}
}
public parseUserData(html: string): TikTokUser {
try {
const $ = cheerio.load(html);
const scriptData = this.extractScriptData($);
if (!scriptData || !scriptData.UserModule) {
throw new ParseError('Could not find user data in the page');
}
const userId = Object.keys(scriptData.UserModule.users)[0];
const userData = scriptData.UserModule.users[userId];
if (!userData) {
throw new ParseError('Invalid user data structure');
}
return this.parseUserObject(userData);
} catch (error) {
this.logger.error(`Error parsing user data: ${(error as Error).message}`);
throw new ParseError(`Failed to parse user data: ${(error as Error).message}`);
}
}
public parseHashtagData(html: string): TikTokHashtag {
try {
const $ = cheerio.load(html);
const scriptData = this.extractScriptData($);
if (!scriptData || !scriptData.ChallengeModule) {
throw new ParseError('Could not find hashtag data in the page');
}
const hashtagId = Object.keys(scriptData.ChallengeModule)[0];
const hashtagData = scriptData.ChallengeModule[hashtagId];
if (!hashtagData) {
throw new ParseError('Invalid hashtag data structure');
}
return this.parseHashtagObject(hashtagData);
} catch (error) {
this.logger.error(`Error parsing hashtag data: ${(error as Error).message}`);
throw new ParseError(`Failed to parse hashtag data: ${(error as Error).message}`);
}
}
public parseCommentsData(html: string): TikTokComment[] {
try {
const $ = cheerio.load(html);
const scriptData = this.extractScriptData($);
if (!scriptData || !scriptData.CommentModule) {
throw new ParseError('Could not find comments data in the page');
}
const comments = Object.values(scriptData.CommentModule).map(commentData =>
this.parseCommentObject(commentData)
);
return comments;
} catch (error) {
this.logger.error(`Error parsing comments data: ${(error as Error).message}`);
throw new ParseError(`Failed to parse comments data: ${(error as Error).message}`);
}
}
private extractScriptData($: cheerio.CheerioAPI): any {
try {
const scripts = $('script').toArray();
for (const script of scripts) {
const scriptContent = $(script).html();
if (scriptContent && scriptContent.includes('__NEXT_DATA__')) {
const jsonStr = scriptContent.match(/__NEXT_DATA__\s*=\s*({.+});/);
if (jsonStr && jsonStr[1]) {
return JSON.parse(jsonStr[1]).props.pageProps;
}
}
if (scriptContent && scriptContent.includes('window[\'SIGI_STATE\']')) {
const jsonStr = scriptContent.match(/window\['SIGI_STATE'\]\s*=\s*({.+});/);
if (jsonStr && jsonStr[1]) {
return JSON.parse(jsonStr[1]);
}
}
}
return null;
} catch (error) {
this.logger.error(`Error extracting script data: ${(error as Error).message}`);
return null;
}
}
public parseVideoObject(data: any): TikTokVideo {
return {
id: data.id,
text: data.desc,
createTime: parseInt(data.createTime),
author: this.parseUserObject(data.author),
music: {
id: data.music.id,
title: data.music.title,
author: data.music.authorName,
album: data.music.album,
playUrl: data.music.playUrl,
coverLarge: data.music.coverLarge,
coverMedium: data.music.coverMedium,
coverThumb: data.music.coverThumb,
duration: parseInt(data.music.duration)
},
stats: {
digg: parseInt(data.stats.diggCount),
share: parseInt(data.stats.shareCount),
comment: parseInt(data.stats.commentCount),
play: parseInt(data.stats.playCount)
},
videoMeta: {
width: data.video.width,
height: data.video.height,
duration: parseInt(data.video.duration),
cover: data.video.cover,
dynamicCover: data.video.dynamicCover,
originCover: data.video.originCover
},
downloadAddr: data.video.downloadAddr,
webVideoUrl: `https://www.tiktok.com/@${data.author.uniqueId}/video/${data.id}`,
hashtags: data.challenge ? Object.values(data.challenge).map((challenge: any) => ({
id: challenge.id,
name: challenge.title,
title: challenge.title,
cover: challenge.cover
})) : [],
mentions: data.textExtra ? data.textExtra
.filter((item: any) => item.type === 1)
.map((item: any) => ({
id: item.userId,
uniqueId: item.userUniqueId,
nickname: item.userName
})) : [],
effects: data.effectStickers ? data.effectStickers.map((effect: any) => ({
id: effect.id,
name: effect.name,
icon: effect.icon
})) : [],
isAd: data.isAd,
commentsDisabled: data.commentSetting === 2,
duetEnabled: data.duetSetting === 1,
stitchEnabled: data.stitchSetting === 1,
secret: data.privateItem,
forFriend: data.forFriend,
digged: data.stats.diggCount > 0,
itemCommentStatus: data.itemCommentStatus
};
}
public parsePhotoObject(data: any): TikTokPhoto {
return {
id: data.id,
text: data.desc,
createTime: parseInt(data.createTime),
author: this.parseUserObject(data.author),
music: {
id: data.music.id,
title: data.music.title,
author: data.music.authorName,
album: data.music.album,
playUrl: data.music.playUrl,
coverLarge: data.music.coverLarge,
coverMedium: data.music.coverMedium,
coverThumb: data.music.coverThumb,
duration: parseInt(data.music.duration)
},
stats: {
digg: parseInt(data.stats.diggCount),
share: parseInt(data.stats.shareCount),
comment: parseInt(data.stats.commentCount),
play: parseInt(data.stats.playCount)
},
covers: data.images ? data.images.map((image: any) => ({
url: image.urlList[0],
width: image.width,
height: image.height
})) : [],
webVideoUrl: `https://www.tiktok.com/@${data.author.uniqueId}/photo/${data.id}`,
hashtags: data.challenge ? Object.values(data.challenge).map((challenge: any) => ({
id: challenge.id,
name: challenge.title,
title: challenge.title,
cover: challenge.cover
})) : [],
mentions: data.textExtra ? data.textExtra
.filter((item: any) => item.type === 1)
.map((item: any) => ({
id: item.userId,
uniqueId: item.userUniqueId,
nickname: item.userName
})) : [],
effects: data.effectStickers ? data.effectStickers.map((effect: any) => ({
id: effect.id,
name: effect.name,
icon: effect.icon
})) : [],
isAd: data.isAd,
commentsDisabled: data.commentSetting === 2,
duetEnabled: data.duetSetting === 1,
stitchEnabled: data.stitchSetting === 1,
secret: data.privateItem,
forFriend: data.forFriend,
digged: data.stats.diggCount > 0,
itemCommentStatus: data.itemCommentStatus
};
}
public parseUserObject(data: any): TikTokUser {
return {
id: data.id,
uniqueId: data.uniqueId,
nickname: data.nickname,
avatarUrl: data.avatarLarger,
signature: data.signature,
verified: data.verified,
following: parseInt(data.followingCount),
fans: parseInt(data.followerCount),
heart: parseInt(data.heartCount),
video: parseInt(data.videoCount),
digg: parseInt(data.diggCount),
privateAccount: data.privateAccount,
isSecret: data.isSecret,
secUid: data.secUid
};
}
public parseHashtagObject(data: any): TikTokHashtag {
return {
id: data.challengeId,
name: data.challengeName,
title: data.title,
description: data.desc,
cover: data.cover,
icon: data.icon,
viewCount: parseInt(data.viewCount),
isCommerce: data.isCommerce,
isAd: data.isAd,
challengeType: data.challengeType,
videos: []
};
}
public parseCommentObject(data: any): TikTokComment {
return {
id: data.cid,
text: data.text,
createTime: parseInt(data.createTime),
user: this.parseUserObject(data.user),
digg: parseInt(data.diggCount),
reply: parseInt(data.replyCommentTotal),
isPinned: data.isPinned,
isAuthorDigged: data.isAuthorDigged,
replies: data.replies ? data.replies.map((reply: any) => this.parseCommentObject(reply)) : []
};
}
}
export default Parser;