UNPKG

@xbibzlibrary/tiktokscrap

Version:

Powerful TikTok Scraper and Downloader Library

321 lines (284 loc) 10.6 kB
import * as cheerio from 'cheerio'; import { TikTokVideo, TikTokPhoto, TikTokUser, TikTokHashtag, TikTokComment } from '../types'; import { ParseError } from '../errors'; import Logger from './logger'; export class Parser { private logger = Logger; public parseVideoData(html: string): TikTokVideo { try { const $ = cheerio.load(html); const scriptData = this.extractScriptData($); if (!scriptData || !scriptData.ItemModule) { throw new ParseError('Could not find video data in the page'); } const videoId = Object.keys(scriptData.ItemModule)[0]; const videoData = scriptData.ItemModule[videoId]; if (!videoData) { throw new ParseError('Invalid video data structure'); } return this.parseVideoObject(videoData); } catch (error) { this.logger.error(`Error parsing video data: ${(error as Error).message}`); throw new ParseError(`Failed to parse video data: ${(error as Error).message}`); } } public parsePhotoData(html: string): TikTokPhoto { try { const $ = cheerio.load(html); const scriptData = this.extractScriptData($); if (!scriptData || !scriptData.ItemModule) { throw new ParseError('Could not find photo data in the page'); } const photoId = Object.keys(scriptData.ItemModule)[0]; const photoData = scriptData.ItemModule[photoId]; if (!photoData) { throw new ParseError('Invalid photo data structure'); } return this.parsePhotoObject(photoData); } catch (error) { this.logger.error(`Error parsing photo data: ${(error as Error).message}`); throw new ParseError(`Failed to parse photo data: ${(error as Error).message}`); } } public parseUserData(html: string): TikTokUser { try { const $ = cheerio.load(html); const scriptData = this.extractScriptData($); if (!scriptData || !scriptData.UserModule) { throw new ParseError('Could not find user data in the page'); } const userId = Object.keys(scriptData.UserModule.users)[0]; const userData = scriptData.UserModule.users[userId]; if (!userData) { throw new ParseError('Invalid user data structure'); } return this.parseUserObject(userData); } catch (error) { this.logger.error(`Error parsing user data: ${(error as Error).message}`); throw new ParseError(`Failed to parse user data: ${(error as Error).message}`); } } public parseHashtagData(html: string): TikTokHashtag { try { const $ = cheerio.load(html); const scriptData = this.extractScriptData($); if (!scriptData || !scriptData.ChallengeModule) { throw new ParseError('Could not find hashtag data in the page'); } const hashtagId = Object.keys(scriptData.ChallengeModule)[0]; const hashtagData = scriptData.ChallengeModule[hashtagId]; if (!hashtagData) { throw new ParseError('Invalid hashtag data structure'); } return this.parseHashtagObject(hashtagData); } catch (error) { this.logger.error(`Error parsing hashtag data: ${(error as Error).message}`); throw new ParseError(`Failed to parse hashtag data: ${(error as Error).message}`); } } public parseCommentsData(html: string): TikTokComment[] { try { const $ = cheerio.load(html); const scriptData = this.extractScriptData($); if (!scriptData || !scriptData.CommentModule) { throw new ParseError('Could not find comments data in the page'); } const comments = Object.values(scriptData.CommentModule).map(commentData => this.parseCommentObject(commentData) ); return comments; } catch (error) { this.logger.error(`Error parsing comments data: ${(error as Error).message}`); throw new ParseError(`Failed to parse comments data: ${(error as Error).message}`); } } private extractScriptData($: cheerio.CheerioAPI): any { try { const scripts = $('script').toArray(); for (const script of scripts) { const scriptContent = $(script).html(); if (scriptContent && scriptContent.includes('__NEXT_DATA__')) { const jsonStr = scriptContent.match(/__NEXT_DATA__\s*=\s*({.+});/); if (jsonStr && jsonStr[1]) { return JSON.parse(jsonStr[1]).props.pageProps; } } if (scriptContent && scriptContent.includes('window[\'SIGI_STATE\']')) { const jsonStr = scriptContent.match(/window\['SIGI_STATE'\]\s*=\s*({.+});/); if (jsonStr && jsonStr[1]) { return JSON.parse(jsonStr[1]); } } } return null; } catch (error) { this.logger.error(`Error extracting script data: ${(error as Error).message}`); return null; } } public parseVideoObject(data: any): TikTokVideo { return { id: data.id, text: data.desc, createTime: parseInt(data.createTime), author: this.parseUserObject(data.author), music: { id: data.music.id, title: data.music.title, author: data.music.authorName, album: data.music.album, playUrl: data.music.playUrl, coverLarge: data.music.coverLarge, coverMedium: data.music.coverMedium, coverThumb: data.music.coverThumb, duration: parseInt(data.music.duration) }, stats: { digg: parseInt(data.stats.diggCount), share: parseInt(data.stats.shareCount), comment: parseInt(data.stats.commentCount), play: parseInt(data.stats.playCount) }, videoMeta: { width: data.video.width, height: data.video.height, duration: parseInt(data.video.duration), cover: data.video.cover, dynamicCover: data.video.dynamicCover, originCover: data.video.originCover }, downloadAddr: data.video.downloadAddr, webVideoUrl: `https://www.tiktok.com/@${data.author.uniqueId}/video/${data.id}`, hashtags: data.challenge ? Object.values(data.challenge).map((challenge: any) => ({ id: challenge.id, name: challenge.title, title: challenge.title, cover: challenge.cover })) : [], mentions: data.textExtra ? data.textExtra .filter((item: any) => item.type === 1) .map((item: any) => ({ id: item.userId, uniqueId: item.userUniqueId, nickname: item.userName })) : [], effects: data.effectStickers ? data.effectStickers.map((effect: any) => ({ id: effect.id, name: effect.name, icon: effect.icon })) : [], isAd: data.isAd, commentsDisabled: data.commentSetting === 2, duetEnabled: data.duetSetting === 1, stitchEnabled: data.stitchSetting === 1, secret: data.privateItem, forFriend: data.forFriend, digged: data.stats.diggCount > 0, itemCommentStatus: data.itemCommentStatus }; } public parsePhotoObject(data: any): TikTokPhoto { return { id: data.id, text: data.desc, createTime: parseInt(data.createTime), author: this.parseUserObject(data.author), music: { id: data.music.id, title: data.music.title, author: data.music.authorName, album: data.music.album, playUrl: data.music.playUrl, coverLarge: data.music.coverLarge, coverMedium: data.music.coverMedium, coverThumb: data.music.coverThumb, duration: parseInt(data.music.duration) }, stats: { digg: parseInt(data.stats.diggCount), share: parseInt(data.stats.shareCount), comment: parseInt(data.stats.commentCount), play: parseInt(data.stats.playCount) }, covers: data.images ? data.images.map((image: any) => ({ url: image.urlList[0], width: image.width, height: image.height })) : [], webVideoUrl: `https://www.tiktok.com/@${data.author.uniqueId}/photo/${data.id}`, hashtags: data.challenge ? Object.values(data.challenge).map((challenge: any) => ({ id: challenge.id, name: challenge.title, title: challenge.title, cover: challenge.cover })) : [], mentions: data.textExtra ? data.textExtra .filter((item: any) => item.type === 1) .map((item: any) => ({ id: item.userId, uniqueId: item.userUniqueId, nickname: item.userName })) : [], effects: data.effectStickers ? data.effectStickers.map((effect: any) => ({ id: effect.id, name: effect.name, icon: effect.icon })) : [], isAd: data.isAd, commentsDisabled: data.commentSetting === 2, duetEnabled: data.duetSetting === 1, stitchEnabled: data.stitchSetting === 1, secret: data.privateItem, forFriend: data.forFriend, digged: data.stats.diggCount > 0, itemCommentStatus: data.itemCommentStatus }; } public parseUserObject(data: any): TikTokUser { return { id: data.id, uniqueId: data.uniqueId, nickname: data.nickname, avatarUrl: data.avatarLarger, signature: data.signature, verified: data.verified, following: parseInt(data.followingCount), fans: parseInt(data.followerCount), heart: parseInt(data.heartCount), video: parseInt(data.videoCount), digg: parseInt(data.diggCount), privateAccount: data.privateAccount, isSecret: data.isSecret, secUid: data.secUid }; } public parseHashtagObject(data: any): TikTokHashtag { return { id: data.challengeId, name: data.challengeName, title: data.title, description: data.desc, cover: data.cover, icon: data.icon, viewCount: parseInt(data.viewCount), isCommerce: data.isCommerce, isAd: data.isAd, challengeType: data.challengeType, videos: [] }; } public parseCommentObject(data: any): TikTokComment { return { id: data.cid, text: data.text, createTime: parseInt(data.createTime), user: this.parseUserObject(data.user), digg: parseInt(data.diggCount), reply: parseInt(data.replyCommentTotal), isPinned: data.isPinned, isAuthorDigged: data.isAuthorDigged, replies: data.replies ? data.replies.map((reply: any) => this.parseCommentObject(reply)) : [] }; } } export default Parser;