UNPKG

medium-blogs-fetcher

Version:

A package to fetch Medium user profiles and blogs

203 lines (168 loc) 6.45 kB
import axios from "axios"; import * as cheerio from "cheerio"; import Parser from "rss-parser"; const parser = new Parser(); /** * Fetch Medium user profile details including avatar, bio, and profile URL. * @param {string} username - Medium username * @param {object} options - (Optional) Custom headers & proxy * @returns {Promise<object>} - User profile details */ export async function getMediumUserProfile(username, options = {}) { try { const profileUrl = `https://medium.com/@${username}`; const { data } = await axios.get(profileUrl, { headers: { "User-Agent": "Mozilla/5.0", ...options.headers }, proxy: options.proxy || false, }); const $ = cheerio.load(data); const avatar = $('meta[property="og:image"]').attr("content") || "https://cdn-images-1.medium.com/fit/c/64/64/default-avatar.png"; const bio = $('meta[property="og:description"]').attr("content") || "No bio available."; // Extract followers count from the correct element let followersText = $('span.pw-follower-count a').first().text().trim(); // Ensure the followers text is properly extracted if (!followersText) { console.warn(`⚠️ No followers found for ${username}`); followersText = "0"; } // Convert K (thousand) and M (million) properly let followers = 0; const match = followersText.match(/^([\d.]+)([KM]?)\s*Followers?$/i); // Match number + optional K/M if (match) { let number = parseFloat(match[1]); // Extract numeric part let suffix = match[2].toUpperCase(); // Extract K or M (if present) if (suffix === "M") { followers = Math.round(number * 1_000_000); } else if (suffix === "K") { followers = Math.round(number * 1_000); } else { followers = Math.round(number); } } return { username, avatar, bio, followers, profileUrl }; } catch (error) { return { error: "User not found or request failed", details: error.message, }; } } /** * Fetch the Medium user's avatar by scraping their profile page. * @param {string} username - Medium username * @param {object} options - (Optional) Custom headers & proxy * @returns {Promise<string>} - Avatar URL or default */ export async function getMediumAvatar(username, options = {}) { try { const profileUrl = `https://medium.com/@${username}`; const { data } = await axios.get(profileUrl, { headers: { "User-Agent": "Mozilla/5.0", ...options.headers }, proxy: options.proxy || false, }); const $ = cheerio.load(data); return ( $('meta[property="og:image"]').attr("content") || "https://cdn-images-1.medium.com/fit/c/64/64/1*2Y7paYtPz5-Nj0zTLOzSwg.png" ); } catch (error) { console.error(`Error fetching avatar: ${error.message}`); return "https://cdn-images-1.medium.com/fit/c/64/64/1*2Y7paYtPz5-Nj0zTLOzSwg.png"; } } /** * Fetch Medium blogs for a given username. * @param {string} username - Medium username * @param {object} options - (Optional) Custom headers & proxy * @returns {Promise<object[]>} - List of blog posts */ export async function getMediumBlogs(username, options = {}) { try { const feedUrl = `https://medium.com/feed/@${username}`; const response = await axios.get(feedUrl, { headers: { "User-Agent": "Mozilla/5.0", ...options.headers }, proxy: options.proxy || false, }); const feed = await parser.parseString(response.data); // console.log(feed.items) return feed.items.map((post) => ({ title: post.title, link: post.link, categories: post.categories || ["Uncategorized"], description: post.contentSnippet, content: post.content, published: post.pubDate, })); } catch (error) { return { error: "Failed to fetch Medium blogs", details: error.message }; } } /** * Fetch top Medium users for a specific blog tag, sorted by followers. * @param {string} tag - Medium tag (e.g., "technology", "programming"). * @param {number} limit - Number of top users to return. * @param {object} options - (Optional) Custom headers & proxy. * @returns {Promise<object[]>} - Sorted list of top users with profiles and follower count. */ export async function getTopUsersByTag(tag, limit, options = {}) { try { const tagUrl = `https://medium.com/tag/${tag}`; const { data } = await axios.get(tagUrl, { headers: { "User-Agent": "Mozilla/5.0", ...options.headers }, proxy: options.proxy || false, }); const $ = cheerio.load(data); const users = new Set(); // Scrape author usernames from articles on the tag page $('a[href^="/@"]').each((_, element) => { let username = $(element).attr("href"); if (username) { // Extract only the username (removes everything after `?`) username = username.split("?")[0].replace("/@", ""); // Ensure username does not contain slashes (which indicate article URLs) if (!username.includes("/")) { users.add(username); } } }); console.log(`✅ Found ${users.size} unique users for tag: ${tag}`); // Fetch user profiles const userProfiles = await Promise.all( [...users].map(async (username) => { const profile = await getMediumUserProfile(username, options); if (profile.error) { console.warn(`⚠️ Skipping user ${username} due to fetch error`); return null; // Skip failed users } return { username: profile.username, avatar: profile.avatar, bio: profile.bio, followers: profile.followers, // Ensure followers are included profileUrl: profile.profileUrl, }; }) ); // Sort users by followers in descending order const sortedUsers = userProfiles .filter((user) => user !== null) // Remove failed fetch attempts .sort((a, b) => b.followers - a.followers) // Sort by followers count .slice(0, limit); // Limit results console.log(`✅ Top ${limit} users sorted by followers:`); // Logging results sortedUsers.forEach((user, index) => console.log(`#${index + 1}: ${user.username} - ${user.followers} Followers`) ); return sortedUsers; } catch (error) { return [ { error: "Failed to fetch top users for the tag", details: error.message, }, ]; } }