fetch-fic
Version:
Package up delicious, delicious fanfic from various sources into epub ebooks ready for reading in your ereader of choice.
153 lines (146 loc) • 6.47 kB
JavaScript
const url = require('url')
const Site = use('site')
const cache = use('cache')
const moment = require('moment')
class ArchiveOfOurOwn extends Site {
static matches (siteUrlStr) {
const siteUrl = url.parse(siteUrlStr)
const hostname = siteUrl.hostname
if (!/(^|www[.])archiveofourown.org$/.test(hostname)) return false
const path = siteUrl.pathname || siteUrl.path || ''
if (!/^[/]works[/]\d+/.test(path)) return false
return true
}
constructor (siteUrlStr) {
super(siteUrlStr)
this.link = this.link.replace(/[/]works[/](\d+).*?$/, '/works/$1')
this.publisher = 'archiveofourown.org'
this.publisherName = 'Archive of Our Own'
const siteUrl = url.parse(siteUrlStr)
const path = siteUrl.pathname || siteUrl.path || ''
const ficMatch = path.match(/^[/]works[/](\d+)/)
this.workId = ficMatch[1]
}
normalizeLink (href, base) {
return super.normalizeLink(href, base).replace(/#.*$/, '')
}
chapterIndex () {
return 'https://archiveofourown.org/works/' + this.workId + '/navigate'
}
tagGroup ($, prefix, $dd) {
const tags = []
$dd.find('li').each((ii, vv) => {
tags.push(prefix + ':' + $(vv).text().trim())
})
return tags
}
async getFicMetadata (fetch, fic) {
fic.link = this.link
fic.publisher = this.publisherName
fic.chapterHeadings = true
const [meta, html] = await fetch(this.chapterIndex())
const cheerio = require('cheerio')
const $ = cheerio.load(html)
if ($('.error-503-maintenance').length) {
const err = new Error($('#main').text().trim().split(/\n/).map(l => l.trim()).join('\n'))
err.link = this.chapterIndex()
err.code = 503
err.site = this.publisherName
await cache.clearUrl(err.link)
throw err
}
const base = $('base').attr('href') || this.chapterIndex()
const heading = $('h2.heading')
fic.title = heading.find('a[rel!="author"]').text()
const $author = heading.find('a[rel="author"]')
fic.authorUrl = this.normalizeLink($author.attr('href'), base)
fic.author = $author.text()
const chapterList = $('ol.index').find('li')
chapterList.each((ii, vv) => {
const $vv = $(vv)
const name = $vv.find('a').text().replace(/^\d+[.] /, '')
const link = this.normalizeLink($vv.find('a').attr('href'), base)
const created = moment.utc($vv.find('span.datetime').text(), '(YYYY-MM-DD)')
fic.addChapter({name, link, created})
})
const chapter = await fic.chapters[0].getContent(fetch)
if (chapter.$('.error-503-maintenance').length) {
const err = new Error(chapter.$('#main').text().trim().split(/\n/).map(l => l.trim()).join('\n'))
err.link = chapter.fetchWith()
err.code = 503
err.site = this.publisherName
await cache.clearUrl(err.link)
throw err
}
const $meta = chapter.$('dl.meta')
const ratings = this.tagGroup(chapter.$, 'rating', $meta.find('dd.rating'))
const warnings = this.tagGroup(chapter.$, 'warning', $meta.find('dd.warnings'))
.filter(warn => !/No Archive Warnings Apply/.test(warn))
const category = this.tagGroup(chapter.$, 'category', $meta.find('dd.category'))
const fandom = this.tagGroup(chapter.$, 'fandom', $meta.find('dd.fandom'))
const relationship = this.tagGroup(chapter.$, '', $meta.find('dd.relationship'))
.filter(r => r !== ':Friendship - Relationship')
.map(r => r.replace(/^:/, /[/]/.test(r) ? 'ship:' : 'friendship:'))
const characters = this.tagGroup(chapter.$, 'character', $meta.find('dd.character'))
const freeform = this.tagGroup(chapter.$, 'freeform', $meta.find('dd.freeform'))
const language = 'language:' + $meta.find('dd.language').text().trim()
fic.tags = [].concat(ratings, warnings, category, fandom, relationship, characters, freeform, language)
const $stats = $meta.find('dl.stats')
const chapterCounts = $stats.find('dd.chapters').text().trim().split('/')
const written = chapterCounts[0]
const planned = chapterCounts[1]
if (written === planned) {
if (written === '1') {
fic.tags.push('status:one-shot')
} else {
fic.tags.push('status:complete')
}
}
fic.created = moment.utc($stats.find('dd.published').text().trim())
const modified = $stats.find('dd.status').text().trim()
fic.modified = modified && moment.utc(modified)
fic.words = Number($stats.find('dd.words').text().trim())
fic.comments = Number($stats.find('dd.comments').text().trim())
fic.kudos = Number($stats.find('dd.kudos').text().trim())
fic.bookmarks = Number($stats.find('dd.bookmarks').text().trim())
fic.hits = Number($stats.find('dd.hits').text().trim())
fic.title = chapter.$('h2.title').text().trim()
fic.description = (chapter.$('.summary').find('.userstuff').html() || '').replace(/<p>/g, '\n<p>').replace(/^\s+|\s+$/g, '')
}
async getChapter (fetch, chapterInfo) {
const [meta, html] = await fetch(chapterInfo.fetchWith())
const ChapterContent = use('chapter-content')
const chapter = new ChapterContent(chapterInfo, {html, site: this})
if (chapter.$('.error-503-maintenance').length) {
const err = new Error(chapter.$('#main').text().trim().split(/\n/).map(l => l.trim()).join('\n'))
err.link = chapter.fetchWith()
err.code = 503
err.site = this.publisherName
await cache.clearUrl(err.link)
throw err
}
if (chapter.$('p.caution').length) {
chapterInfo.fetchFrom = chapterInfo.fetchWith() + '?view_adult=true'
return this.getChapter(fetch, chapterInfo)
}
chapter.base = chapter.$('base').attr('href') || meta.finalUrl
if (meta.finalUrl !== chapter.link) {
chapter.fetchFrom = chapter.link
chapter.link = meta.finalUrl
}
const $content = chapter.$('div[role="article"]')
$content.find('h3.landmark').remove()
const notes = chapter.$('#notes').find('.userstuff').html()
const endNotes = chapter.$('div.end').find('.userstuff').html()
let content = ''
if (notes && !/\(See the end of the chapter for.*notes.*.\)/.test(notes)) {
content += `<aside style="border: solid black 1px; padding: 1em">${notes}</aside>`
}
content += $content.html()
if (endNotes) content += `<aside epub:type="endnote" style="border: solid black 1px; padding: 1em">${endNotes}</aside>`
chapter.content = content
return chapter
}
}
module.exports = ArchiveOfOurOwn