From a7646533422e623c7bfe6deccfd55d90ee0abf5e Mon Sep 17 00:00:00 2001 From: Ruthenic Date: Wed, 2 Nov 2022 20:23:49 -0400 Subject: [PATCH] so much shit + lazy-loaded chapters (ie accessed on first prop access) --- .vscode/settings.json | 6 +- src/classes/AO3.ts | 54 +++++++++--- src/classes/Chapter.ts | 118 ++++++++++++++++++++++++- src/classes/Work.ts | 181 ++++++++++++++++++++++++++++++-------- src/global.d.ts | 1 - src/types.d.ts | 1 + src/utils/asyncForeach.ts | 5 ++ test.py | 13 +++ test.ts | 47 ++++++++-- 9 files changed, 358 insertions(+), 68 deletions(-) delete mode 100644 src/global.d.ts create mode 100644 src/types.d.ts create mode 100644 src/utils/asyncForeach.ts create mode 100644 test.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 8675ad5..aa1c94e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,4 @@ { - "deno.enable": true, - "deno.unstable": true -} \ No newline at end of file + "deno.enable": true, + "deno.unstable": true +} diff --git a/src/classes/AO3.ts b/src/classes/AO3.ts index b68e907..21f5a16 100644 --- a/src/classes/AO3.ts +++ b/src/classes/AO3.ts @@ -1,18 +1,46 @@ -import { IAxiodResponse } from "https://deno.land/x/axiod@0.26.2/interfaces.ts"; -import axiod from "https://deno.land/x/axiod@0.26.2/mod.ts"; import Work from "./Work.ts"; +import { ID } from "../types.d.ts"; +import { + DOMParser, +} from "https://denopkg.dev/gh/Ruthenic/deno-dom@master/deno-dom-wasm.ts"; export default class AO3 { - session: typeof axiod + session: { + get: (path: string) => Promise; + }; + DOMParser = new DOMParser(); - constructor(opts?: Record) { - this.session = axiod.create({ - baseURL: opts?.url ?? "https://archiveofourown.org/" - }) - } + /** + * a representation of AO3 in class form + */ + constructor(opts?: { + url?: string; + }) { + /*this.session = axiod.create({ + baseURL: opts?.url ?? "https://archiveofourown.org/", + });*/ + this.session = { + get: async (path: string) => { + const res = await fetch( + opts?.url ?? "https://archiveofourown.org/" + path, + ); + if (res.status > 300) { + console.log(res); + throw new Error("Failed request, probably rate-limited"); + } + return res; + }, + }; + } - async getWork(id: ID) { - const res = await this.session.get(`/works/${id}?view_adult=true&view_full_work=true`) - return new Work(id, res.data, this.session) - } -} \ No newline at end of file + /** + * gets a Work from an ID + * @returns {Promise} a Work class for the work + */ + async getWork(id: ID): Promise { + const res = await this.session.get( + `/works/${id}?view_adult=true&view_full_work=true`, + ); + return new Work(id, await res.text(), this.session, new DOMParser()); + } +} diff --git a/src/classes/Chapter.ts b/src/classes/Chapter.ts index 7b9a198..f38c381 100644 --- a/src/classes/Chapter.ts +++ b/src/classes/Chapter.ts @@ -1,4 +1,116 @@ +import axiod from "https://deno.land/x/axiod@0.26.2/mod.ts"; +import type { + DOMParser, +} from "https://denopkg.dev/gh/Ruthenic/deno-dom@master/src/dom/dom-parser.ts"; +import type { + Element, +} from "https://denopkg.dev/gh/Ruthenic/deno-dom@master/src/dom/element.ts"; +import type { + HTMLDocument, +} from "https://denopkg.dev/gh/Ruthenic/deno-dom@master/src/dom/document.ts"; +import { ID } from "../types.d.ts"; + export default class Chapter { - constructor(workId: ID, id: ID) { - } -} \ No newline at end of file + #session: { + get: (path: string) => Promise; + }; + isInited = false; + #document!: HTMLDocument; + #DOMParser: DOMParser; + #id: ID; + #workID: ID; + #name!: string; + #text!: string; + #summary!: string; + #startNote!: string; + #endNote!: string; + id!: Promise; + workID!: Promise; + name!: Promise; + text!: Promise; + summary!: Promise; + startNote!: Promise; + endNote!: Promise; + + constructor(workId: ID, id: ID, session: { + get: (path: string) => Promise; + }, DOMParser: DOMParser) { + this.#session = session; + this.#workID = workId; + this.#id = id; + this.#DOMParser = DOMParser; + + return new Proxy(this, { + get: async (target, prop) => { + if (!this.isInited) { + await target.init(); + target.isInited = true; + } + switch (prop) { + case "id": + return target.#id; + case "workID": + return target.#workID; + case "name": + return target.#name; + case "text": + return target.#text; + case "summary": + return target.#summary; + case "startNote": + return target.#startNote; + case "endNote": + return target.#endNote; + } + }, + }); + } + + async init() { + console.log("initing chapter"); + const res = await this.#session.get( + `/works/${this.#workID}/chapters/${this.#id}?view_adult=true`, + ); + this.#document = this.#DOMParser.parseFromString( + await res.text(), + "text/html", + ) as HTMLDocument; + this.populateMetadata(); + this.populateSummary(); + this.populateNotes(); + this.populateText(); + } + + populateMetadata() { + this.#name = this.#document.querySelector("h3.title")?.innerText.replace( + /Chapter \d+: /, + "", + ).trim() as string; + } + + populateSummary() { + this.#summary = this.#document.querySelector("#summary > .userstuff") + ?.innerText.trim() as string; + } + + populateNotes() { + const notesList = Array.from( + this.#document.querySelectorAll(".notes > .userstuff"), + ).map((n) => + (n as Element).innerHTML + ); + this.#startNote = notesList[0]?.trim()?.replace(/<\/{0,1}p>/g, "\n")?.trim(); + this.#endNote = notesList[1]?.trim()?.replace(/<\/{0,1}p>/g, "\n")?.trim(); + } + + populateText() { + /*this.text = this.#document.querySelector("div.userstuff[role='article']")?.innerText.trim().replace(/Chapter Text\s+/, "") as string*/ + //"div.userstuff[role='article'] > p" + Array.from( + this.#document.querySelectorAll("div.userstuff[role='article'] > p"), + ).forEach( + (t) => this.#text += (t as Element).innerText + "\n", + ); + this.#text = this.#text.trim(); + } +} diff --git a/src/classes/Work.ts b/src/classes/Work.ts index cf31272..d9762d7 100644 --- a/src/classes/Work.ts +++ b/src/classes/Work.ts @@ -1,41 +1,144 @@ -import axiod from "https://deno.land/x/axiod@0.26.2/mod.ts" -import Chapter from "./Chapter.ts" -import { DOMParser, Element, HTMLDocument } from "https://deno.land/x/deno_dom@v0.1.35-alpha/deno-dom-wasm.ts"; +import Chapter from "./Chapter.ts"; +import type { + DOMParser, +} from "https://denopkg.dev/gh/Ruthenic/deno-dom@master/src/dom/dom-parser.ts"; +import type { + Element, +} from "https://denopkg.dev/gh/Ruthenic/deno-dom@master/src/dom/element.ts"; +import type { + HTMLDocument, +} from "https://denopkg.dev/gh/Ruthenic/deno-dom@master/src/dom/document.ts"; +import { ID } from "../types.d.ts"; +import asyncForEach from "../utils/asyncForeach.ts"; export default class Work { - session: typeof axiod - id: ID - document: HTMLDocument - chapters: Chapter[] = [] - tags: string[] = [] - published?: Date - updated?: Date - - constructor(id: ID, body: string, session: typeof axiod) { - this.session = session - this.id = id - this.document = new DOMParser().parseFromString(body, "text/html") as HTMLDocument - this.populateTags() - this.populateDates() - } - - populateTags() { - Array.from((this.document.querySelector("dd.fandom > ul.commas") as Element).children).map( - t => this.tags.push(t.children[0].innerText) - ) - Array.from((this.document.querySelector("dd.relationship > ul.commas") as Element).children).map( - t => this.tags.push(t.children[0].innerText) - ) - Array.from((this.document.querySelector("dd.character > ul.commas") as Element).children).map( - t => this.tags.push(t.children[0].innerText) - ) - Array.from((this.document.querySelector("dd.freeform > ul.commas") as Element).children).map( - t => this.tags.push(t.children[0].innerText) - ) - } - - populateDates() { - this.published = new Date(this.document.querySelector("dd.published")?.innerText as string) - this.updated = new Date(this.document.querySelector("dd.status")?.innerText as string) - } -} \ No newline at end of file + #session: { + get: (path: string) => Promise; + }; + /** + * ID of the work + */ + id: ID; + #document: HTMLDocument; + #DOMParser: DOMParser; + /** + * a list of Chapters in the work + */ + chapters: Chapter[] = []; + /** + * a list of tags in the work + */ + tags: string[] = []; + /** + * the approximate date the work was published + */ + published?: Date; + /** + * the approximate date the work was last updated + */ + updated?: Date; + + /** + * represents a work on AO3 + * @param id the ID of the work + * @param body the HTML body of the work (in text) + * @param session an axiod session (used for fetching additional details) + */ + constructor( + id: ID, + body: string, + session: { + get: (path: string) => Promise; + }, + DOMParser: DOMParser, + ) { + this.#session = session; + this.id = id; + this.#document = DOMParser.parseFromString( + body, + "text/html", + ) as HTMLDocument; + this.#DOMParser = DOMParser; + } + + //jank incarnate + async init() { + this.populateTags(); + this.populateDates(); + await this.populateChapters(); + } + + populateTags() { + Array.from( + (this.#document.querySelector("dd.fandom > ul.commas") as Element) + .children, + ).map( + (t) => this.tags.push(t.children[0].innerText), + ); + Array.from( + (this.#document.querySelector("dd.relationship > ul.commas") as Element) + .children, + ).map( + (t) => this.tags.push(t.children[0].innerText), + ); + Array.from( + (this.#document.querySelector("dd.character > ul.commas") as Element) + .children, + ).map( + (t) => this.tags.push(t.children[0].innerText), + ); + Array.from( + (this.#document.querySelector("dd.freeform > ul.commas") as Element) + .children, + ).map( + (t) => this.tags.push(t.children[0].innerText), + ); + } + + populateDates() { + this.published = new Date( + this.#document.querySelector("dd.published")?.innerText as string, + ); + this.updated = new Date( + this.#document.querySelector("dd.status")?.innerText as string, + ); + } + + //CW: horrifying jank + async populateChapters() { + const firstChapterUrl = + (this.#document.querySelector("li.chapter > a") as Element).getAttribute( + "href", + ) as string + "?view_adult=true"; + const res = await this.#session.get(firstChapterUrl); + const document = this.#DOMParser.parseFromString( + await res.text(), + "text/html", + ) as HTMLDocument; + + const promises: Promise[] = []; + + Array.from((document.getElementById("selected_id") as Element).children) + .sort( + (a, b) => { + return Number(a.getAttribute("value")) - + Number(b.getAttribute("value")); + }, + ).forEach((c) => { + promises.push((async (c: Element) => { + const newChapter = new Chapter( + this.id, + c.getAttribute("value") as string, + this.#session, + this.#DOMParser, + ); + await 1; //shut up + this.chapters.push( + newChapter, + ); + })(c)); + }); + + await Promise.all(promises); + } +} diff --git a/src/global.d.ts b/src/global.d.ts deleted file mode 100644 index e3d664d..0000000 --- a/src/global.d.ts +++ /dev/null @@ -1 +0,0 @@ -type ID = BigInt | number | string \ No newline at end of file diff --git a/src/types.d.ts b/src/types.d.ts new file mode 100644 index 0000000..d94ef79 --- /dev/null +++ b/src/types.d.ts @@ -0,0 +1 @@ +export type ID = BigInt | number | string; diff --git a/src/utils/asyncForeach.ts b/src/utils/asyncForeach.ts new file mode 100644 index 0000000..2a133b2 --- /dev/null +++ b/src/utils/asyncForeach.ts @@ -0,0 +1,5 @@ +export default async function asyncForEach(array: any[], callback: Function) { + for (let index = 0; index < array.length; index++) { + await callback(array[index], index, array); + } +} diff --git a/test.py b/test.py new file mode 100644 index 0000000..c26ae23 --- /dev/null +++ b/test.py @@ -0,0 +1,13 @@ +import AO3 + +work = AO3.Work(37522864) + +print(f"TAGS: {work.tags}") +print(f"PUBLISHED: {work.date_published}") +print(f"UPDATED: {work.date_updated}") + +e = len(work.chapters) +e = work.id +e = work.chapters[0].text +e = work.chapters[0].title +e = work.chapters[0].text \ No newline at end of file diff --git a/test.ts b/test.ts index 7ba0dbf..47fa73c 100644 --- a/test.ts +++ b/test.ts @@ -3,12 +3,41 @@ import { assert } from "https://deno.land/std@0.161.0/testing/asserts.ts"; const ao3 = new AO3(); -const work = await ao3.getWork(37522864) - -assert(work.tags.length > 0) -assert(work.tags.includes("Attempted Murder")) -console.log(`TAGS: ${work.tags}`) -assert(work.published instanceof Date) -console.log(`PUBLISHED: ${work.published}`) -assert(work.updated instanceof Date) -console.log(`UPDATED: ${work.updated}`) \ No newline at end of file +let work = await ao3.getWork("37522864"); + +await work.init(); + +assert(work.tags.length > 0); +assert(work.tags.includes("Hazbin Hotel (Web Series)")); //fandom tags +assert(work.tags.includes("Angel Dust/Husk (Hazbin Hotel)")); //ship tags +assert(work.tags.includes("FizzaRolli (Helluva Boss)")); //character tags +assert(work.tags.includes("Attempted Murder")); //freeform tags +console.log(`TAGS: ${work.tags}`); +assert(work.published instanceof Date); +console.log(`PUBLISHED: ${work.published}`); +assert(work.updated instanceof Date); +console.log(`UPDATED: ${work.updated}`); + +assert(work.chapters.length > 0); +assert(await work.chapters[0].id == "93652975"); +assert(await work.chapters[0].workID == "37522864"); +assert(await work.chapters[0].name == "Part I"); +assert((await work.chapters[0].text).length > 0); + +work = await ao3.getWork("39612636"); + +await work.init(); + +//NOTE: this is not to actually make sure we're at feature parity; this is just to track regressions +assert( + (await work.chapters[22].startNote) === + `TW: minor suicidal themes\n\nI'll admit, this chapter is a little... Choppy. I had an idea and I had to make do with the time I had. \n\nSmall note: I changed Diamond’s description. Before she had black tattoos and white scars over her skin. Now it’s just all scarring.`, +); +assert( + (await work.chapters[22].endNote) === + "Sorry, no Ozzie + Fizz reunion yet. I promise the next chapter will be better tho", +); +assert( + (await work.chapters[22].summary) === + "Fizz tries to make the best out of his comeback", +);