feature: First push to git

This commit is contained in:
Keith Solomon
2026-05-16 14:02:49 -05:00
commit 265f69d95a
46 changed files with 11551 additions and 0 deletions
+42
View File
@@ -0,0 +1,42 @@
import * as cheerio from 'cheerio';
import { ExtractedLink, ParserInput, ParserPlugin } from './types.js';
function nearestSection($: cheerio.CheerioAPI, element: any): string | undefined {
const previous = $(element).prevAll('h1,h2,h3,h4,h5,h6,strong,b').first().text().trim();
if (previous) {
return previous;
}
const parentPrevious = $(element)
.parent()
.prevAll('h1,h2,h3,h4,h5,h6,p,tr')
.first()
.text()
.trim();
return parentPrevious || undefined;
}
export const genericParser: ParserPlugin = {
name: 'generic',
matches: () => true,
parse(input: ParserInput): ExtractedLink[] {
const $ = cheerio.load(input.html);
return $('a[href]')
.toArray()
.map((element) => {
const anchor = $(element);
const title = anchor.text().replace(/\s+/g, ' ').trim() || anchor.attr('aria-label') || '';
const url = anchor.attr('href') ?? '';
const context = anchor.closest('p,li,td,div').text().replace(/\s+/g, ' ').trim();
return {
url,
title,
description: context && context !== title ? context : '',
sourceText: title,
section: nearestSection($, element),
context
};
})
.filter((link) => Boolean(link.url));
}
};