feat: add sqlite catalog web app

This commit is contained in:
Keith Solomon
2026-05-17 14:05:25 -05:00
parent 140c16891f
commit fe0678fac2
22 changed files with 1452 additions and 12 deletions
+19
View File
@@ -2,10 +2,12 @@ import { Command, Option } from 'commander';
import { writeFile } from 'node:fs/promises';
import { loadConfig } from '../config/config.js';
import { createGmailClient } from '../gmail/client.js';
import { DatabaseWriter } from '../output/databaseWriter.js';
import { ExcelWriter } from '../output/excel.js';
import { createGoogleSheetsWriter } from '../output/googleSheets.js';
import { OutputWriter } from '../output/sheets.js';
import { runCatalog } from '../run/runCatalog.js';
import { createWebApp } from '../web/app.js';
import { validateDateFilters } from './flags.js';
const sampleConfig = `gmail:
@@ -79,6 +81,20 @@ export function createProgram(): Command {
console.log(JSON.stringify(summary, null, 2));
});
program
.command('serve')
.description('Start a local web app for browsing the SQLite catalog')
.option('--config <path>', 'Config path', './config.yaml')
.option('--host <host>', 'Host to bind', '127.0.0.1')
.option('--port <port>', 'Port to bind', (value) => Number(value), 3000)
.action(async (options) => {
const config = await loadConfig(options.config);
const app = createWebApp(config.database.path);
app.listen(options.port, options.host, () => {
console.log(`Newsletter Link Catalog listening at http://${options.host}:${options.port}`);
});
});
return program;
}
@@ -86,6 +102,9 @@ async function createWriters(
config: Awaited<ReturnType<typeof loadConfig>>
): Promise<OutputWriter[]> {
const writers: OutputWriter[] = [];
if (config.database.enabled) {
writers.push(new DatabaseWriter(config.database.path));
}
if (config.output.excel.enabled) {
writers.push(new ExcelWriter(config.output.excel.path));
}
+6
View File
@@ -81,6 +81,12 @@ const configSchema = z
linkConcurrency: z.number().int().positive().default(3)
})
.default({}),
database: z
.object({
enabled: z.boolean().default(true),
path: z.string().default('./data/newsletter-catalog.sqlite')
})
.default({}),
stateFile: z.string().default('~/.nlc/state.json'),
plugins: z.record(z.string(), z.any()).default({})
})
+63
View File
@@ -0,0 +1,63 @@
export const catalogSchema = `
PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS newsletters (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS issues (
id INTEGER PRIMARY KEY AUTOINCREMENT,
newsletter_id INTEGER NOT NULL REFERENCES newsletters(id) ON DELETE CASCADE,
issue_date TEXT NOT NULL,
title TEXT NOT NULL DEFAULT '',
message_id TEXT,
UNIQUE(newsletter_id, issue_date, title)
);
CREATE TABLE IF NOT EXISTS links (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS link_occurrences (
id INTEGER PRIMARY KEY AUTOINCREMENT,
issue_id INTEGER NOT NULL REFERENCES issues(id) ON DELETE CASCADE,
link_id INTEGER NOT NULL REFERENCES links(id) ON DELETE CASCADE,
category TEXT NOT NULL DEFAULT '',
title TEXT NOT NULL DEFAULT '',
description TEXT NOT NULL DEFAULT '',
page_title_meta TEXT NOT NULL DEFAULT '',
also_in TEXT NOT NULL DEFAULT '',
UNIQUE(issue_id, link_id)
);
CREATE TABLE IF NOT EXISTS sponsors (
id INTEGER PRIMARY KEY AUTOINCREMENT,
issue_id INTEGER REFERENCES issues(id) ON DELETE SET NULL,
link_id INTEGER REFERENCES links(id) ON DELETE SET NULL,
newsletter TEXT NOT NULL DEFAULT '',
sponsor TEXT NOT NULL DEFAULT '',
description TEXT NOT NULL DEFAULT ''
);
CREATE TABLE IF NOT EXISTS dead_links (
id INTEGER PRIMARY KEY AUTOINCREMENT,
link_id INTEGER REFERENCES links(id) ON DELETE SET NULL,
url TEXT NOT NULL,
status TEXT NOT NULL DEFAULT '',
source TEXT NOT NULL DEFAULT '',
date TEXT NOT NULL DEFAULT ''
);
CREATE TABLE IF NOT EXISTS runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
started_at TEXT NOT NULL,
mode TEXT NOT NULL DEFAULT '',
newsletters_processed INTEGER NOT NULL DEFAULT 0,
links_extracted INTEGER NOT NULL DEFAULT 0,
sponsors INTEGER NOT NULL DEFAULT 0,
dead_links INTEGER NOT NULL DEFAULT 0,
errors INTEGER NOT NULL DEFAULT 0
);
`;
+202
View File
@@ -0,0 +1,202 @@
import { mkdirSync } from 'node:fs';
import { createRequire } from 'node:module';
import { dirname } from 'node:path';
import { CatalogPayload } from '../output/sheets.js';
import { catalogSchema } from './schema.js';
const require = createRequire(import.meta.url);
const sqlite = require('node:sqlite') as typeof import('node:sqlite');
export interface CatalogRunPayload extends CatalogPayload {
mode: string;
newslettersProcessed: number;
linksExtracted: number;
sponsorCount: number;
deadLinkCount: number;
errors: number;
}
export class CatalogDatabase {
private readonly db: import('node:sqlite').DatabaseSync;
public constructor(private readonly path: string) {
if (path !== ':memory:') {
mkdirSync(dirname(path), { recursive: true });
}
this.db = new sqlite.DatabaseSync(path);
this.db.exec('PRAGMA foreign_keys = ON');
}
public migrate(): void {
this.db.exec(catalogSchema);
}
public close(): void {
this.db.close();
}
public tableNames(): string[] {
return this.db
.prepare(
"SELECT name FROM sqlite_master WHERE type = 'table' AND name NOT LIKE 'sqlite_%' ORDER BY name"
)
.all()
.map((row: any) => row.name);
}
public count(table: string): number {
const safeTable = table.replace(/[^a-z_]/g, '');
const row = this.db.prepare(`SELECT COUNT(*) AS count FROM ${safeTable}`).get() as {
count: number;
};
return row.count;
}
public saveCatalogRun(payload: CatalogRunPayload): void {
this.migrate();
this.db.exec('BEGIN');
try {
this.insertRun(payload);
for (const row of payload.rows) {
this.insertOccurrence(row);
}
for (const sponsor of payload.sponsors) {
this.insertSponsor(sponsor);
}
for (const deadLink of payload.deadLinks) {
this.insertDeadLink(deadLink);
}
this.db.exec('COMMIT');
} catch (error) {
this.db.exec('ROLLBACK');
throw error;
}
}
public dashboardCounts(): Record<string, number> {
return {
newsletters: this.count('newsletters'),
issues: this.count('issues'),
links: this.count('links'),
sponsors: this.count('sponsors'),
deadLinks: this.count('dead_links'),
runs: this.count('runs')
};
}
public contentLinks(): any[] {
return this.db
.prepare(
`SELECT n.name AS newsletter, i.issue_date AS issueDate, o.category, o.title, l.url, o.description
FROM link_occurrences o
JOIN issues i ON i.id = o.issue_id
JOIN newsletters n ON n.id = i.newsletter_id
JOIN links l ON l.id = o.link_id
ORDER BY i.issue_date DESC, n.name, o.title`
)
.all();
}
public sponsoredLinks(): any[] {
return this.db
.prepare('SELECT newsletter, sponsor, description FROM sponsors ORDER BY newsletter, sponsor')
.all();
}
public deadLinks(): any[] {
return this.db
.prepare('SELECT url, status, source, date FROM dead_links ORDER BY date DESC, url')
.all();
}
public runs(): any[] {
return this.db.prepare('SELECT * FROM runs ORDER BY started_at DESC, id DESC').all();
}
private insertRun(payload: CatalogRunPayload): void {
this.db
.prepare(
`INSERT INTO runs
(started_at, mode, newsletters_processed, links_extracted, sponsors, dead_links, errors)
VALUES (?, ?, ?, ?, ?, ?, ?)`
)
.run(
new Date().toISOString(),
payload.mode,
payload.newslettersProcessed,
payload.linksExtracted,
payload.sponsorCount,
payload.deadLinkCount,
payload.errors
);
}
private insertOccurrence(row: Record<string, unknown>): void {
const newsletterId = this.upsertNewsletter(String(row['Source Newsletter'] ?? 'Newsletter'));
const issueId = this.upsertIssue(newsletterId, String(row['Issue Date'] ?? ''), '');
const linkId = this.upsertLink(String(row['Link URL'] ?? ''));
this.db
.prepare(
`INSERT INTO link_occurrences
(issue_id, link_id, category, title, description, page_title_meta, also_in)
VALUES (?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(issue_id, link_id) DO UPDATE SET
category = excluded.category,
title = excluded.title,
description = excluded.description,
page_title_meta = excluded.page_title_meta,
also_in = excluded.also_in`
)
.run(
issueId,
linkId,
String(row.Category ?? ''),
String(row.Title ?? ''),
String(row.Description ?? ''),
String(row['Page Title + Meta'] ?? ''),
String(row['Also In'] ?? '')
);
}
private insertSponsor(row: Record<string, unknown>): void {
const newsletter = String(row.Newsletter ?? '');
const linkId = this.upsertLink(String(row.Link ?? ''));
this.db
.prepare(
`INSERT INTO sponsors (issue_id, link_id, newsletter, sponsor, description)
VALUES (?, ?, ?, ?, ?)`
)
.run(null, linkId, newsletter, String(row.Sponsor ?? ''), String(row.Description ?? ''));
}
private insertDeadLink(row: Record<string, unknown>): void {
const url = String(row.URL ?? '');
const linkId = this.upsertLink(url);
this.db
.prepare('INSERT INTO dead_links (link_id, url, status, source, date) VALUES (?, ?, ?, ?, ?)')
.run(linkId, url, String(row.Status ?? ''), String(row.Source ?? ''), String(row.Date ?? ''));
}
private upsertNewsletter(name: string): number {
this.db.prepare('INSERT OR IGNORE INTO newsletters (name) VALUES (?)').run(name);
return (
this.db.prepare('SELECT id FROM newsletters WHERE name = ?').get(name) as { id: number }
).id;
}
private upsertIssue(newsletterId: number, issueDate: string, title: string): number {
this.db
.prepare('INSERT OR IGNORE INTO issues (newsletter_id, issue_date, title) VALUES (?, ?, ?)')
.run(newsletterId, issueDate, title);
return (
this.db
.prepare('SELECT id FROM issues WHERE newsletter_id = ? AND issue_date = ? AND title = ?')
.get(newsletterId, issueDate, title) as { id: number }
).id;
}
private upsertLink(url: string): number {
this.db.prepare('INSERT OR IGNORE INTO links (url) VALUES (?)').run(url);
return (this.db.prepare('SELECT id FROM links WHERE url = ?').get(url) as { id: number }).id;
}
}
+23
View File
@@ -0,0 +1,23 @@
import { CatalogDatabase } from '../database/store.js';
import { CatalogPayload, OutputWriter } from './sheets.js';
export class DatabaseWriter implements OutputWriter {
public constructor(private readonly path: string) {}
public async write(payload: CatalogPayload, summary: Record<string, unknown> = {}): Promise<void> {
const db = new CatalogDatabase(this.path);
try {
db.saveCatalogRun({
mode: String(summary.mode ?? 'run'),
newslettersProcessed: Number(summary.newslettersProcessed ?? 0),
linksExtracted: Number(summary.linksExtracted ?? payload.rows.length),
sponsorCount: Number(summary.sponsors ?? payload.sponsors.length),
deadLinkCount: Number(summary.deadLinks ?? payload.deadLinks.length),
errors: Number(summary.errors ?? 0),
...payload
});
} finally {
db.close();
}
}
}
+1 -1
View File
@@ -19,5 +19,5 @@ export interface CatalogPayload {
}
export interface OutputWriter {
write(payload: CatalogPayload): Promise<unknown>;
write(payload: CatalogPayload, summary?: Record<string, unknown>): Promise<unknown>;
}
+8
View File
@@ -32,6 +32,10 @@ function sponsorMarkerText(value: string): string | undefined {
}
function blockTokens($: cheerio.CheerioAPI, node: any): Token[] {
if (!node) {
return [];
}
if (node.type === 'text') {
const text = compactText(node.data ?? '');
return text ? [{ type: 'text', text }] : [];
@@ -49,6 +53,10 @@ function blockTokens($: cheerio.CheerioAPI, node: any): Token[] {
function localContext($: cheerio.CheerioAPI, element: any, title: string): string {
const block = $(element).closest('p,li,td,div').first();
if (block.length === 0) {
return title;
}
const tokens = blockTokens($, block.get(0));
const anchorIndex = tokens.findIndex(
(token) => token.type === 'anchor' && token.element === element
+20 -7
View File
@@ -47,6 +47,16 @@ function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
function runMode(options: RunOptions): string {
if (options.full) {
return 'full';
}
if (options.dryRun) {
return 'dry-run';
}
return 'incremental';
}
export async function runCatalog(options: RunOptions): Promise<RunSummary> {
const config = normalizeConfig(options.config);
const state = new StateStore(config.stateFile);
@@ -113,17 +123,20 @@ export async function runCatalog(options: RunOptions): Promise<RunSummary> {
}
}
if (!options.dryRun) {
for (const writer of options.writers) {
await writer.write({ rows, sponsors, deadLinks: [] });
}
}
return {
const summary = {
mode: runMode(options),
newslettersProcessed: messages.length,
linksExtracted: rows.length,
sponsors: sponsors.length,
deadLinks: 0,
errors
};
if (!options.dryRun) {
for (const writer of options.writers) {
await writer.write({ rows, sponsors, deadLinks: [] }, summary);
}
}
return summary;
}
+101
View File
@@ -0,0 +1,101 @@
import express from 'express';
import { CatalogDatabase } from '../database/store.js';
import { dashboard, page, table } from './views.js';
export function createWebApp(databasePath: string) {
const app = express();
app.get('/', (_req, res, next) => {
withDatabase(databasePath, (db) => res.send(dashboard(db.dashboardCounts()))).catch(next);
});
app.get('/links', (_req, res, next) => {
withDatabase(databasePath, (db) =>
res.send(
page(
'Links',
`<h1>Links</h1>${table(db.contentLinks(), [
['newsletter', 'Newsletter'],
['issueDate', 'Issue Date'],
['category', 'Category'],
['title', 'Title'],
['url', 'URL'],
['description', 'Description']
])}`
)
)
).catch(next);
});
app.get('/sponsors', (_req, res, next) => {
withDatabase(databasePath, (db) =>
res.send(
page(
'Sponsored Links',
`<h1>Sponsored Links</h1>${table(db.sponsoredLinks(), [
['newsletter', 'Newsletter'],
['sponsor', 'Sponsor'],
['description', 'Description']
])}`
)
)
).catch(next);
});
app.get('/dead-links', (_req, res, next) => {
withDatabase(databasePath, (db) =>
res.send(
page(
'Dead Links',
`<h1>Dead Links</h1>${table(db.deadLinks(), [
['url', 'URL'],
['status', 'Status'],
['source', 'Source'],
['date', 'Date']
])}`
)
)
).catch(next);
});
app.get('/runs', (_req, res, next) => {
withDatabase(databasePath, (db) =>
res.send(
page(
'Runs',
`<h1>Runs</h1>${table(db.runs(), [
['started_at', 'Started'],
['mode', 'Mode'],
['newsletters_processed', 'Newsletters'],
['links_extracted', 'Links'],
['sponsors', 'Sponsors'],
['dead_links', 'Dead Links'],
['errors', 'Errors']
])}`
)
)
).catch(next);
});
app.use(
(error: Error, _req: express.Request, res: express.Response, _next: express.NextFunction) => {
console.error(error);
res.status(500).send(page('Error', '<h1>Error</h1><p>Something went wrong.</p>'));
}
);
return app;
}
async function withDatabase(
databasePath: string,
callback: (database: CatalogDatabase) => void
): Promise<void> {
const db = new CatalogDatabase(databasePath);
try {
db.migrate();
callback(db);
} finally {
db.close();
}
}
+63
View File
@@ -0,0 +1,63 @@
function escapeHtml(value: unknown): string {
return String(value ?? '')
.replaceAll('&', '&amp;')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&#39;');
}
export function page(title: string, body: string): string {
return `<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>${escapeHtml(title)}</title>
<style>
body { font-family: Arial, sans-serif; margin: 2rem; color: #202124; }
nav a { margin-right: 1rem; }
table { border-collapse: collapse; width: 100%; margin-top: 1rem; }
th, td { border: 1px solid #ddd; padding: 0.45rem; text-align: left; vertical-align: top; }
th { background: #f5f5f5; }
.cards { display: flex; flex-wrap: wrap; gap: 1rem; }
.card { border: 1px solid #ddd; padding: 1rem; min-width: 10rem; }
.muted { color: #666; }
</style>
</head>
<body>
<nav>
<a href="/">Dashboard</a>
<a href="/links">Links</a>
<a href="/sponsors">Sponsored Links</a>
<a href="/dead-links">Dead Links</a>
<a href="/runs">Runs</a>
</nav>
${body}
</body>
</html>`;
}
export function table(rows: Record<string, unknown>[], columns: Array<[string, string]>): string {
if (rows.length === 0) {
return '<p class="muted">No rows yet.</p>';
}
return `<table><thead><tr>${columns.map(([, label]) => `<th>${escapeHtml(label)}</th>`).join('')}</tr></thead><tbody>${rows
.map((row) => `<tr>${columns.map(([key]) => `<td>${escapeHtml(row[key])}</td>`).join('')}</tr>`)
.join('')}</tbody></table>`;
}
export function dashboard(counts: Record<string, number>): string {
return page(
'Newsletter Link Catalog',
`<h1>Newsletter Link Catalog</h1>
<section class="cards">
${Object.entries(counts)
.map(
([key, value]) =>
`<div class="card"><strong>${escapeHtml(value)}</strong><br>${escapeHtml(key)}</div>`
)
.join('')}
</section>`
);
}