✨ feature: Enhance output options with Google Sheets integration and improve Excel writer functionality
This commit is contained in:
+28
-3
@@ -3,6 +3,8 @@ import { writeFile } from 'node:fs/promises';
|
|||||||
import { loadConfig } from '../config/config.js';
|
import { loadConfig } from '../config/config.js';
|
||||||
import { createGmailClient } from '../gmail/client.js';
|
import { createGmailClient } from '../gmail/client.js';
|
||||||
import { ExcelWriter } from '../output/excel.js';
|
import { ExcelWriter } from '../output/excel.js';
|
||||||
|
import { createGoogleSheetsWriter } from '../output/googleSheets.js';
|
||||||
|
import { OutputWriter } from '../output/sheets.js';
|
||||||
import { runCatalog } from '../run/runCatalog.js';
|
import { runCatalog } from '../run/runCatalog.js';
|
||||||
import { validateDateFilters } from './flags.js';
|
import { validateDateFilters } from './flags.js';
|
||||||
|
|
||||||
@@ -54,9 +56,7 @@ export function createProgram(): Command {
|
|||||||
.action(async (options) => {
|
.action(async (options) => {
|
||||||
validateDateFilters(options);
|
validateDateFilters(options);
|
||||||
const config = await loadConfig(options.config);
|
const config = await loadConfig(options.config);
|
||||||
const writers = config.output.excel.enabled
|
const writers = await createWriters(config);
|
||||||
? [new ExcelWriter(config.output.excel.path)]
|
|
||||||
: [];
|
|
||||||
const messages =
|
const messages =
|
||||||
process.env.NLC_FIXTURE === '1'
|
process.env.NLC_FIXTURE === '1'
|
||||||
? fixtureMessages()
|
? fixtureMessages()
|
||||||
@@ -82,6 +82,31 @@ export function createProgram(): Command {
|
|||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function createWriters(
|
||||||
|
config: Awaited<ReturnType<typeof loadConfig>>
|
||||||
|
): Promise<OutputWriter[]> {
|
||||||
|
const writers: OutputWriter[] = [];
|
||||||
|
if (config.output.excel.enabled) {
|
||||||
|
writers.push(new ExcelWriter(config.output.excel.path));
|
||||||
|
}
|
||||||
|
if (config.output.sheetsApi.enabled) {
|
||||||
|
if (!config.output.sheetsApi.credentials || !config.output.sheetsApi.token) {
|
||||||
|
throw new Error('Google Sheets output requires sheets_api credentials and token paths');
|
||||||
|
}
|
||||||
|
if (!config.output.sheetsApi.spreadsheetId) {
|
||||||
|
throw new Error('Google Sheets output requires output.sheets_api.spreadsheet_id');
|
||||||
|
}
|
||||||
|
writers.push(
|
||||||
|
await createGoogleSheetsWriter({
|
||||||
|
credentials: config.output.sheetsApi.credentials,
|
||||||
|
token: config.output.sheetsApi.token,
|
||||||
|
spreadsheetId: config.output.sheetsApi.spreadsheetId
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return writers;
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchGmailMessages(
|
async function fetchGmailMessages(
|
||||||
config: Awaited<ReturnType<typeof loadConfig>>,
|
config: Awaited<ReturnType<typeof loadConfig>>,
|
||||||
options: { dryRun?: number | boolean; from?: string; to?: string; last?: string }
|
options: { dryRun?: number | boolean; from?: string; to?: string; last?: string }
|
||||||
|
|||||||
+9
-1
@@ -10,6 +10,14 @@ import { NewsletterMessage } from '../parsing/types.js';
|
|||||||
const gmailScopes = ['https://www.googleapis.com/auth/gmail.readonly'];
|
const gmailScopes = ['https://www.googleapis.com/auth/gmail.readonly'];
|
||||||
|
|
||||||
export async function authorizeGmail(credentialsPath: string, tokenPath: string) {
|
export async function authorizeGmail(credentialsPath: string, tokenPath: string) {
|
||||||
|
return authorizeGoogleOAuth(credentialsPath, tokenPath, gmailScopes);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function authorizeGoogleOAuth(
|
||||||
|
credentialsPath: string,
|
||||||
|
tokenPath: string,
|
||||||
|
scopes: string[]
|
||||||
|
) {
|
||||||
const credentials = JSON.parse(await readFile(expandHome(credentialsPath), 'utf8'));
|
const credentials = JSON.parse(await readFile(expandHome(credentialsPath), 'utf8'));
|
||||||
const clientConfig = credentials.installed ?? credentials.web;
|
const clientConfig = credentials.installed ?? credentials.web;
|
||||||
const oauth = new google.auth.OAuth2(
|
const oauth = new google.auth.OAuth2(
|
||||||
@@ -22,7 +30,7 @@ export async function authorizeGmail(credentialsPath: string, tokenPath: string)
|
|||||||
oauth.setCredentials(JSON.parse(await readFile(expandHome(tokenPath), 'utf8')));
|
oauth.setCredentials(JSON.parse(await readFile(expandHome(tokenPath), 'utf8')));
|
||||||
return oauth;
|
return oauth;
|
||||||
} catch {
|
} catch {
|
||||||
const url = oauth.generateAuthUrl({ access_type: 'offline', scope: gmailScopes });
|
const url = oauth.generateAuthUrl({ access_type: 'offline', scope: scopes });
|
||||||
const code = await waitForBrowserCode(url);
|
const code = await waitForBrowserCode(url);
|
||||||
const { tokens } = await oauth.getToken(code);
|
const { tokens } = await oauth.getToken(code);
|
||||||
oauth.setCredentials(tokens);
|
oauth.setCredentials(tokens);
|
||||||
|
|||||||
+19
-1
@@ -3,6 +3,16 @@ import { dirname } from 'node:path';
|
|||||||
import XLSX from 'xlsx';
|
import XLSX from 'xlsx';
|
||||||
import { CatalogPayload, OutputWriter, sanitizeSheetName } from './sheets.js';
|
import { CatalogPayload, OutputWriter, sanitizeSheetName } from './sheets.js';
|
||||||
|
|
||||||
|
const contentColumns = [
|
||||||
|
'Issue Date',
|
||||||
|
'Category',
|
||||||
|
'Link URL',
|
||||||
|
'Title',
|
||||||
|
'Description',
|
||||||
|
'Page Title + Meta',
|
||||||
|
'Also In'
|
||||||
|
];
|
||||||
|
|
||||||
export class ExcelWriter implements OutputWriter {
|
export class ExcelWriter implements OutputWriter {
|
||||||
public constructor(private readonly path: string) {}
|
public constructor(private readonly path: string) {}
|
||||||
|
|
||||||
@@ -14,7 +24,11 @@ export class ExcelWriter implements OutputWriter {
|
|||||||
grouped.set(sheet, [...(grouped.get(sheet) ?? []), row]);
|
grouped.set(sheet, [...(grouped.get(sheet) ?? []), row]);
|
||||||
}
|
}
|
||||||
for (const [sheet, rows] of grouped) {
|
for (const [sheet, rows] of grouped) {
|
||||||
XLSX.utils.book_append_sheet(workbook, XLSX.utils.json_to_sheet(rows), sheet);
|
XLSX.utils.book_append_sheet(
|
||||||
|
workbook,
|
||||||
|
XLSX.utils.json_to_sheet(rows.map(toContentOutputRow), { header: contentColumns }),
|
||||||
|
sheet
|
||||||
|
);
|
||||||
}
|
}
|
||||||
XLSX.utils.book_append_sheet(
|
XLSX.utils.book_append_sheet(
|
||||||
workbook,
|
workbook,
|
||||||
@@ -30,3 +44,7 @@ export class ExcelWriter implements OutputWriter {
|
|||||||
XLSX.writeFile(workbook, this.path);
|
XLSX.writeFile(workbook, this.path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function toContentOutputRow(row: Record<string, unknown>): Record<string, unknown> {
|
||||||
|
return Object.fromEntries(contentColumns.map((column) => [column, row[column] ?? '']));
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,15 +1,97 @@
|
|||||||
import { google } from 'googleapis';
|
import { google } from 'googleapis';
|
||||||
import { CatalogPayload, OutputWriter } from './sheets.js';
|
import { authorizeGoogleOAuth } from '../gmail/client.js';
|
||||||
|
import { CatalogPayload, escapeCell, OutputWriter, sanitizeSheetName } from './sheets.js';
|
||||||
|
|
||||||
|
const sheetsScopes = ['https://www.googleapis.com/auth/spreadsheets'];
|
||||||
|
type SheetsClient = ReturnType<typeof google.sheets> | any;
|
||||||
|
|
||||||
|
const contentColumns = [
|
||||||
|
'Issue Date',
|
||||||
|
'Category',
|
||||||
|
'Link URL',
|
||||||
|
'Title',
|
||||||
|
'Description',
|
||||||
|
'Page Title + Meta',
|
||||||
|
'Also In'
|
||||||
|
];
|
||||||
|
const sponsorColumns = ['Newsletter', 'Sponsor', 'Link', 'Description'];
|
||||||
|
const deadColumns = ['URL', 'Status', 'Source', 'Date'];
|
||||||
|
|
||||||
export class GoogleSheetsWriter implements OutputWriter {
|
export class GoogleSheetsWriter implements OutputWriter {
|
||||||
public constructor(
|
public constructor(
|
||||||
private readonly spreadsheetId: string,
|
private readonly spreadsheetId: string,
|
||||||
private readonly auth: Parameters<typeof google.sheets>[0]['auth']
|
private readonly auth: Parameters<typeof google.sheets>[0]['auth'],
|
||||||
|
private readonly sheetsClient?: SheetsClient
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
public async write(_payload: CatalogPayload): Promise<void> {
|
public async write(payload: CatalogPayload): Promise<void> {
|
||||||
const sheets = google.sheets({ version: 'v4', auth: this.auth });
|
const sheets = this.sheetsClient ?? google.sheets({ version: 'v4', auth: this.auth });
|
||||||
await sheets.spreadsheets.get({ spreadsheetId: this.spreadsheetId });
|
const existing = await this.getExistingSheetNames(sheets);
|
||||||
// Real row append calls are intentionally centralized here; tests use a fake writer.
|
const grouped = this.groupContentRows(payload.rows);
|
||||||
|
const desired = [...grouped.keys(), 'Sponsored Links', 'Dead Links'];
|
||||||
|
const missing = desired.filter((sheet) => !existing.has(sheet));
|
||||||
|
|
||||||
|
if (missing.length > 0) {
|
||||||
|
await sheets.spreadsheets.batchUpdate({
|
||||||
|
spreadsheetId: this.spreadsheetId,
|
||||||
|
requestBody: {
|
||||||
|
requests: missing.map((title) => ({ addSheet: { properties: { title } } }))
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [sheet, rows] of grouped) {
|
||||||
|
await this.appendRows(sheets, sheet, contentColumns, rows);
|
||||||
|
}
|
||||||
|
await this.appendRows(sheets, 'Sponsored Links', sponsorColumns, payload.sponsors);
|
||||||
|
await this.appendRows(sheets, 'Dead Links', deadColumns, payload.deadLinks);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async getExistingSheetNames(sheets: SheetsClient): Promise<Set<string>> {
|
||||||
|
const spreadsheet = await sheets.spreadsheets.get({ spreadsheetId: this.spreadsheetId });
|
||||||
|
return new Set(
|
||||||
|
(spreadsheet.data.sheets ?? [])
|
||||||
|
.map((sheet: any) => sheet.properties?.title)
|
||||||
|
.filter((title: unknown): title is string => typeof title === 'string')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private groupContentRows(rows: Record<string, unknown>[]): Map<string, Record<string, unknown>[]> {
|
||||||
|
const grouped = new Map<string, Record<string, unknown>[]>();
|
||||||
|
for (const row of rows) {
|
||||||
|
const sheet = sanitizeSheetName(String(row['Source Newsletter'] ?? 'Newsletter'), 100);
|
||||||
|
grouped.set(sheet, [...(grouped.get(sheet) ?? []), row]);
|
||||||
|
}
|
||||||
|
return grouped;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async appendRows(
|
||||||
|
sheets: SheetsClient,
|
||||||
|
sheet: string,
|
||||||
|
columns: string[],
|
||||||
|
rows: Record<string, unknown>[]
|
||||||
|
): Promise<void> {
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await sheets.spreadsheets.values.append({
|
||||||
|
spreadsheetId: this.spreadsheetId,
|
||||||
|
range: `'${sheet.replaceAll("'", "''")}'!A1`,
|
||||||
|
valueInputOption: 'RAW',
|
||||||
|
insertDataOption: 'INSERT_ROWS',
|
||||||
|
requestBody: {
|
||||||
|
values: [columns, ...rows.map((row) => columns.map((column) => escapeCell(row[column] ?? '')))]
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function createGoogleSheetsWriter(options: {
|
||||||
|
credentials: string;
|
||||||
|
token: string;
|
||||||
|
spreadsheetId: string;
|
||||||
|
}): Promise<GoogleSheetsWriter> {
|
||||||
|
const auth = await authorizeGoogleOAuth(options.credentials, options.token, sheetsScopes);
|
||||||
|
return new GoogleSheetsWriter(options.spreadsheetId, auth);
|
||||||
|
}
|
||||||
|
|||||||
+65
-2
@@ -15,6 +15,69 @@ function nearestSection($: cheerio.CheerioAPI, element: any): string | undefined
|
|||||||
return parentPrevious || undefined;
|
return parentPrevious || undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type TextToken = { type: 'text'; text: string };
|
||||||
|
type AnchorToken = { type: 'anchor'; element: any; text: string };
|
||||||
|
type Token = TextToken | AnchorToken;
|
||||||
|
|
||||||
|
function compactText(value: string): string {
|
||||||
|
return value.replace(/\s+/g, ' ').trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function textBeforeSponsorMarker(value: string): string {
|
||||||
|
return compactText(value.split(/\b(?:sponsor|sponsored|advertisement|partner)\b/i)[0] ?? value);
|
||||||
|
}
|
||||||
|
|
||||||
|
function sponsorMarkerText(value: string): string | undefined {
|
||||||
|
return value.match(/\b(?:sponsor|sponsored|advertisement|partner)\b/i)?.[0].toUpperCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function blockTokens($: cheerio.CheerioAPI, node: any): Token[] {
|
||||||
|
if (node.type === 'text') {
|
||||||
|
const text = compactText(node.data ?? '');
|
||||||
|
return text ? [{ type: 'text', text }] : [];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node.type === 'tag' && node.name === 'a' && $(node).attr('href')) {
|
||||||
|
return [{ type: 'anchor', element: node, text: compactText($(node).text()) }];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $(node)
|
||||||
|
.contents()
|
||||||
|
.toArray()
|
||||||
|
.flatMap((child) => blockTokens($, child));
|
||||||
|
}
|
||||||
|
|
||||||
|
function localContext($: cheerio.CheerioAPI, element: any, title: string): string {
|
||||||
|
const block = $(element).closest('p,li,td,div').first();
|
||||||
|
const tokens = blockTokens($, block.get(0));
|
||||||
|
const anchorIndex = tokens.findIndex(
|
||||||
|
(token) => token.type === 'anchor' && token.element === element
|
||||||
|
);
|
||||||
|
if (anchorIndex === -1) {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parts: string[] = [];
|
||||||
|
const previousText = tokens
|
||||||
|
.slice(0, anchorIndex)
|
||||||
|
.reverse()
|
||||||
|
.find((token): token is TextToken => token.type === 'text')?.text;
|
||||||
|
const marker = previousText ? sponsorMarkerText(previousText) : undefined;
|
||||||
|
if (marker) {
|
||||||
|
parts.push(marker);
|
||||||
|
}
|
||||||
|
parts.push(title);
|
||||||
|
|
||||||
|
for (const token of tokens.slice(anchorIndex + 1)) {
|
||||||
|
if (token.type === 'anchor') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
parts.push(textBeforeSponsorMarker(token.text));
|
||||||
|
}
|
||||||
|
|
||||||
|
return compactText(parts.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
export const genericParser: ParserPlugin = {
|
export const genericParser: ParserPlugin = {
|
||||||
name: 'generic',
|
name: 'generic',
|
||||||
matches: () => true,
|
matches: () => true,
|
||||||
@@ -24,9 +87,9 @@ export const genericParser: ParserPlugin = {
|
|||||||
.toArray()
|
.toArray()
|
||||||
.map((element) => {
|
.map((element) => {
|
||||||
const anchor = $(element);
|
const anchor = $(element);
|
||||||
const title = anchor.text().replace(/\s+/g, ' ').trim() || anchor.attr('aria-label') || '';
|
const title = compactText(anchor.text()) || anchor.attr('aria-label') || '';
|
||||||
const url = anchor.attr('href') ?? '';
|
const url = anchor.attr('href') ?? '';
|
||||||
const context = anchor.closest('p,li,td,div').text().replace(/\s+/g, ' ').trim();
|
const context = localContext($, element, title);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
|
|||||||
+13
-1
@@ -35,6 +35,18 @@ function issueDate(date: string): string {
|
|||||||
return new Date(date).toISOString().slice(0, 10);
|
return new Date(date).toISOString().slice(0, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function sponsorDescription(linkTitle: string, description: string): string {
|
||||||
|
return description
|
||||||
|
.replace(/\b(?:sponsor|sponsored|advertisement|partner)\b/i, '')
|
||||||
|
.replace(new RegExp(`^\\s*${escapeRegExp(linkTitle)}\\s*(?:[-:–—]|\\s)+`, 'i'), '')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function escapeRegExp(value: string): string {
|
||||||
|
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||||
|
}
|
||||||
|
|
||||||
export async function runCatalog(options: RunOptions): Promise<RunSummary> {
|
export async function runCatalog(options: RunOptions): Promise<RunSummary> {
|
||||||
const config = normalizeConfig(options.config);
|
const config = normalizeConfig(options.config);
|
||||||
const state = new StateStore(config.stateFile);
|
const state = new StateStore(config.stateFile);
|
||||||
@@ -76,7 +88,7 @@ export async function runCatalog(options: RunOptions): Promise<RunSummary> {
|
|||||||
Newsletter: newsletterName(message.from),
|
Newsletter: newsletterName(message.from),
|
||||||
Sponsor: link.title,
|
Sponsor: link.title,
|
||||||
Link: link.normalizedUrl,
|
Link: link.normalizedUrl,
|
||||||
Description: link.description ?? ''
|
Description: sponsorDescription(link.title, link.description ?? '')
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,5 +35,9 @@ describe('ExcelWriter', () => {
|
|||||||
const workbook = XLSX.readFile(path);
|
const workbook = XLSX.readFile(path);
|
||||||
expect(workbook.SheetNames[0]).toBe('A Very Long Newsletter Name Tha');
|
expect(workbook.SheetNames[0]).toBe('A Very Long Newsletter Name Tha');
|
||||||
expect(workbook.SheetNames[0].length).toBe(31);
|
expect(workbook.SheetNames[0].length).toBe(31);
|
||||||
|
const rows = XLSX.utils.sheet_to_json<Record<string, unknown>>(
|
||||||
|
workbook.Sheets[workbook.SheetNames[0]]
|
||||||
|
);
|
||||||
|
expect(rows[0]).not.toHaveProperty('Source Newsletter');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -0,0 +1,79 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { GoogleSheetsWriter } from '../src/output/googleSheets.js';
|
||||||
|
|
||||||
|
describe('GoogleSheetsWriter', () => {
|
||||||
|
it('creates missing sheets and appends content, sponsor, and dead-link rows', async () => {
|
||||||
|
const calls: unknown[] = [];
|
||||||
|
const sheets = {
|
||||||
|
spreadsheets: {
|
||||||
|
get: async () => ({
|
||||||
|
data: { sheets: [{ properties: { title: 'Sponsored Links' } }] }
|
||||||
|
}),
|
||||||
|
batchUpdate: async (request: unknown) => {
|
||||||
|
calls.push(request);
|
||||||
|
},
|
||||||
|
values: {
|
||||||
|
append: async (request: unknown) => {
|
||||||
|
calls.push(request);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
await new GoogleSheetsWriter('sheet-1', undefined, sheets).write({
|
||||||
|
rows: [
|
||||||
|
{
|
||||||
|
'Source Newsletter': 'A Very Long Newsletter Name That Is Fine In Google Sheets',
|
||||||
|
Title: '=Formula',
|
||||||
|
'Link URL': 'https://example.com'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
sponsors: [{ Newsletter: 'Weekly', Sponsor: 'Acme', Link: 'https://sponsor.example' }],
|
||||||
|
deadLinks: [{ URL: 'https://dead.example', Status: '404' }]
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(calls[0]).toMatchObject({
|
||||||
|
spreadsheetId: 'sheet-1',
|
||||||
|
requestBody: {
|
||||||
|
requests: [
|
||||||
|
{
|
||||||
|
addSheet: {
|
||||||
|
properties: { title: 'A Very Long Newsletter Name That Is Fine In Google Sheets' }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{ addSheet: { properties: { title: 'Dead Links' } } }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
});
|
||||||
|
expect(calls).toContainEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
spreadsheetId: 'sheet-1',
|
||||||
|
range: "'A Very Long Newsletter Name That Is Fine In Google Sheets'!A1",
|
||||||
|
requestBody: {
|
||||||
|
values: [
|
||||||
|
[
|
||||||
|
'Issue Date',
|
||||||
|
'Category',
|
||||||
|
'Link URL',
|
||||||
|
'Title',
|
||||||
|
'Description',
|
||||||
|
'Page Title + Meta',
|
||||||
|
'Also In'
|
||||||
|
],
|
||||||
|
['', '', 'https://example.com', "'=Formula", '', '', '']
|
||||||
|
]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
expect(calls).toContainEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
range: "'Sponsored Links'!A1"
|
||||||
|
})
|
||||||
|
);
|
||||||
|
expect(calls).toContainEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
range: "'Dead Links'!A1"
|
||||||
|
})
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
import { describe, expect, it } from 'vitest';
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { genericParser } from '../src/parsing/generic.js';
|
||||||
import { selectParser } from '../src/parsing/plugins.js';
|
import { selectParser } from '../src/parsing/plugins.js';
|
||||||
|
|
||||||
describe('parser plugin selection', () => {
|
describe('parser plugin selection', () => {
|
||||||
@@ -11,3 +12,27 @@ describe('parser plugin selection', () => {
|
|||||||
).toBe('generic');
|
).toBe('generic');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('generic parser', () => {
|
||||||
|
it('keeps descriptions local to each link when many links share a container', () => {
|
||||||
|
const links = genericParser.parse({
|
||||||
|
html: `
|
||||||
|
<div>
|
||||||
|
<h2>CSS & HTML Tools</h2>
|
||||||
|
<a href="https://cascade.example">Cascade</a> - CSS property icons.
|
||||||
|
<a href="https://frames.example">Fancy Frames</a> - Decorative border generator.
|
||||||
|
SPONSORED
|
||||||
|
<a href="https://flexboxle.example">flexboxle</a> - A daily puzzle game to master CSS Flexbox.
|
||||||
|
<a href="https://types.example">Typescale AI</a> - A typescale generator.
|
||||||
|
</div>
|
||||||
|
`
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(links.map((link) => link.description)).toEqual([
|
||||||
|
'Cascade - CSS property icons.',
|
||||||
|
'Fancy Frames - Decorative border generator.',
|
||||||
|
'SPONSORED flexboxle - A daily puzzle game to master CSS Flexbox.',
|
||||||
|
'Typescale AI - A typescale generator.'
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -41,4 +41,49 @@ describe('run orchestration', () => {
|
|||||||
expect(result.linksExtracted).toBe(1);
|
expect(result.linksExtracted).toBe(1);
|
||||||
expect(writes).toHaveLength(0);
|
expect(writes).toHaveLength(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('only sends locally marked sponsored links to the sponsored output', async () => {
|
||||||
|
const stateFile = join(dir, 'state.json');
|
||||||
|
const writes: any[] = [];
|
||||||
|
|
||||||
|
await runCatalog({
|
||||||
|
config: {
|
||||||
|
gmail: { folder: 'Newsletters' },
|
||||||
|
output: { name: 'Catalog', excel: { enabled: true, path: join(dir, 'out.xlsx') } },
|
||||||
|
stateFile
|
||||||
|
},
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
id: 'msg-1',
|
||||||
|
messageId: '<msg-1>',
|
||||||
|
from: 'Web Tools Weekly <w@example.com>',
|
||||||
|
date: '2026-05-16T00:00:00.000Z',
|
||||||
|
html: `
|
||||||
|
<div>
|
||||||
|
<a href="https://cascade.example">Cascade</a> - CSS property icons.
|
||||||
|
<a href="https://frames.example">Fancy Frames</a> - Decorative borders.
|
||||||
|
SPONSORED
|
||||||
|
<a href="https://flexboxle.example">flexboxle</a> - A daily puzzle game.
|
||||||
|
<a href="https://types.example">Typescale AI</a> - A typescale generator.
|
||||||
|
</div>
|
||||||
|
`
|
||||||
|
}
|
||||||
|
],
|
||||||
|
writers: [{ write: async (payload) => writes.push(payload) }]
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(writes[0].sponsors).toEqual([
|
||||||
|
{
|
||||||
|
Newsletter: 'Web Tools Weekly',
|
||||||
|
Sponsor: 'flexboxle',
|
||||||
|
Link: 'https://flexboxle.example/',
|
||||||
|
Description: 'A daily puzzle game.'
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
expect(writes[0].rows.map((row: any) => row.Title)).toEqual([
|
||||||
|
'Cascade',
|
||||||
|
'Fancy Frames',
|
||||||
|
'Typescale AI'
|
||||||
|
]);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user