feature: Implement Gmail message fetching and Excel sheet name truncation

This commit is contained in:
Keith Solomon
2026-05-17 10:59:44 -05:00
parent cb568597dc
commit 379526114c
9 changed files with 289 additions and 163 deletions
+1
View File
@@ -1,3 +1,4 @@
node_modules
dist
config.yaml
output
-149
View File
@@ -13,7 +13,6 @@
"cheerio": "^1.0.0",
"commander": "^12.1.0",
"googleapis": "^140.0.1",
"open": "^10.1.0",
"ora": "^8.1.1",
"xlsx": "^0.18.5",
"yaml": "^2.5.1",
@@ -2214,21 +2213,6 @@
"integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==",
"license": "BSD-3-Clause"
},
"node_modules/bundle-name": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/bundle-name/-/bundle-name-4.1.0.tgz",
"integrity": "sha512-tjwM5exMg6BGRI+kNmTntNsvdZS1X8BFYS6tnJ2hdH0kVxM6/eVZ2xy+FqStSWvYmtfFMDLIxurorHwDKfDz5Q==",
"license": "MIT",
"dependencies": {
"run-applescript": "^7.0.0"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/bundle-require": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/bundle-require/-/bundle-require-5.1.0.tgz",
@@ -2744,34 +2728,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/default-browser": {
"version": "5.5.0",
"resolved": "https://registry.npmjs.org/default-browser/-/default-browser-5.5.0.tgz",
"integrity": "sha512-H9LMLr5zwIbSxrmvikGuI/5KGhZ8E2zH3stkMgM5LpOWDutGM2JZaj460Udnf1a+946zc7YBgrqEWwbk7zHvGw==",
"license": "MIT",
"dependencies": {
"bundle-name": "^4.1.0",
"default-browser-id": "^5.0.0"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/default-browser-id": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/default-browser-id/-/default-browser-id-5.0.1.tgz",
"integrity": "sha512-x1VCxdX4t+8wVfd1so/9w+vQ4vx7lKd2Qp5tDRutErwmR85OgmfX7RlLRMWafRMY7hbEiXIbudNrjOAPa/hL8Q==",
"license": "MIT",
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/define-data-property": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
@@ -2790,18 +2746,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/define-lazy-prop": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-3.0.0.tgz",
"integrity": "sha512-N+MeXYoqr3pOgn8xfyRPREN7gHakLYjhsHhWGT3fWAiL4IkAt0iDw14QiiEm2bE30c5XX5q0FtAA3CK5f9/BUg==",
"license": "MIT",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/define-properties": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz",
@@ -4601,21 +4545,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/is-docker": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/is-docker/-/is-docker-3.0.0.tgz",
"integrity": "sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ==",
"license": "MIT",
"bin": {
"is-docker": "cli.js"
},
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/is-extglob": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
@@ -4685,24 +4614,6 @@
"node": ">=0.10.0"
}
},
"node_modules/is-inside-container": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/is-inside-container/-/is-inside-container-1.0.0.tgz",
"integrity": "sha512-KIYLCCJghfHZxqjYBE7rEy0OBuTd5xCHS7tHVgvCLkx7StIoaxwNW3hCALgEUjFfeRk+MG/Qxmp/vtETEF3tRA==",
"license": "MIT",
"dependencies": {
"is-docker": "^3.0.0"
},
"bin": {
"is-inside-container": "cli.js"
},
"engines": {
"node": ">=14.16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/is-interactive": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/is-interactive/-/is-interactive-2.0.0.tgz",
@@ -4937,21 +4848,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/is-wsl": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-3.1.1.tgz",
"integrity": "sha512-e6rvdUCiQCAuumZslxRJWR/Doq4VpPR82kqclvcS0efgt430SlGIk05vdCN58+VrzgtIcfNODjozVielycD4Sw==",
"license": "MIT",
"dependencies": {
"is-inside-container": "^1.0.0"
},
"engines": {
"node": ">=16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/isarray": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-2.0.5.tgz",
@@ -5627,24 +5523,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/open": {
"version": "10.2.0",
"resolved": "https://registry.npmjs.org/open/-/open-10.2.0.tgz",
"integrity": "sha512-YgBpdJHPyQ2UE5x+hlSXcnejzAvD0b22U2OuAP+8OnlJT+PjWPxtgmGqKKc+RgTM63U9gN0YzrYc71R2WT/hTA==",
"license": "MIT",
"dependencies": {
"default-browser": "^5.2.1",
"define-lazy-prop": "^3.0.0",
"is-inside-container": "^1.0.0",
"wsl-utils": "^0.1.0"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/optionator": {
"version": "0.9.4",
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@@ -6448,18 +6326,6 @@
"fsevents": "~2.3.2"
}
},
"node_modules/run-applescript": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-7.1.0.tgz",
"integrity": "sha512-DPe5pVFaAsinSaV6QjQ6gdiedWDcRCbUuiQfQa2wmWV7+xC9bGulGI8+TdRmoFkAPaBXk8CrAbnlY2ISniJ47Q==",
"license": "MIT",
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/run-parallel": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz",
@@ -8963,21 +8829,6 @@
"dev": true,
"license": "ISC"
},
"node_modules/wsl-utils": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/wsl-utils/-/wsl-utils-0.1.0.tgz",
"integrity": "sha512-h3Fbisa2nKGPxCpm89Hk33lBLsnaGBvctQopaBSOW/uIs6FTe1ATyAnKFJrzVs9vpGdsTe73WF3V4lIsk4Gacw==",
"license": "MIT",
"dependencies": {
"is-wsl": "^3.1.0"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/xlsx": {
"version": "0.18.5",
"resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz",
-1
View File
@@ -29,7 +29,6 @@
"cheerio": "^1.0.0",
"commander": "^12.1.0",
"googleapis": "^140.0.1",
"open": "^10.1.0",
"ora": "^8.1.1",
"xlsx": "^0.18.5",
"yaml": "^2.5.1",
+40 -1
View File
@@ -1,6 +1,7 @@
import { Command, Option } from 'commander';
import { writeFile } from 'node:fs/promises';
import { loadConfig } from '../config/config.js';
import { createGmailClient } from '../gmail/client.js';
import { ExcelWriter } from '../output/excel.js';
import { runCatalog } from '../run/runCatalog.js';
import { validateDateFilters } from './flags.js';
@@ -56,7 +57,15 @@ export function createProgram(): Command {
const writers = config.output.excel.enabled
? [new ExcelWriter(config.output.excel.path)]
: [];
const messages = process.env.NLC_FIXTURE === '1' ? fixtureMessages() : [];
const messages =
process.env.NLC_FIXTURE === '1'
? fixtureMessages()
: await fetchGmailMessages(config, {
dryRun: options.dryRun,
from: options.from,
to: options.to,
last: options.last
});
const summary = await runCatalog({
config,
messages,
@@ -73,6 +82,36 @@ export function createProgram(): Command {
return program;
}
async function fetchGmailMessages(
config: Awaited<ReturnType<typeof loadConfig>>,
options: { dryRun?: number | boolean; from?: string; to?: string; last?: string }
) {
const client = await createGmailClient(config.gmail.credentials, config.gmail.token);
return client.fetchMessages(config.gmail.folder, {
maxResults: typeof options.dryRun === 'number' ? options.dryRun : undefined,
query: buildGmailQuery(options)
});
}
function buildGmailQuery(options: {
from?: string;
to?: string;
last?: string;
}): string | undefined {
if (options.last) {
return `newer_than:${options.last}`;
}
const parts = [];
if (options.from) {
parts.push(`after:${options.from.replaceAll('-', '/')}`);
}
if (options.to) {
parts.push(`before:${options.to.replaceAll('-', '/')}`);
}
return parts.length > 0 ? parts.join(' ') : undefined;
}
function fixtureMessages() {
return [
{
+127 -9
View File
@@ -1,8 +1,9 @@
import { createServer } from 'node:http';
import { spawn } from 'node:child_process';
import { readFile, writeFile, mkdir } from 'node:fs/promises';
import { dirname } from 'node:path';
import open from 'open';
import { google, gmail_v1 } from 'googleapis';
import { platform } from 'node:os';
import { google } from 'googleapis';
import { expandHome } from '../config/config.js';
import { NewsletterMessage } from '../parsing/types.js';
@@ -31,6 +32,14 @@ export async function authorizeGmail(credentialsPath: string, tokenPath: string)
}
}
export async function createGmailClient(
credentialsPath: string,
tokenPath: string
): Promise<GmailClient> {
const auth = await authorizeGmail(credentialsPath, tokenPath);
return new GmailClient(google.gmail({ version: 'v1', auth }));
}
async function waitForBrowserCode(url: string): Promise<string> {
return new Promise((resolveCode, reject) => {
const server = createServer((req, res) => {
@@ -43,17 +52,126 @@ async function waitForBrowserCode(url: string): Promise<string> {
}
});
server.listen(53682, () => {
open(url).catch(reject);
console.log(`Open this URL to authorize Gmail access:\n${url}\n`);
openBrowser(url).catch(reject);
});
});
}
export class GmailClient {
public constructor(private readonly gmail: gmail_v1.Gmail) {}
async function openBrowser(url: string): Promise<void> {
const command = buildBrowserCommand(url, platform());
public async fetchMessages(_label: string): Promise<NewsletterMessage[]> {
// Live Gmail traversal is isolated here. The run path accepts injected messages for tests and smoke.
await this.gmail.users.labels.list({ userId: 'me' });
return [];
const child = spawn(command.file, command.args, {
detached: true,
stdio: 'ignore',
windowsHide: true
});
child.unref();
}
export function buildBrowserCommand(url: string, os: NodeJS.Platform) {
if (os === 'win32') {
return {
file: 'powershell.exe',
args: ['-NoProfile', '-Command', 'Start-Process -FilePath $args[0]', url]
};
}
if (os === 'darwin') {
return { file: 'open', args: [url] };
}
return { file: 'xdg-open', args: [url] };
}
export class GmailClient {
public constructor(private readonly gmail: any) {}
public async fetchMessages(
labelName: string,
options: { maxResults?: number; query?: string } = {}
): Promise<NewsletterMessage[]> {
const labelId = await this.findLabelId(labelName);
const listed = await this.gmail.users.messages.list({
userId: 'me',
labelIds: [labelId],
maxResults: options.maxResults,
q: options.query
});
const messages = listed.data.messages ?? [];
const loaded: NewsletterMessage[] = [];
for (const message of messages) {
if (!message.id) {
continue;
}
const response = await this.gmail.users.messages.get({
userId: 'me',
id: message.id,
format: 'full'
});
const parsed = parseGmailMessage(response.data);
if (parsed.html) {
loaded.push(parsed);
}
}
return loaded;
}
private async findLabelId(labelName: string): Promise<string> {
const response = await this.gmail.users.labels.list({ userId: 'me' });
const labels = response.data.labels ?? [];
const label = labels.find(
(entry: { id?: string; name?: string }) =>
entry.name?.toLowerCase() === labelName.toLowerCase()
);
if (!label?.id) {
throw new Error(`Gmail label "${labelName}" was not found`);
}
return label.id;
}
}
function parseGmailMessage(message: any): NewsletterMessage {
const headers = Object.fromEntries(
(message.payload?.headers ?? []).map((header: { name: string; value: string }) => [
header.name.toLowerCase(),
header.value
])
);
return {
id: message.id ?? headers['message-id'] ?? '',
messageId: headers['message-id'] ?? message.id ?? '',
from: headers.from ?? '',
date: new Date(headers.date ?? Date.now()).toISOString(),
subject: headers.subject ?? '',
html: findHtmlPart(message.payload) ?? '',
headers: {
date: headers.date,
from: headers.from,
listId: headers['list-id'],
messageId: headers['message-id'],
subject: headers.subject ?? ''
}
};
}
function findHtmlPart(part: any): string | undefined {
if (!part) {
return undefined;
}
if (part.mimeType === 'text/html' && part.body?.data) {
return decodeBase64Url(part.body.data);
}
for (const child of part.parts ?? []) {
const html = findHtmlPart(child);
if (html) {
return html;
}
}
return undefined;
}
function decodeBase64Url(value: string): string {
return Buffer.from(value.replace(/-/g, '+').replace(/_/g, '/'), 'base64').toString('utf8');
}
+1 -1
View File
@@ -10,7 +10,7 @@ export class ExcelWriter implements OutputWriter {
const workbook = XLSX.utils.book_new();
const grouped = new Map<string, Record<string, unknown>[]>();
for (const row of payload.rows) {
const sheet = sanitizeSheetName(String(row['Source Newsletter'] ?? 'Newsletter'));
const sheet = sanitizeSheetName(String(row['Source Newsletter'] ?? 'Newsletter'), 31);
grouped.set(sheet, [...(grouped.get(sheet) ?? []), row]);
}
for (const [sheet, rows] of grouped) {
+2 -2
View File
@@ -1,8 +1,8 @@
const invalidSheetCharacters = /[:/\\?*[\]]/g;
export function sanitizeSheetName(input: string): string {
export function sanitizeSheetName(input: string, maxLength = 100): string {
const cleaned = input.replace(invalidSheetCharacters, ' ').replace(/\s+/g, ' ').trim();
return (cleaned || 'Newsletter').slice(0, 100);
return (cleaned || 'Newsletter').slice(0, maxLength);
}
export function escapeCell(value: unknown): unknown {
+39
View File
@@ -0,0 +1,39 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import XLSX from 'xlsx';
import { ExcelWriter } from '../src/output/excel.js';
let dir = '';
beforeEach(async () => {
dir = await mkdtemp(join(tmpdir(), 'nlc-excel-'));
});
afterEach(async () => {
await rm(dir, { force: true, recursive: true });
});
describe('ExcelWriter', () => {
it('truncates newsletter sheet names to the Excel 31-character limit', async () => {
const path = join(dir, 'catalog.xlsx');
const newsletter = 'A Very Long Newsletter Name That Exceeds The Excel Limit';
await new ExcelWriter(path).write({
rows: [
{
'Source Newsletter': newsletter,
Title: 'Post',
'Link URL': 'https://example.com'
}
],
sponsors: [],
deadLinks: []
});
const workbook = XLSX.readFile(path);
expect(workbook.SheetNames[0]).toBe('A Very Long Newsletter Name Tha');
expect(workbook.SheetNames[0].length).toBe(31);
});
});
+79
View File
@@ -0,0 +1,79 @@
import { describe, expect, it } from 'vitest';
import { buildBrowserCommand, GmailClient } from '../src/gmail/client.js';
describe('GmailClient', () => {
it('uses PowerShell to open Windows OAuth URLs without splitting query parameters', () => {
const url =
'https://accounts.google.com/o/oauth2/v2/auth?access_type=offline&response_type=code';
expect(buildBrowserCommand(url, 'win32')).toEqual({
file: 'powershell.exe',
args: ['-NoProfile', '-Command', 'Start-Process -FilePath $args[0]', url]
});
});
it('loads HTML messages from the configured Gmail label', async () => {
const calls: string[] = [];
const gmail = {
users: {
labels: {
list: async () => ({
data: { labels: [{ id: 'Label_1', name: 'Newsletters' }] }
})
},
messages: {
list: async (_params: unknown) => {
calls.push('list');
return { data: { messages: [{ id: 'msg-1' }] } };
},
get: async () => {
calls.push('get');
return {
data: {
id: 'msg-1',
payload: {
headers: [
{ name: 'Message-ID', value: '<msg-1@example.com>' },
{ name: 'From', value: 'Weekly <weekly@example.com>' },
{ name: 'Date', value: 'Sat, 16 May 2026 10:00:00 -0500' }
],
parts: [
{
mimeType: 'text/html',
body: {
data: Buffer.from(
'<h2>Python</h2><a href="https://example.com">Post</a>'
).toString('base64url')
}
}
]
}
}
};
}
}
}
};
const messages = await new GmailClient(gmail).fetchMessages('Newsletters', { maxResults: 5 });
expect(calls).toEqual(['list', 'get']);
expect(messages).toEqual([
{
id: 'msg-1',
messageId: '<msg-1@example.com>',
from: 'Weekly <weekly@example.com>',
date: '2026-05-16T15:00:00.000Z',
subject: '',
html: '<h2>Python</h2><a href="https://example.com">Post</a>',
headers: {
date: 'Sat, 16 May 2026 10:00:00 -0500',
from: 'Weekly <weekly@example.com>',
listId: undefined,
messageId: '<msg-1@example.com>',
subject: ''
}
}
]);
});
});