feature: Implement Gmail message fetching and Excel sheet name truncation

This commit is contained in:
Keith Solomon
2026-05-17 10:59:44 -05:00
parent cb568597dc
commit 379526114c
9 changed files with 289 additions and 163 deletions
+40 -1
View File
@@ -1,6 +1,7 @@
import { Command, Option } from 'commander';
import { writeFile } from 'node:fs/promises';
import { loadConfig } from '../config/config.js';
import { createGmailClient } from '../gmail/client.js';
import { ExcelWriter } from '../output/excel.js';
import { runCatalog } from '../run/runCatalog.js';
import { validateDateFilters } from './flags.js';
@@ -56,7 +57,15 @@ export function createProgram(): Command {
const writers = config.output.excel.enabled
? [new ExcelWriter(config.output.excel.path)]
: [];
const messages = process.env.NLC_FIXTURE === '1' ? fixtureMessages() : [];
const messages =
process.env.NLC_FIXTURE === '1'
? fixtureMessages()
: await fetchGmailMessages(config, {
dryRun: options.dryRun,
from: options.from,
to: options.to,
last: options.last
});
const summary = await runCatalog({
config,
messages,
@@ -73,6 +82,36 @@ export function createProgram(): Command {
return program;
}
async function fetchGmailMessages(
config: Awaited<ReturnType<typeof loadConfig>>,
options: { dryRun?: number | boolean; from?: string; to?: string; last?: string }
) {
const client = await createGmailClient(config.gmail.credentials, config.gmail.token);
return client.fetchMessages(config.gmail.folder, {
maxResults: typeof options.dryRun === 'number' ? options.dryRun : undefined,
query: buildGmailQuery(options)
});
}
function buildGmailQuery(options: {
from?: string;
to?: string;
last?: string;
}): string | undefined {
if (options.last) {
return `newer_than:${options.last}`;
}
const parts = [];
if (options.from) {
parts.push(`after:${options.from.replaceAll('-', '/')}`);
}
if (options.to) {
parts.push(`before:${options.to.replaceAll('-', '/')}`);
}
return parts.length > 0 ? parts.join(' ') : undefined;
}
function fixtureMessages() {
return [
{
+127 -9
View File
@@ -1,8 +1,9 @@
import { createServer } from 'node:http';
import { spawn } from 'node:child_process';
import { readFile, writeFile, mkdir } from 'node:fs/promises';
import { dirname } from 'node:path';
import open from 'open';
import { google, gmail_v1 } from 'googleapis';
import { platform } from 'node:os';
import { google } from 'googleapis';
import { expandHome } from '../config/config.js';
import { NewsletterMessage } from '../parsing/types.js';
@@ -31,6 +32,14 @@ export async function authorizeGmail(credentialsPath: string, tokenPath: string)
}
}
export async function createGmailClient(
credentialsPath: string,
tokenPath: string
): Promise<GmailClient> {
const auth = await authorizeGmail(credentialsPath, tokenPath);
return new GmailClient(google.gmail({ version: 'v1', auth }));
}
async function waitForBrowserCode(url: string): Promise<string> {
return new Promise((resolveCode, reject) => {
const server = createServer((req, res) => {
@@ -43,17 +52,126 @@ async function waitForBrowserCode(url: string): Promise<string> {
}
});
server.listen(53682, () => {
open(url).catch(reject);
console.log(`Open this URL to authorize Gmail access:\n${url}\n`);
openBrowser(url).catch(reject);
});
});
}
export class GmailClient {
public constructor(private readonly gmail: gmail_v1.Gmail) {}
async function openBrowser(url: string): Promise<void> {
const command = buildBrowserCommand(url, platform());
public async fetchMessages(_label: string): Promise<NewsletterMessage[]> {
// Live Gmail traversal is isolated here. The run path accepts injected messages for tests and smoke.
await this.gmail.users.labels.list({ userId: 'me' });
return [];
const child = spawn(command.file, command.args, {
detached: true,
stdio: 'ignore',
windowsHide: true
});
child.unref();
}
export function buildBrowserCommand(url: string, os: NodeJS.Platform) {
if (os === 'win32') {
return {
file: 'powershell.exe',
args: ['-NoProfile', '-Command', 'Start-Process -FilePath $args[0]', url]
};
}
if (os === 'darwin') {
return { file: 'open', args: [url] };
}
return { file: 'xdg-open', args: [url] };
}
export class GmailClient {
public constructor(private readonly gmail: any) {}
public async fetchMessages(
labelName: string,
options: { maxResults?: number; query?: string } = {}
): Promise<NewsletterMessage[]> {
const labelId = await this.findLabelId(labelName);
const listed = await this.gmail.users.messages.list({
userId: 'me',
labelIds: [labelId],
maxResults: options.maxResults,
q: options.query
});
const messages = listed.data.messages ?? [];
const loaded: NewsletterMessage[] = [];
for (const message of messages) {
if (!message.id) {
continue;
}
const response = await this.gmail.users.messages.get({
userId: 'me',
id: message.id,
format: 'full'
});
const parsed = parseGmailMessage(response.data);
if (parsed.html) {
loaded.push(parsed);
}
}
return loaded;
}
private async findLabelId(labelName: string): Promise<string> {
const response = await this.gmail.users.labels.list({ userId: 'me' });
const labels = response.data.labels ?? [];
const label = labels.find(
(entry: { id?: string; name?: string }) =>
entry.name?.toLowerCase() === labelName.toLowerCase()
);
if (!label?.id) {
throw new Error(`Gmail label "${labelName}" was not found`);
}
return label.id;
}
}
function parseGmailMessage(message: any): NewsletterMessage {
const headers = Object.fromEntries(
(message.payload?.headers ?? []).map((header: { name: string; value: string }) => [
header.name.toLowerCase(),
header.value
])
);
return {
id: message.id ?? headers['message-id'] ?? '',
messageId: headers['message-id'] ?? message.id ?? '',
from: headers.from ?? '',
date: new Date(headers.date ?? Date.now()).toISOString(),
subject: headers.subject ?? '',
html: findHtmlPart(message.payload) ?? '',
headers: {
date: headers.date,
from: headers.from,
listId: headers['list-id'],
messageId: headers['message-id'],
subject: headers.subject ?? ''
}
};
}
function findHtmlPart(part: any): string | undefined {
if (!part) {
return undefined;
}
if (part.mimeType === 'text/html' && part.body?.data) {
return decodeBase64Url(part.body.data);
}
for (const child of part.parts ?? []) {
const html = findHtmlPart(child);
if (html) {
return html;
}
}
return undefined;
}
function decodeBase64Url(value: string): string {
return Buffer.from(value.replace(/-/g, '+').replace(/_/g, '/'), 'base64').toString('utf8');
}
+1 -1
View File
@@ -10,7 +10,7 @@ export class ExcelWriter implements OutputWriter {
const workbook = XLSX.utils.book_new();
const grouped = new Map<string, Record<string, unknown>[]>();
for (const row of payload.rows) {
const sheet = sanitizeSheetName(String(row['Source Newsletter'] ?? 'Newsletter'));
const sheet = sanitizeSheetName(String(row['Source Newsletter'] ?? 'Newsletter'), 31);
grouped.set(sheet, [...(grouped.get(sheet) ?? []), row]);
}
for (const [sheet, rows] of grouped) {
+2 -2
View File
@@ -1,8 +1,8 @@
const invalidSheetCharacters = /[:/\\?*[\]]/g;
export function sanitizeSheetName(input: string): string {
export function sanitizeSheetName(input: string, maxLength = 100): string {
const cleaned = input.replace(invalidSheetCharacters, ' ').replace(/\s+/g, ' ').trim();
return (cleaned || 'Newsletter').slice(0, 100);
return (cleaned || 'Newsletter').slice(0, maxLength);
}
export function escapeCell(value: unknown): unknown {