fix(dom): rewrite scraper to use Torn's CSS module structure (li class prefix, propertyValue span)

This commit is contained in:
dev
2026-06-05 08:38:39 -05:00
parent 76e3ba2488
commit 6dd5d2e3f2
+54 -79
View File
@@ -3,102 +3,77 @@
* { attr: 'strength'|'speed'|..., current: number, gym: string } * { attr: 'strength'|'speed'|..., current: number, gym: string }
* or `null` if the page doesn't look like a Torn gym page. * or `null` if the page doesn't look like a Torn gym page.
* *
* The selectors below are best-effort matches for torn.com/gym.php * Torn's gym page is a React app using CSS modules with hash suffixes
* and will need adjustment if Torn changes the markup. * (e.g. class="strength___iXqEf", class="propertyValue___IYxjf"). This
* scraper targets Torn's actual structure rather than guessing at selectors.
*/ */
const KNOWN_ATTRS = ['strength', 'defense', 'speed', 'dexterity', 'endurance', 'intelligence'];
const KNOWN_GYMS = [
'Total Bastion', 'Frontline Fitness', 'Premier Fitness', 'Average Joes',
"Woody's Workout Club", "Baldr's Gym", 'Sportscience Laboratory',
'Chrome Gym', "Mr. Miyagi's", 'Power House', 'Gym 300', 'Gym 400', 'Gym 500', 'Gym 600',
'Elite Gym', "David's Gym",
];
export function currentAttribute() { export function currentAttribute() {
// The attribute name is shown in the gym page header. const li = findActiveAttributeLi();
// Torn displays it as a capitalized word (e.g. "Strength") near the if (!li) return null;
// top of the gym form. const attr = extractAttrFromLi(li);
const KNOWN = ['strength', 'defense', 'speed', 'dexterity', 'endurance', 'intelligence'];
const ATTR_RE = new RegExp('\\b(' + KNOWN.join('|') + ')\\b');
const headers = document.querySelectorAll('h1, h2, h3, h4, .title, .gym-title, [class*="gym"]');
let attr = null;
let attrEl = null;
for (const el of headers) {
const t = (el.textContent || '').trim().toLowerCase();
const m = t.match(ATTR_RE);
if (m) { attr = m[1]; attrEl = el; break; }
}
if (!attr) return null; if (!attr) return null;
const current = extractValueFromLi(li);
// Current value: look for the prominent number on the page that is
// formatted like a Torn attribute (e.g. "14,328,501"). Search near
// the attribute element so we don't pick up unrelated global numbers.
let valEl = findValueNear(attrEl);
if (!valEl) valEl = findValueElement(); // fallback: whole-page scan
if (!valEl) return null;
const current = parseNumber(valEl.textContent);
if (current == null) return null; if (current == null) return null;
// Gym name: any element on the page containing the word "Gym" or
// "Bastion" / "Frontline" / etc. Torn's gym names vary.
const gym = findGymName() || 'Unknown gym'; const gym = findGymName() || 'Unknown gym';
return { attr, current, gym }; return { attr, current, gym };
} }
function findValueNear(el) { function findActiveAttributeLi() {
// Look at the element itself, then up to a few ancestors, then their descendants. // Priority 1: the <li> with the "success" class (just trained).
// Prefer the largest formatted number within ~2 parent levels. const lis = document.querySelectorAll('ul[class*="properties"] > li[class*="success"]');
const scope = []; for (const li of lis) {
let cur = el; if (extractAttrFromLi(li)) return li;
for (let depth = 0; depth < 3 && cur; depth++) {
scope.push(cur);
cur = cur.parentElement;
} }
let best = null; // Priority 2: the <li> corresponding to the .gained message's attribute.
let bestN = -Infinity; const gained = document.querySelector('[class*="gained"]');
for (const root of scope) { if (gained) {
const candidates = root.querySelectorAll('*'); const text = (gained.textContent || '').toLowerCase();
for (const c of candidates) { for (const attr of KNOWN_ATTRS) {
if (c.children.length > 0) continue; if (text.includes(attr)) {
const t = (c.textContent || '').trim(); const li = document.querySelector('ul[class*="properties"] > li[class^="' + attr + '___"]');
if (!/^[\d,]+(\.\d+)?$/.test(t)) continue; if (li) return li;
const n = parseNumber(t);
if (n == null || n < 1) continue;
if (n > bestN) { best = c; bestN = n; }
} }
} }
return best; }
// Priority 3: the first <li> in the properties list.
const all = document.querySelectorAll('ul[class*="properties"] > li');
for (const li of all) {
if (extractAttrFromLi(li)) return li;
}
return null;
} }
function findValueElement() { function extractAttrFromLi(li) {
// Fallback only used when no element is found near the attribute. const cls = li.className || '';
// Walk all elements; pick the largest formatted number on the page. for (const attr of KNOWN_ATTRS) {
const candidates = document.querySelectorAll('*'); if (cls.split(/\s+/).some((c) => c.startsWith(attr + '___'))) return attr;
let best = null;
let bestN = -Infinity;
for (const el of candidates) {
if (el.children.length > 0) continue;
const t = (el.textContent || '').trim();
if (!/^[\d,]+(\.\d+)?$/.test(t)) continue;
const n = parseNumber(t);
if (n == null || n < 1) continue;
if (n > bestN) { best = el; bestN = n; }
} }
return best; return null;
}
function extractValueFromLi(li) {
const valueSpan = li.querySelector('[class^="propertyValue"]');
if (!valueSpan) return null;
return parseNumber(valueSpan.textContent);
} }
function findGymName() { function findGymName() {
// Look for a known set of Torn gym name fragments. Adjust as needed. // Gym names live in aria-labels of <button class="gymButton___HASH">.
// Prefer an element that looks like the gym panel so we don't match const buttons = document.querySelectorAll('button[class*="gymButton"]');
// against global widgets (news, sidebar, ads). for (const btn of buttons) {
const panel = document.querySelector('.gym, #gym, [class*="gym-"], [class*="Gym"]'); const label = btn.getAttribute('aria-label') || '';
const roots = panel ? [panel, document.body] : [document.body]; for (const name of KNOWN_GYMS) {
const known = [ // aria-label format: "Gym Name. Membership cost - $X. ..."
'Total Bastion', 'Frontline Fitness', 'Gym 300', 'Gym 500', if (label === name || label.startsWith(name + '.') || label.startsWith(name + ' ')) {
'Baldr\'s Gym', 'Sportscience Laboratory', 'Premier Fitness', return name;
'Chrome Gym', 'Mr. Miyagi\'s', 'Power House',
];
for (const root of roots) {
const all = root.querySelectorAll('h1, h2, h3, h4, p, span, div, li');
for (const el of all) {
if (el.children.length > 0) continue;
const t = (el.textContent || '').trim();
for (const name of known) {
if (t.includes(name)) return name;
} }
} }
} }