fix(dom): rewrite scraper to use Torn's CSS module structure (li class prefix, propertyValue span)

This commit is contained in:
dev
2026-06-05 08:38:39 -05:00
parent 76e3ba2488
commit 6dd5d2e3f2
+54 -79
View File
@@ -3,102 +3,77 @@
* { attr: 'strength'|'speed'|..., current: number, gym: string }
* or `null` if the page doesn't look like a Torn gym page.
*
* The selectors below are best-effort matches for torn.com/gym.php
* and will need adjustment if Torn changes the markup.
* Torn's gym page is a React app using CSS modules with hash suffixes
* (e.g. class="strength___iXqEf", class="propertyValue___IYxjf"). This
* scraper targets Torn's actual structure rather than guessing at selectors.
*/
const KNOWN_ATTRS = ['strength', 'defense', 'speed', 'dexterity', 'endurance', 'intelligence'];
const KNOWN_GYMS = [
'Total Bastion', 'Frontline Fitness', 'Premier Fitness', 'Average Joes',
"Woody's Workout Club", "Baldr's Gym", 'Sportscience Laboratory',
'Chrome Gym', "Mr. Miyagi's", 'Power House', 'Gym 300', 'Gym 400', 'Gym 500', 'Gym 600',
'Elite Gym', "David's Gym",
];
export function currentAttribute() {
// The attribute name is shown in the gym page header.
// Torn displays it as a capitalized word (e.g. "Strength") near the
// top of the gym form.
const KNOWN = ['strength', 'defense', 'speed', 'dexterity', 'endurance', 'intelligence'];
const ATTR_RE = new RegExp('\\b(' + KNOWN.join('|') + ')\\b');
const headers = document.querySelectorAll('h1, h2, h3, h4, .title, .gym-title, [class*="gym"]');
let attr = null;
let attrEl = null;
for (const el of headers) {
const t = (el.textContent || '').trim().toLowerCase();
const m = t.match(ATTR_RE);
if (m) { attr = m[1]; attrEl = el; break; }
}
const li = findActiveAttributeLi();
if (!li) return null;
const attr = extractAttrFromLi(li);
if (!attr) return null;
// Current value: look for the prominent number on the page that is
// formatted like a Torn attribute (e.g. "14,328,501"). Search near
// the attribute element so we don't pick up unrelated global numbers.
let valEl = findValueNear(attrEl);
if (!valEl) valEl = findValueElement(); // fallback: whole-page scan
if (!valEl) return null;
const current = parseNumber(valEl.textContent);
const current = extractValueFromLi(li);
if (current == null) return null;
// Gym name: any element on the page containing the word "Gym" or
// "Bastion" / "Frontline" / etc. Torn's gym names vary.
const gym = findGymName() || 'Unknown gym';
return { attr, current, gym };
}
function findValueNear(el) {
// Look at the element itself, then up to a few ancestors, then their descendants.
// Prefer the largest formatted number within ~2 parent levels.
const scope = [];
let cur = el;
for (let depth = 0; depth < 3 && cur; depth++) {
scope.push(cur);
cur = cur.parentElement;
function findActiveAttributeLi() {
// Priority 1: the <li> with the "success" class (just trained).
const lis = document.querySelectorAll('ul[class*="properties"] > li[class*="success"]');
for (const li of lis) {
if (extractAttrFromLi(li)) return li;
}
let best = null;
let bestN = -Infinity;
for (const root of scope) {
const candidates = root.querySelectorAll('*');
for (const c of candidates) {
if (c.children.length > 0) continue;
const t = (c.textContent || '').trim();
if (!/^[\d,]+(\.\d+)?$/.test(t)) continue;
const n = parseNumber(t);
if (n == null || n < 1) continue;
if (n > bestN) { best = c; bestN = n; }
// Priority 2: the <li> corresponding to the .gained message's attribute.
const gained = document.querySelector('[class*="gained"]');
if (gained) {
const text = (gained.textContent || '').toLowerCase();
for (const attr of KNOWN_ATTRS) {
if (text.includes(attr)) {
const li = document.querySelector('ul[class*="properties"] > li[class^="' + attr + '___"]');
if (li) return li;
}
}
return best;
}
// Priority 3: the first <li> in the properties list.
const all = document.querySelectorAll('ul[class*="properties"] > li');
for (const li of all) {
if (extractAttrFromLi(li)) return li;
}
return null;
}
function findValueElement() {
// Fallback only used when no element is found near the attribute.
// Walk all elements; pick the largest formatted number on the page.
const candidates = document.querySelectorAll('*');
let best = null;
let bestN = -Infinity;
for (const el of candidates) {
if (el.children.length > 0) continue;
const t = (el.textContent || '').trim();
if (!/^[\d,]+(\.\d+)?$/.test(t)) continue;
const n = parseNumber(t);
if (n == null || n < 1) continue;
if (n > bestN) { best = el; bestN = n; }
function extractAttrFromLi(li) {
const cls = li.className || '';
for (const attr of KNOWN_ATTRS) {
if (cls.split(/\s+/).some((c) => c.startsWith(attr + '___'))) return attr;
}
return best;
return null;
}
function extractValueFromLi(li) {
const valueSpan = li.querySelector('[class^="propertyValue"]');
if (!valueSpan) return null;
return parseNumber(valueSpan.textContent);
}
function findGymName() {
// Look for a known set of Torn gym name fragments. Adjust as needed.
// Prefer an element that looks like the gym panel so we don't match
// against global widgets (news, sidebar, ads).
const panel = document.querySelector('.gym, #gym, [class*="gym-"], [class*="Gym"]');
const roots = panel ? [panel, document.body] : [document.body];
const known = [
'Total Bastion', 'Frontline Fitness', 'Gym 300', 'Gym 500',
'Baldr\'s Gym', 'Sportscience Laboratory', 'Premier Fitness',
'Chrome Gym', 'Mr. Miyagi\'s', 'Power House',
];
for (const root of roots) {
const all = root.querySelectorAll('h1, h2, h3, h4, p, span, div, li');
for (const el of all) {
if (el.children.length > 0) continue;
const t = (el.textContent || '').trim();
for (const name of known) {
if (t.includes(name)) return name;
// Gym names live in aria-labels of <button class="gymButton___HASH">.
const buttons = document.querySelectorAll('button[class*="gymButton"]');
for (const btn of buttons) {
const label = btn.getAttribute('aria-label') || '';
for (const name of KNOWN_GYMS) {
// aria-label format: "Gym Name. Membership cost - $X. ..."
if (label === name || label.startsWith(name + '.') || label.startsWith(name + ' ')) {
return name;
}
}
}