fix(dom): rewrite scraper to use Torn's CSS module structure (li class prefix, propertyValue span)
This commit is contained in:
+54
-79
@@ -3,102 +3,77 @@
|
||||
* { attr: 'strength'|'speed'|..., current: number, gym: string }
|
||||
* or `null` if the page doesn't look like a Torn gym page.
|
||||
*
|
||||
* The selectors below are best-effort matches for torn.com/gym.php
|
||||
* and will need adjustment if Torn changes the markup.
|
||||
* Torn's gym page is a React app using CSS modules with hash suffixes
|
||||
* (e.g. class="strength___iXqEf", class="propertyValue___IYxjf"). This
|
||||
* scraper targets Torn's actual structure rather than guessing at selectors.
|
||||
*/
|
||||
const KNOWN_ATTRS = ['strength', 'defense', 'speed', 'dexterity', 'endurance', 'intelligence'];
|
||||
const KNOWN_GYMS = [
|
||||
'Total Bastion', 'Frontline Fitness', 'Premier Fitness', 'Average Joes',
|
||||
"Woody's Workout Club", "Baldr's Gym", 'Sportscience Laboratory',
|
||||
'Chrome Gym', "Mr. Miyagi's", 'Power House', 'Gym 300', 'Gym 400', 'Gym 500', 'Gym 600',
|
||||
'Elite Gym', "David's Gym",
|
||||
];
|
||||
|
||||
export function currentAttribute() {
|
||||
// The attribute name is shown in the gym page header.
|
||||
// Torn displays it as a capitalized word (e.g. "Strength") near the
|
||||
// top of the gym form.
|
||||
const KNOWN = ['strength', 'defense', 'speed', 'dexterity', 'endurance', 'intelligence'];
|
||||
const ATTR_RE = new RegExp('\\b(' + KNOWN.join('|') + ')\\b');
|
||||
|
||||
const headers = document.querySelectorAll('h1, h2, h3, h4, .title, .gym-title, [class*="gym"]');
|
||||
let attr = null;
|
||||
let attrEl = null;
|
||||
for (const el of headers) {
|
||||
const t = (el.textContent || '').trim().toLowerCase();
|
||||
const m = t.match(ATTR_RE);
|
||||
if (m) { attr = m[1]; attrEl = el; break; }
|
||||
}
|
||||
const li = findActiveAttributeLi();
|
||||
if (!li) return null;
|
||||
const attr = extractAttrFromLi(li);
|
||||
if (!attr) return null;
|
||||
|
||||
// Current value: look for the prominent number on the page that is
|
||||
// formatted like a Torn attribute (e.g. "14,328,501"). Search near
|
||||
// the attribute element so we don't pick up unrelated global numbers.
|
||||
let valEl = findValueNear(attrEl);
|
||||
if (!valEl) valEl = findValueElement(); // fallback: whole-page scan
|
||||
if (!valEl) return null;
|
||||
const current = parseNumber(valEl.textContent);
|
||||
const current = extractValueFromLi(li);
|
||||
if (current == null) return null;
|
||||
|
||||
// Gym name: any element on the page containing the word "Gym" or
|
||||
// "Bastion" / "Frontline" / etc. Torn's gym names vary.
|
||||
const gym = findGymName() || 'Unknown gym';
|
||||
|
||||
return { attr, current, gym };
|
||||
}
|
||||
|
||||
function findValueNear(el) {
|
||||
// Look at the element itself, then up to a few ancestors, then their descendants.
|
||||
// Prefer the largest formatted number within ~2 parent levels.
|
||||
const scope = [];
|
||||
let cur = el;
|
||||
for (let depth = 0; depth < 3 && cur; depth++) {
|
||||
scope.push(cur);
|
||||
cur = cur.parentElement;
|
||||
function findActiveAttributeLi() {
|
||||
// Priority 1: the <li> with the "success" class (just trained).
|
||||
const lis = document.querySelectorAll('ul[class*="properties"] > li[class*="success"]');
|
||||
for (const li of lis) {
|
||||
if (extractAttrFromLi(li)) return li;
|
||||
}
|
||||
let best = null;
|
||||
let bestN = -Infinity;
|
||||
for (const root of scope) {
|
||||
const candidates = root.querySelectorAll('*');
|
||||
for (const c of candidates) {
|
||||
if (c.children.length > 0) continue;
|
||||
const t = (c.textContent || '').trim();
|
||||
if (!/^[\d,]+(\.\d+)?$/.test(t)) continue;
|
||||
const n = parseNumber(t);
|
||||
if (n == null || n < 1) continue;
|
||||
if (n > bestN) { best = c; bestN = n; }
|
||||
// Priority 2: the <li> corresponding to the .gained message's attribute.
|
||||
const gained = document.querySelector('[class*="gained"]');
|
||||
if (gained) {
|
||||
const text = (gained.textContent || '').toLowerCase();
|
||||
for (const attr of KNOWN_ATTRS) {
|
||||
if (text.includes(attr)) {
|
||||
const li = document.querySelector('ul[class*="properties"] > li[class^="' + attr + '___"]');
|
||||
if (li) return li;
|
||||
}
|
||||
}
|
||||
}
|
||||
return best;
|
||||
// Priority 3: the first <li> in the properties list.
|
||||
const all = document.querySelectorAll('ul[class*="properties"] > li');
|
||||
for (const li of all) {
|
||||
if (extractAttrFromLi(li)) return li;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function findValueElement() {
|
||||
// Fallback only used when no element is found near the attribute.
|
||||
// Walk all elements; pick the largest formatted number on the page.
|
||||
const candidates = document.querySelectorAll('*');
|
||||
let best = null;
|
||||
let bestN = -Infinity;
|
||||
for (const el of candidates) {
|
||||
if (el.children.length > 0) continue;
|
||||
const t = (el.textContent || '').trim();
|
||||
if (!/^[\d,]+(\.\d+)?$/.test(t)) continue;
|
||||
const n = parseNumber(t);
|
||||
if (n == null || n < 1) continue;
|
||||
if (n > bestN) { best = el; bestN = n; }
|
||||
function extractAttrFromLi(li) {
|
||||
const cls = li.className || '';
|
||||
for (const attr of KNOWN_ATTRS) {
|
||||
if (cls.split(/\s+/).some((c) => c.startsWith(attr + '___'))) return attr;
|
||||
}
|
||||
return best;
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractValueFromLi(li) {
|
||||
const valueSpan = li.querySelector('[class^="propertyValue"]');
|
||||
if (!valueSpan) return null;
|
||||
return parseNumber(valueSpan.textContent);
|
||||
}
|
||||
|
||||
function findGymName() {
|
||||
// Look for a known set of Torn gym name fragments. Adjust as needed.
|
||||
// Prefer an element that looks like the gym panel so we don't match
|
||||
// against global widgets (news, sidebar, ads).
|
||||
const panel = document.querySelector('.gym, #gym, [class*="gym-"], [class*="Gym"]');
|
||||
const roots = panel ? [panel, document.body] : [document.body];
|
||||
const known = [
|
||||
'Total Bastion', 'Frontline Fitness', 'Gym 300', 'Gym 500',
|
||||
'Baldr\'s Gym', 'Sportscience Laboratory', 'Premier Fitness',
|
||||
'Chrome Gym', 'Mr. Miyagi\'s', 'Power House',
|
||||
];
|
||||
for (const root of roots) {
|
||||
const all = root.querySelectorAll('h1, h2, h3, h4, p, span, div, li');
|
||||
for (const el of all) {
|
||||
if (el.children.length > 0) continue;
|
||||
const t = (el.textContent || '').trim();
|
||||
for (const name of known) {
|
||||
if (t.includes(name)) return name;
|
||||
// Gym names live in aria-labels of <button class="gymButton___HASH">.
|
||||
const buttons = document.querySelectorAll('button[class*="gymButton"]');
|
||||
for (const btn of buttons) {
|
||||
const label = btn.getAttribute('aria-label') || '';
|
||||
for (const name of KNOWN_GYMS) {
|
||||
// aria-label format: "Gym Name. Membership cost - $X. ..."
|
||||
if (label === name || label.startsWith(name + '.') || label.startsWith(name + ' ')) {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user