fix(dom): rewrite scraper to use Torn's CSS module structure (li class prefix, propertyValue span)
This commit is contained in:
+54
-79
@@ -3,102 +3,77 @@
|
|||||||
* { attr: 'strength'|'speed'|..., current: number, gym: string }
|
* { attr: 'strength'|'speed'|..., current: number, gym: string }
|
||||||
* or `null` if the page doesn't look like a Torn gym page.
|
* or `null` if the page doesn't look like a Torn gym page.
|
||||||
*
|
*
|
||||||
* The selectors below are best-effort matches for torn.com/gym.php
|
* Torn's gym page is a React app using CSS modules with hash suffixes
|
||||||
* and will need adjustment if Torn changes the markup.
|
* (e.g. class="strength___iXqEf", class="propertyValue___IYxjf"). This
|
||||||
|
* scraper targets Torn's actual structure rather than guessing at selectors.
|
||||||
*/
|
*/
|
||||||
|
const KNOWN_ATTRS = ['strength', 'defense', 'speed', 'dexterity', 'endurance', 'intelligence'];
|
||||||
|
const KNOWN_GYMS = [
|
||||||
|
'Total Bastion', 'Frontline Fitness', 'Premier Fitness', 'Average Joes',
|
||||||
|
"Woody's Workout Club", "Baldr's Gym", 'Sportscience Laboratory',
|
||||||
|
'Chrome Gym', "Mr. Miyagi's", 'Power House', 'Gym 300', 'Gym 400', 'Gym 500', 'Gym 600',
|
||||||
|
'Elite Gym', "David's Gym",
|
||||||
|
];
|
||||||
|
|
||||||
export function currentAttribute() {
|
export function currentAttribute() {
|
||||||
// The attribute name is shown in the gym page header.
|
const li = findActiveAttributeLi();
|
||||||
// Torn displays it as a capitalized word (e.g. "Strength") near the
|
if (!li) return null;
|
||||||
// top of the gym form.
|
const attr = extractAttrFromLi(li);
|
||||||
const KNOWN = ['strength', 'defense', 'speed', 'dexterity', 'endurance', 'intelligence'];
|
|
||||||
const ATTR_RE = new RegExp('\\b(' + KNOWN.join('|') + ')\\b');
|
|
||||||
|
|
||||||
const headers = document.querySelectorAll('h1, h2, h3, h4, .title, .gym-title, [class*="gym"]');
|
|
||||||
let attr = null;
|
|
||||||
let attrEl = null;
|
|
||||||
for (const el of headers) {
|
|
||||||
const t = (el.textContent || '').trim().toLowerCase();
|
|
||||||
const m = t.match(ATTR_RE);
|
|
||||||
if (m) { attr = m[1]; attrEl = el; break; }
|
|
||||||
}
|
|
||||||
if (!attr) return null;
|
if (!attr) return null;
|
||||||
|
const current = extractValueFromLi(li);
|
||||||
// Current value: look for the prominent number on the page that is
|
|
||||||
// formatted like a Torn attribute (e.g. "14,328,501"). Search near
|
|
||||||
// the attribute element so we don't pick up unrelated global numbers.
|
|
||||||
let valEl = findValueNear(attrEl);
|
|
||||||
if (!valEl) valEl = findValueElement(); // fallback: whole-page scan
|
|
||||||
if (!valEl) return null;
|
|
||||||
const current = parseNumber(valEl.textContent);
|
|
||||||
if (current == null) return null;
|
if (current == null) return null;
|
||||||
|
|
||||||
// Gym name: any element on the page containing the word "Gym" or
|
|
||||||
// "Bastion" / "Frontline" / etc. Torn's gym names vary.
|
|
||||||
const gym = findGymName() || 'Unknown gym';
|
const gym = findGymName() || 'Unknown gym';
|
||||||
|
|
||||||
return { attr, current, gym };
|
return { attr, current, gym };
|
||||||
}
|
}
|
||||||
|
|
||||||
function findValueNear(el) {
|
function findActiveAttributeLi() {
|
||||||
// Look at the element itself, then up to a few ancestors, then their descendants.
|
// Priority 1: the <li> with the "success" class (just trained).
|
||||||
// Prefer the largest formatted number within ~2 parent levels.
|
const lis = document.querySelectorAll('ul[class*="properties"] > li[class*="success"]');
|
||||||
const scope = [];
|
for (const li of lis) {
|
||||||
let cur = el;
|
if (extractAttrFromLi(li)) return li;
|
||||||
for (let depth = 0; depth < 3 && cur; depth++) {
|
|
||||||
scope.push(cur);
|
|
||||||
cur = cur.parentElement;
|
|
||||||
}
|
}
|
||||||
let best = null;
|
// Priority 2: the <li> corresponding to the .gained message's attribute.
|
||||||
let bestN = -Infinity;
|
const gained = document.querySelector('[class*="gained"]');
|
||||||
for (const root of scope) {
|
if (gained) {
|
||||||
const candidates = root.querySelectorAll('*');
|
const text = (gained.textContent || '').toLowerCase();
|
||||||
for (const c of candidates) {
|
for (const attr of KNOWN_ATTRS) {
|
||||||
if (c.children.length > 0) continue;
|
if (text.includes(attr)) {
|
||||||
const t = (c.textContent || '').trim();
|
const li = document.querySelector('ul[class*="properties"] > li[class^="' + attr + '___"]');
|
||||||
if (!/^[\d,]+(\.\d+)?$/.test(t)) continue;
|
if (li) return li;
|
||||||
const n = parseNumber(t);
|
|
||||||
if (n == null || n < 1) continue;
|
|
||||||
if (n > bestN) { best = c; bestN = n; }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return best;
|
}
|
||||||
|
// Priority 3: the first <li> in the properties list.
|
||||||
|
const all = document.querySelectorAll('ul[class*="properties"] > li');
|
||||||
|
for (const li of all) {
|
||||||
|
if (extractAttrFromLi(li)) return li;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function findValueElement() {
|
function extractAttrFromLi(li) {
|
||||||
// Fallback only used when no element is found near the attribute.
|
const cls = li.className || '';
|
||||||
// Walk all elements; pick the largest formatted number on the page.
|
for (const attr of KNOWN_ATTRS) {
|
||||||
const candidates = document.querySelectorAll('*');
|
if (cls.split(/\s+/).some((c) => c.startsWith(attr + '___'))) return attr;
|
||||||
let best = null;
|
|
||||||
let bestN = -Infinity;
|
|
||||||
for (const el of candidates) {
|
|
||||||
if (el.children.length > 0) continue;
|
|
||||||
const t = (el.textContent || '').trim();
|
|
||||||
if (!/^[\d,]+(\.\d+)?$/.test(t)) continue;
|
|
||||||
const n = parseNumber(t);
|
|
||||||
if (n == null || n < 1) continue;
|
|
||||||
if (n > bestN) { best = el; bestN = n; }
|
|
||||||
}
|
}
|
||||||
return best;
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractValueFromLi(li) {
|
||||||
|
const valueSpan = li.querySelector('[class^="propertyValue"]');
|
||||||
|
if (!valueSpan) return null;
|
||||||
|
return parseNumber(valueSpan.textContent);
|
||||||
}
|
}
|
||||||
|
|
||||||
function findGymName() {
|
function findGymName() {
|
||||||
// Look for a known set of Torn gym name fragments. Adjust as needed.
|
// Gym names live in aria-labels of <button class="gymButton___HASH">.
|
||||||
// Prefer an element that looks like the gym panel so we don't match
|
const buttons = document.querySelectorAll('button[class*="gymButton"]');
|
||||||
// against global widgets (news, sidebar, ads).
|
for (const btn of buttons) {
|
||||||
const panel = document.querySelector('.gym, #gym, [class*="gym-"], [class*="Gym"]');
|
const label = btn.getAttribute('aria-label') || '';
|
||||||
const roots = panel ? [panel, document.body] : [document.body];
|
for (const name of KNOWN_GYMS) {
|
||||||
const known = [
|
// aria-label format: "Gym Name. Membership cost - $X. ..."
|
||||||
'Total Bastion', 'Frontline Fitness', 'Gym 300', 'Gym 500',
|
if (label === name || label.startsWith(name + '.') || label.startsWith(name + ' ')) {
|
||||||
'Baldr\'s Gym', 'Sportscience Laboratory', 'Premier Fitness',
|
return name;
|
||||||
'Chrome Gym', 'Mr. Miyagi\'s', 'Power House',
|
|
||||||
];
|
|
||||||
for (const root of roots) {
|
|
||||||
const all = root.querySelectorAll('h1, h2, h3, h4, p, span, div, li');
|
|
||||||
for (const el of all) {
|
|
||||||
if (el.children.length > 0) continue;
|
|
||||||
const t = (el.textContent || '').trim();
|
|
||||||
for (const name of known) {
|
|
||||||
if (t.includes(name)) return name;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user