import { type AnyNode, load } from "cheerio"; import "dotenv/config"; import config from "../config"; import Scraper from "./scraper"; import type { IScraperReportsOptions } from "../interfaces/scaper-reports-options"; import type { IItem } from "../interfaces/item"; import LogLevels from "../enums/log-levels"; import Props from "../enums/props"; export default class ScraperReports { private readonly _name: string; private readonly _options: IScraperReportsOptions; private readonly _scraper: Scraper; constructor (name: string, options: IScraperReportsOptions) { this._name = name; this._options = options; this._scraper = new Scraper(); } private getProperty (domNodeElement: AnyNode, selector: string, prop: Props = Props.TEXT): string { const $ = load(domNodeElement); let value: string | undefined = ""; try { value = prop === Props.TEXT ? $(selector).text() : $(selector).attr(prop); } catch (err) { if (config.LOG_LEVEL === LogLevels.DEBUG) { console.debug(`${this._name} | Error raised\n`); console.debug(`From ${domNodeElement.type} can't get value using selector '${selector}'`); console.error(err.message); } } return value ?? ""; } private getDate (node: AnyNode): string { const selector = this._options.dateSelector ?? ""; return selector !== "" ? this.getProperty(node, selector).trim() : new Date(Date.now()).toLocaleDateString("es-CL"); } private getLocation (node: AnyNode): string { const selector = this._options.locationSelector ?? ""; return selector !== "" ? this.getProperty(node, selector).trim() : ""; } private getDepth (node: AnyNode): string { const selector = this._options.depthSelector ?? ""; return selector !== "" ? this.getProperty(node, selector).trim() : ""; } private getMagnitude (node: AnyNode): number { const selector = this._options.magnitudeSelector ?? ""; return selector !== "" ? Number(this.getProperty(node, selector).trim()) : 0; } private getLink (node: AnyNode): string { const selector = this._options.linkSelector ?? ""; let link = selector !== "" ? this.getProperty(node, selector, Props.LINK).trim() : ""; link = this._options.url.replace("/index.html", "") + link; return link; } private async getLocationImg (link: string): Promise { return await this._scraper.scrapeFile(link); } public async getItems (): Promise { const items: IItem[] = []; const startTime = Date.now(); try { console.info("Starting scraping", this._options); const response = await this._scraper.scrape({ url: this._options.url }); const html = response.data.data.html; const $ = load(html as string); const domElements = $(this._options.reportsSelector); if (config.LOG_LEVEL === LogLevels.DEBUG) { console.debug(`${this._name} | Items obtained: ${domElements.length} `); } for (let i = 1; i < domElements.length; i++) { const node = domElements[i]; const link = this.getLink(node); items.push({ date: this.getDate(node), location: this.getLocation(node), depth: this.getDepth(node), magnitude: this.getMagnitude(node), link, locationImg: await this.getLocationImg(link.replace("html", "jpeg")) }); } } catch (err) { if (config.LOG_LEVEL === LogLevels.DEBUG) { console.debug(`${this._name} | Error\n`); console.error(err.message); } } finally { const endTime = Date.now(); const duration = (endTime - startTime) / 1000; console.info(`${this._name} | Execution time: ${duration}s`); } return items; } }