| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- import { type AnyNode, load } from "cheerio";
- import "dotenv/config";
- import config from "../config";
- import Scraper from "./scraper";
- import type { IScraperReportsOptions } from "../interfaces/scaper-reports-options";
- import type { IItem } from "../interfaces/item";
- import LogLevels from "../enums/log-levels";
- import Props from "../enums/props";
- export default class ScraperReports {
- private readonly _name: string;
- private readonly _options: IScraperReportsOptions;
- private readonly _scraper: Scraper;
- constructor (name: string, options: IScraperReportsOptions) {
- this._name = name;
- this._options = options;
- this._scraper = new Scraper();
- }
- private getProperty (domNodeElement: AnyNode, selector: string, prop: Props = Props.TEXT): string {
- const $ = load(domNodeElement);
- let value: string | undefined = "";
- try {
- value = prop === Props.TEXT ? $(selector).text() : $(selector).attr(prop);
- } catch (err) {
- if (config.LOG_LEVEL === LogLevels.DEBUG) {
- console.debug(`${this._name} | Error raised\n`);
- console.debug(`From ${domNodeElement.type} can't get value using selector '${selector}'`);
- console.error(err.message);
- }
- }
- return value ?? "";
- }
- private getDate (node: AnyNode): string {
- const selector = this._options.dateSelector ?? "";
- return selector !== "" ? this.getProperty(node, selector).trim() : new Date(Date.now()).toLocaleDateString("es-CL");
- }
- private getLocation (node: AnyNode): string {
- const selector = this._options.locationSelector ?? "";
- return selector !== "" ? this.getProperty(node, selector).trim() : "";
- }
- private getDepth (node: AnyNode): string {
- const selector = this._options.depthSelector ?? "";
- return selector !== "" ? this.getProperty(node, selector).trim() : "";
- }
- private getMagnitude (node: AnyNode): number {
- const selector = this._options.magnitudeSelector ?? "";
- return selector !== "" ? Number(this.getProperty(node, selector).trim()) : 0;
- }
- private getLink (node: AnyNode): string {
- const selector = this._options.linkSelector ?? "";
- let link = selector !== "" ? this.getProperty(node, selector, Props.LINK).trim() : "";
- link = this._options.url.replace("/index.html", "") + link;
- return link;
- }
- private async getLocationImg (link: string): Promise<File | null> {
- return await this._scraper.scrapeFile(link);
- }
- public async getItems (): Promise<IItem[]> {
- const items: IItem[] = [];
- const startTime = Date.now();
- try {
- console.info("Starting scraping", this._options);
- const response = await this._scraper.scrape({ url: this._options.url });
- const html = response.data.data.html;
- const $ = load(html as string);
- const domElements = $(this._options.reportsSelector);
- if (config.LOG_LEVEL === LogLevels.DEBUG) {
- console.debug(`${this._name} | Items obtained: ${domElements.length} `);
- }
- for (let i = 1; i < domElements.length; i++) {
- const node = domElements[i];
- const link = this.getLink(node);
- items.push({
- date: this.getDate(node),
- location: this.getLocation(node),
- depth: this.getDepth(node),
- magnitude: this.getMagnitude(node),
- link,
- locationImg: await this.getLocationImg(link.replace("html", "jpeg"))
- });
- }
- } catch (err) {
- if (config.LOG_LEVEL === LogLevels.DEBUG) {
- console.debug(`${this._name} | Error\n`);
- console.error(err.message);
- }
- } finally {
- const endTime = Date.now();
- const duration = (endTime - startTime) / 1000;
- console.info(`${this._name} | Execution time: ${duration}s`);
- }
- return items;
- }
- }
|