|
@@ -0,0 +1,111 @@
|
|
|
+import { type AnyNode, load } from "cheerio";
|
|
|
+import "dotenv/config";
|
|
|
+
|
|
|
+import config from "../config";
|
|
|
+import Scraper from "./scraper";
|
|
|
+
|
|
|
+import type { IScraperReportsOptions } from "../interfaces/scaper-reports-options";
|
|
|
+import type { IItem } from "../interfaces/item";
|
|
|
+
|
|
|
+import LogLevels from "../enums/log-levels";
|
|
|
+import Props from "../enums/props";
|
|
|
+
|
|
|
+export default class ScraperReports {
|
|
|
+ private readonly _name: string;
|
|
|
+ private readonly _options: IScraperReportsOptions;
|
|
|
+ private readonly _scraper: Scraper;
|
|
|
+
|
|
|
+ constructor (name: string, options: IScraperReportsOptions) {
|
|
|
+ this._name = name;
|
|
|
+ this._options = options;
|
|
|
+ this._scraper = new Scraper();
|
|
|
+ }
|
|
|
+
|
|
|
+ private getProperty (domNodeElement: AnyNode, selector: string, prop: Props = Props.TEXT): string {
|
|
|
+ const $ = load(domNodeElement);
|
|
|
+ let value: string | undefined = "";
|
|
|
+ try {
|
|
|
+ value = prop === Props.TEXT ? $(selector).text() : $(selector).attr(prop);
|
|
|
+ } catch (err) {
|
|
|
+ if (config.LOG_LEVEL === LogLevels.DEBUG) {
|
|
|
+ console.debug(`${this._name} | Error raised\n`);
|
|
|
+ console.debug(`From ${domNodeElement.type} can't get value using selector '${selector}'`);
|
|
|
+ console.error(err.message);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return value ?? "";
|
|
|
+ }
|
|
|
+
|
|
|
+ private getDate (node: AnyNode): string {
|
|
|
+ const selector = this._options.dateSelector ?? "";
|
|
|
+ return selector !== "" ? this.getProperty(node, selector).trim() : new Date(Date.now()).toLocaleDateString("es-CL");
|
|
|
+ }
|
|
|
+
|
|
|
+ private getLocation (node: AnyNode): string {
|
|
|
+ const selector = this._options.locationSelector ?? "";
|
|
|
+ return selector !== "" ? this.getProperty(node, selector).trim() : "";
|
|
|
+ }
|
|
|
+
|
|
|
+ private getDepth (node: AnyNode): string {
|
|
|
+ const selector = this._options.depthSelector ?? "";
|
|
|
+ return selector !== "" ? this.getProperty(node, selector).trim() : "";
|
|
|
+ }
|
|
|
+
|
|
|
+ private getMagnitude (node: AnyNode): number {
|
|
|
+ const selector = this._options.magnitudeSelector ?? "";
|
|
|
+ return selector !== "" ? Number(this.getProperty(node, selector).trim()) : 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ private getLink (node: AnyNode): string {
|
|
|
+ const selector = this._options.linkSelector ?? "";
|
|
|
+ let link = selector !== "" ? this.getProperty(node, selector, Props.LINK).trim() : "";
|
|
|
+ link = this._options.url.replace("/index.html", "") + link;
|
|
|
+ return link;
|
|
|
+ }
|
|
|
+
|
|
|
+ private async getLocationImg (link: string): Promise<File | null> {
|
|
|
+ return await this._scraper.scrapeFile(link);
|
|
|
+ }
|
|
|
+
|
|
|
+ public async getItems (): Promise<IItem[]> {
|
|
|
+ const items: IItem[] = [];
|
|
|
+ const startTime = Date.now();
|
|
|
+
|
|
|
+ try {
|
|
|
+ console.info("Starting scraping", this._options);
|
|
|
+ const response = await this._scraper.scrape({ url: this._options.url });
|
|
|
+ const html = response.data.data.html;
|
|
|
+
|
|
|
+ const $ = load(html as string);
|
|
|
+ const domElements = $(this._options.reportsSelector);
|
|
|
+
|
|
|
+ if (config.LOG_LEVEL === LogLevels.DEBUG) {
|
|
|
+ console.debug(`${this._name} | Items obtained: ${domElements.length} `);
|
|
|
+ }
|
|
|
+
|
|
|
+ for (let i = 1; i < 2; i++) {
|
|
|
+ const node = domElements[i];
|
|
|
+ const link = this.getLink(node);
|
|
|
+ items.push({
|
|
|
+ date: this.getDate(node),
|
|
|
+ location: this.getLocation(node),
|
|
|
+ depth: this.getDepth(node),
|
|
|
+ magnitude: this.getMagnitude(node),
|
|
|
+ link,
|
|
|
+ locationImg: await this.getLocationImg(link.replace("html", "jpeg"))
|
|
|
+ });
|
|
|
+ }
|
|
|
+ } catch (err) {
|
|
|
+ if (config.LOG_LEVEL === LogLevels.DEBUG) {
|
|
|
+ console.debug(`${this._name} | Error\n`);
|
|
|
+ console.error(err.message);
|
|
|
+ }
|
|
|
+ } finally {
|
|
|
+ const endTime = Date.now();
|
|
|
+ const duration = (endTime - startTime) / 1000;
|
|
|
+ console.info(`${this._name} | Execution time: ${duration}s`);
|
|
|
+ }
|
|
|
+
|
|
|
+ return items;
|
|
|
+ }
|
|
|
+}
|