scraper-reports.ts 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import { type AnyNode, load } from "cheerio";
  2. import "dotenv/config";
  3. import config from "../config";
  4. import Scraper from "./scraper";
  5. import type { IScraperReportsOptions } from "../interfaces/scaper-reports-options";
  6. import type { IItem } from "../interfaces/item";
  7. import LogLevels from "../enums/log-levels";
  8. import Props from "../enums/props";
  9. export default class ScraperReports {
  10. private readonly _name: string;
  11. private readonly _options: IScraperReportsOptions;
  12. private readonly _scraper: Scraper;
  13. constructor (name: string, options: IScraperReportsOptions) {
  14. this._name = name;
  15. this._options = options;
  16. this._scraper = new Scraper();
  17. }
  18. private getProperty (domNodeElement: AnyNode, selector: string, prop: Props = Props.TEXT): string {
  19. const $ = load(domNodeElement);
  20. let value: string | undefined = "";
  21. try {
  22. value = prop === Props.TEXT ? $(selector).text() : $(selector).attr(prop);
  23. } catch (err) {
  24. if (config.LOG_LEVEL === LogLevels.DEBUG) {
  25. console.debug(`${this._name} | Error raised\n`);
  26. console.debug(`From ${domNodeElement.type} can't get value using selector '${selector}'`);
  27. console.error(err.message);
  28. }
  29. }
  30. return value ?? "";
  31. }
  32. private getDate (node: AnyNode): string {
  33. const selector = this._options.dateSelector ?? "";
  34. return selector !== "" ? this.getProperty(node, selector).trim() : new Date(Date.now()).toLocaleDateString("es-CL");
  35. }
  36. private getLocation (node: AnyNode): string {
  37. const selector = this._options.locationSelector ?? "";
  38. return selector !== "" ? this.getProperty(node, selector).trim() : "";
  39. }
  40. private getDepth (node: AnyNode): string {
  41. const selector = this._options.depthSelector ?? "";
  42. return selector !== "" ? this.getProperty(node, selector).trim() : "";
  43. }
  44. private getMagnitude (node: AnyNode): number {
  45. const selector = this._options.magnitudeSelector ?? "";
  46. return selector !== "" ? Number(this.getProperty(node, selector).trim()) : 0;
  47. }
  48. private getLink (node: AnyNode): string {
  49. const selector = this._options.linkSelector ?? "";
  50. let link = selector !== "" ? this.getProperty(node, selector, Props.LINK).trim() : "";
  51. link = this._options.url.replace("/index.html", "") + link;
  52. return link;
  53. }
  54. private async getLocationImg (link: string): Promise<File | null> {
  55. return await this._scraper.scrapeFile(link);
  56. }
  57. public async getItems (): Promise<IItem[]> {
  58. const items: IItem[] = [];
  59. const startTime = Date.now();
  60. try {
  61. console.info("Starting scraping", this._options);
  62. const response = await this._scraper.scrape({ url: this._options.url });
  63. const html = response.data.data.html;
  64. const $ = load(html as string);
  65. const domElements = $(this._options.reportsSelector);
  66. if (config.LOG_LEVEL === LogLevels.DEBUG) {
  67. console.debug(`${this._name} | Items obtained: ${domElements.length} `);
  68. }
  69. for (let i = 1; i < domElements.length; i++) {
  70. const node = domElements[i];
  71. const link = this.getLink(node);
  72. items.push({
  73. date: this.getDate(node),
  74. location: this.getLocation(node),
  75. depth: this.getDepth(node),
  76. magnitude: this.getMagnitude(node),
  77. link,
  78. locationImg: await this.getLocationImg(link.replace("html", "jpeg"))
  79. });
  80. }
  81. } catch (err) {
  82. if (config.LOG_LEVEL === LogLevels.DEBUG) {
  83. console.debug(`${this._name} | Error\n`);
  84. console.error(err.message);
  85. }
  86. } finally {
  87. const endTime = Date.now();
  88. const duration = (endTime - startTime) / 1000;
  89. console.info(`${this._name} | Execution time: ${duration}s`);
  90. }
  91. return items;
  92. }
  93. }