Browse Source

added sismologia and metrodesantiago

Pablo Barrera Yaksic 2 months ago
parent
commit
c71352dbb8

+ 4 - 0
.env.example

@@ -13,6 +13,8 @@ ELMOSTRADOR = "https://www.elmostrador.cl/categoria/dia/"
 EMOL = "https://www.emol.com"
 INTERFERENCIA = "https://interferencia.cl/"
 LATERCERA = "https://www.latercera.com/canal/nacional/"
+METRODESANTIAGO = ""
+SISMOLOGIA = "https://www.sismologia.cl/index.html"
 THECLINIC = "https://www.theclinic.cl/lo-ultimo/"
 
 # KEYS
@@ -24,6 +26,8 @@ MASTODON_KEY_ELMOSTRADOR = ""
 MASTODON_KEY_EMOL = ""
 MASTODON_KEY_INTERFERENCIA = ""
 MASTODON_KEY_LATERCERA = ""
+MASTODON_KEY_METROSANTIAGO = ""
+MASTODON_KEY_SISMOLOGIA = ""
 MASTODON_KEY_THECLINIC = ""
 
 # Develop

BIN
bun.lockb


+ 5 - 0
package.json

@@ -14,7 +14,12 @@
   "author": "Pablo Barrera Yaksic",
   "devDependencies": {
     "@types/aws-lambda": "^8.10.146",
+    "@typescript-eslint/eslint-plugin": "^8.18.1",
+    "@typescript-eslint/parser": "^8.18.1",
+    "@typescript-eslint/typescript-estree": "^8.18.1",
     "@types/node": "^22.10.2",
+    "eslint-config-prettier": "^9.1.0",
+    "eslint-plugin-prettier": "^5.2.1",
     "eslint": "^9.17.0",
     "typescript": "^5.7.2"
   },

+ 4 - 0
src/config.ts

@@ -16,6 +16,8 @@ const config = {
   EMOL: process.env.EMOL ?? "https://www.emol.com",
   INTERFERENCIA: process.env.INTERFERENCIA ?? "https://interferencia.cl/",
   LATERCERA: process.env.LATERCERA ?? "https://www.latercera.com/canal/nacional/",
+  METRODESANTIAGO: process.env.METRODESANTIAGO ?? "https://xcancel.com/metrodesantiago",
+  SISMOLOGIA: process.env.SISMOLOGIA ?? "https://www.sismologia.cl/index.html",
   THECLINIC: process.env.THECLINIC ?? "https://www.theclinic.cl/lo-ultimo/",
   // KEYS
   MASTODON_KEY_CHILECULTURA: process.env.MASTODON_KEY_CHILECULTURA ?? "",
@@ -26,6 +28,8 @@ const config = {
   MASTODON_KEY_EMOL: process.env.MASTODON_KEY_EMOL ?? "",
   MASTODON_KEY_INTERFERENCIA: process.env.MASTODON_KEY_INTERFERENCIA ?? "",
   MASTODON_KEY_LATERCERA: process.env.MASTODON_KEY_LATERCERA ?? "",
+  MASTODON_KEY_METRODESANTIAGO: process.env.MASTODON_KEY_METRODESANTIAGO ?? "",
+  MASTODON_KEY_SISMOLOGIA: process.env.MASTODON_KEY_SISMOLOGIA ?? "",
   MASTODON_KEY_THECLINIC: process.env.MASTODON_KEY_THECLINIC ?? "",
   // Develop
   DEVELOP: !(process.env.DEVELOP === "false"),

+ 6 - 2
src/enums/emojis.ts

@@ -1,9 +1,13 @@
 enum Emojis {
+  CALENDAR = "📆",
+  DEPTH = "🕳️",
   LINK = "🔗",
+  LOCATION = "📍",
+  MAGNITUDE = "🎚️",
   NEWS = "📰",
+  PIN = "📍",
+  SIREN = "🚨",
   TAGS = "🏷️",
-  CALENDAR = "📆",
-  PIN = "📍"
 };
 
 export default Emojis

+ 2 - 0
src/index.ts

@@ -10,6 +10,7 @@ import { handler as elmostrador } from "./portales/elmostrador/handler";
 import { handler as emol } from "./portales/emol/handler";
 import { handler as interferencia } from "./portales/interferencia/handler";
 import { handler as latercera } from "./portales/latercera/handler";
+// import { handler as metrodesantiago } from "./portales/metrodesantiago/handler";
 import { handler as theclinic } from "./portales/theclinic/handler";
 
 const context: Context = {
@@ -36,6 +37,7 @@ const portalsHandlers = {
   "emol": emol,
   "interferencia": interferencia,
   "latercera": latercera,
+  // "metrodesantiago": metrodesantiago,
   "theclinic": theclinic,
 };
 

+ 8 - 0
src/interfaces/report.ts

@@ -0,0 +1,8 @@
+export interface IReport {
+  date: string
+  location: string
+  depth: string
+  magnitude: number
+  link: string
+  locationImg: File | null
+}

+ 15 - 0
src/interfaces/scaper-reports-options.ts

@@ -0,0 +1,15 @@
+import ScraperMethods from "../enums/scraper-methods"
+
+export interface IScraperReportsOptions {
+  url: string
+  reportsSelector: string
+  dateSelector: string
+  locationSelector: string
+  depthSelector: string
+  magnitudeSelector: string
+  linkSelector: string
+  locationImgSelector: string
+  scraperMethod: ScraperMethods
+  cacheExpiration?: number
+  hashtags?: Array<string>
+};

+ 4 - 0
src/portales/metrodesantiago/definition.yml

@@ -0,0 +1,4 @@
+metrodesantiago:
+  handler: ./src/portales/metrodesantiago/handler.handler
+  events: 
+    - schedule: rate(1 hour)

+ 20 - 0
src/portales/metrodesantiago/handler.ts

@@ -0,0 +1,20 @@
+import { type Handler } from "aws-lambda";
+
+import config from "../../config";
+import Portal from "../portal";
+import ScraperMethods from "../../enums/scraper-methods";
+
+const name = "Metro de Santiago";
+
+export const handler: Handler = new Portal(
+  name, 
+  config.MASTODON_KEY_METRODESANTIAGO,
+  {
+    url: config.METRODESANTIAGO,
+    linkPrefix: config.METRODESANTIAGO,
+    articlesSelector: "div.timeline div.timeline-item",
+    titleSelector: "div.tweet-body div.tweet-header div.tweet-name-row div.fullname-and-username a.fullname",
+    linkSelector: "a.tweet-link",
+    scraperMethod: ScraperMethods.PUPPETEER
+  }
+).getHandler();

+ 2 - 1
src/portales/portal.ts

@@ -35,7 +35,8 @@ export default class Portal {
       const length = articles.length;
 
       // Order has to be reversed to appear in the correct order when posting
-      for (let i = length - 1; i >= 0; i--) {
+      articles.reverse();
+      for (let i = 0; i < length; i++) {
         const article = articles[i];
         const exists = await this._redisClient.retrieve(article.link);
 

+ 24 - 0
src/portales/sismologiauchile/handler.ts

@@ -0,0 +1,24 @@
+import { type Handler } from "aws-lambda";
+
+import config from "../../config";
+import SismologiaCL from ".";
+import ScraperMethods from "../../enums/scraper-methods";
+
+const name = "Sismología UdeChile";
+
+export const handler: Handler = new SismologiaCL(
+  name, 
+  config.MASTODON_KEY_SISMOLOGIA,
+  {
+    url: config.SISMOLOGIA,
+    reportsSelector: "table.sismologia tbody tr.percibido",
+    dateSelector: "td a",
+    locationSelector: "td:nth-child(1)",
+    depthSelector: "td:nth-child(2)",
+    magnitudeSelector: "td:nth-child(3)",
+    linkSelector: "td:nth-child(1) a",
+    locationImgSelector: "main aside img",
+    scraperMethod: ScraperMethods.PUPPETEER,
+    hashtags: ["Sismo", "SismoPercibido", "Chile"]
+  }
+).getHandler();

+ 96 - 0
src/portales/sismologiauchile/index.ts

@@ -0,0 +1,96 @@
+import { type Handler } from "aws-lambda";
+import { createRestAPIClient } from "masto";
+
+import ScraperReports from "../../utils/scraper-reports";
+import RedisClient from "../../libs/redis-client";
+import LogLevels from "../../enums/log-levels";
+import Emojis from "../../enums/emojis";
+import config from "../../config";
+
+import { type IScraperReportsOptions } from "../../interfaces/scaper-reports-options";
+
+export default class SismologiaCL {
+  private readonly _name: string;
+  private readonly _scraperReportsOptions: IScraperReportsOptions
+  private readonly _redisClient: RedisClient;
+  private readonly _mastodonClient: any
+  private readonly _scraperReports: ScraperReports;
+
+  constructor (name: string, accessToken: string, scraperReportsOptions: IScraperReportsOptions) {
+    this._name = name;
+    this._scraperReportsOptions = scraperReportsOptions;
+    this._scraperReports = new ScraperReports(this._name, this._scraperReportsOptions);
+    this._redisClient = new RedisClient();
+    this._mastodonClient = createRestAPIClient({ url: config.MASTODON_URL, accessToken });
+  }
+
+  public async run (event?: any, context?: any): Promise<void> {
+    try {
+      const reports = await this._scraperReports.getReports();
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        console.log(`${this._name} | Reports`, reports);
+      }
+
+      let totalPublished = 0;
+      const length = reports.length;
+
+      // Order has to be reversed to appear in the correct order when posting
+      reports.reverse()
+      for (let i = 0; i < length; i++) {
+        const report = reports[i];
+        const exists = await this._redisClient.retrieve(report.link);
+        if (exists !== null) {
+          continue;
+        }
+
+        const date = new Date(Date.now()).toLocaleDateString();
+        let message = `${Emojis.SIREN} Reporte de #sismo`
+        message += `\n\n${Emojis.CALENDAR} ${report.date} (Hora local)`;
+        message += `\n${Emojis.MAGNITUDE} Magnitud: ${report.magnitude} - ${Emojis.DEPTH} Profundidad: ${report.depth}`;
+        message += `\n${Emojis.LOCATION} ${report.location.replace(report.date, "").trim()}`;
+        message += `\n${Emojis.LINK} ${report.link}`;
+        message += `\n${Emojis.TAGS} ${this._scraperReportsOptions.hashtags?.map((hastag) => `#${hastag}`).join(" ")}`
+
+        if (message.trim().length === 0) {
+          continue;
+        }
+
+        const mediaIds: any[] = [];
+        if (report.locationImg !== null && report.locationImg !== undefined) {
+          const media = await this._mastodonClient.v2.media.create({ file: report.locationImg, description: report.location });
+          mediaIds.push(media.id);
+        }
+
+        console.log(`\n${this._name} | Sending\n`, message);
+
+        if (!config.DEVELOP) {
+          await this._mastodonClient.v1.statuses.create({ status: message, mediaIds });
+          await this._redisClient.store(report.link, date, { EX: 60 * 60 * 168 }); // EX: 1 week
+          totalPublished++;
+        }
+      }
+      console.log(`${this._name} | Published ${totalPublished} new reports`);
+    } catch (err: any) {
+      console.log(`${this._name} | An error has occurred\n`)
+      console.error(err.message);
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        if (event !== undefined) {
+          console.debug("\nEvent\n");
+          console.debug(event);
+        }
+
+        if (context !== undefined) {
+          console.debug("\nContext\n");
+          console.debug(context);
+        }
+      }
+    }
+    console.log(`${this._name} | Finished`);
+  }
+
+  public getHandler (): Handler {
+    return async (event, context) => {
+      await this.run(event, context);
+    }
+  }
+}

+ 22 - 0
src/portales/sismologiauchile/run.ts

@@ -0,0 +1,22 @@
+import config from "../../config";
+import SismologiaCL from ".";
+import ScraperMethods from "../../enums/scraper-methods";
+
+const name = "Sismología UdeChile";
+
+new SismologiaCL(
+  name, 
+  config.MASTODON_KEY_SISMOLOGIA,
+  {
+    url: config.SISMOLOGIA,
+    reportsSelector: "table.sismologia tbody tr.percibido",
+    dateSelector: "td a",
+    locationSelector: "td:nth-child(1)",
+    depthSelector: "td:nth-child(2)",
+    magnitudeSelector: "td:nth-child(3)",
+    linkSelector: "td:nth-child(1) a",
+    locationImgSelector: "main aside img",
+    scraperMethod: ScraperMethods.AXIOS,
+    hashtags: ["Sismo", "SismoPercibido", "Chile"]
+  }
+).run().then(() => process.exit(0));

+ 113 - 0
src/utils/scraper-reports.ts

@@ -0,0 +1,113 @@
+import { load } from "cheerio";
+import { type AnyNode } from "domhandler";
+import "dotenv/config";
+
+import config from "../config";
+import Scraper from "./scraper";
+
+import type { IScraperReportsOptions } from "../interfaces/scaper-reports-options";
+import type { IReport } from "../interfaces/report";
+
+import LogLevels from "../enums/log-levels";
+import Props from "../enums/props";
+import ScraperMethods from "../enums/scraper-methods";
+
+export default class ScraperReports {
+  private readonly _name: string;
+  private readonly _options: IScraperReportsOptions;
+  private readonly _scraper: Scraper;
+
+  constructor (name: string, options: IScraperReportsOptions) {
+    this._name = name;
+    this._options = options;
+    this._scraper = new Scraper();
+  }
+
+  private getProperty (domNodeElement: AnyNode, selector: string, prop: Props = Props.TEXT): string {
+    const $ = load(domNodeElement);
+    let value: string | undefined = "";
+    try {
+      value = prop === Props.TEXT ? $(selector).text() : $(selector).attr(prop);
+    } catch (err) {
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        console.debug(`${this._name} | Error raised\n`);
+        console.debug(`From ${domNodeElement.type} can't get value using selector '${selector}'`);
+        console.error(err.message);
+      }
+    }
+    return value ?? "";
+  }
+
+  private getDate (node: AnyNode): string {
+    const selector = this._options.dateSelector ?? "";
+    return selector !== "" ? this.getProperty(node, selector).trim() : new Date(Date.now()).toLocaleDateString("es-CL");
+  }
+
+  private getLocation (node: AnyNode): string {
+    const selector = this._options.locationSelector ?? "";
+    return selector !== "" ? this.getProperty(node, selector).trim() : "";
+  }
+
+  private getDepth (node: AnyNode): string {
+    const selector = this._options.depthSelector ?? "";
+    return selector !== "" ? this.getProperty(node, selector).trim() : "";
+  }
+
+  private getMagnitude (node: AnyNode): number {
+    const selector = this._options.magnitudeSelector ?? "";
+    return selector !== "" ? Number(this.getProperty(node, selector).trim()) : 0;
+  }
+
+  private getLink (node: AnyNode): string {
+    const selector = this._options.linkSelector ?? "";
+    let link = selector !== "" ? this.getProperty(node, selector, Props.LINK).trim() : "";
+    link = this._options.url.replace("/index.html", "") + link;
+    return link;
+  }
+
+  private async getLocationImg (link: string): Promise<File | null> {
+    return await this._scraper.scrapeFile(link);
+  }
+
+  public async getReports (): Promise<IReport[]> {
+    const reports: IReport[] = [];
+    const startTime = Date.now();
+
+    try {
+      console.info("Starting scraping", this._options);
+      const response = await this._scraper.scrape({ url: this._options.url, scraperMethod: this._options.scraperMethod });
+      const html = this._options.scraperMethod === ScraperMethods.PUPPETEER ? response.data.data.html : response.data;
+
+      const $ = load(html as string);
+      const domElements = $(this._options.reportsSelector);
+
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        console.debug(`${this._name} | Reports obtained: ${domElements.length} `);
+      }
+
+      for (let i = 0; i < domElements.length; i++) {
+        const node = domElements[i];
+        const link = this.getLink(node);
+        reports.push({
+          date: this.getDate(node),
+          location: this.getLocation(node),
+          depth: this.getDepth(node),
+          magnitude: this.getMagnitude(node),
+          link,
+          locationImg: await this.getLocationImg(link.replace("html", "jpeg"))
+        });
+      }
+    } catch (err) {
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        console.debug(`${this._name} | Error\n`);
+        console.error(err.message);
+      }
+    } finally {
+      const endTime = Date.now();
+      const duration = (endTime - startTime) / 1000;
+      console.info(`${this._name} | Execution time: ${duration}s`);
+    }
+
+    return reports;
+  }
+}