Browse Source

first commit

Pablo Barrera Yaksic 1 month ago
commit
2f1a5fc93b

+ 7 - 0
.env.example

@@ -0,0 +1,7 @@
+LOG_LEVEL = "debug"
+REDIS_CONN = "redis://localhost:6379/10"
+PUPPETEER_URL = "http://localhost:8000/api/v1/visit"
+MASTODON_URL = "https://mastodon.cl/api/v1/"
+MASTODON_ACCESS_TOKEN = ""
+
+SISMOLOGIACL = "https://www.sismologia.cl/index.html"

+ 16 - 0
.eslintrc.json

@@ -0,0 +1,16 @@
+{
+  "env": {
+    "browser": false,
+    "commonjs": true,
+    "es2021": true
+  },
+  "extends": "standard-with-typescript",
+  "parserOptions": {
+    "ecmaVersion": "latest"
+  },
+  "rules": {
+    "quotes": "off",
+    "@typescript-eslint/quotes": 0,
+    "@typescript-eslint/semi": 0
+  }
+}

+ 6 - 0
.gitignore

@@ -0,0 +1,6 @@
+node_modules
+dist
+.env
+.env.production
+.build
+.serverless

BIN
bun.lockb


+ 40 - 0
package.json

@@ -0,0 +1,40 @@
+{
+  "name": "bot-sismos",
+  "version": "0.0.8",
+  "description": "Bot que busca reporte de sismos y publica en mastodon.cl",
+  "main": "dist/index.js",
+  "scripts": {
+    "build": "npx tsc",
+    "test": "exit 0"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://git.pablo.by/Mastodon/bot-sismos.git"
+  },
+  "author": "Pablo Barrera Yaksic",
+  "devDependencies": {
+    "@types/aws-lambda": "^8.10.136",
+    "@types/node": "^20.12.2",
+    "@typescript-eslint/eslint-plugin": "^6.21.0",
+    "eslint": "^8.57.0",
+    "eslint-config-standard-with-typescript": "^43.0.1",
+    "eslint-plugin-import": "^2.29.1",
+    "eslint-plugin-n": "^16.6.2",
+    "eslint-plugin-promise": "^6.1.1",
+    "serverless-offline": "^13.3.3",
+    "serverless-plugin-typescript": "^2.1.5",
+    "typescript": "^5.4.3"
+  },
+  "dependencies": {
+    "axios": "^1.6.8",
+    "cheerio": "^1.0.0-rc.12",
+    "dotenv": "^16.4.5",
+    "masto": "^6.7.0",
+    "redis": "^4.6.13"
+  },
+  "trustedDependencies": [
+    "aws-sdk",
+    "es5-ext",
+    "serverless"
+  ]
+}

+ 17 - 0
serverless.yml

@@ -0,0 +1,17 @@
+service: bot-sismos
+frameworkVersion: '3'
+useDotenv: true
+
+provider:
+  name: aws
+  runtime: nodejs20.x
+  region: us-east-1
+  timeout: 60
+
+plugins:
+  - serverless-offline
+  - serverless-plugin-typescript
+  - serverless-dotenv-plugin
+
+functions:
+  - ${file(./src/portales/sismologiacl/definition.yml)}

+ 11 - 0
src/config.ts

@@ -0,0 +1,11 @@
+const config = {
+  LOG_LEVEL: process.env.LOG_LEVEL ?? "debug",
+  REDIS_CONN: process.env.REDIS_CONN ?? "redis://localhost:6379/10",
+  PUPPETEER_URL: process.env.PUPPETEER_URL ?? "http://localhost:8000/api/v1/visit",
+  MASTODON_URL: process.env.MASTODON_URL ?? "https://mastodon.cl/api/v1/",
+  MASTODON_ACCESS_TOKEN: process.env.MASTODON_ACCESS_TOKEN ?? "",
+  // PORTALES
+  SISMOLOGIACL: process.env.SISMOLOGIACL ?? "https://www.sismologia.cl/index.html"
+};
+
+export default config;

+ 12 - 0
src/enums/emojis.ts

@@ -0,0 +1,12 @@
+enum Emojis {
+  LINK = "🔗",
+  SIREN = "🚨",
+  CALENDAR = "📆",
+  MAGNITUDE = "🎚️",
+  CLOCK = "🕒",
+  MAP = "🗺️",
+  LOCATION = "📍",
+  DEPTH = "🕳️"
+};
+
+export default Emojis

+ 6 - 0
src/enums/log-levels.ts

@@ -0,0 +1,6 @@
+enum LogLevels {
+  DEBUG = "debug",
+  INFO = "info"
+};
+
+export default LogLevels;

+ 7 - 0
src/enums/props.ts

@@ -0,0 +1,7 @@
+enum Props {
+  TEXT = "text",
+  LINK = "href",
+  IMAGE = "src"
+};
+
+export default Props

+ 36 - 0
src/index.ts

@@ -0,0 +1,36 @@
+import { type Context } from "aws-lambda";
+
+import { handler as sismologiacl } from "./sources/sismologiacl/handler";
+
+const context: Context = {
+  callbackWaitsForEmptyEventLoop: false,
+  functionName: "",
+  functionVersion: "",
+  invokedFunctionArn: "",
+  memoryLimitInMB: "0",
+  awsRequestId: "",
+  logGroupName: "",
+  logStreamName: "",
+  done: () => {},
+  fail: () => {},
+  succeed: () => {},
+  getRemainingTimeInMillis: () => 1
+};
+
+const sources = [sismologiacl];
+const cooldown = 1000; // 1 seconds
+
+async function main (): Promise<void> {
+  for (const portal of sources) {
+    await portal(null, context, () => {});
+    await new Promise((resolve) => setTimeout(resolve, cooldown));
+  }
+}
+
+main()
+  .catch((err) => {
+    console.error(err)
+  })
+  .finally(() => {
+    process.exit(0);
+  });

+ 8 - 0
src/interfaces/item.ts

@@ -0,0 +1,8 @@
+export interface IItem {
+  date: string
+  location: string
+  depth: string
+  magnitude: number
+  link: string
+  locationImg: File | null
+}

+ 10 - 0
src/interfaces/scaper-reports-options.ts

@@ -0,0 +1,10 @@
+export interface IScraperReportsOptions {
+  url: string
+  reportsSelector: string
+  dateSelector: string
+  locationSelector: string
+  depthSelector: string
+  magnitudeSelector: string
+  linkSelector: string
+  locationImgSelector: string
+};

+ 5 - 0
src/interfaces/scraper-options.ts

@@ -0,0 +1,5 @@
+export interface IScraperOptions {
+  url: string
+  userAgent?: string
+  incognito?: boolean
+}

+ 65 - 0
src/libs/redis-client.ts

@@ -0,0 +1,65 @@
+import { createClient } from "redis";
+
+import config from "../config";
+import LogLevels from "../enums/log-levels";
+
+export default class RedisClient {
+  private _client: any = null;
+
+  constructor () {
+    void this.connect().then(() => {
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        console.debug("Redis connetion stablished");
+      }
+    });
+  }
+
+  private async connect (): Promise<void> {
+    this._client = await createClient({ url: config.REDIS_CONN }).connect().catch((err) => {
+      console.error("Redis connection error", err);
+      throw err;
+    });
+  }
+
+  private async disconnect (): Promise<void> {
+    if (this._client !== null) {
+      await this._client.disconnect();
+      this._client = null;
+    }
+  }
+
+  public async store (key: string, value: string, options?): Promise<boolean> {
+    let result = false;
+    try {
+      if (this._client == null) {
+        await this.connect();
+      }
+
+      await this._client.set(key, value, options);
+      result = true;
+    } catch (err: any) {
+      console.error(err.message);
+    } finally {
+      await this.disconnect();
+    }
+
+    return result;
+  }
+
+  public async retrieve (key: string): Promise<string> {
+    let result = "";
+    try {
+      if (this._client == null) {
+        await this.connect();
+      }
+
+      result = await this._client.get(key);
+    } catch (err: any) {
+      console.error(err.message);
+    } finally {
+      await this.disconnect();
+    }
+
+    return result;
+  }
+}

+ 4 - 0
src/sources/sismologiacl/definition.yml

@@ -0,0 +1,4 @@
+sismologiacl:
+  handler: ./src/portales/sismologiacl/handler.handler
+  events: 
+    - schedule: rate(15 minutes)

+ 17 - 0
src/sources/sismologiacl/handler.ts

@@ -0,0 +1,17 @@
+import { type Handler } from "aws-lambda";
+
+import config from "../../config";
+import SismologiaCL from ".";
+
+const name = "Centro Sismológico Nacional 🇨🇱";
+
+export const handler: Handler = new SismologiaCL(name, {
+  url: config.SISMOLOGIACL,
+  reportsSelector: "table.sismologia tbody tr",
+  dateSelector: "td a",
+  locationSelector: "td:nth-child(1)",
+  depthSelector: "td:nth-child(2)",
+  magnitudeSelector: "td:nth-child(3)",
+  linkSelector: "td:nth-child(1) a",
+  locationImgSelector: "main aside img"
+}).getHandler();

+ 95 - 0
src/sources/sismologiacl/index.ts

@@ -0,0 +1,95 @@
+import { type Handler } from "aws-lambda";
+import { createRestAPIClient } from "masto";
+
+import ScraperReports from "../../utils/scraper-reports";
+import RedisClient from "../../libs/redis-client";
+import LogLevels from "../../enums/log-levels";
+import Emojis from "../../enums/emojis";
+import config from "../../config";
+
+import { type IScraperReportsOptions } from "../../interfaces/scaper-reports-options";
+
+export default class SismologiaCL {
+  private readonly _name: string;
+  private readonly _scraperReportsOptions: IScraperReportsOptions
+  private readonly _redisClient: RedisClient;
+  private readonly _mastodonClient: any
+  private readonly _scraperReports: ScraperReports;
+
+  constructor (name: string, scraperReportsOptions: IScraperReportsOptions) {
+    this._name = name;
+    this._scraperReportsOptions = scraperReportsOptions;
+    this._scraperReports = new ScraperReports(this._name, this._scraperReportsOptions);
+    this._redisClient = new RedisClient();
+    this._mastodonClient = createRestAPIClient({
+      url: config.MASTODON_URL,
+      accessToken: config.MASTODON_ACCESS_TOKEN
+    });
+  }
+
+  public async run (event?: any, context?: any): Promise<void> {
+    try {
+      const reports = await this._scraperReports.getItems();
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        console.log(`${this._name} | Reports`, reports);
+      }
+
+      let totalPublished = 0;
+      const length = reports.length;
+
+      // Order has to be reversed to appear in the correct order when posting
+      for (let i = length - 1; i >= 0; i--) {
+        const report = reports[i];
+        const exists = await this._redisClient.retrieve(report.link);
+        if (exists !== null) {
+          continue;
+        }
+
+        const date = new Date(Date.now()).toLocaleDateString();
+        let message = `${Emojis.SIREN} Reporte de sismo`
+        message += `\n\n${Emojis.CALENDAR} ${report.date} (Hora local)`;
+        message += `\n${Emojis.MAGNITUDE} Magnitud: ${report.magnitude} - ${Emojis.DEPTH} Profundidad: ${report.depth}`;
+        message += `\n${Emojis.LOCATION}${report.location.replace(report.date, "").trim()}`;
+        message += `\n${report.link}`;
+
+        if (message.trim().length === 0) {
+          continue;
+        }
+
+        const mediaIds: any[] = [];
+        if (report.locationImg !== null && report.locationImg !== undefined) {
+          const media = await this._mastodonClient.v2.media.create({ file: report.locationImg, description: report.location });
+          mediaIds.push(media.id);
+        }
+
+        console.log(`\n${this._name} | Sending`, message);
+
+        await this._mastodonClient.v1.statuses.create({ status: message, mediaIds });
+        await this._redisClient.store(report.link, date, { EX: 60 * 60 * 24 }); // EX: 24 hrs expiration
+        totalPublished++;
+      }
+      console.log(`${this._name} | Published ${totalPublished} new reports`);
+    } catch (err: any) {
+      console.log(`${this._name} | An error has occurred\n`)
+      console.error(err.message);
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        if (event !== undefined) {
+          console.debug("\nEvent\n");
+          console.debug(event);
+        }
+
+        if (context !== undefined) {
+          console.debug("\nContext\n");
+          console.debug(context);
+        }
+      }
+    }
+    console.log(`${this._name} | Finished`);
+  }
+
+  public getHandler (): Handler {
+    return async (event, context) => {
+      await this.run(event, context);
+    }
+  }
+}

+ 111 - 0
src/utils/scraper-reports.ts

@@ -0,0 +1,111 @@
+import { type AnyNode, load } from "cheerio";
+import "dotenv/config";
+
+import config from "../config";
+import Scraper from "./scraper";
+
+import type { IScraperReportsOptions } from "../interfaces/scaper-reports-options";
+import type { IItem } from "../interfaces/item";
+
+import LogLevels from "../enums/log-levels";
+import Props from "../enums/props";
+
+export default class ScraperReports {
+  private readonly _name: string;
+  private readonly _options: IScraperReportsOptions;
+  private readonly _scraper: Scraper;
+
+  constructor (name: string, options: IScraperReportsOptions) {
+    this._name = name;
+    this._options = options;
+    this._scraper = new Scraper();
+  }
+
+  private getProperty (domNodeElement: AnyNode, selector: string, prop: Props = Props.TEXT): string {
+    const $ = load(domNodeElement);
+    let value: string | undefined = "";
+    try {
+      value = prop === Props.TEXT ? $(selector).text() : $(selector).attr(prop);
+    } catch (err) {
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        console.debug(`${this._name} | Error raised\n`);
+        console.debug(`From ${domNodeElement.type} can't get value using selector '${selector}'`);
+        console.error(err.message);
+      }
+    }
+    return value ?? "";
+  }
+
+  private getDate (node: AnyNode): string {
+    const selector = this._options.dateSelector ?? "";
+    return selector !== "" ? this.getProperty(node, selector).trim() : new Date(Date.now()).toLocaleDateString("es-CL");
+  }
+
+  private getLocation (node: AnyNode): string {
+    const selector = this._options.locationSelector ?? "";
+    return selector !== "" ? this.getProperty(node, selector).trim() : "";
+  }
+
+  private getDepth (node: AnyNode): string {
+    const selector = this._options.depthSelector ?? "";
+    return selector !== "" ? this.getProperty(node, selector).trim() : "";
+  }
+
+  private getMagnitude (node: AnyNode): number {
+    const selector = this._options.magnitudeSelector ?? "";
+    return selector !== "" ? Number(this.getProperty(node, selector).trim()) : 0;
+  }
+
+  private getLink (node: AnyNode): string {
+    const selector = this._options.linkSelector ?? "";
+    let link = selector !== "" ? this.getProperty(node, selector, Props.LINK).trim() : "";
+    link = this._options.url.replace("/index.html", "") + link;
+    return link;
+  }
+
+  private async getLocationImg (link: string): Promise<File | null> {
+    return await this._scraper.scrapeFile(link);
+  }
+
+  public async getItems (): Promise<IItem[]> {
+    const items: IItem[] = [];
+    const startTime = Date.now();
+
+    try {
+      console.info("Starting scraping", this._options);
+      const response = await this._scraper.scrape({ url: this._options.url });
+      const html = response.data.data.html;
+
+      const $ = load(html as string);
+      const domElements = $(this._options.reportsSelector);
+
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        console.debug(`${this._name} | Items obtained: ${domElements.length} `);
+      }
+
+      for (let i = 1; i < 2; i++) {
+        const node = domElements[i];
+        const link = this.getLink(node);
+        items.push({
+          date: this.getDate(node),
+          location: this.getLocation(node),
+          depth: this.getDepth(node),
+          magnitude: this.getMagnitude(node),
+          link,
+          locationImg: await this.getLocationImg(link.replace("html", "jpeg"))
+        });
+      }
+    } catch (err) {
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        console.debug(`${this._name} | Error\n`);
+        console.error(err.message);
+      }
+    } finally {
+      const endTime = Date.now();
+      const duration = (endTime - startTime) / 1000;
+      console.info(`${this._name} | Execution time: ${duration}s`);
+    }
+
+    return items;
+  }
+}

+ 58 - 0
src/utils/scraper.ts

@@ -0,0 +1,58 @@
+import axios from "axios";
+
+import config from "../config";
+import LogLevels from "../enums/log-levels";
+
+import type { IScraperOptions } from "../interfaces/scraper-options";
+
+export default class Scraper {
+  private _options: IScraperOptions;
+
+  public async scrape (options: IScraperOptions): Promise<any> {
+    this._options = options;
+    let response: any;
+
+    try {
+      response = await axios.post(config.PUPPETEER_URL, {
+        url: this._options.url,
+        screenshot: config.LOG_LEVEL === LogLevels.DEBUG,
+        incognito: this._options.incognito ?? false
+      }, {
+        headers: {
+          "User-Agent": this._options.userAgent ?? "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
+        }
+      });
+
+      if (config.LOG_LEVEL === LogLevels.DEBUG) {
+        console.debug(`From '${this._options.url}'\n`);
+        console.debug("HTML\n")
+        console.debug(response.data.data.html);
+        console.debug("\n");
+        console.debug("Screenshot (Base64)\n")
+        console.debug(response.data.data.screenshot);
+      }
+    } catch (err: any) {
+      console.error(err.message);
+    }
+
+    return response;
+  }
+
+  public async scrapeFile (url: string): Promise<File | null> {
+    this._options = { url };
+    let file: File | null = null;
+
+    try {
+      const response = await fetch(url);
+      const blob = await response.blob();
+
+      const fileUrlParts = url.split("/");
+      const fileName = fileUrlParts[fileUrlParts.length - 1];
+      file = new File([blob], fileName);
+    } catch (err: any) {
+      console.error(err.message);
+    }
+
+    return file;
+  }
+}

+ 19 - 0
tsconfig.json

@@ -0,0 +1,19 @@
+{
+  "compilerOptions": {
+    "rootDir": "src",
+    "outDir": "dist",
+    "target": "es2020",
+    "sourceMap": false,
+    "module":"commonjs",
+    "forceConsistentCasingInFileNames": true,
+    "isolatedModules": true,
+    "types": ["node"],
+    "strictNullChecks": true,
+    "skipLibCheck": true
+  },
+  "include": [
+    "./src/**/*",
+    "./node_modules/@types/node/index.d.ts"
+  ],
+  "exclude": ["node_modules", "**/*.test.ts"]
+}