Explorar o código

updated portals

Pablo Barrera Yaksic hai 1 día
pai
achega
a71699ce11

+ 15 - 13
.env.example

@@ -10,6 +10,20 @@ IMG_PLACEHOLDER = "https://placehold.co/600x400"
 CHILECULTURA = "https://chilecultura.gob.cl/events/"
 CIPER = "https://www.ciperchile.cl/actualidad/"
 CODEXVERDE = "https://codexverde.cl/"
+COOPERATIVA = "https://cooperativa.cl/noticias/site/cache/nroedic/todas/"
+LOG_LEVEL = "debug"
+REDIS_CONN = "redis://localhost:6379/10"
+PUPPETEER_URL = "http://localhost:8000/api/v1/visit"
+MASTODON_URL = "https://<mastodon-domain>"
+MASTODON_API_URL = "https://<mastodon-domain>/api/v1/"
+MASTODON_STREAMING_URL = "wss://<mastodon-domain>/api/v1/streaming"
+MASTODON_ACCESS_TOKEN = "<access-token>"
+IMG_PLACEHOLDER = "https://placehold.co/600x400"
+
+CHILECULTURA = "https://chilecultura.gob.cl/events/"
+CIPER = "https://www.ciperchile.cl/actualidad/"
+CODEXVERDE = "https://codexverde.cl/"
+COOPERATIVA = "https://cooperativa.cl/noticias/site/cache/nroedic/todas/"
 DF = "https://www.df.cl/ultimasnoticias"
 ELCIUDADANO = "https://www.elciudadano.com/"
 ELDESCONCIERTO = "https://eldesconcierto.cl"
@@ -29,6 +43,7 @@ THECLINIC = "https://www.theclinic.cl/lo-ultimo/"
 MASTODON_KEY_CHILECULTURA = ""
 MASTODON_KEY_CIPER = ""
 MASTODON_KEY_CODEXVERDE = ""
+MASTODON_KET_COOPERATIVA = ""
 MASTODON_KEY_DF = ""
 MASTODON_KEY_ELCIUDADANO = ""
 MASTODON_KEY_ELDESCONCIERTO = ""
@@ -36,18 +51,5 @@ MASTODON_KEY_ELMOSTRADOR = ""
 MASTODON_KEY_EMOL = ""
 MASTODON_KEY_FASTCHECK = ""
 MASTODON_KEY_GLACIARESCHILENOS = ""
-MASTODON_KEY_INTERFERENCIA = ""
-MASTODON_KEY_LADERASUR = ""
-MASTODON_KEY_LATERCERA = ""
-MASTODON_KEY_METROSANTIAGO = ""
-MASTODON_KEY_SISMOLOGIA = ""
-MASTODON_KEY_TARREO
-MASTODON_KEY_THECLINIC = ""
-
-## AGENTS
-MASTODON_KEY_FORTUNE = ""
-
-# Develop
-DEVELOP = false
 DEV_ACTIVE_PORTALS = ""
 MASTODON_TEST_ACCESS_TOKEN = ""

+ 121 - 0
README.md

@@ -0,0 +1,121 @@
+<!--
+title: 'AWS Node Scheduled Cron example in NodeJS'
+description: 'This is an example of creating a function that runs as a cron job using the serverless ''schedule'' event.'
+layout: Doc
+framework: v3
+platform: AWS
+language: nodeJS
+priority: 1
+authorLink: 'https://github.com/0dj0bz'
+authorName: 'Rob Abbott'
+authorAvatar: 'https://avatars3.githubusercontent.com/u/5679763?v=4&s=140'
+-->
+
+# Serverless Framework Node Scheduled Cron on AWS
+
+This template demonstrates how to develop and deploy a simple cron-like service running on AWS Lambda using the traditional Serverless Framework.
+
+## Schedule event type
+
+This examples defines two functions, `cron` and `secondCron`, both of which are triggered by an event of `schedule` type, which is used for configuring functions to be executed at specific time or in specific intervals. For detailed information about `schedule` event, please refer to corresponding section of Serverless [docs](https://serverless.com/framework/docs/providers/aws/events/schedule/).
+
+When defining `schedule` events, we need to use `rate` or `cron` expression syntax.
+
+### Rate expressions syntax
+
+```pseudo
+rate(value unit)
+```
+
+`value` - A positive number
+
+`unit` - The unit of time. ( minute | minutes | hour | hours | day | days )
+
+In below example, we use `rate` syntax to define `schedule` event that will trigger our `rateHandler` function every minute
+
+```yml
+functions:
+  rateHandler:
+    handler: handler.run
+    events:
+      - schedule: rate(1 minute)
+```
+
+Detailed information about rate expressions is available in official [AWS docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html#RateExpressions).
+
+
+### Cron expressions syntax
+
+```pseudo
+cron(Minutes Hours Day-of-month Month Day-of-week Year)
+```
+
+All fields are required and time zone is UTC only.
+
+| Field         | Values         | Wildcards     |
+| ------------- |:--------------:|:-------------:|
+| Minutes       | 0-59           | , - * /       |
+| Hours         | 0-23           | , - * /       |
+| Day-of-month  | 1-31           | , - * ? / L W |
+| Month         | 1-12 or JAN-DEC| , - * /       |
+| Day-of-week   | 1-7 or SUN-SAT | , - * ? / L # |
+| Year          | 192199      | , - * /       |
+
+In below example, we use `cron` syntax to define `schedule` event that will trigger our `cronHandler` function every second minute every Monday through Friday
+
+```yml
+functions:
+  cronHandler:
+    handler: handler.run
+    events:
+      - schedule: cron(0/2 * ? * MON-FRI *)
+```
+
+Detailed information about cron expressions in available in official [AWS docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html#CronExpressions).
+
+
+## Usage
+
+### Deployment
+
+This example is made to work with the Serverless Framework dashboard, which includes advanced features such as CI/CD, monitoring, metrics, etc.
+
+In order to deploy with dashboard, you need to first login with:
+
+```
+serverless login
+```
+
+and then perform deployment with:
+
+```
+serverless deploy
+```
+
+After running deploy, you should see output similar to:
+
+```bash
+Deploying aws-node-scheduled-cron-project to stage dev (us-east-1)
+
+✔ Service deployed to stack aws-node-scheduled-cron-project-dev (205s)
+
+functions:
+  rateHandler: aws-node-scheduled-cron-project-dev-rateHandler (2.9 kB)
+  cronHandler: aws-node-scheduled-cron-project-dev-cronHandler (2.9 kB)
+```
+
+There is no additional step required. Your defined schedules becomes active right away after deployment.
+
+### Local invocation
+
+In order to test out your functions locally, you can invoke them with the following command:
+
+```
+serverless invoke local --function rateHandler
+```
+
+After invocation, you should see output similar to:
+
+```bash
+Your cron function "aws-node-scheduled-cron-dev-rateHandler" ran at Fri Mar 05 2021 15:14:39 GMT+0100 (Central European Standard Time)
+```

+ 6 - 4
src/config.ts

@@ -9,11 +9,12 @@ const config = {
   MASTODON_STREAMING_URL: process.env.MASTODON_STREAMING_URL ?? "wss://mastodon.cl/api/v1/streaming",
   IMG_PLACEHOLDER: process.env.IMG_PLACEHOLDER ?? "https://placehold.co/600x400",
   // PORTALES
-  CHILECULTURA: process.env.CHILECULTURA ?? "https://chilecultura.gob.cl/events/",
+  CHILECULTURA: process.env.CHILECULTURA ?? "https://chilecultura.gob.cl/",
   CIPER: process.env.CIPER ?? "https://www.ciperchile.cl/actualidad/",
   CODEXVERDE: process.env.CODEXVERDE ?? "https://codexverde.cl/",
+  // COOPERATIVA: process.env.COOPERATIVA ?? "https://cooperativa.cl/noticias/site/cache/nroedic/todas/",
   DF: process.env.DF ?? "https://www.df.cl/ultimasnoticias",
-  ELCIUDADANO: process.env.ELCIUDADANO ?? "https://www.elciudadano.com/",
+  ELCIUDADANO: process.env.ELCIUDADANO ?? "https://www.elciudadano.com/chile/",
   ELDESCONCIERTO: process.env.ELDESCONCIERTO ?? "https://eldesconcierto.cl",
   ELMOSTRADOR: process.env.ELMOSTRADOR ?? "https://www.elmostrador.cl/categoria/dia/",
   EMOL: process.env.EMOL ?? "https://www.emol.com",
@@ -22,7 +23,7 @@ const config = {
   INTERFERENCIA: process.env.INTERFERENCIA ?? "https://interferencia.cl/",
   LADERASUR: process.env.LADERASUR ?? "https://laderasur.com/",
   LATERCERA: process.env.LATERCERA ?? "https://www.latercera.com/canal/nacional/",
-  METRODESANTIAGO: process.env.METRODESANTIAGO ?? "https://xcancel.com/metrodesantiago",
+  // METRODESANTIAGO: process.env.METRODESANTIAGO ?? "https://xcancel.com/metrodesantiago",
   SISMOLOGIA: process.env.SISMOLOGIA ?? "https://www.sismologia.cl/index.html",
   TARREO: process.env.TARREO ?? "https://www.tarreo.com/noticias/",
   THECLINIC: process.env.THECLINIC ?? "https://www.theclinic.cl/lo-ultimo/",
@@ -30,6 +31,7 @@ const config = {
   MASTODON_KEY_CHILECULTURA: process.env.MASTODON_KEY_CHILECULTURA ?? "",
   MASTODON_KEY_CIPER: process.env.MASTODON_KEY_CIPER ?? "",
   MASTODON_KEY_CODEXVERDE: process.env.MASTODON_KEY_CODEXVERDE ?? "",
+  // MASTODON_KEY_COOPERATIVA: process.env.MASTODON_KET_COOPERATIVA ?? "",
   MASTODON_KEY_DF: process.env.MASTODON_KEY_DF ?? "",
   MASTODON_KEY_ELCIUDADANO: process.env.MASTODON_KEY_ELCIUDADANO ?? "",
   MASTODON_KEY_ELDESCONCIERTO: process.env.MASTODON_KEY_ELDESCONCIERTO ?? "",
@@ -40,7 +42,7 @@ const config = {
   MASTODON_KEY_INTERFERENCIA: process.env.MASTODON_KEY_INTERFERENCIA ?? "",
   MASTODON_KEY_LADERASUR: process.env.MASTODON_KEY_LADERASUR ?? "",
   MASTODON_KEY_LATERCERA: process.env.MASTODON_KEY_LATERCERA ?? "",
-  MASTODON_KEY_METRODESANTIAGO: process.env.MASTODON_KEY_METRODESANTIAGO ?? "",
+  // MASTODON_KEY_METRODESANTIAGO: process.env.MASTODON_KEY_METRODESANTIAGO ?? "",
   MASTODON_KEY_SISMOLOGIA: process.env.MASTODON_KEY_SISMOLOGIA ?? "",
   MASTODON_KEY_TARREO: process.env.MASTODON_KEY_TARREO ?? "",
   MASTODON_KEY_THECLINIC: process.env.MASTODON_KEY_THECLINIC ?? "",

+ 2 - 0
src/index.ts

@@ -5,6 +5,7 @@ import config from "./config";
 import { handler as chilecultura } from "./portales/chilecultura/handler";
 import { handler as ciper } from "./portales/ciper/handler";
 import { handler as codexverde } from "./portales/codexverde/handler";
+import { handler as cooperativa } from "./portales/cooperativa/handler";
 import { handler as df } from "./portales/df/handler";
 import { handler as elciudadano } from "./portales/elciudadano/handler";
 import { handler as eldesconcierto } from "./portales/eldesconcierto/handler";
@@ -38,6 +39,7 @@ const portalsHandlers = {
   "chilecultura": chilecultura,
   "ciper": ciper,
   "codexverde": codexverde,
+  "cooperativa": cooperativa,
   "df": df,
   "elciudadano": elciudadano,
   "eldesconcierto": eldesconcierto,

+ 5 - 5
src/portales/chilecultura/handler.ts

@@ -11,12 +11,12 @@ export const handler: Handler = new Portal(
   config.MASTODON_KEY_CHILECULTURA,
   {
     url: config.CHILECULTURA,
-    articlesSelector: "section.section-base-subsection#this_week_events .card-event, section.section-base-subsection#next_week_events .card-event",
-    titleSelector: "div.card-body a",
-    linkSelector: "div.card-body a",
-    imageSelector: "img.card-img",
+    articlesSelector: "div.event-list a.event-item",
+    titleSelector: "h5",
+    linkSelector: "a",
+    imageSelector: "img.event-image",
     imagePrefix: "https://chilecultura.gob.cl",
-    contentSelector: "p.card-discipline, p.card-description-icon",
+    contentSelector: "",
     linkPrefix: "https://chilecultura.gob.cl",
     scraperMethod: ScraperMethods.AXIOS,
     cacheExpiration: 60 * 60 * 24 * 7, // 7 días

+ 3 - 3
src/portales/codexverde/handler.ts

@@ -11,9 +11,9 @@ export const handler: Handler = new Portal(
   config.MASTODON_KEY_CODEXVERDE,
   {
     url: config.CODEXVERDE,
-    articlesSelector: "#tdi_110 > div:nth-child(1) > div:nth-child(1), #tdi_111 div.td-cpt-post",
-    titleSelector: "div.td-module-meta-info h3",
-    linkSelector: "div.td-module-meta-info h3 a",
+    articlesSelector: " #tdi_98 div.td-cpt-post",
+    titleSelector: "h3.entry-title",
+    linkSelector: "h3.entry-title a",
     scraperMethod: ScraperMethods.AXIOS,
     hashtags: ["Codexverde", "Gestionambiental", "Agua", "Aire", "Biodiversidad", "Cambioclimatico", "Energia", "Residuos"],
     cacheExpiration: 60 * 60 * 24 * 30, // 30 días

+ 4 - 0
src/portales/cooperativa/definition.yml

@@ -0,0 +1,4 @@
+emol:
+  handler: ./src/portales/cooperativa/handler.handler
+  events: 
+    - schedule: rate(1 hour)

+ 23 - 0
src/portales/cooperativa/handler.ts

@@ -0,0 +1,23 @@
+import { type Handler } from "aws-lambda";
+
+import config from "../../config";
+import Portal from "../portal";
+import ScraperMethods from "../../enums/scraper-methods";
+
+const name = `Cooperativa`;
+const today = new Date().toISOString().slice(0, 10).replace(/-/g, '');
+
+export const handler: Handler = new Portal(
+  name, 
+  config.MASTODON_KET_COOPERATIVA,
+  {
+    url: config.COOPERATIVA + today + '.html',
+    articlesSelector: "article.art-todas",
+    titleSelector: "div.contenedor-datos div.fecha-publicacion a",
+    linkSelector: "div.contenedor-datos div.fecha-publicacion a",
+    scraperMethod: ScraperMethods.AXIOS,
+    hashtags: ["Cooperativa", "Noticias"],
+    cacheExpiration: 60 * 60 * 24 * 30, // 30 días
+    linkPrefix: "https://cooperativa.cl"
+  }
+).getHandler();

+ 3 - 3
src/portales/df/handler.ts

@@ -12,9 +12,9 @@ export const handler: Handler = new Portal(
   {
     url: config.DF,
     linkPrefix: "https://www.df.cl",
-    articlesSelector: "article",
-    titleSelector: "h3 a",
-    linkSelector: "h3 a",
+    articlesSelector: "section.tax-list article",
+    titleSelector: "div.card__content h3.card__title",
+    linkSelector: "div.card__content a:nth-child(2)",
     scraperMethod: ScraperMethods.AXIOS,
     hashtags: ["DiarioFinanciero", "DF", "DFMas", "Noticias"]
   }

+ 3 - 3
src/portales/emol/handler.ts

@@ -11,9 +11,9 @@ export const handler: Handler = new Portal(
   config.MASTODON_KEY_EMOL,
   {
     url: config.EMOL,
-    articlesSelector: "div.cont_736_e_2015 cont_300_e_2015 div.contenedor-destacados, div.cont_736_e_2015 div",
-    titleSelector: "h1, h3, div.contenedor-titulo a",
-    linkSelector: "h1 a, h3 a, div.contenedor-titulo a",
+    articlesSelector: "div.cont_378_e_2015 div.col_center_noticia2-390px, div.cont_378_e_2015 div.col_center_noticia4dest-360px",
+    titleSelector: "h1 a, h3 a",
+    linkSelector: "h1 a, h3 a",
     linkPrefix: config.EMOL,
     scraperMethod: ScraperMethods.AXIOS,
     hashtags: ["Emol", "ElMercurioOnline", "ElMercurio", "Noticias"]

+ 3 - 3
src/portales/fastcheck/handler.ts

@@ -12,9 +12,9 @@ export const handler: Handler = new Portal(
   config.MASTODON_KEY_FASTCHECK,
   {
     url: config.FASTCHECK,
-    articlesSelector: "div.elementor-element-9da95a3 article",
-    titleSelector: "h1.elementor-post__title, h3.elementor-post__title",
-    linkSelector: "a",
+    articlesSelector: "div.article-right-rail__body div.result-list div.story-card-ctn",
+    titleSelector: "h2.story-card__headline",
+    linkSelector: "h2.story-card__headline a",
     scraperMethod: ScraperMethods.AXIOS,
     hashtags: ["FastCheck", "Artículo", "Noticias"],
     cacheExpiration: 60 * 60 * 24 * 30, // 30 días

+ 4 - 0
src/portales/supergeek/definition.yml

@@ -0,0 +1,4 @@
+emol:
+  handler: ./src/portales/cooperativa/handler.handler
+  events: 
+    - schedule: rate(1 hour)

+ 23 - 0
src/portales/supergeek/handler.ts

@@ -0,0 +1,23 @@
+import { type Handler } from "aws-lambda";
+
+import config from "../../config";
+import Portal from "../portal";
+import ScraperMethods from "../../enums/scraper-methods";
+
+const name = `Cooperativa`;
+const today = new Date().toISOString().slice(0, 10).replace(/-/g, '');
+
+export const handler: Handler = new Portal(
+  name, 
+  config.MASTODON_KET_COOPERATIVA,
+  {
+    url: config.COOPERATIVA + today + '.html',
+    articlesSelector: "article.art-todas",
+    titleSelector: "div.contenedor-datos div.fecha-publicacion a",
+    linkSelector: "div.contenedor-datos div.fecha-publicacion a",
+    scraperMethod: ScraperMethods.AXIOS,
+    hashtags: ["Cooperativa", "Noticias"],
+    cacheExpiration: 60 * 60 * 24 * 30, // 30 días
+    linkPrefix: "https://cooperativa.cl"
+  }
+).getHandler();