|
@@ -50,9 +50,22 @@ export default class ScraperArticles {
|
|
return selector !== "" ? this.getProperty(article, selector, Props.LINK) : this._options.url;
|
|
return selector !== "" ? this.getProperty(article, selector, Props.LINK) : this._options.url;
|
|
}
|
|
}
|
|
|
|
|
|
- private getImage (article: AnyNode): string {
|
|
|
|
|
|
+ private async getImage (article: AnyNode): Promise<File | null> {
|
|
|
|
+ let imgFile;
|
|
const selector = this._options.imageSelector ?? "";
|
|
const selector = this._options.imageSelector ?? "";
|
|
- return selector !== "" ? this.getProperty(article, selector, Props.IMAGE) : config.IMG_PLACEHOLDER;
|
|
|
|
|
|
+ const imgUrl = selector !== "" ? this.getProperty(article, selector, Props.IMAGE) : "";
|
|
|
|
+
|
|
|
|
+ if (imgUrl !== "") {
|
|
|
|
+ imgFile = this._scraper.scrapeFile(imgUrl)
|
|
|
|
+ .catch((err) => {
|
|
|
|
+ if (config.LOG_LEVEL === LogLevels.DEBUG) {
|
|
|
|
+ console.debug(`${this._name} | Error raised\n`);
|
|
|
|
+ console.debug(`From ${article.type} can't get image using selector '${selector}'`);
|
|
|
|
+ console.error(err.message);
|
|
|
|
+ }
|
|
|
|
+ });
|
|
|
|
+ }
|
|
|
|
+ return imgFile;
|
|
}
|
|
}
|
|
|
|
|
|
private getAuthor (article: AnyNode): string {
|
|
private getAuthor (article: AnyNode): string {
|
|
@@ -62,7 +75,7 @@ export default class ScraperArticles {
|
|
|
|
|
|
private getDate (article: AnyNode): string {
|
|
private getDate (article: AnyNode): string {
|
|
const selector = this._options.dateSelector ?? "";
|
|
const selector = this._options.dateSelector ?? "";
|
|
- return selector !== "" ? this.getProperty(article, selector).trim() : new Date(Date.now()).toLocaleDateString();
|
|
|
|
|
|
+ return selector !== "" ? this.getProperty(article, selector).trim() : new Date(Date.now()).toLocaleDateString("es-CL");
|
|
}
|
|
}
|
|
|
|
|
|
public async getArticles (): Promise<IArticle[]> {
|
|
public async getArticles (): Promise<IArticle[]> {
|
|
@@ -70,6 +83,7 @@ export default class ScraperArticles {
|
|
const startTime = Date.now();
|
|
const startTime = Date.now();
|
|
|
|
|
|
try {
|
|
try {
|
|
|
|
+ console.info("Starting scraping", this._options);
|
|
const response = await this._scraper.scrape({ url: this._options.url });
|
|
const response = await this._scraper.scrape({ url: this._options.url });
|
|
const html = response.data.data.html;
|
|
const html = response.data.data.html;
|
|
|
|
|
|
@@ -80,16 +94,17 @@ export default class ScraperArticles {
|
|
console.debug(`${this._name} | Articles obtained: ${domElements.length} `);
|
|
console.debug(`${this._name} | Articles obtained: ${domElements.length} `);
|
|
}
|
|
}
|
|
|
|
|
|
- domElements.each((i, article) => {
|
|
|
|
|
|
+ for (let i = 0; i < domElements.length; i++) {
|
|
|
|
+ const article = domElements[i];
|
|
articles.push({
|
|
articles.push({
|
|
title: this.getTitle(article),
|
|
title: this.getTitle(article),
|
|
content: this.getContent(article),
|
|
content: this.getContent(article),
|
|
link: this.getLink(article),
|
|
link: this.getLink(article),
|
|
- image: this.getImage(article),
|
|
|
|
|
|
+ image: await this.getImage(article),
|
|
author: this.getAuthor(article),
|
|
author: this.getAuthor(article),
|
|
date: this.getDate(article)
|
|
date: this.getDate(article)
|
|
});
|
|
});
|
|
- });
|
|
|
|
|
|
+ }
|
|
} catch (err) {
|
|
} catch (err) {
|
|
if (config.LOG_LEVEL === LogLevels.DEBUG) {
|
|
if (config.LOG_LEVEL === LogLevels.DEBUG) {
|
|
console.debug(`${this._name} | Error raised\n`);
|
|
console.debug(`${this._name} | Error raised\n`);
|