|
|
@@ -15,13 +15,13 @@ export default class ScraperArticles {
|
|
|
private readonly _options: IScraperArticlesOptions;
|
|
|
private readonly _scraper: Scraper;
|
|
|
|
|
|
- constructor (name: string, options: IScraperArticlesOptions) {
|
|
|
+ constructor(name: string, options: IScraperArticlesOptions) {
|
|
|
this._name = name;
|
|
|
this._options = options;
|
|
|
this._scraper = new Scraper();
|
|
|
}
|
|
|
|
|
|
- private getProperty (domNodeElement: AnyNode, selector: string, prop: Props = Props.TEXT): string {
|
|
|
+ private getProperty(domNodeElement: AnyNode, selector: string, prop: Props = Props.TEXT): string {
|
|
|
const $ = load(domNodeElement);
|
|
|
let value: string | undefined = "";
|
|
|
try {
|
|
|
@@ -36,32 +36,32 @@ export default class ScraperArticles {
|
|
|
return value ?? "";
|
|
|
}
|
|
|
|
|
|
- private getTitle (article: AnyNode): string {
|
|
|
+ private getTitle(article: AnyNode): string {
|
|
|
const selector = this._options.titleSelector ?? "";
|
|
|
return selector !== "" ? this.getProperty(article, selector).trim() : "";
|
|
|
}
|
|
|
|
|
|
- private getContent (article: AnyNode): string {
|
|
|
+ private getContent(article: AnyNode): string {
|
|
|
const selector = this._options.contentSelector ?? "";
|
|
|
return selector !== "" ? this.getProperty(article, selector).trim() : "";
|
|
|
}
|
|
|
|
|
|
- private getLink (article: AnyNode): string {
|
|
|
+ private getLink(article: AnyNode): string {
|
|
|
const selector = this._options.linkSelector ?? "";
|
|
|
const url = selector !== "" ? this.getProperty(article, selector, Props.LINK) : this._options.url
|
|
|
|
|
|
- return ! this.isValidUrl(url) && this._options.linkPrefix !== undefined
|
|
|
- ? (this._options.linkPrefix + url.trim()).trim()
|
|
|
- : url.trim();
|
|
|
+ return !this.isValidUrl(url) && this._options.linkPrefix !== undefined
|
|
|
+ ? (this._options.linkPrefix + url.trim()).trim()
|
|
|
+ : url.trim();
|
|
|
}
|
|
|
|
|
|
- private async getImage (article: AnyNode): Promise<File | null> {
|
|
|
+ private async getImage(article: AnyNode): Promise<File | null> {
|
|
|
let imgFile;
|
|
|
const selector = this._options.imageSelector ?? "";
|
|
|
let imgUrl = selector !== "" ? this.getProperty(article, selector, Props.IMAGE) : "";
|
|
|
|
|
|
if (imgUrl !== "") {
|
|
|
- if (! this.isValidUrl(imgUrl) && this._options.imagePrefix) {
|
|
|
+ if (!this.isValidUrl(imgUrl) && this._options.imagePrefix) {
|
|
|
imgUrl = this._options.imagePrefix + imgUrl.trim();
|
|
|
}
|
|
|
imgFile = this._scraper.scrapeFile(imgUrl)
|
|
|
@@ -76,17 +76,17 @@ export default class ScraperArticles {
|
|
|
return imgFile;
|
|
|
}
|
|
|
|
|
|
- private getAuthor (article: AnyNode): string {
|
|
|
+ private getAuthor(article: AnyNode): string {
|
|
|
const selector = this._options.authorSelector ?? "";
|
|
|
return selector !== "" ? this.getProperty(article, selector).trim() : this._name;
|
|
|
}
|
|
|
|
|
|
- private getDate (article: AnyNode): string {
|
|
|
+ private getDate(article: AnyNode): string {
|
|
|
const selector = this._options.dateSelector ?? "";
|
|
|
return selector !== "" ? this.getProperty(article, selector).trim() : new Date(Date.now()).toLocaleDateString("es-CL");
|
|
|
}
|
|
|
|
|
|
- private isValidUrl (url: string): boolean {
|
|
|
+ private isValidUrl(url: string): boolean {
|
|
|
try {
|
|
|
const parsedUrl = new URL(url);
|
|
|
if (!parsedUrl.protocol || !parsedUrl.host) {
|
|
|
@@ -98,7 +98,7 @@ export default class ScraperArticles {
|
|
|
}
|
|
|
};
|
|
|
|
|
|
- public async getArticles (): Promise<IArticle[]> {
|
|
|
+ public async getArticles(): Promise<IArticle[]> {
|
|
|
const articles: IArticle[] = [];
|
|
|
const startTime = Date.now();
|
|
|
|
|
|
@@ -116,6 +116,11 @@ export default class ScraperArticles {
|
|
|
|
|
|
for (let i = 0; i < domElements.length; i++) {
|
|
|
const article = domElements[i];
|
|
|
+
|
|
|
+ if (!this.getLink(article)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
articles.push({
|
|
|
title: this.getTitle(article),
|
|
|
content: this.getContent(article),
|