|
@@ -49,7 +49,8 @@ export default class ScraperArticles {
|
|
|
private getLink (article: AnyNode): string {
|
|
|
const selector = this._options.linkSelector ?? "";
|
|
|
const url = selector !== "" ? this.getProperty(article, selector, Props.LINK) : this._options.url
|
|
|
- return this._options.linkPrefix !== undefined && ! url.includes(this._options.linkPrefix)
|
|
|
+
|
|
|
+ return ! this.isValidUrl(url) && this._options.linkPrefix !== undefined
|
|
|
? (this._options.linkPrefix + url.trim()).trim()
|
|
|
: url.trim();
|
|
|
}
|
|
@@ -60,7 +61,7 @@ export default class ScraperArticles {
|
|
|
let imgUrl = selector !== "" ? this.getProperty(article, selector, Props.IMAGE) : "";
|
|
|
|
|
|
if (imgUrl !== "") {
|
|
|
- if (this._options.imagePrefix && ! imgUrl.includes(this._options.imagePrefix)) {
|
|
|
+ if (! this.isValidUrl(imgUrl) && this._options.imagePrefix) {
|
|
|
imgUrl = this._options.imagePrefix + imgUrl.trim();
|
|
|
}
|
|
|
imgFile = this._scraper.scrapeFile(imgUrl)
|
|
@@ -85,6 +86,18 @@ export default class ScraperArticles {
|
|
|
return selector !== "" ? this.getProperty(article, selector).trim() : new Date(Date.now()).toLocaleDateString("es-CL");
|
|
|
}
|
|
|
|
|
|
+ private isValidUrl (url: string): boolean {
|
|
|
+ try {
|
|
|
+ const parsedUrl = new URL(url);
|
|
|
+ if (!parsedUrl.protocol || !parsedUrl.host) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ return true;
|
|
|
+ } catch (error) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
public async getArticles (): Promise<IArticle[]> {
|
|
|
const articles: IArticle[] = [];
|
|
|
const startTime = Date.now();
|