|
@@ -5,31 +5,46 @@ import LogLevels from "../enums/log-levels";
|
|
|
|
|
|
import type { IScraperOptions } from "../interfaces/scraper-options";
|
|
|
|
|
|
+import ScraperMethods from "../enums/scraper-methods";
|
|
|
+
|
|
|
export default class Scraper {
|
|
|
private _options: IScraperOptions;
|
|
|
|
|
|
+ private logScrape(response: any) {
|
|
|
+ console.debug(`From '${this._options.url}'\n`);
|
|
|
+ console.debug("HTML\n");
|
|
|
+
|
|
|
+ if (this._options.scraperMethod == ScraperMethods.PUPPETEER) {
|
|
|
+ console.debug(response.data.data.html);
|
|
|
+ console.debug("\n");
|
|
|
+ console.debug("Screenshot (Base64)\n")
|
|
|
+ console.debug(response.data.data.screenshot);
|
|
|
+ } else {
|
|
|
+ console.debug(response.data);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
public async scrape (options: IScraperOptions): Promise<any> {
|
|
|
this._options = options;
|
|
|
let response: any;
|
|
|
+ const userAgent = this._options.userAgent ?? "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36";
|
|
|
+ const headers = {
|
|
|
+ "User-Agent": userAgent
|
|
|
+ }
|
|
|
|
|
|
try {
|
|
|
- response = await axios.post(config.PUPPETEER_URL, {
|
|
|
- url: this._options.url,
|
|
|
- screenshot: config.LOG_LEVEL === LogLevels.DEBUG,
|
|
|
- incognito: this._options.incognito ?? false
|
|
|
- }, {
|
|
|
- headers: {
|
|
|
- "User-Agent": this._options.userAgent ?? "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
|
|
|
- }
|
|
|
- });
|
|
|
-
|
|
|
+ if (this._options.scraperMethod == ScraperMethods.PUPPETEER) {
|
|
|
+ response = await axios.post(config.PUPPETEER_URL, {
|
|
|
+ url: this._options.url,
|
|
|
+ screenshot: config.LOG_LEVEL === LogLevels.DEBUG,
|
|
|
+ incognito: this._options.incognito ?? false
|
|
|
+ }, { headers });
|
|
|
+ } else {
|
|
|
+ response = await axios.get(this._options.url, { headers });
|
|
|
+ }
|
|
|
+
|
|
|
if (config.LOG_LEVEL === LogLevels.DEBUG) {
|
|
|
- console.debug(`From '${this._options.url}'\n`);
|
|
|
- console.debug("HTML\n")
|
|
|
- console.debug(response.data.data.html);
|
|
|
- console.debug("\n");
|
|
|
- console.debug("Screenshot (Base64)\n")
|
|
|
- console.debug(response.data.data.screenshot);
|
|
|
+ this.logScrape(response);
|
|
|
}
|
|
|
} catch (err: any) {
|
|
|
console.error(err.message);
|