| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- import axios from "axios";
- import config from "../config";
- import LogLevels from "../enums/log-levels";
- import type { IScraperOptions } from "../interfaces/scraper-options";
- export default class Scraper {
- private _options: IScraperOptions;
- public async scrape (options: IScraperOptions): Promise<any> {
- this._options = options;
- let response: any;
- try {
- response = await axios.post(config.PUPPETEER_URL, {
- url: this._options.url,
- screenshot: config.LOG_LEVEL === LogLevels.DEBUG,
- incognito: this._options.incognito ?? false
- }, {
- headers: {
- "User-Agent": this._options.userAgent ?? "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
- }
- });
- if (config.LOG_LEVEL === LogLevels.DEBUG) {
- console.debug(`From '${this._options.url}'\n`);
- console.debug("HTML\n")
- console.debug(response.data.data.html);
- console.debug("\n");
- console.debug("Screenshot (Base64)\n")
- console.debug(response.data.data.screenshot);
- }
- } catch (err: any) {
- console.error(err.message);
- }
- return response;
- }
- public async scrapeFile (url: string): Promise<File | null> {
- this._options = { url };
- let file: File | null = null;
- try {
- const response = await fetch(url);
- const blob = await response.blob();
- const fileUrlParts = url.split("/");
- const fileName = fileUrlParts[fileUrlParts.length - 1];
- file = new File([blob], fileName);
- } catch (err: any) {
- console.error(err.message);
- }
- return file;
- }
- }
|