scraper.ts 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import axios from "axios";
  2. import config from "../config";
  3. import LogLevels from "../enums/log-levels";
  4. import type { IScraperOptions } from "../interfaces/scraper-options";
  5. export default class Scraper {
  6. private _options: IScraperOptions;
  7. public async scrape (options: IScraperOptions): Promise<any> {
  8. this._options = options;
  9. let response: any;
  10. try {
  11. response = await axios.post(config.PUPPETEER_URL, {
  12. url: this._options.url,
  13. screenshot: config.LOG_LEVEL === LogLevels.DEBUG,
  14. incognito: this._options.incognito ?? false
  15. }, {
  16. headers: {
  17. "User-Agent": this._options.userAgent ?? "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
  18. }
  19. });
  20. if (config.LOG_LEVEL === LogLevels.DEBUG) {
  21. console.debug(`From '${this._options.url}'\n`);
  22. console.debug("HTML\n")
  23. console.debug(response.data.data.html);
  24. console.debug("\n");
  25. console.debug("Screenshot (Base64)\n")
  26. console.debug(response.data.data.screenshot);
  27. }
  28. } catch (err: any) {
  29. console.error(err.message);
  30. }
  31. return response;
  32. }
  33. public async scrapeFile (url: string): Promise<File | null> {
  34. this._options = { url };
  35. let file: File | null = null;
  36. try {
  37. const response = await fetch(url);
  38. const blob = await response.blob();
  39. const fileUrlParts = url.split("/");
  40. const fileName = fileUrlParts[fileUrlParts.length - 1];
  41. file = new File([blob], fileName);
  42. } catch (err: any) {
  43. console.error(err.message);
  44. }
  45. return file;
  46. }
  47. }