website.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. from flask_restful import Resource, reqparse # type: ignore
  2. from controllers.console import api
  3. from controllers.console.datasets.error import WebsiteCrawlError
  4. from controllers.console.wraps import account_initialization_required, setup_required
  5. from libs.login import login_required
  6. from services.website_service import WebsiteService
  7. class WebsiteCrawlApi(Resource):
  8. @setup_required
  9. @login_required
  10. @account_initialization_required
  11. def post(self):
  12. parser = reqparse.RequestParser()
  13. parser.add_argument(
  14. "provider", type=str, choices=["firecrawl", "jinareader"], required=True, nullable=True, location="json"
  15. )
  16. parser.add_argument("url", type=str, required=True, nullable=True, location="json")
  17. parser.add_argument("options", type=dict, required=True, nullable=True, location="json")
  18. args = parser.parse_args()
  19. WebsiteService.document_create_args_validate(args)
  20. # crawl url
  21. try:
  22. result = WebsiteService.crawl_url(args)
  23. except Exception as e:
  24. raise WebsiteCrawlError(str(e))
  25. return result, 200
  26. class WebsiteCrawlStatusApi(Resource):
  27. @setup_required
  28. @login_required
  29. @account_initialization_required
  30. def get(self, job_id: str):
  31. parser = reqparse.RequestParser()
  32. parser.add_argument("provider", type=str, choices=["firecrawl", "jinareader"], required=True, location="args")
  33. args = parser.parse_args()
  34. # get crawl status
  35. try:
  36. result = WebsiteService.get_crawl_status(job_id, args["provider"])
  37. except Exception as e:
  38. raise WebsiteCrawlError(str(e))
  39. return result, 200
  40. api.add_resource(WebsiteCrawlApi, "/website/crawl")
  41. api.add_resource(WebsiteCrawlStatusApi, "/website/crawl/status/<string:job_id>")