aws_s3_storage.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import logging
  2. from collections.abc import Generator
  3. import boto3 # type: ignore
  4. from botocore.client import Config # type: ignore
  5. from botocore.exceptions import ClientError # type: ignore
  6. from configs import dify_config
  7. from extensions.storage.base_storage import BaseStorage
  8. logger = logging.getLogger(__name__)
  9. class AwsS3Storage(BaseStorage):
  10. """Implementation for Amazon Web Services S3 storage."""
  11. def __init__(self):
  12. super().__init__()
  13. self.bucket_name = dify_config.S3_BUCKET_NAME
  14. if dify_config.S3_USE_AWS_MANAGED_IAM:
  15. logger.info("Using AWS managed IAM role for S3")
  16. session = boto3.Session()
  17. region_name = dify_config.S3_REGION
  18. self.client = session.client(service_name="s3", region_name=region_name)
  19. else:
  20. logger.info("Using ak and sk for S3")
  21. self.client = boto3.client(
  22. "s3",
  23. aws_secret_access_key=dify_config.S3_SECRET_KEY,
  24. aws_access_key_id=dify_config.S3_ACCESS_KEY,
  25. endpoint_url=dify_config.S3_ENDPOINT,
  26. region_name=dify_config.S3_REGION,
  27. config=Config(s3={"addressing_style": dify_config.S3_ADDRESS_STYLE}),
  28. )
  29. # create bucket
  30. try:
  31. self.client.head_bucket(Bucket=self.bucket_name)
  32. except ClientError as e:
  33. # if bucket not exists, create it
  34. if e.response["Error"]["Code"] == "404":
  35. self.client.create_bucket(Bucket=self.bucket_name)
  36. # if bucket is not accessible, pass, maybe the bucket is existing but not accessible
  37. elif e.response["Error"]["Code"] == "403":
  38. pass
  39. else:
  40. # other error, raise exception
  41. raise
  42. def save(self, filename, data):
  43. self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
  44. def load_once(self, filename: str) -> bytes:
  45. try:
  46. data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
  47. except ClientError as ex:
  48. if ex.response["Error"]["Code"] == "NoSuchKey":
  49. raise FileNotFoundError("File not found")
  50. else:
  51. raise
  52. return data
  53. def load_stream(self, filename: str) -> Generator:
  54. try:
  55. response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
  56. yield from response["Body"].iter_chunks()
  57. except ClientError as ex:
  58. if ex.response["Error"]["Code"] == "NoSuchKey":
  59. raise FileNotFoundError("file not found")
  60. elif "reached max retries" in str(ex):
  61. raise ValueError("please do not request the same file too frequently")
  62. else:
  63. raise
  64. def download(self, filename, target_filepath):
  65. self.client.download_file(self.bucket_name, filename, target_filepath)
  66. def exists(self, filename):
  67. try:
  68. self.client.head_object(Bucket=self.bucket_name, Key=filename)
  69. return True
  70. except:
  71. return False
  72. def delete(self, filename):
  73. self.client.delete_object(Bucket=self.bucket_name, Key=filename)