aws_s3_storage.py 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import logging
  2. from collections.abc import Generator
  3. import boto3 # type: ignore
  4. from botocore.client import Config # type: ignore
  5. from botocore.exceptions import ClientError # type: ignore
  6. from configs import dify_config
  7. from extensions.storage.base_storage import BaseStorage
  8. logger = logging.getLogger(__name__)
  9. class AwsS3Storage(BaseStorage):
  10. """Implementation for Amazon Web Services S3 storage."""
  11. def __init__(self):
  12. super().__init__()
  13. self.bucket_name = dify_config.S3_BUCKET_NAME
  14. if dify_config.S3_USE_AWS_MANAGED_IAM:
  15. logger.info("Using AWS managed IAM role for S3")
  16. session = boto3.Session()
  17. region_name = dify_config.S3_REGION
  18. self.client = session.client(service_name="s3", region_name=region_name)
  19. else:
  20. logger.info("Using ak and sk for S3")
  21. self.client = boto3.client(
  22. "s3",
  23. aws_secret_access_key=dify_config.S3_SECRET_KEY,
  24. aws_access_key_id=dify_config.S3_ACCESS_KEY,
  25. endpoint_url=dify_config.S3_ENDPOINT,
  26. region_name=dify_config.S3_REGION,
  27. config=Config(
  28. s3={"addressing_style": dify_config.S3_ADDRESS_STYLE},
  29. request_checksum_calculation="when_required",
  30. response_checksum_validation="when_required",
  31. ),
  32. )
  33. # create bucket
  34. try:
  35. self.client.head_bucket(Bucket=self.bucket_name)
  36. except ClientError as e:
  37. # if bucket not exists, create it
  38. if e.response["Error"]["Code"] == "404":
  39. self.client.create_bucket(Bucket=self.bucket_name)
  40. # if bucket is not accessible, pass, maybe the bucket is existing but not accessible
  41. elif e.response["Error"]["Code"] == "403":
  42. pass
  43. else:
  44. # other error, raise exception
  45. raise
  46. def save(self, filename, data):
  47. self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
  48. def load_once(self, filename: str) -> bytes:
  49. try:
  50. data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
  51. except ClientError as ex:
  52. if ex.response["Error"]["Code"] == "NoSuchKey":
  53. raise FileNotFoundError("File not found")
  54. else:
  55. raise
  56. return data
  57. def load_stream(self, filename: str) -> Generator:
  58. try:
  59. response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
  60. yield from response["Body"].iter_chunks()
  61. except ClientError as ex:
  62. if ex.response["Error"]["Code"] == "NoSuchKey":
  63. raise FileNotFoundError("file not found")
  64. elif "reached max retries" in str(ex):
  65. raise ValueError("please do not request the same file too frequently")
  66. else:
  67. raise
  68. def download(self, filename, target_filepath):
  69. self.client.download_file(self.bucket_name, filename, target_filepath)
  70. def exists(self, filename):
  71. try:
  72. self.client.head_object(Bucket=self.bucket_name, Key=filename)
  73. return True
  74. except:
  75. return False
  76. def delete(self, filename):
  77. self.client.delete_object(Bucket=self.bucket_name, Key=filename)