Initial commit

This commit is contained in:
2025-10-14 14:17:21 +08:00
commit ac715a8b88
35011 changed files with 3834178 additions and 0 deletions

View File

View File

@@ -0,0 +1,67 @@
import json
import os
import threading
from flask import Response
from configs import dify_config
from dify_app import DifyApp
def init_app(app: DifyApp):
@app.after_request
def after_request(response):
"""Add Version headers to the response."""
response.headers.add("X-Version", dify_config.CURRENT_VERSION)
response.headers.add("X-Env", dify_config.DEPLOY_ENV)
return response
@app.route("/health")
def health():
return Response(
json.dumps({"pid": os.getpid(), "status": "ok", "version": dify_config.CURRENT_VERSION}),
status=200,
content_type="application/json",
)
@app.route("/threads")
def threads():
num_threads = threading.active_count()
threads = threading.enumerate()
thread_list = []
for thread in threads:
thread_name = thread.name
thread_id = thread.ident
is_alive = thread.is_alive()
thread_list.append(
{
"name": thread_name,
"id": thread_id,
"is_alive": is_alive,
}
)
return {
"pid": os.getpid(),
"thread_num": num_threads,
"threads": thread_list,
}
@app.route("/db-pool-stat")
def pool_stat():
from extensions.ext_database import db
engine = db.engine
# TODO: Fix the type error
# FIXME maybe its sqlalchemy issue
return {
"pid": os.getpid(),
"pool_size": engine.pool.size(), # type: ignore
"checked_in_connections": engine.pool.checkedin(), # type: ignore
"checked_out_connections": engine.pool.checkedout(), # type: ignore
"overflow_connections": engine.pool.overflow(), # type: ignore
"connection_timeout": engine.pool.timeout(), # type: ignore
"recycle_time": db.engine.pool._recycle, # type: ignore
}

View File

@@ -0,0 +1,48 @@
from configs import dify_config
from dify_app import DifyApp
def init_app(app: DifyApp):
# register blueprint routers
from flask_cors import CORS # type: ignore
from controllers.console import bp as console_app_bp
from controllers.files import bp as files_bp
from controllers.inner_api import bp as inner_api_bp
from controllers.service_api import bp as service_api_bp
from controllers.web import bp as web_bp
CORS(
service_api_bp,
allow_headers=["Content-Type", "Authorization", "X-App-Code"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
)
app.register_blueprint(service_api_bp)
CORS(
web_bp,
resources={r"/*": {"origins": dify_config.WEB_API_CORS_ALLOW_ORIGINS}},
supports_credentials=True,
allow_headers=["Content-Type", "Authorization", "X-App-Code"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
expose_headers=["X-Version", "X-Env"],
)
app.register_blueprint(web_bp)
CORS(
console_app_bp,
resources={r"/*": {"origins": dify_config.CONSOLE_CORS_ALLOW_ORIGINS}},
supports_credentials=True,
allow_headers=["Content-Type", "Authorization"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
expose_headers=["X-Version", "X-Env"],
)
app.register_blueprint(console_app_bp)
CORS(files_bp, allow_headers=["Content-Type"], methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"])
app.register_blueprint(files_bp)
app.register_blueprint(inner_api_bp)

View File

@@ -0,0 +1,104 @@
from datetime import timedelta
import pytz
from celery import Celery, Task # type: ignore
from celery.schedules import crontab # type: ignore
from configs import dify_config
from dify_app import DifyApp
def init_app(app: DifyApp) -> Celery:
class FlaskTask(Task):
def __call__(self, *args: object, **kwargs: object) -> object:
with app.app_context():
return self.run(*args, **kwargs)
broker_transport_options = {}
if dify_config.CELERY_USE_SENTINEL:
broker_transport_options = {
"master_name": dify_config.CELERY_SENTINEL_MASTER_NAME,
"sentinel_kwargs": {
"socket_timeout": dify_config.CELERY_SENTINEL_SOCKET_TIMEOUT,
},
}
celery_app = Celery(
app.name,
task_cls=FlaskTask,
broker=dify_config.CELERY_BROKER_URL,
backend=dify_config.CELERY_BACKEND,
task_ignore_result=True,
)
# Add SSL options to the Celery configuration
ssl_options = {
"ssl_cert_reqs": None,
"ssl_ca_certs": None,
"ssl_certfile": None,
"ssl_keyfile": None,
}
celery_app.conf.update(
result_backend=dify_config.CELERY_RESULT_BACKEND,
broker_transport_options=broker_transport_options,
broker_connection_retry_on_startup=True,
worker_log_format=dify_config.LOG_FORMAT,
worker_task_log_format=dify_config.LOG_FORMAT,
worker_hijack_root_logger=False,
timezone=pytz.timezone(dify_config.LOG_TZ or "UTC"),
)
if dify_config.BROKER_USE_SSL:
celery_app.conf.update(
broker_use_ssl=ssl_options, # Add the SSL options to the broker configuration
)
if dify_config.LOG_FILE:
celery_app.conf.update(
worker_logfile=dify_config.LOG_FILE,
)
celery_app.set_default()
app.extensions["celery"] = celery_app
imports = [
"schedule.clean_embedding_cache_task",
"schedule.clean_unused_datasets_task",
"schedule.create_tidb_serverless_task",
"schedule.update_tidb_serverless_status_task",
"schedule.clean_messages",
"schedule.mail_clean_document_notify_task",
]
day = dify_config.CELERY_BEAT_SCHEDULER_TIME
beat_schedule = {
"clean_embedding_cache_task": {
"task": "schedule.clean_embedding_cache_task.clean_embedding_cache_task",
"schedule": timedelta(days=day),
},
"clean_unused_datasets_task": {
"task": "schedule.clean_unused_datasets_task.clean_unused_datasets_task",
"schedule": timedelta(days=day),
},
"create_tidb_serverless_task": {
"task": "schedule.create_tidb_serverless_task.create_tidb_serverless_task",
"schedule": crontab(minute="0", hour="*"),
},
"update_tidb_serverless_status_task": {
"task": "schedule.update_tidb_serverless_status_task.update_tidb_serverless_status_task",
"schedule": timedelta(minutes=10),
},
"clean_messages": {
"task": "schedule.clean_messages.clean_messages",
"schedule": timedelta(days=day),
},
# every Monday
"mail_clean_document_notify_task": {
"task": "schedule.mail_clean_document_notify_task.mail_clean_document_notify_task",
"schedule": crontab(minute="0", hour="10", day_of_week="1"),
},
}
celery_app.conf.update(beat_schedule=beat_schedule, imports=imports)
return celery_app

View File

@@ -0,0 +1,9 @@
from core.extension.extension import Extension
from dify_app import DifyApp
def init_app(app: DifyApp):
code_based_extension.init()
code_based_extension = Extension()

View File

@@ -0,0 +1,41 @@
from dify_app import DifyApp
def init_app(app: DifyApp):
from commands import (
add_qdrant_index,
clear_free_plan_tenant_expired_logs,
convert_to_agent_apps,
create_tenant,
extract_plugins,
extract_unique_plugins,
fix_app_site_missing,
install_plugins,
migrate_data_for_plugin,
old_metadata_migration,
reset_email,
reset_encrypt_key_pair,
reset_password,
upgrade_db,
vdb_migrate,
)
cmds_to_register = [
reset_password,
reset_email,
reset_encrypt_key_pair,
vdb_migrate,
convert_to_agent_apps,
add_qdrant_index,
create_tenant,
upgrade_db,
fix_app_site_missing,
migrate_data_for_plugin,
extract_plugins,
extract_unique_plugins,
install_plugins,
old_metadata_migration,
clear_free_plan_tenant_expired_logs,
]
for cmd in cmds_to_register:
app.cli.add_command(cmd)

View File

@@ -0,0 +1,13 @@
from configs import dify_config
from dify_app import DifyApp
def is_enabled() -> bool:
return dify_config.API_COMPRESSION_ENABLED
def init_app(app: DifyApp):
from flask_compress import Compress # type: ignore
compress = Compress()
compress.init_app(app)

View File

@@ -0,0 +1,6 @@
from dify_app import DifyApp
from models import db
def init_app(app: DifyApp):
db.init_app(app)

View File

@@ -0,0 +1,10 @@
from core.hosting_configuration import HostingConfiguration
hosting_configuration = HostingConfiguration()
from dify_app import DifyApp
def init_app(app: DifyApp):
hosting_configuration.init_app(app)

View File

@@ -0,0 +1,5 @@
from dify_app import DifyApp
def init_app(app: DifyApp):
from events import event_handlers # noqa: F401

View File

@@ -0,0 +1,71 @@
import logging
import os
import sys
import uuid
from logging.handlers import RotatingFileHandler
import flask
from configs import dify_config
from dify_app import DifyApp
def init_app(app: DifyApp):
log_handlers: list[logging.Handler] = []
log_file = dify_config.LOG_FILE
if log_file:
log_dir = os.path.dirname(log_file)
os.makedirs(log_dir, exist_ok=True)
log_handlers.append(
RotatingFileHandler(
filename=log_file,
maxBytes=dify_config.LOG_FILE_MAX_SIZE * 1024 * 1024,
backupCount=dify_config.LOG_FILE_BACKUP_COUNT,
)
)
# Always add StreamHandler to log to console
sh = logging.StreamHandler(sys.stdout)
sh.addFilter(RequestIdFilter())
log_handlers.append(sh)
logging.basicConfig(
level=dify_config.LOG_LEVEL,
format=dify_config.LOG_FORMAT,
datefmt=dify_config.LOG_DATEFORMAT,
handlers=log_handlers,
force=True,
)
log_tz = dify_config.LOG_TZ
if log_tz:
from datetime import datetime
import pytz
timezone = pytz.timezone(log_tz)
def time_converter(seconds):
return datetime.fromtimestamp(seconds, tz=timezone).timetuple()
for handler in logging.root.handlers:
if handler.formatter:
handler.formatter.converter = time_converter
def get_request_id():
if getattr(flask.g, "request_id", None):
return flask.g.request_id
new_uuid = uuid.uuid4().hex[:10]
flask.g.request_id = new_uuid
return new_uuid
class RequestIdFilter(logging.Filter):
# This is a logging filter that makes the request ID available for use in
# the logging format. Note that we're checking if we're in a request
# context, as we may want to log things before Flask is fully loaded.
def filter(self, record):
record.req_id = get_request_id() if flask.has_request_context() else ""
return True

View File

@@ -0,0 +1,62 @@
import json
import flask_login # type: ignore
from flask import Response, request
from flask_login import user_loaded_from_request, user_logged_in
from werkzeug.exceptions import Unauthorized
import contexts
from dify_app import DifyApp
from libs.passport import PassportService
from services.account_service import AccountService
login_manager = flask_login.LoginManager()
# Flask-Login configuration
@login_manager.request_loader
def load_user_from_request(request_from_flask_login):
"""Load user based on the request."""
if request.blueprint not in {"console", "inner_api"}:
return None
# Check if the user_id contains a dot, indicating the old format
auth_header = request.headers.get("Authorization", "")
if not auth_header:
auth_token = request.args.get("_token")
if not auth_token:
raise Unauthorized("Invalid Authorization token.")
else:
if " " not in auth_header:
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
auth_scheme, auth_token = auth_header.split(None, 1)
auth_scheme = auth_scheme.lower()
if auth_scheme != "bearer":
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
decoded = PassportService().verify(auth_token)
user_id = decoded.get("user_id")
logged_in_account = AccountService.load_logged_in_account(account_id=user_id)
return logged_in_account
@user_logged_in.connect
@user_loaded_from_request.connect
def on_user_logged_in(_sender, user):
"""Called when a user logged in."""
if user:
contexts.tenant_id.set(user.current_tenant_id)
@login_manager.unauthorized_handler
def unauthorized_handler():
"""Handle unauthorized requests."""
return Response(
json.dumps({"code": "unauthorized", "message": "Unauthorized."}),
status=401,
content_type="application/json",
)
def init_app(app: DifyApp):
login_manager.init_app(app)

View File

@@ -0,0 +1,97 @@
import logging
from typing import Optional
from flask import Flask
from configs import dify_config
from dify_app import DifyApp
class Mail:
def __init__(self):
self._client = None
self._default_send_from = None
def is_inited(self) -> bool:
return self._client is not None
def init_app(self, app: Flask):
mail_type = dify_config.MAIL_TYPE
if not mail_type:
logging.warning("MAIL_TYPE is not set")
return
if dify_config.MAIL_DEFAULT_SEND_FROM:
self._default_send_from = dify_config.MAIL_DEFAULT_SEND_FROM
match mail_type:
case "resend":
import resend # type: ignore
api_key = dify_config.RESEND_API_KEY
if not api_key:
raise ValueError("RESEND_API_KEY is not set")
api_url = dify_config.RESEND_API_URL
if api_url:
resend.api_url = api_url
resend.api_key = api_key
self._client = resend.Emails
case "smtp":
from libs.smtp import SMTPClient
if not dify_config.SMTP_SERVER or not dify_config.SMTP_PORT:
raise ValueError("SMTP_SERVER and SMTP_PORT are required for smtp mail type")
if not dify_config.SMTP_USE_TLS and dify_config.SMTP_OPPORTUNISTIC_TLS:
raise ValueError("SMTP_OPPORTUNISTIC_TLS is not supported without enabling SMTP_USE_TLS")
self._client = SMTPClient(
server=dify_config.SMTP_SERVER,
port=dify_config.SMTP_PORT,
username=dify_config.SMTP_USERNAME or "",
password=dify_config.SMTP_PASSWORD or "",
_from=dify_config.MAIL_DEFAULT_SEND_FROM or "",
use_tls=dify_config.SMTP_USE_TLS,
opportunistic_tls=dify_config.SMTP_OPPORTUNISTIC_TLS,
)
case _:
raise ValueError("Unsupported mail type {}".format(mail_type))
def send(self, to: str, subject: str, html: str, from_: Optional[str] = None):
if not self._client:
raise ValueError("Mail client is not initialized")
if not from_ and self._default_send_from:
from_ = self._default_send_from
if not from_:
raise ValueError("mail from is not set")
if not to:
raise ValueError("mail to is not set")
if not subject:
raise ValueError("mail subject is not set")
if not html:
raise ValueError("mail html is not set")
self._client.send(
{
"from": from_,
"to": to,
"subject": subject,
"html": html,
}
)
def is_enabled() -> bool:
return dify_config.MAIL_TYPE is not None and dify_config.MAIL_TYPE != ""
def init_app(app: DifyApp):
mail.init_app(app)
mail = Mail()

View File

@@ -0,0 +1,9 @@
from dify_app import DifyApp
def init_app(app: DifyApp):
import flask_migrate # type: ignore
from extensions.ext_database import db
flask_migrate.Migrate(app, db)

View File

@@ -0,0 +1,9 @@
from configs import dify_config
from dify_app import DifyApp
def init_app(app: DifyApp):
if dify_config.RESPECT_XFORWARD_HEADERS_ENABLED:
from werkzeug.middleware.proxy_fix import ProxyFix
app.wsgi_app = ProxyFix(app.wsgi_app, x_port=1) # type: ignore

View File

@@ -0,0 +1,98 @@
from typing import Any, Union
import redis
from redis.cluster import ClusterNode, RedisCluster
from redis.connection import Connection, SSLConnection
from redis.sentinel import Sentinel
from configs import dify_config
from dify_app import DifyApp
class RedisClientWrapper:
"""
A wrapper class for the Redis client that addresses the issue where the global
`redis_client` variable cannot be updated when a new Redis instance is returned
by Sentinel.
This class allows for deferred initialization of the Redis client, enabling the
client to be re-initialized with a new instance when necessary. This is particularly
useful in scenarios where the Redis instance may change dynamically, such as during
a failover in a Sentinel-managed Redis setup.
Attributes:
_client (redis.Redis): The actual Redis client instance. It remains None until
initialized with the `initialize` method.
Methods:
initialize(client): Initializes the Redis client if it hasn't been initialized already.
__getattr__(item): Delegates attribute access to the Redis client, raising an error
if the client is not initialized.
"""
def __init__(self):
self._client = None
def initialize(self, client):
if self._client is None:
self._client = client
def __getattr__(self, item):
if self._client is None:
raise RuntimeError("Redis client is not initialized. Call init_app first.")
return getattr(self._client, item)
redis_client = RedisClientWrapper()
def init_app(app: DifyApp):
global redis_client
connection_class: type[Union[Connection, SSLConnection]] = Connection
if dify_config.REDIS_USE_SSL:
connection_class = SSLConnection
redis_params: dict[str, Any] = {
"username": dify_config.REDIS_USERNAME,
"password": dify_config.REDIS_PASSWORD or None, # Temporary fix for empty password
"db": dify_config.REDIS_DB,
"encoding": "utf-8",
"encoding_errors": "strict",
"decode_responses": False,
}
if dify_config.REDIS_USE_SENTINEL:
assert dify_config.REDIS_SENTINELS is not None, "REDIS_SENTINELS must be set when REDIS_USE_SENTINEL is True"
sentinel_hosts = [
(node.split(":")[0], int(node.split(":")[1])) for node in dify_config.REDIS_SENTINELS.split(",")
]
sentinel = Sentinel(
sentinel_hosts,
sentinel_kwargs={
"socket_timeout": dify_config.REDIS_SENTINEL_SOCKET_TIMEOUT,
"username": dify_config.REDIS_SENTINEL_USERNAME,
"password": dify_config.REDIS_SENTINEL_PASSWORD,
},
)
master = sentinel.master_for(dify_config.REDIS_SENTINEL_SERVICE_NAME, **redis_params)
redis_client.initialize(master)
elif dify_config.REDIS_USE_CLUSTERS:
assert dify_config.REDIS_CLUSTERS is not None, "REDIS_CLUSTERS must be set when REDIS_USE_CLUSTERS is True"
nodes = [
ClusterNode(host=node.split(":")[0], port=int(node.split(":")[1]))
for node in dify_config.REDIS_CLUSTERS.split(",")
]
# FIXME: mypy error here, try to figure out how to fix it
redis_client.initialize(RedisCluster(startup_nodes=nodes, password=dify_config.REDIS_CLUSTERS_PASSWORD)) # type: ignore
else:
redis_params.update(
{
"host": dify_config.REDIS_HOST,
"port": dify_config.REDIS_PORT,
"connection_class": connection_class,
}
)
pool = redis.ConnectionPool(**redis_params)
redis_client.initialize(redis.Redis(connection_pool=pool))
app.extensions["redis"] = redis_client

View File

@@ -0,0 +1,40 @@
from configs import dify_config
from dify_app import DifyApp
def init_app(app: DifyApp):
if dify_config.SENTRY_DSN:
import openai
import sentry_sdk
from langfuse import parse_error # type: ignore
from sentry_sdk.integrations.celery import CeleryIntegration
from sentry_sdk.integrations.flask import FlaskIntegration
from werkzeug.exceptions import HTTPException
from core.model_runtime.errors.invoke import InvokeRateLimitError
def before_send(event, hint):
if "exc_info" in hint:
exc_type, exc_value, tb = hint["exc_info"]
if parse_error.defaultErrorResponse in str(exc_value):
return None
return event
sentry_sdk.init(
dsn=dify_config.SENTRY_DSN,
integrations=[FlaskIntegration(), CeleryIntegration()],
ignore_errors=[
HTTPException,
ValueError,
FileNotFoundError,
openai.APIStatusError,
InvokeRateLimitError,
parse_error.defaultErrorResponse,
],
traces_sample_rate=dify_config.SENTRY_TRACES_SAMPLE_RATE,
profiles_sample_rate=dify_config.SENTRY_PROFILES_SAMPLE_RATE,
environment=dify_config.DEPLOY_ENV,
release=f"dify-{dify_config.CURRENT_VERSION}-{dify_config.COMMIT_SHA}",
before_send=before_send,
)

View File

@@ -0,0 +1,6 @@
from configs import dify_config
from dify_app import DifyApp
def init_app(app: DifyApp):
app.secret_key = dify_config.SECRET_KEY

View File

@@ -0,0 +1,138 @@
import logging
from collections.abc import Callable, Generator
from typing import Literal, Union, overload
from flask import Flask
from configs import dify_config
from dify_app import DifyApp
from extensions.storage.base_storage import BaseStorage
from extensions.storage.storage_type import StorageType
logger = logging.getLogger(__name__)
class Storage:
def init_app(self, app: Flask):
storage_factory = self.get_storage_factory(dify_config.STORAGE_TYPE)
with app.app_context():
self.storage_runner = storage_factory()
@staticmethod
def get_storage_factory(storage_type: str) -> Callable[[], BaseStorage]:
match storage_type:
case StorageType.S3:
from extensions.storage.aws_s3_storage import AwsS3Storage
return AwsS3Storage
case StorageType.OPENDAL:
from extensions.storage.opendal_storage import OpenDALStorage
return lambda: OpenDALStorage(dify_config.OPENDAL_SCHEME)
case StorageType.LOCAL:
from extensions.storage.opendal_storage import OpenDALStorage
return lambda: OpenDALStorage(scheme="fs", root=dify_config.STORAGE_LOCAL_PATH)
case StorageType.AZURE_BLOB:
from extensions.storage.azure_blob_storage import AzureBlobStorage
return AzureBlobStorage
case StorageType.ALIYUN_OSS:
from extensions.storage.aliyun_oss_storage import AliyunOssStorage
return AliyunOssStorage
case StorageType.GOOGLE_STORAGE:
from extensions.storage.google_cloud_storage import GoogleCloudStorage
return GoogleCloudStorage
case StorageType.TENCENT_COS:
from extensions.storage.tencent_cos_storage import TencentCosStorage
return TencentCosStorage
case StorageType.OCI_STORAGE:
from extensions.storage.oracle_oci_storage import OracleOCIStorage
return OracleOCIStorage
case StorageType.HUAWEI_OBS:
from extensions.storage.huawei_obs_storage import HuaweiObsStorage
return HuaweiObsStorage
case StorageType.BAIDU_OBS:
from extensions.storage.baidu_obs_storage import BaiduObsStorage
return BaiduObsStorage
case StorageType.VOLCENGINE_TOS:
from extensions.storage.volcengine_tos_storage import VolcengineTosStorage
return VolcengineTosStorage
case StorageType.SUPBASE:
from extensions.storage.supabase_storage import SupabaseStorage
return SupabaseStorage
case _:
raise ValueError(f"unsupported storage type {storage_type}")
def save(self, filename, data):
try:
self.storage_runner.save(filename, data)
except Exception as e:
logger.exception(f"Failed to save file {filename}")
raise e
@overload
def load(self, filename: str, /, *, stream: Literal[False] = False) -> bytes: ...
@overload
def load(self, filename: str, /, *, stream: Literal[True]) -> Generator: ...
def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]:
try:
if stream:
return self.load_stream(filename)
else:
return self.load_once(filename)
except Exception as e:
logger.exception(f"Failed to load file {filename}")
raise e
def load_once(self, filename: str) -> bytes:
try:
return self.storage_runner.load_once(filename)
except Exception as e:
logger.exception(f"Failed to load_once file {filename}")
raise e
def load_stream(self, filename: str) -> Generator:
try:
return self.storage_runner.load_stream(filename)
except Exception as e:
logger.exception(f"Failed to load_stream file {filename}")
raise e
def download(self, filename, target_filepath):
try:
self.storage_runner.download(filename, target_filepath)
except Exception as e:
logger.exception(f"Failed to download file {filename}")
raise e
def exists(self, filename):
try:
return self.storage_runner.exists(filename)
except Exception as e:
logger.exception(f"Failed to check file exists {filename}")
raise e
def delete(self, filename):
try:
return self.storage_runner.delete(filename)
except Exception as e:
logger.exception(f"Failed to delete file {filename}")
raise e
storage = Storage()
def init_app(app: DifyApp):
storage.init_app(app)

View File

@@ -0,0 +1,11 @@
import os
import time
from dify_app import DifyApp
def init_app(app: DifyApp):
os.environ["TZ"] = "UTC"
# windows platform not support tzset
if hasattr(time, "tzset"):
time.tzset()

View File

@@ -0,0 +1,7 @@
from dify_app import DifyApp
def init_app(app: DifyApp):
import warnings
warnings.simplefilter("ignore", ResourceWarning)

View File

@@ -0,0 +1,54 @@
import posixpath
from collections.abc import Generator
import oss2 as aliyun_s3 # type: ignore
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
class AliyunOssStorage(BaseStorage):
"""Implementation for Aliyun OSS storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.ALIYUN_OSS_BUCKET_NAME
self.folder = dify_config.ALIYUN_OSS_PATH
oss_auth_method = aliyun_s3.Auth
region = None
if dify_config.ALIYUN_OSS_AUTH_VERSION == "v4":
oss_auth_method = aliyun_s3.AuthV4
region = dify_config.ALIYUN_OSS_REGION
oss_auth = oss_auth_method(dify_config.ALIYUN_OSS_ACCESS_KEY, dify_config.ALIYUN_OSS_SECRET_KEY)
self.client = aliyun_s3.Bucket(
oss_auth,
dify_config.ALIYUN_OSS_ENDPOINT,
self.bucket_name,
connect_timeout=30,
region=region,
)
def save(self, filename, data):
self.client.put_object(self.__wrapper_folder_filename(filename), data)
def load_once(self, filename: str) -> bytes:
obj = self.client.get_object(self.__wrapper_folder_filename(filename))
data: bytes = obj.read()
return data
def load_stream(self, filename: str) -> Generator:
obj = self.client.get_object(self.__wrapper_folder_filename(filename))
while chunk := obj.read(4096):
yield chunk
def download(self, filename: str, target_filepath):
self.client.get_object_to_file(self.__wrapper_folder_filename(filename), target_filepath)
def exists(self, filename: str):
return self.client.object_exists(self.__wrapper_folder_filename(filename))
def delete(self, filename: str):
self.client.delete_object(self.__wrapper_folder_filename(filename))
def __wrapper_folder_filename(self, filename: str) -> str:
return posixpath.join(self.folder, filename) if self.folder else filename

View File

@@ -0,0 +1,87 @@
import logging
from collections.abc import Generator
import boto3 # type: ignore
from botocore.client import Config # type: ignore
from botocore.exceptions import ClientError # type: ignore
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
logger = logging.getLogger(__name__)
class AwsS3Storage(BaseStorage):
"""Implementation for Amazon Web Services S3 storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.S3_BUCKET_NAME
if dify_config.S3_USE_AWS_MANAGED_IAM:
logger.info("Using AWS managed IAM role for S3")
session = boto3.Session()
region_name = dify_config.S3_REGION
self.client = session.client(service_name="s3", region_name=region_name)
else:
logger.info("Using ak and sk for S3")
self.client = boto3.client(
"s3",
aws_secret_access_key=dify_config.S3_SECRET_KEY,
aws_access_key_id=dify_config.S3_ACCESS_KEY,
endpoint_url=dify_config.S3_ENDPOINT,
region_name=dify_config.S3_REGION,
config=Config(s3={"addressing_style": dify_config.S3_ADDRESS_STYLE}),
)
# create bucket
try:
self.client.head_bucket(Bucket=self.bucket_name)
except ClientError as e:
# if bucket not exists, create it
if e.response["Error"]["Code"] == "404":
self.client.create_bucket(Bucket=self.bucket_name)
# if bucket is not accessible, pass, maybe the bucket is existing but not accessible
elif e.response["Error"]["Code"] == "403":
pass
else:
# other error, raise exception
raise
def save(self, filename, data):
self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
def load_once(self, filename: str) -> bytes:
try:
data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
raise FileNotFoundError("File not found")
else:
raise
return data
def load_stream(self, filename: str) -> Generator:
try:
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
yield from response["Body"].iter_chunks()
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
raise FileNotFoundError("file not found")
elif "reached max retries" in str(ex):
raise ValueError("please do not request the same file too frequently")
else:
raise
def download(self, filename, target_filepath):
self.client.download_file(self.bucket_name, filename, target_filepath)
def exists(self, filename):
try:
self.client.head_object(Bucket=self.bucket_name, Key=filename)
return True
except:
return False
def delete(self, filename):
self.client.delete_object(Bucket=self.bucket_name, Key=filename)

View File

@@ -0,0 +1,84 @@
from collections.abc import Generator
from datetime import UTC, datetime, timedelta
from typing import Optional
from azure.identity import ChainedTokenCredential, DefaultAzureCredential
from azure.storage.blob import AccountSasPermissions, BlobServiceClient, ResourceTypes, generate_account_sas
from configs import dify_config
from extensions.ext_redis import redis_client
from extensions.storage.base_storage import BaseStorage
class AzureBlobStorage(BaseStorage):
"""Implementation for Azure Blob storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.AZURE_BLOB_CONTAINER_NAME
self.account_url = dify_config.AZURE_BLOB_ACCOUNT_URL
self.account_name = dify_config.AZURE_BLOB_ACCOUNT_NAME
self.account_key = dify_config.AZURE_BLOB_ACCOUNT_KEY
self.credential: Optional[ChainedTokenCredential] = None
if self.account_key == "managedidentity":
self.credential = DefaultAzureCredential()
else:
self.credential = None
def save(self, filename, data):
client = self._sync_client()
blob_container = client.get_container_client(container=self.bucket_name)
blob_container.upload_blob(filename, data)
def load_once(self, filename: str) -> bytes:
client = self._sync_client()
blob = client.get_container_client(container=self.bucket_name)
blob = blob.get_blob_client(blob=filename)
data: bytes = blob.download_blob().readall()
return data
def load_stream(self, filename: str) -> Generator:
client = self._sync_client()
blob = client.get_blob_client(container=self.bucket_name, blob=filename)
blob_data = blob.download_blob()
yield from blob_data.chunks()
def download(self, filename, target_filepath):
client = self._sync_client()
blob = client.get_blob_client(container=self.bucket_name, blob=filename)
with open(target_filepath, "wb") as my_blob:
blob_data = blob.download_blob()
blob_data.readinto(my_blob)
def exists(self, filename):
client = self._sync_client()
blob = client.get_blob_client(container=self.bucket_name, blob=filename)
return blob.exists()
def delete(self, filename):
client = self._sync_client()
blob_container = client.get_container_client(container=self.bucket_name)
blob_container.delete_blob(filename)
def _sync_client(self):
if self.account_key == "managedidentity":
return BlobServiceClient(account_url=self.account_url, credential=self.credential) # type: ignore
cache_key = "azure_blob_sas_token_{}_{}".format(self.account_name, self.account_key)
cache_result = redis_client.get(cache_key)
if cache_result is not None:
sas_token = cache_result.decode("utf-8")
else:
sas_token = generate_account_sas(
account_name=self.account_name or "",
account_key=self.account_key or "",
resource_types=ResourceTypes(service=True, container=True, object=True),
permission=AccountSasPermissions(read=True, write=True, delete=True, list=True, add=True, create=True),
expiry=datetime.now(UTC).replace(tzinfo=None) + timedelta(hours=1),
)
redis_client.set(cache_key, sas_token, ex=3000)
return BlobServiceClient(account_url=self.account_url or "", credential=sas_token)

View File

@@ -0,0 +1,57 @@
import base64
import hashlib
from collections.abc import Generator
from baidubce.auth.bce_credentials import BceCredentials # type: ignore
from baidubce.bce_client_configuration import BceClientConfiguration # type: ignore
from baidubce.services.bos.bos_client import BosClient # type: ignore
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
class BaiduObsStorage(BaseStorage):
"""Implementation for Baidu OBS storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.BAIDU_OBS_BUCKET_NAME
client_config = BceClientConfiguration(
credentials=BceCredentials(
access_key_id=dify_config.BAIDU_OBS_ACCESS_KEY,
secret_access_key=dify_config.BAIDU_OBS_SECRET_KEY,
),
endpoint=dify_config.BAIDU_OBS_ENDPOINT,
)
self.client = BosClient(config=client_config)
def save(self, filename, data):
md5 = hashlib.md5()
md5.update(data)
content_md5 = base64.standard_b64encode(md5.digest())
self.client.put_object(
bucket_name=self.bucket_name, key=filename, data=data, content_length=len(data), content_md5=content_md5
)
def load_once(self, filename: str) -> bytes:
response = self.client.get_object(bucket_name=self.bucket_name, key=filename)
data: bytes = response.data.read()
return data
def load_stream(self, filename: str) -> Generator:
response = self.client.get_object(bucket_name=self.bucket_name, key=filename).data
while chunk := response.read(4096):
yield chunk
def download(self, filename, target_filepath):
self.client.get_object_to_file(bucket_name=self.bucket_name, key=filename, file_name=target_filepath)
def exists(self, filename):
res = self.client.get_object_meta_data(bucket_name=self.bucket_name, key=filename)
if res is None:
return False
return True
def delete(self, filename):
self.client.delete_object(bucket_name=self.bucket_name, key=filename)

View File

@@ -0,0 +1,32 @@
"""Abstract interface for file storage implementations."""
from abc import ABC, abstractmethod
from collections.abc import Generator
class BaseStorage(ABC):
"""Interface for file storage."""
@abstractmethod
def save(self, filename, data):
raise NotImplementedError
@abstractmethod
def load_once(self, filename: str) -> bytes:
raise NotImplementedError
@abstractmethod
def load_stream(self, filename: str) -> Generator:
raise NotImplementedError
@abstractmethod
def download(self, filename, target_filepath):
raise NotImplementedError
@abstractmethod
def exists(self, filename):
raise NotImplementedError
@abstractmethod
def delete(self, filename):
raise NotImplementedError

View File

@@ -0,0 +1,60 @@
import base64
import io
import json
from collections.abc import Generator
from google.cloud import storage as google_cloud_storage # type: ignore
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
class GoogleCloudStorage(BaseStorage):
"""Implementation for Google Cloud storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.GOOGLE_STORAGE_BUCKET_NAME
service_account_json_str = dify_config.GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64
# if service_account_json_str is empty, use Application Default Credentials
if service_account_json_str:
service_account_json = base64.b64decode(service_account_json_str).decode("utf-8")
# convert str to object
service_account_obj = json.loads(service_account_json)
self.client = google_cloud_storage.Client.from_service_account_info(service_account_obj)
else:
self.client = google_cloud_storage.Client()
def save(self, filename, data):
bucket = self.client.get_bucket(self.bucket_name)
blob = bucket.blob(filename)
with io.BytesIO(data) as stream:
blob.upload_from_file(stream)
def load_once(self, filename: str) -> bytes:
bucket = self.client.get_bucket(self.bucket_name)
blob = bucket.get_blob(filename)
data: bytes = blob.download_as_bytes()
return data
def load_stream(self, filename: str) -> Generator:
bucket = self.client.get_bucket(self.bucket_name)
blob = bucket.get_blob(filename)
with blob.open(mode="rb") as blob_stream:
while chunk := blob_stream.read(4096):
yield chunk
def download(self, filename, target_filepath):
bucket = self.client.get_bucket(self.bucket_name)
blob = bucket.get_blob(filename)
blob.download_to_filename(target_filepath)
def exists(self, filename):
bucket = self.client.get_bucket(self.bucket_name)
blob = bucket.blob(filename)
return blob.exists()
def delete(self, filename):
bucket = self.client.get_bucket(self.bucket_name)
bucket.delete_blob(filename)

View File

@@ -0,0 +1,51 @@
from collections.abc import Generator
from obs import ObsClient # type: ignore
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
class HuaweiObsStorage(BaseStorage):
"""Implementation for Huawei OBS storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.HUAWEI_OBS_BUCKET_NAME
self.client = ObsClient(
access_key_id=dify_config.HUAWEI_OBS_ACCESS_KEY,
secret_access_key=dify_config.HUAWEI_OBS_SECRET_KEY,
server=dify_config.HUAWEI_OBS_SERVER,
)
def save(self, filename, data):
self.client.putObject(bucketName=self.bucket_name, objectKey=filename, content=data)
def load_once(self, filename: str) -> bytes:
data: bytes = self.client.getObject(bucketName=self.bucket_name, objectKey=filename)["body"].response.read()
return data
def load_stream(self, filename: str) -> Generator:
response = self.client.getObject(bucketName=self.bucket_name, objectKey=filename)["body"].response
while chunk := response.read(4096):
yield chunk
def download(self, filename, target_filepath):
self.client.getObject(bucketName=self.bucket_name, objectKey=filename, downloadPath=target_filepath)
def exists(self, filename):
res = self._get_meta(filename)
if res is None:
return False
return True
def delete(self, filename):
self.client.deleteObject(bucketName=self.bucket_name, objectKey=filename)
def _get_meta(self, filename):
res = self.client.getObjectMetadata(bucketName=self.bucket_name, objectKey=filename)
if res.status < 300:
return res
else:
return None

View File

@@ -0,0 +1,82 @@
import logging
import os
from collections.abc import Generator
from pathlib import Path
import opendal # type: ignore[import]
from dotenv import dotenv_values
from extensions.storage.base_storage import BaseStorage
logger = logging.getLogger(__name__)
def _get_opendal_kwargs(*, scheme: str, env_file_path: str = ".env", prefix: str = "OPENDAL_"):
kwargs = {}
config_prefix = prefix + scheme.upper() + "_"
for key, value in os.environ.items():
if key.startswith(config_prefix):
kwargs[key[len(config_prefix) :].lower()] = value
file_env_vars: dict = dotenv_values(env_file_path) or {}
for key, value in file_env_vars.items():
if key.startswith(config_prefix) and key[len(config_prefix) :].lower() not in kwargs and value:
kwargs[key[len(config_prefix) :].lower()] = value
return kwargs
class OpenDALStorage(BaseStorage):
def __init__(self, scheme: str, **kwargs):
kwargs = kwargs or _get_opendal_kwargs(scheme=scheme)
if scheme == "fs":
root = kwargs.get("root", "storage")
Path(root).mkdir(parents=True, exist_ok=True)
self.op = opendal.Operator(scheme=scheme, **kwargs) # type: ignore
logger.debug(f"opendal operator created with scheme {scheme}")
retry_layer = opendal.layers.RetryLayer(max_times=3, factor=2.0, jitter=True)
self.op = self.op.layer(retry_layer)
logger.debug("added retry layer to opendal operator")
def save(self, filename: str, data: bytes) -> None:
self.op.write(path=filename, bs=data)
logger.debug(f"file {filename} saved")
def load_once(self, filename: str) -> bytes:
if not self.exists(filename):
raise FileNotFoundError("File not found")
content: bytes = self.op.read(path=filename)
logger.debug(f"file {filename} loaded")
return content
def load_stream(self, filename: str) -> Generator:
if not self.exists(filename):
raise FileNotFoundError("File not found")
batch_size = 4096
file = self.op.open(path=filename, mode="rb")
while chunk := file.read(batch_size):
yield chunk
logger.debug(f"file {filename} loaded as stream")
def download(self, filename: str, target_filepath: str):
if not self.exists(filename):
raise FileNotFoundError("File not found")
with Path(target_filepath).open("wb") as f:
f.write(self.op.read(path=filename))
logger.debug(f"file {filename} downloaded to {target_filepath}")
def exists(self, filename: str) -> bool:
res: bool = self.op.exists(path=filename)
return res
def delete(self, filename: str):
if self.exists(filename):
self.op.delete(path=filename)
logger.debug(f"file {filename} deleted")
return
logger.debug(f"file {filename} not found, skip delete")

View File

@@ -0,0 +1,59 @@
from collections.abc import Generator
import boto3 # type: ignore
from botocore.exceptions import ClientError # type: ignore
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
class OracleOCIStorage(BaseStorage):
"""Implementation for Oracle OCI storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.OCI_BUCKET_NAME
self.client = boto3.client(
"s3",
aws_secret_access_key=dify_config.OCI_SECRET_KEY,
aws_access_key_id=dify_config.OCI_ACCESS_KEY,
endpoint_url=dify_config.OCI_ENDPOINT,
region_name=dify_config.OCI_REGION,
)
def save(self, filename, data):
self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
def load_once(self, filename: str) -> bytes:
try:
data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
raise FileNotFoundError("File not found")
else:
raise
return data
def load_stream(self, filename: str) -> Generator:
try:
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
yield from response["Body"].iter_chunks()
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
raise FileNotFoundError("File not found")
else:
raise
def download(self, filename, target_filepath):
self.client.download_file(self.bucket_name, filename, target_filepath)
def exists(self, filename):
try:
self.client.head_object(Bucket=self.bucket_name, Key=filename)
return True
except:
return False
def delete(self, filename):
self.client.delete_object(Bucket=self.bucket_name, Key=filename)

View File

@@ -0,0 +1,16 @@
from enum import StrEnum
class StorageType(StrEnum):
ALIYUN_OSS = "aliyun-oss"
AZURE_BLOB = "azure-blob"
BAIDU_OBS = "baidu-obs"
GOOGLE_STORAGE = "google-storage"
HUAWEI_OBS = "huawei-obs"
LOCAL = "local"
OCI_STORAGE = "oci-storage"
OPENDAL = "opendal"
S3 = "s3"
TENCENT_COS = "tencent-cos"
VOLCENGINE_TOS = "volcengine-tos"
SUPBASE = "supabase"

View File

@@ -0,0 +1,59 @@
import io
from collections.abc import Generator
from pathlib import Path
from supabase import Client
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
class SupabaseStorage(BaseStorage):
"""Implementation for supabase obs storage."""
def __init__(self):
super().__init__()
if dify_config.SUPABASE_URL is None:
raise ValueError("SUPABASE_URL is not set")
if dify_config.SUPABASE_API_KEY is None:
raise ValueError("SUPABASE_API_KEY is not set")
if dify_config.SUPABASE_BUCKET_NAME is None:
raise ValueError("SUPABASE_BUCKET_NAME is not set")
self.bucket_name = dify_config.SUPABASE_BUCKET_NAME
self.client = Client(supabase_url=dify_config.SUPABASE_URL, supabase_key=dify_config.SUPABASE_API_KEY)
self.create_bucket(id=dify_config.SUPABASE_BUCKET_NAME, bucket_name=dify_config.SUPABASE_BUCKET_NAME)
def create_bucket(self, id, bucket_name):
if not self.bucket_exists():
self.client.storage.create_bucket(id=id, name=bucket_name)
def save(self, filename, data):
self.client.storage.from_(self.bucket_name).upload(filename, data)
def load_once(self, filename: str) -> bytes:
content: bytes = self.client.storage.from_(self.bucket_name).download(filename)
return content
def load_stream(self, filename: str) -> Generator:
result = self.client.storage.from_(self.bucket_name).download(filename)
byte_stream = io.BytesIO(result)
while chunk := byte_stream.read(4096): # Read in chunks of 4KB
yield chunk
def download(self, filename, target_filepath):
result = self.client.storage.from_(self.bucket_name).download(filename)
Path(target_filepath).write_bytes(result)
def exists(self, filename):
result = self.client.storage.from_(self.bucket_name).list(filename)
if result.count() > 0:
return True
return False
def delete(self, filename):
self.client.storage.from_(self.bucket_name).remove(filename)
def bucket_exists(self):
buckets = self.client.storage.list_buckets()
return any(bucket.name == self.bucket_name for bucket in buckets)

View File

@@ -0,0 +1,43 @@
from collections.abc import Generator
from qcloud_cos import CosConfig, CosS3Client # type: ignore
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
class TencentCosStorage(BaseStorage):
"""Implementation for Tencent Cloud COS storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.TENCENT_COS_BUCKET_NAME
config = CosConfig(
Region=dify_config.TENCENT_COS_REGION,
SecretId=dify_config.TENCENT_COS_SECRET_ID,
SecretKey=dify_config.TENCENT_COS_SECRET_KEY,
Scheme=dify_config.TENCENT_COS_SCHEME,
)
self.client = CosS3Client(config)
def save(self, filename, data):
self.client.put_object(Bucket=self.bucket_name, Body=data, Key=filename)
def load_once(self, filename: str) -> bytes:
data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].get_raw_stream().read()
return data
def load_stream(self, filename: str) -> Generator:
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
yield from response["Body"].get_stream(chunk_size=4096)
def download(self, filename, target_filepath):
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
response["Body"].get_stream_to_file(target_filepath)
def exists(self, filename):
return self.client.object_exists(Bucket=self.bucket_name, Key=filename)
def delete(self, filename):
self.client.delete_object(Bucket=self.bucket_name, Key=filename)

View File

@@ -0,0 +1,46 @@
from collections.abc import Generator
import tos # type: ignore
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
class VolcengineTosStorage(BaseStorage):
"""Implementation for Volcengine TOS storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.VOLCENGINE_TOS_BUCKET_NAME
self.client = tos.TosClientV2(
ak=dify_config.VOLCENGINE_TOS_ACCESS_KEY,
sk=dify_config.VOLCENGINE_TOS_SECRET_KEY,
endpoint=dify_config.VOLCENGINE_TOS_ENDPOINT,
region=dify_config.VOLCENGINE_TOS_REGION,
)
def save(self, filename, data):
self.client.put_object(bucket=self.bucket_name, key=filename, content=data)
def load_once(self, filename: str) -> bytes:
data = self.client.get_object(bucket=self.bucket_name, key=filename).read()
if not isinstance(data, bytes):
raise TypeError("Expected bytes, got {}".format(type(data).__name__))
return data
def load_stream(self, filename: str) -> Generator:
response = self.client.get_object(bucket=self.bucket_name, key=filename)
while chunk := response.read(4096):
yield chunk
def download(self, filename, target_filepath):
self.client.get_object_to_file(bucket=self.bucket_name, key=filename, file_path=target_filepath)
def exists(self, filename):
res = self.client.head_object(bucket=self.bucket_name, key=filename)
if res.status_code != 200:
return False
return True
def delete(self, filename):
self.client.delete_object(bucket=self.bucket_name, key=filename)