diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py index 3ba56325ee0..71aaf3c77a9 100644 --- a/api/apps/llm_app.py +++ b/api/apps/llm_app.py @@ -24,7 +24,7 @@ from api.db import StatusEnum, LLMType from api.db.db_models import TenantLLM from api.utils.api_utils import get_json_result -from api.utils.base64_image import test_image +from common.base64_image import test_image from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel diff --git a/api/utils/base64_image.py b/api/utils/base64_image.py index 25afcf33290..cd7307f51a5 100644 --- a/api/utils/base64_image.py +++ b/api/utils/base64_image.py @@ -1,56 +1,15 @@ -import base64 -import logging -from functools import partial -from io import BytesIO - -from PIL import Image - -test_image_base64 = "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAIAAAD/gAIDAAAA6ElEQVR4nO3QwQ3AIBDAsIP9d25XIC+EZE8QZc18w5l9O+AlZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBT+IYAHHLHkdEgAAAABJRU5ErkJggg==" -test_image = base64.b64decode(test_image_base64) - - -async def image2id(d: dict, storage_put_func: partial, objname:str, bucket:str="imagetemps"): - import logging - from io import BytesIO - import trio - from rag.svr.task_executor import minio_limiter - if not d.get("image"): - return - - with BytesIO() as output_buffer: - if isinstance(d["image"], bytes): - output_buffer.write(d["image"]) - output_buffer.seek(0) - else: - # If the image is in RGBA mode, convert it to RGB mode before saving it in JPEG format. - if d["image"].mode in ("RGBA", "P"): - converted_image = d["image"].convert("RGB") - d["image"] = converted_image - try: - d["image"].save(output_buffer, format='JPEG') - except OSError as e: - logging.warning( - "Saving image exception, ignore: {}".format(str(e))) - - async with minio_limiter: - await trio.to_thread.run_sync(lambda: storage_put_func(bucket=bucket, fnm=objname, binary=output_buffer.getvalue())) - d["img_id"] = f"{bucket}-{objname}" - if not isinstance(d["image"], bytes): - d["image"].close() - del d["image"] # Remove image reference - - -def id2image(image_id:str|None, storage_get_func: partial): - if not image_id: - return - arr = image_id.split("-") - if len(arr) != 2: - return - bkt, nm = image_id.split("-") - try: - blob = storage_get_func(bucket=bkt, filename=nm) - if not blob: - return - return Image.open(BytesIO(blob)) - except Exception as e: - logging.exception(e) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/api/utils/file_utils.py b/api/utils/file_utils.py index 45208da1e80..edf5b66fcc7 100644 --- a/api/utils/file_utils.py +++ b/api/utils/file_utils.py @@ -49,8 +49,6 @@ from api.db import FileType from common.file_utils import get_project_base_directory -PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE") - LOCK_KEY_pdfplumber = "global_shared_lock_pdfplumber" if LOCK_KEY_pdfplumber not in sys.modules: sys.modules[LOCK_KEY_pdfplumber] = threading.Lock() diff --git a/common/base64_image.py b/common/base64_image.py new file mode 100644 index 00000000000..e8e62240847 --- /dev/null +++ b/common/base64_image.py @@ -0,0 +1,72 @@ +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import base64 +import logging +from functools import partial +from io import BytesIO + +from PIL import Image + +test_image_base64 = "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAIAAAD/gAIDAAAA6ElEQVR4nO3QwQ3AIBDAsIP9d25XIC+EZE8QZc18w5l9O+AlZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBWYFZgVmBT+IYAHHLHkdEgAAAABJRU5ErkJggg==" +test_image = base64.b64decode(test_image_base64) + + +async def image2id(d: dict, storage_put_func: partial, objname:str, bucket:str="imagetemps"): + import logging + from io import BytesIO + import trio + from rag.svr.task_executor import minio_limiter + if not d.get("image"): + return + + with BytesIO() as output_buffer: + if isinstance(d["image"], bytes): + output_buffer.write(d["image"]) + output_buffer.seek(0) + else: + # If the image is in RGBA mode, convert it to RGB mode before saving it in JPEG format. + if d["image"].mode in ("RGBA", "P"): + converted_image = d["image"].convert("RGB") + d["image"] = converted_image + try: + d["image"].save(output_buffer, format='JPEG') + except OSError as e: + logging.warning( + "Saving image exception, ignore: {}".format(str(e))) + + async with minio_limiter: + await trio.to_thread.run_sync(lambda: storage_put_func(bucket=bucket, fnm=objname, binary=output_buffer.getvalue())) + d["img_id"] = f"{bucket}-{objname}" + if not isinstance(d["image"], bytes): + d["image"].close() + del d["image"] # Remove image reference + + +def id2image(image_id:str|None, storage_get_func: partial): + if not image_id: + return + arr = image_id.split("-") + if len(arr) != 2: + return + bkt, nm = image_id.split("-") + try: + blob = storage_get_func(bucket=bkt, filename=nm) + if not blob: + return + return Image.open(BytesIO(blob)) + except Exception as e: + logging.exception(e) diff --git a/rag/flow/hierarchical_merger/hierarchical_merger.py b/rag/flow/hierarchical_merger/hierarchical_merger.py index 69d50c8f1c2..ded3cbead49 100644 --- a/rag/flow/hierarchical_merger/hierarchical_merger.py +++ b/rag/flow/hierarchical_merger/hierarchical_merger.py @@ -21,7 +21,7 @@ import trio from common.misc_utils import get_uuid -from api.utils.base64_image import id2image, image2id +from common.base64_image import id2image, image2id from deepdoc.parser.pdf_parser import RAGFlowPdfParser from rag.flow.base import ProcessBase, ProcessParamBase from rag.flow.hierarchical_merger.schema import HierarchicalMergerFromUpstream diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py index d67253d881c..4c5fb7ba090 100644 --- a/rag/flow/parser/parser.py +++ b/rag/flow/parser/parser.py @@ -27,7 +27,7 @@ from api.db.services.file_service import FileService from api.db.services.llm_service import LLMBundle from common.misc_utils import get_uuid -from api.utils.base64_image import image2id +from common.base64_image import image2id from deepdoc.parser import ExcelParser from deepdoc.parser.mineru_parser import MinerUParser from deepdoc.parser.pdf_parser import PlainParser, RAGFlowPdfParser, VisionParser diff --git a/rag/flow/splitter/splitter.py b/rag/flow/splitter/splitter.py index da0ce8b9125..a62c4458019 100644 --- a/rag/flow/splitter/splitter.py +++ b/rag/flow/splitter/splitter.py @@ -18,7 +18,7 @@ import trio from common.misc_utils import get_uuid -from api.utils.base64_image import id2image, image2id +from common.base64_image import id2image, image2id from deepdoc.parser.pdf_parser import RAGFlowPdfParser from rag.flow.base import ProcessBase, ProcessParamBase from rag.flow.splitter.schema import SplitterFromUpstream diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index dafaaf28fe7..d786b13c13d 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -27,7 +27,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.pipeline_operation_log_service import PipelineOperationLogService from api.utils.api_utils import timeout -from api.utils.base64_image import image2id +from common.base64_image import image2id from api.utils.log_utils import init_root_logger from common.file_utils import get_project_base_directory from api.utils.configs import show_configs