image_fitz.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. """
  2. Fitz 图像预处理插件
  3. 提供将输入图片(本地路径、HTTP(S) URL 或 PIL.Image)通过 PyMuPDF(fitz)
  4. 按指定 DPI 渲染为新的高质量图像的能力。
  5. 导出的主要函数:
  6. - render_image_at_dpi: 首选、语义清晰的 API。
  7. - get_image_by_fitz_doc: 与现有代码保持一致的兼容函数名。
  8. 相关辅助函数也一并提炼,确保模块自洽可用:
  9. - fitz_doc_to_image: 将 fitz.Page 渲染为 PIL.Image。
  10. """
  11. from collections.abc import Generator
  12. from typing import Any, Union, Optional
  13. from io import BytesIO
  14. import os
  15. import base64
  16. import requests
  17. import fitz # PyMuPDF
  18. from PIL import Image
  19. from dify_plugin import Tool
  20. from dify_plugin.entities.tool import ToolInvokeMessage
  21. def _load_image_to_bytes(image: Union[str, Image.Image]) -> tuple[Image.Image, bytes]:
  22. """
  23. 将输入图片统一读取为 PIL.Image 和原始字节数据。
  24. 支持:
  25. - PIL.Image.Image 直接传入
  26. - 本地路径(以普通字符串表示)
  27. - HTTP(S) URL
  28. - Base64 数据 URI (data:image/xxx;base64,...)
  29. 返回:
  30. - (pil_image, image_bytes)
  31. """
  32. if isinstance(image, Image.Image):
  33. bio = BytesIO()
  34. image.save(bio, format='PNG')
  35. return image, bio.getvalue()
  36. assert isinstance(image, str), f"Unsupported image type: {type(image)}"
  37. # 处理 base64 数据 URI
  38. if image.startswith("data:image/"):
  39. try:
  40. # 解析 data URI: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...
  41. header, data = image.split(',', 1)
  42. if 'base64' in header:
  43. data_bytes = base64.b64decode(data)
  44. pil_img = Image.open(BytesIO(data_bytes))
  45. return pil_img, data_bytes
  46. else:
  47. raise ValueError("Only base64 encoded data URIs are supported")
  48. except Exception as e:
  49. raise ValueError(f"Failed to parse base64 data URI: {e}")
  50. # 处理 HTTP(S) URL
  51. if image.startswith("http://") or image.startswith("https://"):
  52. try:
  53. with requests.get(image, stream=True) as response:
  54. response.raise_for_status()
  55. data_bytes = response.content
  56. pil_img = Image.open(BytesIO(data_bytes))
  57. return pil_img, data_bytes
  58. except Exception as e:
  59. raise ValueError(f"Failed to download image from URL: {e}")
  60. # 默认当作本地路径处理
  61. try:
  62. with open(image, 'rb') as f:
  63. data_bytes = f.read()
  64. pil_img = Image.open(BytesIO(data_bytes))
  65. return pil_img, data_bytes
  66. except Exception as e:
  67. raise ValueError(f"Failed to load image from file path: {e}")
  68. def fitz_doc_to_image(doc: fitz.Page, target_dpi: int = 200, origin_dpi: Optional[tuple] = None) -> Image.Image:
  69. """
  70. 将 fitz.Page 按指定 DPI 渲染为 PIL.Image。
  71. 参数:
  72. - doc: fitz.Page
  73. - target_dpi: 目标渲染 DPI,默认 200
  74. - origin_dpi: 原始 DPI 信息(当前未使用,保留参数以兼容原始签名)
  75. """
  76. mat = fitz.Matrix(target_dpi / 72, target_dpi / 72)
  77. pm = doc.get_pixmap(matrix=mat, alpha=False)
  78. if pm.width > 4500 or pm.height > 4500:
  79. # 超大图回退到 fitz 默认 DPI,以避免内存和性能问题
  80. mat = fitz.Matrix(72 / 72, 72 / 72)
  81. pm = doc.get_pixmap(matrix=mat, alpha=False)
  82. image = Image.frombytes('RGB', (pm.width, pm.height), pm.samples)
  83. return image
  84. def render_image_at_dpi(image: Union[str, Image.Image], target_dpi: int = 200) -> Image.Image:
  85. """
  86. 使用 fitz 将任意输入图片按指定 DPI 渲染为新的 PIL.Image。
  87. 支持输入:
  88. - PIL.Image.Image
  89. - 本地路径
  90. - HTTP(S) URL
  91. 返回:
  92. - 渲染后的 PIL.Image
  93. """
  94. pil_img, data_bytes = _load_image_to_bytes(image)
  95. origin_dpi = pil_img.info.get('dpi', None)
  96. # 先将图片封装为 PDF,再用 fitz 渲染
  97. pdf_bytes = fitz.open(stream=data_bytes).convert_to_pdf()
  98. doc = fitz.open('pdf', pdf_bytes)
  99. page = doc[0]
  100. image_fitz = fitz_doc_to_image(page, target_dpi=target_dpi, origin_dpi=origin_dpi)
  101. return image_fitz
  102. def get_image_by_fitz_doc(image: Union[str, Image.Image], target_dpi: int = 200) -> Image.Image:
  103. """
  104. 兼容函数名:行为等同于 render_image_at_dpi。
  105. """
  106. return render_image_at_dpi(image, target_dpi=target_dpi)
  107. class ImageFitzTool(Tool):
  108. def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
  109. """
  110. 调用图像预处理工具
  111. """
  112. try:
  113. # 获取参数
  114. image_input = tool_parameters.get("image_input")
  115. target_dpi = tool_parameters.get("target_dpi", 200)
  116. if not image_input:
  117. raise ValueError("image_input parameter is required")
  118. # 处理图像
  119. processed_image = render_image_at_dpi(image_input, target_dpi)
  120. # 将处理后的图像转换为字节数据
  121. bio = BytesIO()
  122. processed_image.save(bio, format='PNG')
  123. image_bytes = bio.getvalue()
  124. # 创建文件元数据
  125. meta = {
  126. 'filename': 'processed_image.png',
  127. 'mime_type': 'image/png',
  128. 'width': processed_image.size[0],
  129. 'height': processed_image.size[1],
  130. 'dpi': target_dpi
  131. }
  132. # 返回 blob 消息(文件形式)
  133. yield self.create_blob_message(blob=image_bytes, meta=meta)
  134. except Exception as e:
  135. yield self.create_text_message(f"Error processing image: {str(e)}")