chore: process script now scales the previews so the html file is not too big

This commit is contained in:
Almamu 2025-04-08 03:50:31 +02:00
parent 1b33eff462
commit 915a89fb41

View File

@ -3,7 +3,6 @@ import re
import base64
import cv2
import numpy as np
import shutil
from bs4 import BeautifulSoup
def extract_base64_image(html_file):
@ -17,35 +16,50 @@ def extract_base64_image(html_file):
return match.group(1)
return None
def resize_image_base64(image_data, width=100):
"""Resizes the image to the specified width while keeping aspect ratio and returns base64."""
img_array = np.frombuffer(base64.b64decode(image_data), dtype=np.uint8)
img = cv2.imdecode(img_array, cv2.IMREAD_UNCHANGED)
if img is None:
return None
h, w = img.shape[:2]
new_height = int((width / w) * h)
resized_img = cv2.resize(img, (width, new_height), interpolation=cv2.INTER_AREA)
_, buffer = cv2.imencode('.png', resized_img)
resized_base64 = base64.b64encode(buffer).decode('utf-8')
return resized_base64
def categorize_image(image_data):
"""Categorizes the image as 'grey', 'no image', or 'content'."""
"""Categorizes the image as 'grey', 'no image', or 'content' and returns resized image."""
if not image_data:
return "no_image"
return "no_image", None
img_array = np.frombuffer(base64.b64decode(image_data), dtype=np.uint8)
img = cv2.imdecode(img_array, cv2.IMREAD_GRAYSCALE)
if img is None:
return "no_image"
mean_value = cv2.mean(img)[0] # Average pixel brightness
std_dev = np.std(img) # Standard deviation of pixel values
if mean_value > 200 and std_dev < 10:
return "grey"
return "content"
return "no_image", None
mean_value = cv2.mean(img)[0]
std_dev = np.std(img)
category = "grey" if mean_value > 200 and std_dev < 10 else "content"
resized_data = resize_image_base64(image_data)
return category, resized_data
def process_html_files(input_folder, output_html="gallery.html"):
"""Processes all HTML files, categorizes them, and generates a gallery."""
categories = {"no_image": [], "grey": [], "content": []}
for file in os.listdir(input_folder):
if file.endswith(".html") and not file.startswith('report'):
file_path = os.path.join(input_folder, file)
image_data = extract_base64_image(file_path)
category = categorize_image(image_data)
categories[category].append((file, image_data))
category, resized_image = categorize_image(image_data)
categories[category].append((file, resized_image))
with open(output_html, "w", encoding="utf-8") as f:
f.write("""
<html>
@ -54,13 +68,13 @@ def process_html_files(input_folder, output_html="gallery.html"):
<style>
.gallery { display: flex; flex-wrap: wrap; }
.thumb { margin: 10px; text-align: center; }
img { width: 100px; height: auto; border: 1px solid black; }
img { border: 1px solid black; }
</style>
</head>
<body>
<h1>Image Gallery</h1>
""")
for category, files in categories.items():
f.write(f"<h2>{category.replace('_', ' ').title()}</h2>")
f.write("<div class='gallery'>")
@ -70,9 +84,9 @@ def process_html_files(input_folder, output_html="gallery.html"):
else:
f.write(f"<div class='thumb'><a href='{file_name}'>[No Image]</a></div>")
f.write("</div>")
f.write("</body></html>")
print(f"Gallery created at {output_html}")
if __name__ == "__main__":