linux-wallpaperengine/tools/scripts/process.py

95 lines
3.4 KiB
Python

import os
import re
import base64
import cv2
import numpy as np
from bs4 import BeautifulSoup
def extract_base64_image(html_file):
"""Extracts base64-encoded image data from an HTML file."""
with open(html_file, "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
img_tag = soup.find("img")
if img_tag and 'src' in img_tag.attrs:
match = re.search(r'data:image/png;base64,(.+)', img_tag['src'])
if match:
return match.group(1)
return None
def resize_image_base64(image_data, width=100):
"""Resizes the image to the specified width while keeping aspect ratio and returns base64."""
img_array = np.frombuffer(base64.b64decode(image_data), dtype=np.uint8)
img = cv2.imdecode(img_array, cv2.IMREAD_UNCHANGED)
if img is None:
return None
h, w = img.shape[:2]
new_height = int((width / w) * h)
resized_img = cv2.resize(img, (width, new_height), interpolation=cv2.INTER_AREA)
_, buffer = cv2.imencode('.png', resized_img)
resized_base64 = base64.b64encode(buffer).decode('utf-8')
return resized_base64
def categorize_image(image_data):
"""Categorizes the image as 'grey', 'no image', or 'content' and returns resized image."""
if not image_data:
return "no_image", None
img_array = np.frombuffer(base64.b64decode(image_data), dtype=np.uint8)
img = cv2.imdecode(img_array, cv2.IMREAD_GRAYSCALE)
if img is None:
return "no_image", None
mean_value = cv2.mean(img)[0]
std_dev = np.std(img)
category = "grey" if mean_value > 200 and std_dev < 10 else "content"
resized_data = resize_image_base64(image_data)
return category, resized_data
def process_html_files(input_folder, output_html="gallery.html"):
"""Processes all HTML files, categorizes them, and generates a gallery."""
categories = {"no_image": [], "grey": [], "content": []}
for file in os.listdir(input_folder):
if file.endswith(".html") and not file.startswith('report'):
file_path = os.path.join(input_folder, file)
image_data = extract_base64_image(file_path)
category, resized_image = categorize_image(image_data)
categories[category].append((file, resized_image))
with open(output_html, "w", encoding="utf-8") as f:
f.write("""
<html>
<head>
<title>Image Gallery</title>
<style>
.gallery { display: flex; flex-wrap: wrap; }
.thumb { margin: 10px; text-align: center; }
img { border: 1px solid black; }
</style>
</head>
<body>
<h1>Image Gallery</h1>
""")
for category, files in categories.items():
f.write(f"<h2>{category.replace('_', ' ').title()}</h2>")
f.write("<div class='gallery'>")
for file_name, img_data in files:
if img_data:
f.write(f"<div class='thumb'><a href='{file_name}'><img src='data:image/png;base64,{img_data}'></a></div>")
else:
f.write(f"<div class='thumb'><a href='{file_name}'>[No Image]</a></div>")
f.write("</div>")
f.write("</body></html>")
print(f"Gallery created at {output_html}")
if __name__ == "__main__":
input_folder = "./output" # Change to your directory
process_html_files(input_folder, "output/gallery.html")