img-spider/main.py
2025-02-11 15:16:15 +08:00

52 lines
2.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
import turtle
import time
import tkinter as tk
from tkinter import simpledialog, messagebox
import threading
def download_image_chunked(url, image_dir, image_name):
response = requests.get(url, stream=True)
if response.status_code == 200:
file_path = os.path.join(image_dir, image_name)
with open(file_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192): # 8192 bytes per chunk
if chunk:
f.write(chunk)
print(f"Image saved as {file_path}")
else:
print("Failed to download the image.")
def submit_tasks_with_delay(executor, url, image_dir, count, interval):
for i in range(count):
image_name = f'image_{i}.png'
# 使用一个单独的线程来等待,并在等待后提交任务
threading.Timer(interval * i, executor.submit, args=(download_image_chunked, url, image_dir, image_name)).start()
def main(url, image_dir, count, interval):
if not os.path.exists(image_dir):
os.makedirs(image_dir)
# 显示开始消息
messagebox.showinfo("开始", "开始拉取将保存在Image文件夹中")
# 初始化线程池
with ThreadPoolExecutor(max_workers=64) as executor:
# 使用一个单独的线程来逐个提交任务,并添加间隔
submit_tasks_with_delay(executor, url, image_dir, count, interval)
# 等待所有任务完成由于我们使用了Timer这里没有直接的方式等待所有任务
# 但我们可以等待足够长的时间,或者添加其他逻辑来检查任务是否完成
time.sleep(interval * count) # 这只是一个示例,可能并不准确
# 显示结束消息(注意:这可能会提前显示,因为不能保证所有任务都已完成)
def delayed_download(url, image_dir, image_name, interval): # 添加一个带延迟的下载函数
time.sleep(interval) # 等待指定的时间间隔
download_image_chunked(url, image_dir, image_name)
def get_input():
root = tk.Tk()
root.withdraw() # 隐藏主窗口
url = simpledialog.askstring("输入", "请输入要爬取的链接:", parent=root)
count = simpledialog.askinteger("输入", "请输入要爬取图片的数量:", parent=root)
interval = simpledialog.askfloat("输入", "请输入图片下载间隔(秒):", parent=root) # 添加间隔输入
if url and count > 0 and interval >= 0: # 确保间隔是非负的
main(url, 'Image', count, interval) # 调用main函数时传入interval参数
else:
messagebox.showerror("错误", "无效的输入,请重新输入。")
if __name__ == "__main__":
get_input()