上传文件至 /

This commit is contained in:
mei 2025-02-11 15:12:20 +08:00
parent a3722737c0
commit 5fbcb0252c
4 changed files with 13651 additions and 0 deletions

12938
malicious_ips.txt Normal file

File diff suppressed because it is too large Load Diff

317
v2-cidr.py Normal file
View File

@ -0,0 +1,317 @@
import geoip2.database
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import numpy as np
import ipaddress
import os
import sys
from collections import defaultdict
import matplotlib.font_manager as fm
from adjustText import adjust_text
import warnings
# 配置参数
IP_FILE = 'malicious_ips.txt'
GEOIP_DB = 'GeoLite2-City.mmdb'
OUTPUT_CHART = 'ip_geo_distribution.png'
MAP_DATA_URL = 'https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries.zip'
# 性能优化配置
os.environ["OMP_NUM_THREADS"] = "1" # 修复内存泄漏警告
MAX_CACHE_SIZE = 10000 # 缓存最近查询的1万个IP
# 中文字体配置
FONT_PATH = 'C:/Windows/Fonts/msyh.ttc' if os.name == 'nt' else \
'/System/Library/Fonts/Supplemental/Songti.ttc'
if not os.path.exists(FONT_PATH):
FONT_PATH = fm.findfont(fm.FontProperties(family=['sans-serif']))
def configure_matplotlib():
"""配置Matplotlib中文字体"""
try:
plt.rcParams['font.sans-serif'] = fm.FontProperties(fname=FONT_PATH).get_name()
plt.rcParams['axes.unicode_minus'] = False
except Exception as e:
warnings.warn(f"字体配置异常: {str(e)}")
def cidr_to_ips(cidr):
"""智能CIDR解析生成器"""
try:
network = ipaddress.ip_network(cidr.strip(), strict=False)
# 对大型网络进行抽样处理
if network.version == 4:
if network.prefixlen <= 20:
step = 2 ** (24 - network.prefixlen)
else:
step = 1
else: # IPv6处理
if network.prefixlen <= 48:
step = 2 ** (64 - network.prefixlen)
else:
step = 1
count = 0
for ip in network.hosts():
if count % step == 0:
yield str(ip)
count += 1
except ValueError as e:
warnings.warn(f"无效CIDR格式: {cidr} - {str(e)}")
def load_ips(filename):
"""支持CIDR的IP加载器"""
with open(filename, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line:
continue
# CIDR格式处理
if '/' in line:
yield from cidr_to_ips(line)
# 单个IP处理
else:
try:
ipaddress.ip_address(line)
yield line
except ValueError:
warnings.warn(f"忽略无效IP: {line}")
class GeoIPCache:
"""支持上下文管理器的带LRU缓存的GeoIP查询器"""
def __init__(self, db_path):
self.reader = geoip2.database.Reader(db_path)
self.cache = {}
self.lru = []
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def lookup(self, ip):
if ip in self.cache:
self.lru.remove(ip)
self.lru.insert(0, ip)
return self.cache[ip]
try:
response = self.reader.city(ip)
result = (
response.country.name or 'Unknown',
response.city.name or 'Unknown',
round(response.location.latitude, 2),
round(response.location.longitude, 2)
)
except Exception:
result = ('Unknown', 'Unknown', None, None)
# 更新缓存
self.cache[ip] = result
self.lru.insert(0, ip)
if len(self.lru) > MAX_CACHE_SIZE:
old_ip = self.lru.pop()
del self.cache[old_ip]
return result
def close(self):
self.reader.close()
def geo_lookup(ips):
"""执行地理信息查询"""
country_data = defaultdict(int)
city_data = defaultdict(int)
coord_data = []
with GeoIPCache(GEOIP_DB) as cache: # 现在支持上下文管理器
for ip in ips:
country, city, lat, lon = cache.lookup(ip)
if country and country != 'Unknown':
country_data[country] += 1
if city and city != 'Unknown' and lat and lon:
city_key = (city, country, lat, lon)
city_data[city_key] += 1
coord_data.append((lon, lat))
return country_data, city_data, coord_data
def create_city_layer(city_data):
"""创建城市数据层"""
cities = []
for (city, country, lat, lon), count in city_data.items():
cities.append({
'City': city,
'Country': country,
'Latitude': lat,
'Longitude': lon,
'Count': count
})
city_df = pd.DataFrame(cities)
# 合并同一城市不同坐标的数据
city_df = city_df.groupby(['City', 'Country']).agg({
'Latitude': 'mean',
'Longitude': 'mean',
'Count': 'sum'
}).reset_index()
if not city_df.empty:
return gpd.GeoDataFrame(
city_df,
geometry=gpd.points_from_xy(city_df.Longitude, city_df.Latitude)
)
return None
def visualize_distribution(data):
"""生成可视化图表"""
configure_matplotlib()
country_data, city_data, coords = data
try:
# 加载高精度地图
world = gpd.read_file(MAP_DATA_URL)
# 国家名称标准化
name_mapping = {
'United States of America': 'United States',
'Russian Federation': 'Russia',
'Iran (Islamic Republic of)': 'Iran',
'Viet Nam': 'Vietnam',
'Korea, Republic of': 'South Korea',
'Hong Kong S.A.R.': 'Hong Kong',
'Taiwan': 'Taiwan Province of China'
}
world['NAME'] = world['NAME'].replace(name_mapping)
# 合并国家数据
country_df = world.merge(
pd.DataFrame.from_dict(country_data, orient='index', columns=['Country_Count']),
how="left",
left_on='NAME',
right_index=True
)
# 创建绘图画布
fig, ax = plt.subplots(figsize=(24, 16))
# 绘制国家层
country_plot = country_df.plot(
ax=ax,
column='Country_Count',
cmap='YlOrRd',
edgecolor='#333333',
linewidth=0.5,
legend=True,
legend_kwds={
'label': "国家级别IP数量",
'orientation': "horizontal",
'shrink': 0.5
},
missing_kwds={"color": "lightgrey"}
)
# 绘制城市层
city_gdf = create_city_layer(city_data)
if city_gdf is not None and not city_gdf.empty:
counts = city_gdf['Count']
min_count, max_count = counts.min(), counts.max()
# 动态计算点大小
sizes = np.interp(
counts,
(min_count, max_count),
(20, 200)
) if min_count != max_count else 100
city_gdf.plot(
ax=ax,
markersize=sizes,
color='darkred',
alpha=0.6,
edgecolor='black',
linewidth=0.3,
marker='o'
)
# 添加城市标签
texts = []
top_cities = city_gdf.nlargest(20, 'Count')
for _, row in top_cities.iterrows():
texts.append(ax.text(
row.geometry.x,
row.geometry.y,
f"{row['City']}\n{row['Count']}",
fontproperties=fm.FontProperties(fname=FONT_PATH, size=8),
ha='center',
va='center',
bbox=dict(
facecolor='white',
alpha=0.8,
edgecolor='none',
boxstyle='round,pad=0.2'
)
))
# 自动调整标签布局
adjust_text(
texts,
arrowprops=dict(
arrowstyle='-',
color='gray',
lw=0.5
),
ax=ax
)
# 添加标注
ax.annotate(
'数据来源: MaxMind GeoLite2',
xy=(0.72, 0.04),
xycoords='figure fraction',
fontproperties=fm.FontProperties(fname=FONT_PATH, size=9)
)
plt.title(
'全球恶意IP分布热力图国家/城市层级)',
fontproperties=fm.FontProperties(fname=FONT_PATH, size=22),
pad=20
)
plt.axis('off')
plt.savefig(OUTPUT_CHART, dpi=400, bbox_inches='tight')
plt.close()
except Exception as e:
print(f"地图渲染失败: {str(e)}")
raise
if __name__ == '__main__':
warnings.filterwarnings("ignore", category=UserWarning)
# 加载IP列表
ips = list(load_ips(IP_FILE))
print(f"成功加载 {len(ips)} 个IP地址")
# 地理查询
country_data, city_data, coords = geo_lookup(ips)
# 打印统计信息
print("\n国家统计TOP10:")
for country, count in sorted(country_data.items(), key=lambda x: x[1], reverse=True)[:10]:
print(f"{country}: {count}")
print("\n城市统计TOP10:")
sorted_cities = sorted(city_data.items(), key=lambda x: x[1], reverse=True)[:10]
for (city, country, _, _), count in sorted_cities:
print(f"{country}-{city}: {count}")
# 生成可视化
visualize_distribution((country_data, city_data, coords))
print(f"\n可视化结果已保存至 {OUTPUT_CHART}")

151
v2.py Normal file
View File

@ -0,0 +1,151 @@
import geoip2.database
import matplotlib
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import os
import sys
from collections import defaultdict
import matplotlib.font_manager as fm
import warnings
# 配置参数
IP_FILE = 'malicious_ips.txt'
GEOIP_DB = 'GeoLite2-City.mmdb'
OUTPUT_CHART = 'ip_geo_distribution.png'
MAP_DATA_URL = 'https://naciscdn.org/naturalearth/50m/cultural/ne_50m_admin_0_countries.zip'
# 内存泄漏修复
os.environ["OMP_NUM_THREADS"] = "1"
# 中文字体配置
FONT_PATH = 'C:/Windows/Fonts/msyh.ttc' if os.name == 'nt' else \
'/System/Library/Fonts/Supplemental/Songti.ttc'
if not os.path.exists(FONT_PATH):
FONT_PATH = fm.findfont(fm.FontProperties(family=['sans-serif']))
def configure_matplotlib():
"""跨平台字体配置"""
try:
plt.rcParams['font.sans-serif'] = fm.FontProperties(fname=FONT_PATH).get_name()
plt.rcParams['axes.unicode_minus'] = False
except Exception as e:
warnings.warn(f"字体配置异常: {str(e)}")
def version_aware_legend():
"""版本兼容的图例参数生成"""
try:
from packaging.version import Version
mpl_ver = Version(matplotlib.__version__)
return {'title': '恶意IP数量'} if mpl_ver >= Version('3.4') else {'label': '恶意IP数量'}
except ImportError:
print("建议安装packaging库以获得更好的版本兼容性pip install packaging")
return {'label': '恶意IP数量'}
def visualize_distribution(data):
"""最终稳定版地理可视化"""
try:
configure_matplotlib()
world = gpd.read_file(MAP_DATA_URL)
# 国家名称标准化
name_mapping = {
'United States of America': 'United States',
'Russian Federation': 'Russia',
'Iran (Islamic Republic of)': 'Iran',
'Viet Nam': 'Vietnam',
'Korea, Republic of': 'South Korea',
'Hong Kong S.A.R.': 'Hong Kong',
'Taiwan': 'Taiwan Province of China'
}
world['NAME'] = world['NAME'].replace(name_mapping)
# 合并数据
df = world.merge(
pd.DataFrame.from_dict(data, orient='index', columns=['count']),
how="left",
left_on='NAME',
right_index=True
)
# 创建绘图画布
fig, ax = plt.subplots(figsize=(20, 15))
# 绘图参数配置
plot_params = {
'column': 'count',
'ax': ax,
'cmap': 'YlOrRd',
'edgecolor': 'black',
'linewidth': 0.3,
'missing_kwds': {"color": "lightgrey"},
'legend': True,
'legend_kwds': version_aware_legend()
}
# 智能分类方案
try:
import mapclassify
plot_params['scheme'] = 'NaturalBreaks'
except ImportError:
plot_params['scheme'] = 'equal_interval'
# 绘制地图
df.plot(**plot_params)
# 添加国家标签
top_countries = sorted(data.items(), key=lambda x: x[1], reverse=True)[:15]
for country, count in top_countries:
try:
geom = df[df['NAME'] == country].geometry
if not geom.empty:
centroid = geom.centroid
ax.text(
x=centroid.x.values[0],
y=centroid.y.values[0],
s=f"{country}\n{count}",
fontproperties=fm.FontProperties(fname=FONT_PATH, size=8),
ha='center',
va='center',
bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
)
except Exception as e:
continue
plt.title('全球恶意IP分布热力图',
fontproperties=fm.FontProperties(fname=FONT_PATH, size=18))
plt.axis('off')
plt.savefig(OUTPUT_CHART, dpi=400, bbox_inches='tight')
plt.close()
except Exception as e:
print(f"地图渲染失败: {str(e)}")
if "legend_kwds" in str(e):
print("解决方案:")
print("1. 升级matplotlib: pip install matplotlib --upgrade")
print("2. 或修改代码中version_aware_legend()的返回值")
if __name__ == '__main__':
warnings.filterwarnings("ignore", category=UserWarning)
# 主流程
ips = [ip.strip() for ip in open(IP_FILE) if ip.strip()]
print(f"成功加载 {len(ips)} 个IP地址")
# 实际地理查询
geo_data = defaultdict(int)
with geoip2.database.Reader(GEOIP_DB) as reader:
for ip in ips:
try:
response = reader.city(ip)
if response.country.name:
geo_data[response.country.name] += 1
except:
continue
print("地理位置统计:")
for country, count in sorted(geo_data.items(), key=lambda x: x[1], reverse=True)[:15]:
print(f"{country}: {count}")
visualize_distribution(geo_data)
print(f"最终图表已保存至 {OUTPUT_CHART}")

245
v2_city.py Normal file
View File

@ -0,0 +1,245 @@
import geoip2.database
import matplotlib
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import numpy as np
import os
from collections import defaultdict
import matplotlib.font_manager as fm
from adjustText import adjust_text
# 配置参数
IP_FILE = 'malicious_ips.txt'
GEOIP_DB = 'GeoLite2-City.mmdb'
OUTPUT_CHART = 'ip_geo_distribution.png'
MAP_DATA_URL = 'https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries.zip'
# 内存优化配置
os.environ["OMP_NUM_THREADS"] = "1"
# 中文字体配置
FONT_PATH = 'C:/Windows/Fonts/msyh.ttc' if os.name == 'nt' else \
'/System/Library/Fonts/Supplemental/Songti.ttc'
if not os.path.exists(FONT_PATH):
FONT_PATH = fm.findfont(fm.FontProperties(family=['sans-serif']))
def configure_matplotlib():
"""配置Matplotlib中文字体"""
try:
plt.rcParams['font.sans-serif'] = fm.FontProperties(fname=FONT_PATH).get_name()
plt.rcParams['axes.unicode_minus'] = False
except Exception as e:
print(f"字体配置异常: {str(e)}")
def geo_lookup(ips, db_path):
"""执行地理信息查询(精确到城市)"""
city_data = defaultdict(int)
country_data = defaultdict(int)
coord_data = []
with geoip2.database.Reader(db_path) as reader:
for ip in ips:
try:
response = reader.city(ip)
country = response.country.name
city = response.city.name
latitude = response.location.latitude
longitude = response.location.longitude
if country:
country_data[country] += 1
if city and latitude and longitude:
# 对经纬度进行标准化处理保留2位小数
city_key = (
city,
country,
round(latitude, 2),
round(longitude, 2)
)
city_data[city_key] += 1
coord_data.append((longitude, latitude))
except Exception as e:
continue
return country_data, city_data, coord_data
def create_city_layer(city_data):
"""创建城市数据层"""
cities = []
for (city, country, lat, lon), count in city_data.items():
cities.append({
'City': city,
'Country': country,
'Latitude': lat,
'Longitude': lon,
'Count': count
})
city_df = pd.DataFrame(cities)
# 合并同一城市不同坐标的数据(取最大计数)
city_df = city_df.groupby(['City', 'Country']).agg({
'Latitude': 'mean',
'Longitude': 'mean',
'Count': 'sum'
}).reset_index()
if not city_df.empty:
city_gdf = gpd.GeoDataFrame(
city_df,
geometry=gpd.points_from_xy(city_df.Longitude, city_df.Latitude)
)
return city_gdf
return None
def visualize_combined(data):
"""生成组合可视化图表"""
configure_matplotlib()
country_data, city_data, coords = data
world = gpd.read_file(MAP_DATA_URL)
# 国家名称标准化映射
name_mapping = {
'United States of America': 'United States',
'Russian Federation': 'Russia',
'Iran (Islamic Republic of)': 'Iran',
'Viet Nam': 'Vietnam',
'Korea, Republic of': 'South Korea',
'Hong Kong S.A.R.': 'Hong Kong',
'Taiwan': 'Taiwan Province of China'
}
world['NAME'] = world['NAME'].replace(name_mapping)
# 国家级别数据处理
country_df = world.merge(
pd.DataFrame.from_dict(country_data, orient='index', columns=['Country_Count']),
how="left",
left_on='NAME',
right_index=True
)
# 城市级别数据处理
city_gdf = create_city_layer(city_data)
# 创建绘图画布
fig, ax = plt.subplots(figsize=(24, 16))
# 绘制国家层热力图
country_plot = country_df.plot(
ax=ax,
column='Country_Count',
cmap='YlOrRd',
edgecolor='black',
linewidth=0.3,
legend=True,
legend_kwds={
'label': "国家级别IP数量",
'orientation': "horizontal",
'shrink': 0.6
},
missing_kwds={"color": "lightgrey"}
)
# 绘制城市热力点
if city_gdf is not None and not city_gdf.empty:
# 动态计算点大小
counts = city_gdf['Count']
min_count, max_count = counts.min(), counts.max()
if min_count == max_count:
sizes = 100 # 所有点大小相同
else:
sizes = np.interp(
counts,
(min_count, max_count),
(20, 200) # 点大小范围
)
# 绘制散点图
city_gdf.plot(
ax=ax,
markersize=sizes,
color='darkred',
alpha=0.6,
edgecolor='black',
linewidth=0.3,
marker='o',
label='城市热点'
)
# 添加城市标签前20大热点
texts = []
top_cities = city_gdf.nlargest(20, 'Count')
for _, row in top_cities.iterrows():
texts.append(ax.text(
row.geometry.x,
row.geometry.y,
f"{row['City']}\n{row['Count']}",
fontproperties=fm.FontProperties(fname=FONT_PATH, size=8),
ha='center',
va='center',
bbox=dict(
facecolor='white',
alpha=0.8,
edgecolor='none',
boxstyle='round,pad=0.2'
)
))
# 自动调整标签位置防止重叠
adjust_text(
texts,
arrowprops=dict(
arrowstyle='-',
color='gray',
lw=0.5
),
ax=ax
)
# 添加数据来源标注
ax.annotate(
'数据来源: MaxMind GeoLite2',
xy=(0.68, 0.04),
xycoords='figure fraction',
fontproperties=fm.FontProperties(fname=FONT_PATH, size=9)
)
# 设置标题和输出
plt.title(
'全球恶意IP分布热力图国家/城市层级)',
fontproperties=fm.FontProperties(fname=FONT_PATH, size=22),
pad=20
)
plt.axis('off')
plt.savefig(OUTPUT_CHART, dpi=400, bbox_inches='tight')
plt.close()
if __name__ == '__main__':
# 加载IP列表
with open(IP_FILE, 'r', encoding='utf-8') as f:
ips = [line.strip() for line in f if line.strip()]
print(f"成功加载 {len(ips)} 个IP地址")
# 执行地理查询
country_data, city_data, coords = geo_lookup(ips, GEOIP_DB)
# 打印统计数据
print("\n国家统计TOP10:")
for country, count in sorted(country_data.items(), key=lambda x: x[1], reverse=True)[:10]:
print(f"{country}: {count}")
print("\n城市统计TOP10:")
sorted_cities = sorted(
city_data.items(),
key=lambda x: x[1],
reverse=True
)[:10]
for (city, country, _, _), count in sorted_cities:
print(f"{country}-{city}: {count}")
# 生成可视化图表
visualize_combined((country_data, city_data, coords))
print(f"\n可视化结果已保存至 {OUTPUT_CHART}")