From c09c33bcf18bc765e5f18e0c2b217a72b8a15561 Mon Sep 17 00:00:00 2001 From: iBug Date: Mon, 27 Jul 2020 13:51:16 +0800 Subject: [PATCH] Slightly compact domain list --- code.js | 2 ++ gfwlist.py | 27 ++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/code.js b/code.js index 3ca92f6..817b525 100644 --- a/code.js +++ b/code.js @@ -44,6 +44,8 @@ function checkDomainType(host) { ptr = ptr[segment]; if (ptr === undefined) break; + if (typeof ptr === "number") + return ptr; if (ptr["@"] !== undefined) type = ptr["@"]; } diff --git a/gfwlist.py b/gfwlist.py index 5c35a15..64097b9 100755 --- a/gfwlist.py +++ b/gfwlist.py @@ -1,18 +1,25 @@ #!/usr/bin/python3 +import os import base64 import json import urllib.parse import requests +GFWLIST_FILE = "gfwlist.txt" GFWLIST_URL = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt' def get_gfwlist(): - r = requests.get(GFWLIST_URL) - r.raise_for_status() - return base64.b64decode(r.text).decode("utf-8").rstrip("\n") + if os.path.isfile(GFWLIST_FILE): + with open(GFWLIST_FILE, "r") as f: + text = f.read() + else: + r = requests.get(GFWLIST_URL) + r.raise_for_status() + text = r.text + return base64.b64decode(text).decode("utf-8").rstrip("\n") def update_domains(domains, host, mode=0): @@ -26,6 +33,19 @@ def update_domains(domains, host, mode=0): this["@"] = mode +def postproc_domains(domains): + # Turn all {"@": 1} into 1 to save some text + keys = list(domains.keys()) + for key in keys: + if key == "@": + continue + obj = domains[key] + if len(obj) == 1 and "@" in obj: + domains[key] = obj["@"] + else: + postproc_domains(obj) + + def parse_gfwlist(text): domains = {} blackpat = [] # blacklisted patterns @@ -61,6 +81,7 @@ def parse_gfwlist(text): blackpat.append(line) else: whitepat.append(line) + postproc_domains(domains) return domains, blackpat, whitepat