mirror of
https://github.com/iBug/pac.git
synced 2025-07-13 12:52:15 +08:00
Add GFWList parser script
This commit is contained in:
parent
0df3228abd
commit
12304fe245
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,4 +1,5 @@
|
||||
# Output
|
||||
gfwlist.txt
|
||||
release-info.md
|
||||
*.pac
|
||||
dist/
|
||||
|
75
gfwlist.py
Executable file
75
gfwlist.py
Executable file
@ -0,0 +1,75 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import base64
|
||||
import json
|
||||
import urllib.parse
|
||||
import requests
|
||||
|
||||
|
||||
GFWLIST_URL = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt'
|
||||
|
||||
|
||||
def get_gfwlist():
|
||||
r = requests.get(GFWLIST_URL)
|
||||
r.raise_for_status()
|
||||
return base64.b64decode(r.text).decode("utf-8").rstrip("\n")
|
||||
|
||||
|
||||
def update_domains(domains, host, mode=0):
|
||||
segments = host.strip(".").split(".")[::-1]
|
||||
|
||||
this = domains
|
||||
for segment in segments:
|
||||
if segment not in this:
|
||||
this[segment] = {}
|
||||
this = this[segment]
|
||||
this["@"] = mode
|
||||
|
||||
|
||||
def parse_gfwlist(text):
|
||||
domains = {}
|
||||
blackpat = [] # blacklisted patterns
|
||||
whitepat = [] # whitelisted patterns
|
||||
|
||||
for line in text.splitlines():
|
||||
if not line.strip() or line.startswith("!"):
|
||||
continue # ignore comments and empty lines
|
||||
|
||||
mode = 0 # default to blacklist
|
||||
if line.startswith("@@"):
|
||||
mode = 1 # now it's whitelist
|
||||
line = line[2:]
|
||||
|
||||
if line.startswith("||"):
|
||||
# domain prefix
|
||||
update_domains(domains, line[2:], mode)
|
||||
else:
|
||||
# Keyword pattern
|
||||
# Single vertical line at either side means string boundary
|
||||
if line.startswith("|"):
|
||||
line = line[1:]
|
||||
else:
|
||||
line = "*" + line
|
||||
if line.endswith("|"):
|
||||
line = line[:-1]
|
||||
else:
|
||||
line = line + "*"
|
||||
if mode == 0:
|
||||
blackpat.append(line)
|
||||
else:
|
||||
whitepat.append(line)
|
||||
return domains, blackpat, whitepat
|
||||
|
||||
|
||||
def generate_pac_partial():
|
||||
gfwlist = get_gfwlist()
|
||||
domains, blackpat, whitepat = parse_gfwlist(gfwlist)
|
||||
return "var DOMAINS = {};\n\nvar BLACKPAT = {};\n\nvar WHITEPAT = {};\n".format(
|
||||
json.dumps(domains, indent=2),
|
||||
json.dumps(blackpat, indent=2),
|
||||
json.dumps(whitepat, indent=2),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(generate_pac_partial())
|
Loading…
Reference in New Issue
Block a user