mirror of
https://github.com/iBug/pac.git
synced 2025-07-13 21:02:16 +08:00
Add GFWList parser script
This commit is contained in:
parent
0df3228abd
commit
12304fe245
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,4 +1,5 @@
|
|||||||
# Output
|
# Output
|
||||||
|
gfwlist.txt
|
||||||
release-info.md
|
release-info.md
|
||||||
*.pac
|
*.pac
|
||||||
dist/
|
dist/
|
||||||
|
75
gfwlist.py
Executable file
75
gfwlist.py
Executable file
@ -0,0 +1,75 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import urllib.parse
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
GFWLIST_URL = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt'
|
||||||
|
|
||||||
|
|
||||||
|
def get_gfwlist():
|
||||||
|
r = requests.get(GFWLIST_URL)
|
||||||
|
r.raise_for_status()
|
||||||
|
return base64.b64decode(r.text).decode("utf-8").rstrip("\n")
|
||||||
|
|
||||||
|
|
||||||
|
def update_domains(domains, host, mode=0):
|
||||||
|
segments = host.strip(".").split(".")[::-1]
|
||||||
|
|
||||||
|
this = domains
|
||||||
|
for segment in segments:
|
||||||
|
if segment not in this:
|
||||||
|
this[segment] = {}
|
||||||
|
this = this[segment]
|
||||||
|
this["@"] = mode
|
||||||
|
|
||||||
|
|
||||||
|
def parse_gfwlist(text):
|
||||||
|
domains = {}
|
||||||
|
blackpat = [] # blacklisted patterns
|
||||||
|
whitepat = [] # whitelisted patterns
|
||||||
|
|
||||||
|
for line in text.splitlines():
|
||||||
|
if not line.strip() or line.startswith("!"):
|
||||||
|
continue # ignore comments and empty lines
|
||||||
|
|
||||||
|
mode = 0 # default to blacklist
|
||||||
|
if line.startswith("@@"):
|
||||||
|
mode = 1 # now it's whitelist
|
||||||
|
line = line[2:]
|
||||||
|
|
||||||
|
if line.startswith("||"):
|
||||||
|
# domain prefix
|
||||||
|
update_domains(domains, line[2:], mode)
|
||||||
|
else:
|
||||||
|
# Keyword pattern
|
||||||
|
# Single vertical line at either side means string boundary
|
||||||
|
if line.startswith("|"):
|
||||||
|
line = line[1:]
|
||||||
|
else:
|
||||||
|
line = "*" + line
|
||||||
|
if line.endswith("|"):
|
||||||
|
line = line[:-1]
|
||||||
|
else:
|
||||||
|
line = line + "*"
|
||||||
|
if mode == 0:
|
||||||
|
blackpat.append(line)
|
||||||
|
else:
|
||||||
|
whitepat.append(line)
|
||||||
|
return domains, blackpat, whitepat
|
||||||
|
|
||||||
|
|
||||||
|
def generate_pac_partial():
|
||||||
|
gfwlist = get_gfwlist()
|
||||||
|
domains, blackpat, whitepat = parse_gfwlist(gfwlist)
|
||||||
|
return "var DOMAINS = {};\n\nvar BLACKPAT = {};\n\nvar WHITEPAT = {};\n".format(
|
||||||
|
json.dumps(domains, indent=2),
|
||||||
|
json.dumps(blackpat, indent=2),
|
||||||
|
json.dumps(whitepat, indent=2),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print(generate_pac_partial())
|
Loading…
Reference in New Issue
Block a user