Merge branch 'gfwlist'

This commit is contained in:
iBug 2020-07-27 14:15:56 +08:00
commit 3707d2fca0
5 changed files with 179 additions and 5 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
# Output # Output
gfwlist.txt
release-info.md release-info.md
*.pac *.pac
dist/ dist/

View File

@ -2,13 +2,22 @@
PAC scripts for proxies PAC scripts for proxies
## 特点
基于 IP 地址白名单设计,位于白名单中的 IP 地址走直连,白名单以外的 IP 地址走代理(暂不支持 IPv6
另有 GFWList 版本从 [gfwlist/gfwlist](https://github.com/gfwlist/gfwlist) 获取域名及 URL 列表,优先匹配列表中的黑白名单,有效防止 DNS 污染。
每周六 12:00 (UTC) 会自动使用 GitHub Actions 运行[生成脚本](build.py)从数据源获取 IP 地址列表并生成 PAC 文件。
## 使用 ## 使用
获取方式:[本仓库的 Releases](https://github.com/iBug/pac/releases/latest) 获取方式:[本仓库的 Releases](https://github.com/iBug/pac/releases/latest)
每周六 12:00 (UTC) 会自动使用 GitHub Actions 运行[生成脚本](build.py)从数据源获取 IP 地址列表并生成 PAC 文件。 - `pac-<name>.txt` 包含从数据源 `<name>` 获取的 IP 地址列表(白名单)
- `pad-gfwlist-<name>.txt` 在 IP 白名单的基础上添加了 GFWList 的匹配
本代码是为 Shadowsocks Windows 4.1.9 及以上版本设计的,若要在旧版本中使用请手动修改一处字符串,详情见 [shadowsocks-windows#2761](https://github.com/shadowsocks/shadowsocks-windows/issues/2761)。 本代码是为 Shadowsocks Windows 4.1.9 及以上版本设计的,若要在旧版本中使用(或使用其他代理软件)请手动修改代码第 4 行 `__PROXY__` 为你的代理地址,详情见 [shadowsocks-windows#2761](https://github.com/shadowsocks/shadowsocks-windows/issues/2761)。
## 贡献 ## 贡献

View File

@ -6,6 +6,8 @@ import ipaddress
import requests import requests
from requests.exceptions import RequestException, HTTPError from requests.exceptions import RequestException, HTTPError
import gfwlist
SOURCES = { SOURCES = {
'ipdeny.com': 'http://www.ipdeny.com/ipblocks/data/aggregated/cn-aggregated.zone', 'ipdeny.com': 'http://www.ipdeny.com/ipblocks/data/aggregated/cn-aggregated.zone',
@ -13,6 +15,9 @@ SOURCES = {
} }
OUT_DIR = "dist" OUT_DIR = "dist"
# Stub content to disable GFWList check
GFWLIST_STUB = "var DOMAINS = {};\nvar BLACKPAT = [];\nvar WHITEPAT = [];\n"
def fetch_and_convert(src): def fetch_and_convert(src):
response = requests.get(src) response = requests.get(src)
@ -36,6 +41,9 @@ def main():
code = f.read() code = f.read()
code = code.replace("@@TIME@@", now.isoformat()[:-7]) code = code.replace("@@TIME@@", now.isoformat()[:-7])
gfwlist_part = gfwlist.generate_pac_partial()
gfwlist_stub = GFWLIST_STUB
os.makedirs(OUT_DIR, mode=0o755, exist_ok=True) os.makedirs(OUT_DIR, mode=0o755, exist_ok=True)
for key in SOURCES: for key in SOURCES:
print(f"Generating PAC script from source {key}") print(f"Generating PAC script from source {key}")
@ -45,10 +53,19 @@ def main():
continue continue
except HTTPError: except HTTPError:
continue continue
filename = f"pac-{key}.txt" filename = f"pac-{key}.txt"
filename_gfwlist = f"pac-gfwlist-{key}.txt"
with open(os.path.join(OUT_DIR, filename), "w") as f: with open(os.path.join(OUT_DIR, filename), "w") as f:
f.write(code) f.write(code)
f.write(data) f.write(data)
f.write("\n")
f.write(gfwlist_stub)
with open(os.path.join(OUT_DIR, filename_gfwlist), "w") as f:
f.write(code)
f.write(data)
f.write("\n")
f.write(gfwlist_part)
if __name__ == '__main__': if __name__ == '__main__':

54
code.js
View File

@ -1,6 +1,9 @@
// Author: iBug <ibugone.com> // Author: iBug <ibugone.com>
// Time: @@TIME@@ // Time: @@TIME@@
var proxy = __PROXY__;
var direct = "DIRECT";
function belongsToSubnet(host, list) { function belongsToSubnet(host, list) {
var ip = host.split(".").map(Number); var ip = host.split(".").map(Number);
ip = 0x1000000 * ip[0] + 0x10000 * ip[1] + 0x100 * ip[2] + ip[3]; ip = 0x1000000 * ip[0] + 0x10000 * ip[1] + 0x100 * ip[2] + ip[3];
@ -23,6 +26,40 @@ function belongsToSubnet(host, list) {
return (masked ^ list[x][0]) == 0; return (masked ^ list[x][0]) == 0;
} }
function hasMatchedPattern(text, patterns) {
for (var i = 0; i < patterns.length; i++) {
if (shExpMatch(text, patterns[i]))
return true;
}
return false;
}
function checkDomainType(host) {
// Check if a domain is blacklisted or whitelisted
var segments = host.split(".").reverse();
var ptr = DOMAINS;
var type = DOMAINS["@"];
for (var i = 0; i < segments.length; i++) {
var segment = segments[i];
ptr = ptr[segment];
if (ptr === undefined)
break;
if (typeof ptr === "number")
return ptr;
if (ptr["@"] !== undefined)
type = ptr["@"];
}
return type;
}
function hasWhitelistedPattern(url) {
return hasMatchedPattern(url, WHITEPAT);
}
function hasBlacklistedPattern(url) {
return hasMatchedPattern(url, BLACKPAT);
}
function isChina(host) { function isChina(host) {
return belongsToSubnet(host, CHINA); return belongsToSubnet(host, CHINA);
} }
@ -31,10 +68,21 @@ function isLan(host) {
return belongsToSubnet(host, LAN); return belongsToSubnet(host, LAN);
} }
var proxy = __PROXY__;
var direct = "DIRECT";
function FindProxyForURL(url, host) { function FindProxyForURL(url, host) {
if (hasWhitelistedPattern(url)) {
return direct;
}
if (hasBlacklistedPattern(url)) {
return proxy;
}
var domainType = checkDomainType(host);
if (domainType === 0) {
return proxy;
} else if (domainType === 1) {
return direct;
}
// Fallback to IP whitelist
var remote = dnsResolve(host); var remote = dnsResolve(host);
if (!remote || remote.indexOf(":") !== -1) { if (!remote || remote.indexOf(":") !== -1) {
// resolution failed or is IPv6 addr // resolution failed or is IPv6 addr

99
gfwlist.py Executable file
View File

@ -0,0 +1,99 @@
#!/usr/bin/python3
import os
import base64
import json
import urllib.parse
import requests
GFWLIST_FILE = "gfwlist.txt"
GFWLIST_URL = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt'
def get_gfwlist():
if os.path.isfile(GFWLIST_FILE):
with open(GFWLIST_FILE, "r") as f:
text = f.read()
else:
r = requests.get(GFWLIST_URL)
r.raise_for_status()
text = r.text
return base64.b64decode(text).decode("utf-8").rstrip("\n")
def update_domains(domains, host, mode=0):
segments = host.strip(".").split(".")[::-1]
this = domains
for segment in segments:
if segment not in this:
this[segment] = {}
this = this[segment]
this["@"] = mode
def postproc_domains(domains):
# Turn all {"@": 1} into 1 to save some text
keys = list(domains.keys())
for key in keys:
if key == "@":
continue
obj = domains[key]
if len(obj) == 1 and "@" in obj:
domains[key] = obj["@"]
else:
postproc_domains(obj)
def parse_gfwlist(text):
domains = {}
blackpat = [] # blacklisted patterns
whitepat = [] # whitelisted patterns
for line in text.splitlines()[1:]:
if not line.strip() or line.startswith("!"):
continue # ignore comments and empty lines
mode = 0 # default to blacklist
if line.startswith("@@"):
mode = 1 # now it's whitelist
line = line[2:]
if line.startswith("||"):
# domain prefix
update_domains(domains, line[2:], mode)
elif line.startswith("/"):
# regex, can't handle yet
pass
else:
# Keyword pattern
# Single vertical line at either side means string boundary
if line.startswith("|"):
line = line[1:]
else:
line = "*" + line
if line.endswith("|"):
line = line[:-1]
else:
line = line + "*"
if mode == 0:
blackpat.append(line)
else:
whitepat.append(line)
postproc_domains(domains)
return domains, blackpat, whitepat
def generate_pac_partial():
gfwlist = get_gfwlist()
domains, blackpat, whitepat = parse_gfwlist(gfwlist)
return "var DOMAINS = {};\n\nvar BLACKPAT = {};\n\nvar WHITEPAT = {};\n".format(
json.dumps(domains, indent=2),
json.dumps(blackpat, indent=2),
json.dumps(whitepat, indent=2),
)
if __name__ == '__main__':
print(generate_pac_partial())