mirror of
https://github.com/MetaCubeX/meta-rules-dat.git
synced 2024-12-25 06:14:14 +08:00
test
This commit is contained in:
parent
8026386706
commit
5273c5e5a2
24
.github/workflows/run.yml
vendored
24
.github/workflows/run.yml
vendored
@ -6,7 +6,6 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- hidden
|
||||
paths-ignore:
|
||||
- "**/README.md"
|
||||
jobs:
|
||||
@ -27,11 +26,10 @@ jobs:
|
||||
echo "WIN_EXTRA=https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/extra.txt" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
|
||||
- name: Checkout the "hidden" branch
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: MetaCubeX/meta-rules-dat
|
||||
ref: hidden
|
||||
|
||||
- name: Checkout Loyalsoldier/domain-list-custom
|
||||
uses: actions/checkout@v3
|
||||
@ -84,11 +82,11 @@ jobs:
|
||||
run: |
|
||||
# curl -sSL ${CUSTOM_DIRECT} | grep -v google | grep -v manhua | grep -v ooklaserver | grep -v "acg.rip" | perl -ne '/^((full|regexp|keyword):[^:]+)(\n$|:@.+)/ && print "$1\n"' | sort --ignore-case -u > direct-reserve.txt
|
||||
curl -sSL ${CUSTOM_PROXY} | grep -Ev ":@cn" | perl -ne '/^((full|regexp|keyword):[^:]+)(\n$|:@.+)/ && print "$1\n"' | sort --ignore-case -u > proxy-reserve.txt
|
||||
|
||||
|
||||
- name: Add proxy, direct and reject domains from "hidden" branch to appropriate temp files
|
||||
run: |
|
||||
cat proxy.txt >> temp-proxy.txt
|
||||
cat direct.txt >> temp-direct.txt
|
||||
cat ./resouces/proxy.txt >> temp-proxy.txt
|
||||
cat ./resouces/direct.txt >> temp-direct.txt
|
||||
# cat reject.txt >> temp-reject.txt
|
||||
|
||||
- name: Sort and generate redundant lists
|
||||
@ -99,20 +97,20 @@ jobs:
|
||||
|
||||
- name: Remove redundant domains
|
||||
run: |
|
||||
chmod +x *.py
|
||||
python ./findRedundantDomain.py ./direct-list-with-redundant ./direct-list-deleted-unsort
|
||||
python ./findRedundantDomain.py ./proxy-list-with-redundant ./proxy-list-deleted-unsort
|
||||
chmod +x ./resouces/*.py
|
||||
python ./resouces/findRedundantDomain.py ./direct-list-with-redundant ./direct-list-deleted-unsort
|
||||
python ./resouces/findRedundantDomain.py ./proxy-list-with-redundant ./proxy-list-deleted-unsort
|
||||
[ ! -f "direct-list-deleted-unsort" ] && touch direct-list-deleted-unsort
|
||||
[ ! -f "proxy-list-deleted-unsort" ] && touch proxy-list-deleted-unsort
|
||||
sort ./direct-list-deleted-unsort > ./direct-list-deleted-sort
|
||||
sort ./proxy-list-deleted-unsort > ./proxy-list-deleted-sort
|
||||
python ./removeFrom.py -remove ./direct-list-deleted-sort -from ./direct-list-with-redundant -out direct-list-without-redundant
|
||||
python ./removeFrom.py -remove ./proxy-list-deleted-sort -from ./proxy-list-with-redundant -out proxy-list-without-redundant
|
||||
python ./resouces/removeFrom.py -remove ./direct-list-deleted-sort -from ./direct-list-with-redundant -out direct-list-without-redundant
|
||||
python ./resouces/removeFrom.py -remove ./proxy-list-deleted-sort -from ./proxy-list-with-redundant -out proxy-list-without-redundant
|
||||
|
||||
- name: Remove domains from "need-to-remove" lists in "hidden" branch
|
||||
run: |
|
||||
python ./removeFrom.py -remove ./direct-need-to-remove.txt -from ./direct-list-without-redundant -out temp-cn.txt
|
||||
python ./removeFrom.py -remove ./proxy-need-to-remove.txt -from ./proxy-list-without-redundant -out temp-geolocation-\!cn.txt
|
||||
python ./resouces/removeFrom.py -remove ./resouces/direct-need-to-remove.txt -from ./direct-list-without-redundant -out ./temp-cn.txt
|
||||
python ./resouces/removeFrom.py -remove ./resouces/proxy-need-to-remove.txt -from ./proxy-list-without-redundant -out ./temp-geolocation-\!cn.txt
|
||||
|
||||
- name: Remove domains end with ".cn" in "temp-geolocation-!cn.txt" and write lists to data directory
|
||||
run: |
|
||||
|
35
resouces/direct-need-to-remove.txt
Normal file
35
resouces/direct-need-to-remove.txt
Normal file
@ -0,0 +1,35 @@
|
||||
103.com
|
||||
123cha.com
|
||||
95081.com
|
||||
airasia.com
|
||||
baid.us
|
||||
baidu.jp
|
||||
bussou.com
|
||||
busytrade.com
|
||||
cnbeta.com
|
||||
cnbetacdn.com
|
||||
cnpolitics.org
|
||||
dm530.net
|
||||
duanzhihu.com
|
||||
dysfz.cc
|
||||
emacs-china.org
|
||||
galaxymacau.com
|
||||
galstars.net
|
||||
haitum.com
|
||||
hostloc.com
|
||||
jiaoyou8.com
|
||||
kh.google.com
|
||||
laonanren.com
|
||||
mysinablog.com
|
||||
ntrqq.com
|
||||
nytlog.com
|
||||
shuangtv.net
|
||||
suppig.net
|
||||
top
|
||||
xclient.info
|
||||
xjp.cc
|
||||
yanghengjun.com
|
||||
ydy.com
|
||||
yslang.com
|
||||
yysub.net
|
||||
hamreus.com
|
0
resouces/direct.txt
Normal file
0
resouces/direct.txt
Normal file
67
resouces/findRedundantDomain.py
Normal file
67
resouces/findRedundantDomain.py
Normal file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
print(sys.argv[1], sys.argv[2])
|
||||
|
||||
''' Find redundant items in domain lists.
|
||||
e.g. 'bar.foo.com' is redundant for 'foo.com'.
|
||||
'''
|
||||
|
||||
def load(list):
|
||||
''' Parse conf file & Prepare data structure
|
||||
Returns: [ ['abc', 'com'],
|
||||
['bar', 'foo', 'com'],
|
||||
... ]
|
||||
'''
|
||||
|
||||
results = []
|
||||
with open(list, 'r') as f:
|
||||
for line in f.readlines():
|
||||
line = line.strip()
|
||||
if line == '' or line.startswith('#'):
|
||||
continue
|
||||
# A domain name is case-insensitive and
|
||||
# consists of several labels, separated by a full stop
|
||||
domain_labels = line.lower().split('.')
|
||||
results.append(domain_labels)
|
||||
|
||||
# Sort results by domain labels' length
|
||||
results.sort(key=len)
|
||||
return results
|
||||
|
||||
def find(labelses, removedDomainFile):
|
||||
''' Find redundant items by a tree of top-level domain label to sub-level.
|
||||
`tree` is like { 'com': { 'foo: { 'bar': LEAF },
|
||||
'abc': LEAF },
|
||||
'org': ... }
|
||||
'''
|
||||
|
||||
tree = {}
|
||||
LEAF = 1
|
||||
for labels in labelses:
|
||||
domain = '.'.join(labels)
|
||||
# Init root node as current node
|
||||
node = tree
|
||||
while len(labels) > 0:
|
||||
label = labels.pop()
|
||||
if label in node:
|
||||
# If child node is a LEAF node,
|
||||
# current domain must be an existed domain or a subdomain of an existed.
|
||||
if node[label] == LEAF:
|
||||
print(f"Redundant found: {domain} at {'.'.join(labels)}")
|
||||
with open(removedDomainFile, "a") as f:
|
||||
f.write(domain)
|
||||
f.write("\n")
|
||||
break
|
||||
else:
|
||||
# Create a leaf node if current label is last one
|
||||
if len(labels) == 0:
|
||||
node[label] = LEAF
|
||||
# Create a branch node
|
||||
else:
|
||||
node[label] = {}
|
||||
# Iterate to child node
|
||||
node = node[label]
|
||||
|
||||
if __name__ == '__main__':
|
||||
find(load(sys.argv[1]), sys.argv[2])
|
3
resouces/proxy-need-to-remove.txt
Normal file
3
resouces/proxy-need-to-remove.txt
Normal file
@ -0,0 +1,3 @@
|
||||
ifanr.com
|
||||
weibo.com
|
||||
www.baidu.com
|
2
resouces/proxy.txt
Normal file
2
resouces/proxy.txt
Normal file
@ -0,0 +1,2 @@
|
||||
supertop.co
|
||||
hk.chinamobile.com
|
33
resouces/reject-need-to-remove.txt
Normal file
33
resouces/reject-need-to-remove.txt
Normal file
@ -0,0 +1,33 @@
|
||||
4paradigm.com
|
||||
addthis.com
|
||||
addthisedge.com
|
||||
alimama.alicdn.com
|
||||
alimama.com
|
||||
analytics.google.com
|
||||
app.chat.xiaomi.net
|
||||
bdtj.tagtic.cn
|
||||
cdn.onesignal.com
|
||||
click.discord.com
|
||||
click.redditmail.com
|
||||
ctrip.com
|
||||
d.ifengimg.com
|
||||
icons.mydrivers.com
|
||||
img.alibaba.com
|
||||
jav321.com
|
||||
knet.cn
|
||||
mail.tsinghua.edu.cn
|
||||
mtalk.google.com
|
||||
mx.technolutions.net
|
||||
newrelic.com
|
||||
offer.alibaba.com
|
||||
pingjs.qq.com
|
||||
qlogo.cn
|
||||
resolver.msg.xiaomi.net
|
||||
s.youtube.com
|
||||
sf3-ttcdn-tos.pstatp.com
|
||||
t.co
|
||||
tagtic.cn
|
||||
telegra.ph
|
||||
tongji.baidu.com
|
||||
tv.sohu.com
|
||||
ue.yeyoucdn.com
|
0
resouces/reject.txt
Normal file
0
resouces/reject.txt
Normal file
29
resouces/removeFrom.py
Normal file
29
resouces/removeFrom.py
Normal file
@ -0,0 +1,29 @@
|
||||
import argparse
|
||||
|
||||
|
||||
def remove_domains(file_to_remove, file_to_remove_from, output_file):
|
||||
with open(file_to_remove, "r") as f_remove, open(
|
||||
file_to_remove_from, "r"
|
||||
) as f_from:
|
||||
domains_to_remove = set(line.strip() for line in f_remove)
|
||||
all_domains = set(line.strip() for line in f_from)
|
||||
|
||||
remaining_domains = all_domains - domains_to_remove
|
||||
|
||||
with open(output_file, "w") as output:
|
||||
output.write("\n".join(remaining_domains))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Remove domains from a file.")
|
||||
parser.add_argument(
|
||||
"-remove", required=True, help="File containing domains to be removed"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-from", required=True, dest="from_file", help="File to remove domains from"
|
||||
)
|
||||
parser.add_argument("-out", required=True, help="Output file")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
remove_domains(args.remove, args.from_file, args.out)
|
Loading…
x
Reference in New Issue
Block a user