From bd0e1a3d7b719c28a5a7e7727de912cb187240e4 Mon Sep 17 00:00:00 2001 From: loyalsoldier <10487845+Loyalsoldier@users.noreply.github.com> Date: Mon, 2 Mar 2020 01:05:01 +0800 Subject: [PATCH] Remove redundant domains --- .github/workflows/build.yml | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0e1f68f957..8aa36745a2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -93,15 +93,28 @@ jobs: cat proxy.txt >> temp-proxy.txt cat direct.txt >> temp-direct.txt - - name: Sort and generate lists + - name: Sort and generate redundant lists run: | - cat temp-proxy.txt | sort --ignore-case -u | perl -ne '/^((?=^.{3,255})[a-zA-Z0-9][-_a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-_a-zA-Z0-9]{0,62})+)/ && print "$1\n"' > $GOPATH/src/$GEOSITE_REPO/data/proxy-list + cat temp-proxy.txt | sort --ignore-case -u | perl -ne '/^((?=^.{3,255})[a-zA-Z0-9][-_a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-_a-zA-Z0-9]{0,62})+)/ && print "$1\n"' > proxy-list-with-redundant cat temp-proxy.txt | sort --ignore-case -u | perl -ne 'print if not /^((?=^.{3,255})[a-zA-Z0-9][-_a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-_a-zA-Z0-9]{0,62})+)/' > proxy-excluse-list - cat temp-direct.txt | sort --ignore-case -u | perl -ne '/^((?=^.{3,255})[a-zA-Z0-9][-_a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-_a-zA-Z0-9]{0,62})+)/ && print "$1\n"' > $GOPATH/src/$GEOSITE_REPO/data/direct-list + cat temp-direct.txt | sort --ignore-case -u | perl -ne '/^((?=^.{3,255})[a-zA-Z0-9][-_a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-_a-zA-Z0-9]{0,62})+)/ && print "$1\n"' > direct-list-with-redundant cat temp-direct.txt | sort --ignore-case -u | perl -ne 'print if not /^((?=^.{3,255})[a-zA-Z0-9][-_a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-_a-zA-Z0-9]{0,62})+)/' > direct-excluse-list - cat temp-reject.txt | sort --ignore-case -u | perl -ne '/^((?=^.{3,255})[a-zA-Z0-9][-_a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-_a-zA-Z0-9]{0,62})+)/ && print "$1\n"' > $GOPATH/src/$GEOSITE_REPO/data/reject-list + cat temp-reject.txt | sort --ignore-case -u | perl -ne '/^((?=^.{3,255})[a-zA-Z0-9][-_a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-_a-zA-Z0-9]{0,62})+)/ && print "$1\n"' > reject-list-with-redundant cat temp-reject.txt | sort --ignore-case -u | perl -ne 'print if not /^((?=^.{3,255})[a-zA-Z0-9][-_a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-_a-zA-Z0-9]{0,62})+)/' > reject-excluse-list + - name: Remove redundant domains + run: | + chmod +x findRedundantDomain.py + ./findRedundantDomain.py ./direct-list-with-redundant ./direct-list-deleted-unsort + ./findRedundantDomain.py ./proxy-list-with-redundant ./proxy-list-deleted-unsort + ./findRedundantDomain.py ./reject-list-with-redundant ./reject-list-deleted-unsort + sort ./direct-list-deleted-unsort > ./direct-list-deleted-sort + sort ./proxy-list-deleted-unsort > ./proxy-list-deleted-sort + sort ./reject-list-deleted-unsort > ./reject-list-deleted-sort + diff ./direct-list-deleted-sort ./direct-list-with-redundant | awk '/^>/{print $2}' > $GOPATH/src/$GEOSITE_REPO/data/direct-list + diff ./proxy-list-deleted-sort ./proxy-list-with-redundant | awk '/^>/{print $2}' > $GOPATH/src/$GEOSITE_REPO/data/proxy-list + diff ./reject-list-deleted-sort ./reject-list-with-redundant | awk '/^>/{print $2}' > $GOPATH/src/$GEOSITE_REPO/data/reject-list + - name: Add list into appropriate category file run: | cd $GOPATH/src/$GEOSITE_REPO/data