Skip to content

Commit

Permalink
Fix false positive extraction of the adblock decoder.
Browse files Browse the repository at this point in the history
This patch fix #13.

Reverse:
  * Of the last patch for the way we check for URL.

Introduction:
  * Of new test cases.
  * Of the force update for all version which are older than `0.94.3`.
    * Because of this patch.

Review:
  * Of the way we extract domain and URL from the given adblock file.

Deprecation:
  * Of all version which are equal or older than `0.109.0`.

Thanks:
  * To @dnmTX
  * @adblockplus for their documentation
    * cf: https://adblockplus.org/filter-cheatsheet
  • Loading branch information
funilrys committed Oct 2, 2018
1 parent 7c5989d commit 25b4f49
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 42 deletions.
2 changes: 1 addition & 1 deletion PyFunceble/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
# We set our project name.
NAME = "PyFunceble"
# We set out project version.
VERSION = "0.109.0.dev-beta (Sarcoline Puku / Mosquito)"
VERSION = "0.110.0.dev-beta (Sarcoline Puku / Mosquito)"

if "PYFUNCEBLE_OUTPUT_DIR" in environ: # pragma: no cover
# We handle the case that the `PYFUNCEBLE_OUTPUT_DIR` environnement variable is set.
Expand Down
2 changes: 1 addition & 1 deletion PyFunceble/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def is_url_valid(self, url=None, return_formated=False):

try:
# We initiate a regex which will match the domain or the url base.
regex = r"(^([a-z]+:\/\/)(.+?(?=\/)|.+?$))"
regex = r"(^(http:\/\/|https:\/\/)(.+?(?=\/)|.+?$))"

# We extract the url base with the help of the initiated regex.
formated_base = Regex(
Expand Down
46 changes: 28 additions & 18 deletions PyFunceble/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,13 @@ def _format_adblock_decoded(cls, to_format, result=None):
if data:
# The currently read line is not empty.

if "^" in data:
# There is an accent in the currently read line.

# We recall this method but with the current result state
# and splited data.
return cls._format_adblock_decoded(data.split("^"), result)

if "#" in data:
# There is a dash in the currently read line.

Expand All @@ -651,13 +658,6 @@ def _format_adblock_decoded(cls, to_format, result=None):
# and splited data.
return cls._format_adblock_decoded(data.split(","), result)

if "~" in data:
# There is a tilde in the currently read line.

# We recall this method but with the current result state
# and splited data.
return cls._format_adblock_decoded(data.split("~"), result)

if "!" in data:
# There is an exclamation mark in the currently read line.

Expand Down Expand Up @@ -719,28 +719,38 @@ def _adblock_decode(self, list_to_test):
# the element to format.
regex = r"^(?:.*\|\|)([^\/\$\^]{1,}).*$"

# We initiate the second regex we are going to use to get
# We initiate the third regex we are going to use to get
# the element to format.
regex_v2 = r"((.*\..*)(?:#{1,}.*))"
regex_v3 = (
r"(?:#+(?:[a-z]+?)?\[[a-z]+(?:\^|\*)\=(?:\'|\"))(.*\..*)(?:(?:\'|\")\])"
)

# We initiate the third regex we are going to use to get
# We initiate the fourth regex we are going to use to get
# the element to format.
regex_v3 = r"(?:#+(?:[a-z]+?)?\[[a-z]+\^\=(?:\'|\"))(.*\..*)(?:(?:\'|\")\])"
regex_v4 = r"^\|(.*\..*)\|$"

for line in list_to_test:
# We loop through the different line.

if line.startswith("!"):
if (
line.startswith("!")
or line.startswith("@@")
or line.startswith("/")
or line.startswith("[")
):
continue

# We extract the different group from our first regex.
rematch = Regex(
line, regex, return_data=True, rematch=True, group=0
).match()

# We extract the different group from our second regex.
rematch_v2 = Regex(
line, regex_v2, return_data=True, rematch=True, group=0
# We extract the different group from our fourth regex.
#
# Note: We execute the following in second because it is more
# specific that others.
rematch_v4 = Regex(
line, regex_v4, return_data=True, rematch=True, group=0
).match()

# We extract the different group from our third regex.
Expand All @@ -754,11 +764,11 @@ def _adblock_decode(self, list_to_test):
# We extend the result with the extracted elements.
result.extend(rematch)

if rematch_v2:
# The second extraction was successfull.
if rematch_v4:
# The fourth extraction was successfull.

# We extend the formated elements from the extracted elements.
result.extend(List(self._format_adblock_decoded(rematch_v2)).format())
result.extend(List(self._format_adblock_decoded(rematch_v4)).format())

if rematch_v3:
# The second extraction was successfull.
Expand Down
31 changes: 12 additions & 19 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,45 +353,38 @@ def setUp(self):

load_config(True)
self.lines = [
"||google.com$script,image",
"||twitter.com^",
"||funilrys.github.io$script,image",
"||google.com^$script,image",
"||twitter.com^helloworld.com",
"||api.google.com/papi/action$popup",
"facebook.com###player-above-2",
"~github.com,hello.world##.wrapper",
"@@||cnn.com/*ad.xml",
"!||world.hello/*ad.xml",
"bing.com,bingo.com#@##adBanner",
"!@@||funceble.world/js",
"yahoo.com,msn.com,api.hello.world#@#awesomeWorld",
"yahoo.com,~msn.com,api.hello.world#@#awesomeWorld",
"!funilrys.com##body",
"hello#@#badads",
"hubgit.com|oohay.com|ipa.elloh.dlorw#@#awesomeWorld",
'##[href^="https://funceble.funilrys.com/"]',
"[AdBlock Plus 2.0]",
'##div[href^="http://funilrys.com/"]',
'##[href^="ftp://funceble.funilrys.com/"]',
'com##[href^="ftp://funceble.funilrys-funceble.com/"]',
"/banner/*/img^" "|github.io|",
"|github.io|",
"||api.funilrys.com/widget/$",
]

self.expected = [
"funilrys.github.io",
"google.com",
"twitter.com",
"api.google.com",
"facebook.com",
"github.com",
"hello.world",
"cnn.com",
"world.hello",
"bing.com",
"bingo.com",
"funceble.world",
"api.hello.world",
"msn.com",
"yahoo.com",
"funilrys.com",
"hubgit.com",
"ipa.elloh.dlorw",
"oohay.com",
"funceble.funilrys.com",
"funilrys.com",
"github.io",
"api.funilrys.com",
]

def test_adblock_decode(self):
Expand Down
6 changes: 3 additions & 3 deletions version.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
current_version: 0.109.0.dev-beta (Sarcoline Puku / Mosquito)
current_version: 0.110.0.dev-beta (Sarcoline Puku / Mosquito)
deprecated: [0.0.0, 0.0.1, 0.65.0, 0.67.1, 0.68.0, 0.69.3, 0.69.5, 0.70.4, 0.71.2,
0.72.7, 0.73.1, 0.74.5, 0.75.1, 0.76.2, 0.77.0, 0.78.0, 0.79.1, 0.80.9, 0.81.8,
0.82.4, 0.83.2, 0.84.5, 0.85.0, 0.86.0, 0.87.1, 0.88.3, 0.89.3, 0.90.2, 0.91.1,
0.92.0, 0.93.0, 0.94.6, 0.95.1, 0.96.2, 0.97.1, 0.98.0, 0.99.4, 0.100.0, 0.101.23,
0.102.0, 0.103.3, 0.104.4, 0.105.1, 0.106.2, 0.107.1, 0.108.1]
0.102.0, 0.103.3, 0.104.4, 0.105.1, 0.106.2, 0.107.1, 0.108.1, 0.109.0]
force_update:
minimal_version: [0.0.0, 0.0.1, 0.82.5, 0.89.2]
minimal_version: [0.0.0, 0.0.1, 0.82.5, 0.89.2, 0.94.3]
status: true

0 comments on commit 25b4f49

Please sign in to comment.