Skip to content

Commit

Permalink
Add telephone number extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
battleoverflow committed Jul 18, 2023
1 parent ba1fd52 commit 43f7921
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 3 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ This library currently supports the following IOCs:
* SHA1
* SHA256
* SHA512
* Telephone numbers
* Custom regex
* With exactly one capture group

Expand Down
1 change: 1 addition & 0 deletions docs/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ This library currently supports the following IOCs:
* SHA1
* SHA256
* SHA512
* Telephone numbers
* Custom regex
* With exactly one capture group

Expand Down
15 changes: 15 additions & 0 deletions iocextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,8 @@ def ipv4_len(ip_len=3):
re.MULTILINE | re.DOTALL | re.VERBOSE,
)

TELEPHONE_RE = re.compile(r"((?:(?:\+?1\s*(?:[.-]\s*)?)?(?:\(\s*([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?([2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?([0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(\d+))?)")


def extract_iocs(data, refang=False, strip=False):
"""
Expand All @@ -395,6 +397,7 @@ def extract_iocs(data, refang=False, strip=False):
extract_emails(data, refang=refang),
extract_hashes(data),
extract_yara_rules(data),
extract_telephone_nums(data)
)


Expand Down Expand Up @@ -647,6 +650,18 @@ def extract_emails(data, refang=False):
yield email


def extract_telephone_nums(data):
"""
Extract telephone numbers!
:param data: Input text
:rtype: Iterator[:class:`str`]
"""

for tele in TELEPHONE_RE.finditer(data):
yield tele.group(1)


def extract_hashes(data):
"""
Extract MD5/SHA hashes!
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

setup(
name='iocextract',
version='1.15.2',
version='1.16.0',
include_package_data=True,
py_modules=['iocextract',],
install_requires=['regex',],
py_modules=['iocextract'],
install_requires=['regex'],
extras_require = {
':python_version <= "2.7"': [
'ipaddress',
Expand Down
3 changes: 3 additions & 0 deletions test_data/input.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,6 @@ hxxps://example[.]com/
short[.]is
tcp://example[.]com/
tcp://example[.]com:80/
123-456-7890
123.102.1215
(123) 456-7890
1 change: 1 addition & 0 deletions test_data/valid.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ hxxps://example[.]com/
short[.]is
tcp://example[.]com/
tcp://example[.]com:80/
456-7890
4 changes: 4 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,3 +852,7 @@ def test_base64_error_when_whitespace_in_leading_characters(self):
content = 'ZyBodHRwOi8vIi4kd2ViLiRpbmouIm5uU3ByZWFkIFZpYSA6ICIuJHZpc2l0b3IuIm5uS2VybmVsIFZlcnNpb24gOiAiLiRhcmFuLiJublNhZmUgTW9kZSA6ICIuJHNhZmVtb2RlOyBtYWlsKCJrYW1laGFtZS5kcmFnb25AZ21haWwuY29tIiwiU2V0b3JhbiBCb3MgIi4kc2FmZW1vZGUsJGJvZHksJGZsb2F0KTs='

self.assertIn('http://".$web.$inj."nnSpread', list(iocextract.extract_urls(content, refang=True)))

def test_telephone_number_extraction(self):
content = "123-456-7890 (123) 456-7890 123.456.7890"
self.assertEqual(['456-7890', '456-7890', '456.7890'], list(iocextract.extract_telephone_nums(content)))

0 comments on commit 43f7921

Please sign in to comment.