-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'development' of https://github.com/usc-isi-i2/etk into …
…development
- Loading branch information
Showing
16 changed files
with
438 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import warnings | ||
import sys | ||
import argparse | ||
|
||
from etk.extractors.bitcoin_address_extractor import BitcoinAddressExtractor | ||
|
||
bitcoin_address_extractor = BitcoinAddressExtractor() | ||
|
||
|
||
def add_arguments(parser): | ||
""" | ||
Parse arguments | ||
Args: | ||
parser (argparse.ArgumentParser) | ||
""" | ||
parser.description = 'Examples:\n' \ | ||
'python -m etk bitcoin_address_extractor /tmp/input.txt\n' \ | ||
'cat /tmp/input.txt | python -m etk bitcoin_address_extractor' | ||
parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||
|
||
|
||
def run(args): | ||
""" | ||
Args: | ||
args (argparse.Namespace) | ||
""" | ||
with warnings.catch_warnings(): | ||
warnings.simplefilter('ignore') | ||
|
||
for line in args.input_file: | ||
extractions = bitcoin_address_extractor.extract(line) | ||
for e in extractions: | ||
print(e.value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import warnings | ||
import sys | ||
import argparse | ||
|
||
from etk.extractors.cryptographic_hash_extractor import CryptographicHashExtractor | ||
|
||
cryptographic_hash_extractor = CryptographicHashExtractor() | ||
|
||
|
||
def add_arguments(parser): | ||
""" | ||
Parse arguments | ||
Args: | ||
parser (argparse.ArgumentParser) | ||
""" | ||
parser.description = 'Examples:\n' \ | ||
'python -m etk cryptographic_hash_extractor /tmp/input.txt\n' \ | ||
'cat /tmp/input.txt | python -m etk cryptographic_hash_extractor' | ||
parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||
|
||
|
||
def run(args): | ||
""" | ||
Args: | ||
args (argparse.Namespace) | ||
""" | ||
with warnings.catch_warnings(): | ||
warnings.simplefilter('ignore') | ||
|
||
for line in args.input_file: | ||
extractions = cryptographic_hash_extractor.extract(line) | ||
for e in extractions: | ||
print(e.value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import warnings | ||
import sys | ||
import argparse | ||
|
||
from etk.extractors.cve_extractor import CVEExtractor | ||
|
||
cve_extractor = CVEExtractor() | ||
|
||
|
||
def add_arguments(parser): | ||
""" | ||
Parse arguments | ||
Args: | ||
parser (argparse.ArgumentParser) | ||
""" | ||
parser.description = 'Examples:\n' \ | ||
'python -m etk bitcoin_address_extractor /tmp/input.txt\n' \ | ||
'cat /tmp/input.txt | python -m etk bitcoin_address_extractor' | ||
parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||
|
||
|
||
def run(args): | ||
""" | ||
Args: | ||
args (argparse.Namespace) | ||
""" | ||
with warnings.catch_warnings(): | ||
warnings.simplefilter('ignore') | ||
|
||
for line in args.input_file: | ||
extractions = cve_extractor.extract(line) | ||
for e in extractions: | ||
print(e.value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import warnings | ||
import sys | ||
import argparse | ||
|
||
from etk.extractors.email_extractor import EmailExtractor | ||
|
||
email_extractor = EmailExtractor() | ||
|
||
|
||
def add_arguments(parser): | ||
""" | ||
Parse arguments | ||
Args: | ||
parser (argparse.ArgumentParser) | ||
""" | ||
parser.description = 'Examples:\n' \ | ||
'python -m etk email_extractor /tmp/input.txt\n' \ | ||
'cat /tmp/input.txt | python -m etk email_extractor' | ||
parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||
|
||
|
||
def run(args): | ||
""" | ||
Args: | ||
args (argparse.Namespace) | ||
""" | ||
with warnings.catch_warnings(): | ||
warnings.simplefilter('ignore') | ||
|
||
for line in args.input_file: | ||
extractions = email_extractor.extract(line) | ||
for e in extractions: | ||
print(e.value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import warnings | ||
import sys | ||
import argparse | ||
|
||
from etk.extractors.hostname_extractor import HostnameExtractor | ||
|
||
hostname_extractor = HostnameExtractor() | ||
|
||
|
||
def add_arguments(parser): | ||
""" | ||
Parse arguments | ||
Args: | ||
parser (argparse.ArgumentParser) | ||
""" | ||
parser.description = 'Examples:\n' \ | ||
'python -m etk hostname_extractor /tmp/input.txt\n' \ | ||
'cat /tmp/input.txt | python -m etk hostname_extractor' | ||
parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||
|
||
|
||
def run(args): | ||
""" | ||
Args: | ||
args (argparse.Namespace) | ||
""" | ||
with warnings.catch_warnings(): | ||
warnings.simplefilter('ignore') | ||
|
||
for line in args.input_file: | ||
extractions = hostname_extractor.extract(line) | ||
for e in extractions: | ||
print(e.value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import warnings | ||
import sys | ||
import argparse | ||
|
||
from etk.extractors.html_content_extractor import HTMLContentExtractor | ||
|
||
|
||
def add_arguments(parser): | ||
""" | ||
Parse arguments | ||
Args: | ||
parser (argparse.ArgumentParser) | ||
""" | ||
parser.description = 'Examples:\n' \ | ||
'python -m etk html_content_extractor /tmp/input.html\n' \ | ||
'cat /tmp/input.html | python -m etk html_content_extractor' | ||
parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||
|
||
|
||
def run(args): | ||
""" | ||
Args: | ||
args (argparse.Namespace) | ||
""" | ||
html_content_extractor = HTMLContentExtractor() | ||
|
||
with warnings.catch_warnings(): | ||
warnings.simplefilter('ignore') | ||
|
||
extractions = html_content_extractor.extract(html_text=args.input_file) | ||
for e in extractions: | ||
print(e.value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import warnings | ||
import sys | ||
import argparse | ||
|
||
from etk.extractors.html_metadata_extractor import HTMLMetadataExtractor | ||
|
||
|
||
def add_arguments(parser): | ||
""" | ||
Parse arguments | ||
Args: | ||
parser (argparse.ArgumentParser) | ||
""" | ||
parser.description = 'Examples:\n' \ | ||
'python -m etk regex_extractor pattern /tmp/date.txt\n' \ | ||
'cat /tmp/date.txt | python -m etk regex_extractor pattern' | ||
parser.add_argument('pattern', nargs='?', type=str, default=sys.stdin) | ||
parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||
|
||
|
||
def run(args): | ||
""" | ||
Args: | ||
args (argparse.Namespace) | ||
""" | ||
html_metadata_extractor = HTMLMetadataExtractor() | ||
|
||
with warnings.catch_warnings(): | ||
warnings.simplefilter('ignore') | ||
|
||
extractions = html_metadata_extractor.extract(html_text=args.input_file) | ||
for e in extractions: | ||
print(e.value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import warnings | ||
import sys | ||
import argparse | ||
|
||
from etk.extractors.ip_address_extractor import IPAddressExtractor | ||
|
||
ip_address_extractor = IPAddressExtractor() | ||
|
||
|
||
def add_arguments(parser): | ||
""" | ||
Parse arguments | ||
Args: | ||
parser (argparse.ArgumentParser) | ||
""" | ||
parser.description = 'Examples:\n' \ | ||
'python -m etk ip_address_extractor /tmp/input.txt\n' \ | ||
'cat /tmp/input.txt | python -m etk ip_address_extractor' | ||
parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||
|
||
|
||
def run(args): | ||
""" | ||
Args: | ||
args (argparse.Namespace) | ||
""" | ||
with warnings.catch_warnings(): | ||
warnings.simplefilter('ignore') | ||
|
||
for line in args.input_file: | ||
extractions = ip_address_extractor.extract(line) | ||
for e in extractions: | ||
print(e.value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import warnings | ||
import sys | ||
import argparse | ||
|
||
from etk.extractors.regex_extractor import RegexExtractor | ||
|
||
|
||
def add_arguments(parser): | ||
""" | ||
Parse arguments | ||
Args: | ||
parser (argparse.ArgumentParser) | ||
""" | ||
parser.description = 'Examples:\n' \ | ||
'python -m etk regex_extractor pattern /tmp/date.txt\n' \ | ||
'cat /tmp/date.txt | python -m etk regex_extractor pattern' | ||
parser.add_argument('pattern', nargs='?', type=str, default=sys.stdin) | ||
parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||
|
||
|
||
def run(args): | ||
""" | ||
Args: | ||
args (argparse.Namespace) | ||
""" | ||
regex_extractor = RegexExtractor(pattern=args.pattern) | ||
|
||
with warnings.catch_warnings(): | ||
warnings.simplefilter('ignore') | ||
|
||
for line in args.input_file: | ||
extractions = regex_extractor.extract(line) | ||
for e in extractions: | ||
print(e.value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
<html> | ||
<head> | ||
<title> | ||
15 Sep 2018: The Dormouse's story | ||
</title> | ||
</head> | ||
<body> | ||
<p class="title"> | ||
<b> | ||
15 Sep 2018: The Dormouse's story | ||
</b> | ||
</p> | ||
<p class="story"> | ||
10 days ago, there were three little sisters; and their names were | ||
<a class="sister" href="http://example.com/elsie" id="link1"> | ||
Elsie | ||
</a> | ||
, | ||
<a class="sister" href="http://example.com/lacie" id="link2"> | ||
Lacie | ||
</a> | ||
and | ||
<a class="sister" href="http://example.com/tillie" id="link2"> | ||
Tillie | ||
</a> | ||
; and they lived at the bottom of a well. | ||
</p> | ||
<p class="story"> | ||
... | ||
</p> | ||
</body> | ||
</html> |
Oops, something went wrong.