Revert "Fixed error 'IOC_Parser' object has no attribute 'dedup_store'"

ttufts · Mar 30, 2015 · 8cd34fc · 8cd34fc
1 parent a7caf8f
commit 8cd34fc
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 55 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,2 @@
 venv
-*.pyc
-*.swp
+*.pyc
diff --git a/ioc-parser.py b/ioc-parser.py
@@ -81,17 +81,12 @@
 class IOC_Parser(object):
     patterns = {}
 
-    def __init__(self, patterns_ini, input_format = 'pdf', output_format='csv', dedup=False, library='pypdf2', campaign='Unknown', campaign_confidence='low', confidence='low', impact='low', tags=[]):
+    def __init__(self, patterns_ini, input_format = 'pdf', output_format='csv', dedup=False, library='pypdf2'):
         basedir = os.path.dirname(os.path.abspath(__file__))
         self.load_patterns(patterns_ini)
         self.whitelist = WhiteList(basedir)
         self.handler = output.getHandler(output_format)
         self.dedup = dedup
-        self.campaign = campaign
-        self.campaign_confidence = campaign_confidence
-        self.confidence = confidence
-        self.impact = impact
-        self.tags = tags
 
         self.ext_filter = "*." + input_format
         parser_format = "parse_" + input_format
@@ -134,34 +129,23 @@ def is_whitelisted(self, ind_match, ind_type):
         return False
 
     def parse_page(self, fpath, data, page_num):
-        try:
-            if self.dedup:
-                self.dedup_store = set()
+        for ind_type, ind_regex in self.patterns.items():
+            matches = ind_regex.findall(data)
 
-            for ind_type, ind_regex in self.patterns.items():
-                matches = ind_regex.findall(data)
+            for ind_match in matches:
+                if isinstance(ind_match, tuple):
+                    ind_match = ind_match[0]
 
-                for ind_match in matches:
-                    if isinstance(ind_match, tuple):
-                        ind_match = ind_match[0]
+                if self.is_whitelisted(ind_match, ind_type):
+                    continue
 
-                    if self.is_whitelisted(ind_match, ind_type):
+                if self.dedup:
+                    if (ind_type, ind_match) in self.dedup_store:
                         continue
 
-                    if self.dedup:
-                        if (ind_type, ind_match) in self.dedup_store:
-                            continue
-
-                        self.dedup_store.add((ind_type, ind_match))
+                    self.dedup_store.add((ind_type, ind_match))
 
-                    tags = ""
-                    if len(self.tags) > 0:
-                        tags = ','.join(self.tags)
-                    self.handler.print_match(ind_match, ind_type, self.campaign, self.campaign_confidence, self.confidence, self.impact, tags)
-        except KeyError as e:
-            print "{0}".format(e)
-        except:
-            print "Unexpected error:", sys.exc_info()[0]
+                self.handler.print_match(fpath, page_num, ind_type, ind_match)
 
     def parse_pdf_pypdf2(self, f, fpath):
         try:
@@ -220,7 +204,7 @@ def parse_pdf(self, f, fpath):
         except AttributeError:
             e = 'Selected PDF parser library is not supported: %s' % (self.library)
             raise NotImplementedError(e)
-
+            
         self.parser_func(f, fpath)
 
     def parse_txt(self, f, fpath):
@@ -296,12 +280,7 @@ def parse(self, path):
     argparser.add_argument('-o', dest='OUTPUT_FORMAT', default='csv', help='Output format (csv/json/yara)')
     argparser.add_argument('-d', dest='DEDUP', action='store_true', default=False, help='Deduplicate matches')
     argparser.add_argument('-l', dest='LIB', default='pdfminer', help='PDF parsing library (pypdf2/pdfminer)')
-    argparser.add_argument('-c', dest='CAMPAIGN', default='Unknown', help='Campaign attribution')
-    argparser.add_argument('--camp-conf', dest='CAMPAIGN_CONFIDENCE', default='low', help='Campaign confidence for crits')
-    argparser.add_argument('--confidence', dest='INDICATOR_CONFIDENCE', default='low', help='Indicator confidence for crits')
-    argparser.add_argument('--impact', dest='INDICATOR_IMPACT', default='low', help='Indicator impact for crits')
-    argparser.add_argument('-t', action='append', dest='TAGS', default=[], help='Bucket list tags for crits. Multiple -t options are allowed.')
     args = argparser.parse_args()
 
-    parser = IOC_Parser(args.INI, args.INPUT_FORMAT, args.OUTPUT_FORMAT, args.DEDUP, args.LIB, args.CAMPAIGN, args.CAMPAIGN_CONFIDENCE, args.INDICATOR_CONFIDENCE, args.INDICATOR_IMPACT, args.TAGS)
+    parser = IOC_Parser(args.INI, args.INPUT_FORMAT, args.OUTPUT_FORMAT, args.DEDUP, args.LIB)
     parser.parse(args.PATH)
diff --git a/output.py b/output.py
@@ -4,19 +4,6 @@
 import json
 
 OUTPUT_FORMATS = ('csv', 'json', 'yara', )
-TYPE_CONVERSION = {
-    "CVE" : "CVE",
-    "Filename" : "Windows - FileName",
-    "Host" : "URI - Domain Name",
-    "MD5" : "Hash - MD5",
-    "SHA1" : "Hash - SHA1",
-    "URL" : "URI - URL",
-    "Email" : "Email - Address",
-    "Filepath" : "Windows - FilePath",
-    "IP" : "Address - ipv4-addr",
-    "Registry" : "Windows - Registry",
-    "SHA256" : "Hash - SHA256"
-    }
 
 def getHandler(output_format):
     output_format = output_format.lower()
@@ -44,14 +31,13 @@ def print_error(self, fpath, exception):
 
 class OutputHandler_csv(OutputHandler):
     def __init__(self):
-        self.csv_writer = csv.writer(sys.stdout, delimiter = ',')
-	self.csv_writer.writerow(('Indicator', 'Type', 'Campaign', 'Campaign Confidence', 'Confidence', 'Impact', 'Bucket List'))
+        self.csv_writer = csv.writer(sys.stdout, delimiter = '\t')
 
-    def print_match(self, match, ind_type, campaign, campaign_confidence, confidence, impact, tags):
-        self.csv_writer.writerow((match, TYPE_CONVERSION[ind_type], campaign, campaign_confidence, confidence, impact, tags))
+    def print_match(self, fpath, page, name, match):
+        self.csv_writer.writerow((fpath, page, name, match))
 
     def print_error(self, fpath, exception):
-        self.csv_writer.writerow((exception, 'error', 'error', 'low', 'low', 'low', 'error'))
+        self.csv_writer.writerow((fpath, '0', 'error', exception))
 
 class OutputHandler_json(OutputHandler):
     def print_match(self, fpath, page, name, match):
-Original file line number
+Diff line change
@@ -1,3 +1,2 @@
     venv
-    *.pyc
-    *.swp
+    *.pyc