diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 000000000..83d3ab663 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,74 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = viper/alembic + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# timezone to use when rendering the date +# within the migration file as well as the filename. +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +#truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat alembic/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# this setting is used and needed for developers (e.g. when making changes) +sqlalchemy.url = sqlite:///viper.db + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/docs/source/customize/db-schema-changes.rst b/docs/source/customize/db-schema-changes.rst new file mode 100644 index 000000000..553ca30b5 --- /dev/null +++ b/docs/source/customize/db-schema-changes.rst @@ -0,0 +1,95 @@ +Making changes to the database schema +===================================== + +Viper is using the framework Alembic (http://alembic.zzzcomputing.com/en/latest/) to support making +and deploying changes to the database schema. Refer to this tutorial forgetting a detail introduction +into why to use Alembic: http://alembic.zzzcomputing.com/en/latest/tutorial.html#running-our-first-migration + +Configuration/Usage +=================== + +The **alembic.ini** file in the Viper root directory contains the URL to the database that will be used +when running the native alembic command line tools. Adapt it to your environment as needed (e.g. other +path/database type):: + + # this setting is used and needed for developers (e.g. when making changes) + sqlalchemy.url = sqlite:///viper.db + + +Useful commands are:: + + alembic current [--verbose] + alembic history [--verbose] + alembic heads [--verbose] + alembic revision --autogenerate -m "Added account table" + alembic upgrade head + alembic downgrade -1 + +Please be aware that the autogeneration feature might not work perfectly in all cases! + + +A simple example +================ + +Let's assume you want to add another column to the *Malware* table. First update the class in database.py:: + + diff --git a/viper/core/database.py b/viper/core/database.py + index 1798871..a0633a5 100644 + --- a/viper/core/database.py + +++ b/viper/core/database.py + @@ -71,6 +71,7 @@ class Malware(Base): + sha1 = Column(String(40), nullable=False) + sha256 = Column(String(64), nullable=False, index=True) + sha512 = Column(String(128), nullable=False) + + sha4711 = Column(String(47), nullable=True) + ssdeep = Column(String(255), nullable=True) + created_at = Column(DateTime(timezone=False), default=datetime.now(), nullable=False) + parent_id = Column(Integer(), ForeignKey('malware.id')) + + +Then run the alembic revision command with the autogenerate flag a provide a short and meaning full message:: + + alembic revision --autogenerate -m "add sha4711 column" + INFO [alembic.runtime.migration] Context impl SQLiteImpl. + INFO [alembic.runtime.migration] Will assume non-transactional DDL. + INFO [alembic.autogenerate.compare] Detected added column 'malware.sha4711' + Generating /home/viper/viper/alembic/versions/446173d7559f_add_sha4711_column.py ... done + + +The next steps is to check the generated file (viper/alembic/versions/446173d7559f_add_sha4711_column.py) and +if needed make adjustments. When the file is ok run the following to actually upgrade the database:: + + alembic upgrade head + INFO [alembic.runtime.migration] Context impl SQLiteImpl. + INFO [alembic.runtime.migration] Will assume non-transactional DDL. + INFO [alembic.runtime.migration] Running upgrade 74c7becae858 -> 446173d7559f, add sha4711 column + + +The alembic history will now also show this revision:: + + alembic history --verbose + Rev: 446173d7559f (head) + Parent: 74c7becae858 + Path: /home/viper/viper/viper/alembic/versions/446173d7559f_add_sha4711_column.py + + add sha4711 column + + Revision ID: 446173d7559f + Revises: 74c7becae858 + Create Date: 2018-02-18 19:41:09.453930 + + Rev: 74c7becae858 + Parent: + Path: /home/viper/viper/viper/alembic/versions/74c7becae858_initial_alembic_migration.py + + initial alembic migration + + Revision ID: 74c7becae858 + Revises: + Create Date: 2017-05-09 20:52:15.401889 + + +Make sure to include the new files in viper/alembic/versions/ in your git commits. + + +Viper is setup to automatically update user database if they are not running on the latest revision. diff --git a/docs/source/customize/index.rst b/docs/source/customize/index.rst index d9f3fc45b..fa0731f7f 100644 --- a/docs/source/customize/index.rst +++ b/docs/source/customize/index.rst @@ -1,263 +1,6 @@ -Create new modules -================== - -Viper in itself is simply a framework, modules are what give it analytical capabilities. We receive and include new modules all the time from contributors, but there are always new features to add. If you have an idea, you should implement a module for it and contribute it back to the community. - -The following paragraphs introduce you to the first steps to create a new module. - - -First steps ------------ - -First thing first, you need to create your *.py* script under the ``modules/`` directory: all modules are dynamically loaded by Viper from that folder exclusively. You can create subfolders and place your modules anywhere, Viper will be able to find them. - -Any module needs to have some basic attributes that will make it recognizable. It needs to be a Python class inheriting ``Module``, it needs to have a ``cmd`` and ``description`` attribute and it needs to have a ``run()`` function. For example the following would be a valid, although not very useful, Viper module: - - .. code-block:: python - :linenos: - - from viper.common.abstracts import Module - - class MyModule(Module): - cmd = 'mycmd' - description = 'This module does this and that' - - def run(self): - print("Do something.") - - -Arguments ---------- - -When a module is invoked from the Viper shell it can be provided with a number of arguments and options. These should be parsed with the python ``argparse`` module as show in the example below. - - - .. code-block:: python - :linenos: - - from viper.common.abstracts import Module - - class MyModule(ModuleName): - cmd = 'mycmd' - description = 'This module does this and that' - authors = ['YourName'] - - def __init__(self): - super(ModuleName, self).__init__() - self.parser.add_argument('-t', '--this', action='store_true', help='Do This Thing') - self.parser.add_argument('-b', '--that', action='store_true', help='Do That') - - def run(self): - if self.args.this: - print("This is FOO") - elif self.args.that: - print("That is FOO") - -Using the Config File ---------------------- - -Viper provides a config file that will allow you to store user editable sections in a single file rather than inside the modules. - - /usr/share/viper/viper.conf.sample - -You can easily access the config file: - - .. code-block:: python - :linenos: - - from viper.core.config import __config__ - - cfg = __config__ - - -From here you can access any element in the config file by name: - - .. code-block:: python - :linenos: - - from viper.core.config import Config - - cfg = Config() - - config_item = cfg.modulename.config_item - - # Example Getting VirusTotal Key - - vt_key = cfg.virustotal.virustotal_key - - - -Using common config settings for outbound http connections ----------------------------------------------------------- - -A common use case for modules is to implement the API of an external web service (e.g. https://koodous.com/). -The (great!) requests library (https://github.com/requests/requests/) provides an easy interface for making -outbound http connections. -Viper provides a global configuration section ``[http_client]`` where certain requests options can be set -for Proxies, TLS Verfication, CA_BUNDLE and TLS Client Certificates. -Please check the current ``viper.conf.sample`` for more details. - -When implementing a custom module settings from the global ``[http_client]]`` can be overridden by specifying -them again in the configuration section of the custom module and then calling the ``Config.parse_http_client`` -method for the custom module configuration section. Example: - - .. code-block:: ini - :linenos: - - # viper.conf - - [http_client] - https_proxy = http://prx1.example.internal:3128 - tls_verify = True - - [mymodule] - base_url = https://myapi.example.internal - https_proxy = False - tls_verify = False - - - .. code-block:: python - :linenos: - - import requests - from viper.common.abstracts import Module - from viper.core.config import __config__ - - cfg = __config__ - cfg.parse_http_client(cfg.mymodule) - - class MyModule(Module): - cmd = 'mycmd' - description = 'This module does this and that' - - def run(self): - url = cfg.mymodule.base_url - r = requests.get(url=url, headers=headers, proxies=cfg.mymodule.proxies, verify=cfg.mymodule.verify, cert=cfg.mymodule.cert) - - -Accessing the session ---------------------- - -In most cases, you will probably want to execute some analysis function on the currently opened file and in order to do so you'll need to access the session. Sessions are internally made available through a global object called ``__sessions__``, which has the following attributes: - - * ``__sessions__.current``: a ``Session`` object for the currently opened file. - * ``__sessions__.sessions``: the list of all ``Session`` objects opened during the current Viper execution. - * ``__sessions__.find``: a list contains all the results from the last executed ``find`` command. - -A ``Session`` object has the following attributes: - - * ``Session.id``: an incremental ID for the session. - * ``Session.created_at``: the date and time when the session was opened. - * ``Session.file``: a ``File`` object containing common attributes of the currently opened file (generally speaking, the same information returned by the ``info`` command). - -Following are the information available on the opened file: - - * ``__sessions__.current.file.path`` - * ``__sessions__.current.file.name`` - * ``__sessions__.current.file.size`` - * ``__sessions__.current.file.type`` - * ``__sessions__.current.file.mime`` - * ``__sessions__.current.file.md5`` - * ``__sessions__.current.file.sha1`` - * ``__sessions__.current.file.sha256`` - * ``__sessions__.current.file.sha512`` - * ``__sessions__.current.file.crc32`` - * ``__sessions__.current.file.ssdeep`` - * ``__sessions__.current.file.tags`` - -Here is an example: - - .. code-block:: python - :linenos: - - from viper.common.abstracts import Module - from viper.core.session import __sessions__ - - class MyModule(Module): - cmd = 'mycmd' - description = 'This module does this and that' - - def run(self): - # Check if there is an open session. - if not __sessions__.is_set(): - # No open session. - return - - # Print attributes of the opened file. - print("MD5: " + __sessions__.current.file.md5) - - # Do something to the file. - do_something(__sessions__.current.file.path) - - -Accessing the database ----------------------- - -In case you're interested in automatically retreiving all files stored in the local repository or just a subset, you'll need to access the local database. Viper provides an interface called ``Database()`` to be imported from ``viper.core.database``. - -You can then use the ``find()`` function, specify a key and an optional value and you will obtain a list of objects you can loop through. For example: - - .. code-block:: python - :linenos: - - from viper.common.abstracts import Module - from viper.core.database import Database - - class MyModule(Module): - cmd = 'mycmd' - description = 'This module does this and that' - - def run(self): - db = Database() - # Obtain the list of all stored samples. - samples = db.find(key='all') - - # Obtain the list of all samples matching a tag. - samples = db.find(key='tag', value='apt') - - # Obtain the list of all samples with notes matching a pattern. - samples = db.find(key='note', value='maliciousdomain.tld') - - # Loop through results. - for sample in samples: - print("Sample " + sample.md5) - - -Printing results ----------------- - -Viper provides several function to facilitate and standardize the output of your modules. Viper uses a logging function to return the output to the console or web application. -The format is ``self.log('type', "Your Text")`` and the following types are made available in Viper. - - * ``info``: prints the message with a ``[*]`` prefix. - * ``warning``: prints the message with a yellow ``[!]`` prefix. - * ``error``: prints the message with a red ``[!]`` prefix. - * ``success``: prints the message with a green ``[+]`` prefix. - * ``item``: prints an item from a list. - * ``table``: prints a table with headers and rows. - -You can also easily print tables, such as in the following example: - - .. code-block:: python - :linenos: - - from viper.common.abstracts import Module - - class MyModule(Module): - cmd = 'mycmd' - description = 'This module does this and that' - - def run(self): - self.log('info', "This is Something") - self.log('warning', "This is the warning Text") - - # This is the header of the table. - header = ['Column 1', 'Column 2'] - # These are the rows. - rows = [ - ['Row 1', 'Row 1'], - ['Row 2', 'Row 2'] - ] - - self.log('table', dict(header=header, rows=rows)) +Usage +===== +.. toctree:: + new-modules + db-schema-changes diff --git a/docs/source/customize/new-modules.rst b/docs/source/customize/new-modules.rst new file mode 100644 index 000000000..d9f3fc45b --- /dev/null +++ b/docs/source/customize/new-modules.rst @@ -0,0 +1,263 @@ +Create new modules +================== + +Viper in itself is simply a framework, modules are what give it analytical capabilities. We receive and include new modules all the time from contributors, but there are always new features to add. If you have an idea, you should implement a module for it and contribute it back to the community. + +The following paragraphs introduce you to the first steps to create a new module. + + +First steps +----------- + +First thing first, you need to create your *.py* script under the ``modules/`` directory: all modules are dynamically loaded by Viper from that folder exclusively. You can create subfolders and place your modules anywhere, Viper will be able to find them. + +Any module needs to have some basic attributes that will make it recognizable. It needs to be a Python class inheriting ``Module``, it needs to have a ``cmd`` and ``description`` attribute and it needs to have a ``run()`` function. For example the following would be a valid, although not very useful, Viper module: + + .. code-block:: python + :linenos: + + from viper.common.abstracts import Module + + class MyModule(Module): + cmd = 'mycmd' + description = 'This module does this and that' + + def run(self): + print("Do something.") + + +Arguments +--------- + +When a module is invoked from the Viper shell it can be provided with a number of arguments and options. These should be parsed with the python ``argparse`` module as show in the example below. + + + .. code-block:: python + :linenos: + + from viper.common.abstracts import Module + + class MyModule(ModuleName): + cmd = 'mycmd' + description = 'This module does this and that' + authors = ['YourName'] + + def __init__(self): + super(ModuleName, self).__init__() + self.parser.add_argument('-t', '--this', action='store_true', help='Do This Thing') + self.parser.add_argument('-b', '--that', action='store_true', help='Do That') + + def run(self): + if self.args.this: + print("This is FOO") + elif self.args.that: + print("That is FOO") + +Using the Config File +--------------------- + +Viper provides a config file that will allow you to store user editable sections in a single file rather than inside the modules. + + /usr/share/viper/viper.conf.sample + +You can easily access the config file: + + .. code-block:: python + :linenos: + + from viper.core.config import __config__ + + cfg = __config__ + + +From here you can access any element in the config file by name: + + .. code-block:: python + :linenos: + + from viper.core.config import Config + + cfg = Config() + + config_item = cfg.modulename.config_item + + # Example Getting VirusTotal Key + + vt_key = cfg.virustotal.virustotal_key + + + +Using common config settings for outbound http connections +---------------------------------------------------------- + +A common use case for modules is to implement the API of an external web service (e.g. https://koodous.com/). +The (great!) requests library (https://github.com/requests/requests/) provides an easy interface for making +outbound http connections. +Viper provides a global configuration section ``[http_client]`` where certain requests options can be set +for Proxies, TLS Verfication, CA_BUNDLE and TLS Client Certificates. +Please check the current ``viper.conf.sample`` for more details. + +When implementing a custom module settings from the global ``[http_client]]`` can be overridden by specifying +them again in the configuration section of the custom module and then calling the ``Config.parse_http_client`` +method for the custom module configuration section. Example: + + .. code-block:: ini + :linenos: + + # viper.conf + + [http_client] + https_proxy = http://prx1.example.internal:3128 + tls_verify = True + + [mymodule] + base_url = https://myapi.example.internal + https_proxy = False + tls_verify = False + + + .. code-block:: python + :linenos: + + import requests + from viper.common.abstracts import Module + from viper.core.config import __config__ + + cfg = __config__ + cfg.parse_http_client(cfg.mymodule) + + class MyModule(Module): + cmd = 'mycmd' + description = 'This module does this and that' + + def run(self): + url = cfg.mymodule.base_url + r = requests.get(url=url, headers=headers, proxies=cfg.mymodule.proxies, verify=cfg.mymodule.verify, cert=cfg.mymodule.cert) + + +Accessing the session +--------------------- + +In most cases, you will probably want to execute some analysis function on the currently opened file and in order to do so you'll need to access the session. Sessions are internally made available through a global object called ``__sessions__``, which has the following attributes: + + * ``__sessions__.current``: a ``Session`` object for the currently opened file. + * ``__sessions__.sessions``: the list of all ``Session`` objects opened during the current Viper execution. + * ``__sessions__.find``: a list contains all the results from the last executed ``find`` command. + +A ``Session`` object has the following attributes: + + * ``Session.id``: an incremental ID for the session. + * ``Session.created_at``: the date and time when the session was opened. + * ``Session.file``: a ``File`` object containing common attributes of the currently opened file (generally speaking, the same information returned by the ``info`` command). + +Following are the information available on the opened file: + + * ``__sessions__.current.file.path`` + * ``__sessions__.current.file.name`` + * ``__sessions__.current.file.size`` + * ``__sessions__.current.file.type`` + * ``__sessions__.current.file.mime`` + * ``__sessions__.current.file.md5`` + * ``__sessions__.current.file.sha1`` + * ``__sessions__.current.file.sha256`` + * ``__sessions__.current.file.sha512`` + * ``__sessions__.current.file.crc32`` + * ``__sessions__.current.file.ssdeep`` + * ``__sessions__.current.file.tags`` + +Here is an example: + + .. code-block:: python + :linenos: + + from viper.common.abstracts import Module + from viper.core.session import __sessions__ + + class MyModule(Module): + cmd = 'mycmd' + description = 'This module does this and that' + + def run(self): + # Check if there is an open session. + if not __sessions__.is_set(): + # No open session. + return + + # Print attributes of the opened file. + print("MD5: " + __sessions__.current.file.md5) + + # Do something to the file. + do_something(__sessions__.current.file.path) + + +Accessing the database +---------------------- + +In case you're interested in automatically retreiving all files stored in the local repository or just a subset, you'll need to access the local database. Viper provides an interface called ``Database()`` to be imported from ``viper.core.database``. + +You can then use the ``find()`` function, specify a key and an optional value and you will obtain a list of objects you can loop through. For example: + + .. code-block:: python + :linenos: + + from viper.common.abstracts import Module + from viper.core.database import Database + + class MyModule(Module): + cmd = 'mycmd' + description = 'This module does this and that' + + def run(self): + db = Database() + # Obtain the list of all stored samples. + samples = db.find(key='all') + + # Obtain the list of all samples matching a tag. + samples = db.find(key='tag', value='apt') + + # Obtain the list of all samples with notes matching a pattern. + samples = db.find(key='note', value='maliciousdomain.tld') + + # Loop through results. + for sample in samples: + print("Sample " + sample.md5) + + +Printing results +---------------- + +Viper provides several function to facilitate and standardize the output of your modules. Viper uses a logging function to return the output to the console or web application. +The format is ``self.log('type', "Your Text")`` and the following types are made available in Viper. + + * ``info``: prints the message with a ``[*]`` prefix. + * ``warning``: prints the message with a yellow ``[!]`` prefix. + * ``error``: prints the message with a red ``[!]`` prefix. + * ``success``: prints the message with a green ``[+]`` prefix. + * ``item``: prints an item from a list. + * ``table``: prints a table with headers and rows. + +You can also easily print tables, such as in the following example: + + .. code-block:: python + :linenos: + + from viper.common.abstracts import Module + + class MyModule(Module): + cmd = 'mycmd' + description = 'This module does this and that' + + def run(self): + self.log('info', "This is Something") + self.log('warning', "This is the warning Text") + + # This is the header of the table. + header = ['Column 1', 'Column 2'] + # These are the rows. + rows = [ + ['Row 1', 'Row 1'], + ['Row 2', 'Row 2'] + ] + + self.log('table', dict(header=header, rows=rows)) + diff --git a/requirements-base.txt b/requirements-base.txt index a71c06ce9..4bdfd5e74 100644 --- a/requirements-base.txt +++ b/requirements-base.txt @@ -1,3 +1,4 @@ +alembic bitstring==3.1.5 pbkdf2==1.3 python-dateutil==2.6.1 diff --git a/viper-update b/viper-update index b1cc3c41e..7d2253dd6 100755 --- a/viper-update +++ b/viper-update @@ -5,11 +5,11 @@ import os import sys +import logging import hashlib from zipfile import ZipFile from optparse import OptionParser -from sqlalchemy import create_engine -from datetime import datetime +import glob try: from io import BytesIO @@ -20,9 +20,10 @@ from viper.common.out import print_info from viper.common.out import print_warning from viper.common.out import print_error from viper.common.out import print_success -from viper.common.out import print_item from viper.common.network import download from viper.common.objects import File +from viper.core.database import upgrade_database +from viper.core.logger import init_logger from viper.core.config import __config__ from viper.core.project import __project__ @@ -33,6 +34,8 @@ try: except NameError: pass +log = logging.getLogger('viper') + url = 'https://github.com/viper-framework/viper/archive/master.zip' @@ -145,74 +148,11 @@ def update(): zip_data.close() -def update_db(): - print_item("Backing up Sqlite DB") - - # backup of database name with a timestamp to avoid to be overwritten - db_backupname = "viper_{0}.db.bak".format(datetime.utcnow().strftime("%Y%m%d-%H%M%S")) - - try: - os.rename('viper.db', db_backupname) - except Exception as e: - print_error("Failed to Backup. {0} Stopping".format(e)) - return - - print_item("Creating New DataBase File") - from viper.core.database import Database - Database() - - print_item("Connecting to Viper Databases") - old_engine = create_engine('sqlite:///{0}'.format(db_backupname)) - db_path = os.path.join(__project__.get_path(), 'viper.db') - new_engine = create_engine('sqlite:///{0}'.format(db_path)) - - print_item("Reading data from Old Database") - malware = old_engine.execute('SELECT * FROM malware').fetchall() - association = old_engine.execute('SELECT * FROM association').fetchall() - notes = old_engine.execute('SELECT * FROM note').fetchall() - tags = old_engine.execute('SELECT * FROM tag').fetchall() - - print_item(" Adding rows to New Database") - - # Add all the rows back in - for row in notes: - new_engine.execute("INSERT INTO note VALUES ('{0}', '{1}', '{2}')".format(row[0], row[1], row[2])) - - for row in tags: - new_engine.execute("INSERT INTO tag VALUES ('{0}', '{1}')".format(row[0], row[1])) - - for row in malware: - new_engine.execute("INSERT INTO malware VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', " - "'{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', 'Null')".format(row[0], row[1], row[2], - row[3], row[4], row[5], - row[6], row[7], row[8], - row[9], row[10], row[11]) - ) - - # Rebuild association table with foreign keys - for row in association: - if row[0] is None: - tag_id = "Null" - else: - tag_id = "(SELECT id from tag WHERE id='{0}')".format(row[0]) - if row[1] is None: - note_id = "Null" - else: - note_id = "(SELECT id from note WHERE id='{0}')".format(row[1]) - if row[2] is None: - malware_id = "Null" - else: - malware_id = "(SELECT id from malware WHERE id='{0}')".format(row[2]) - - new_engine.execute("INSERT INTO association VALUES ({0}, {1}, {2}, 'Null')".format(tag_id, note_id, malware_id)) - - print_info("Update Complete") - - if __name__ == '__main__': - parser = OptionParser(usage='usage: %prog -c|-d') + parser = OptionParser(usage='usage: %prog -c|-d [--verbose]') parser.add_option("-d", "--db", action='store_true', default=False, help="Update DB Tables") parser.add_option("-c", "--core", action='store_true', default=False, help="Update Core Files") + parser.add_option("-v", "--verbose", action='store_true', default=False, help="Print more progress messages") (options, args) = parser.parse_args() @@ -226,12 +166,43 @@ if __name__ == '__main__': parser.print_help() sys.exit() + init_logger(log_file_path="./viper-web.log", debug=True) + log.info("starting viper-update") + if options.db: - print_warning("To update Projects you will need to copy their viper.db file in to the main viper folder") - print_warning("Run the DB update then move the new db file back to the project folder. ") - print_info("Updating to New DB format") + # lookup database type in from viper.conf ([database] - connection) + cfg = Config() + connection = cfg.database.connection + + if connection.startswith("mysql+pymysql"): + db_type = "mysql" + elif connection.startswith("mysql"): + db_type = "mysql" + elif connection.startswith("postgresql"): + db_type = "postgresql" + else: + db_type = "sqlite" + + if db_type == "sqlite": + print_info("Detected Database Backend: sqlite (Backups supported)") + + dbs = [("{}/viper.db".format(__project__.base_path))] + dbs.extend(glob.glob("{}/projects/*/viper.db".format(__project__.base_path))) - update_db() + for db_path in dbs: + db_url = "sqlite:///{}".format(db_path) + upgrade_database(db_url, db_type, verbose=options.verbose) + + else: + print_info("Detected Database Backend: Non sqlite (Backups are _not_ supported!)") + print_warning("Backups are currently only supported on sqlite DB. You need to create a backup manually!") + choice = input("Are you sure you want to proceed? [y/N] ") + + if choice.lower() != 'y': + sys.exit() + + upgrade_database(connection, db_type, verbose=options.verbose) if options.core: update() + diff --git a/viper/alembic/env.py b/viper/alembic/env.py new file mode 100644 index 000000000..bd7947378 --- /dev/null +++ b/viper/alembic/env.py @@ -0,0 +1,81 @@ +from __future__ import with_statement +from alembic import context +from sqlalchemy import create_engine, engine_from_config, pool +from logging.config import fileConfig + +import os +import sys + +parent_dir = os.path.abspath(os.path.join(os.getcwd())) +sys.path.append(parent_dir) +from viper.core.database import Base # noqa + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +try: + fileConfig(config.config_file_name) +except TypeError: + pass + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + Calls to context.execute() here emit the given string to the + script output. + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, target_metadata=target_metadata, literal_binds=True) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + In this scenario we need to create an Engine + and associate a connection with the context. + """ + + cmd_line_url = context.get_x_argument(as_dictionary=True).get('dbname') + if cmd_line_url: + connectable = create_engine(cmd_line_url) + else: + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix='sqlalchemy.', + poolclass=pool.NullPool) + + with connectable.connect() as connection: + context.configure( + connection=connection, + render_as_batch=True, + target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() + diff --git a/viper/alembic/script.py.mako b/viper/alembic/script.py.mako new file mode 100644 index 000000000..2c0156303 --- /dev/null +++ b/viper/alembic/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/viper/alembic/versions/74c7becae858_initial_alembic_migration.py b/viper/alembic/versions/74c7becae858_initial_alembic_migration.py new file mode 100644 index 000000000..4d77ca1db --- /dev/null +++ b/viper/alembic/versions/74c7becae858_initial_alembic_migration.py @@ -0,0 +1,80 @@ +"""initial alembic migration + +Revision ID: 74c7becae858 +Revises: +Create Date: 2017-05-09 20:52:15.401889 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '74c7becae858' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### Viper Database on 2017-10-30 ### + op.create_table('analysis', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('cmd_line', sa.String(length=255), nullable=True), + sa.Column('results', sa.Text(), nullable=False), + sa.Column('stored_at', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('malware', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=True), + sa.Column('size', sa.Integer(), nullable=False), + sa.Column('type', sa.Text(), nullable=True), + sa.Column('mime', sa.String(length=255), nullable=True), + sa.Column('md5', sa.String(length=32), nullable=False), + sa.Column('crc32', sa.String(length=8), nullable=False), + sa.Column('sha1', sa.String(length=40), nullable=False), + sa.Column('sha256', sa.String(length=64), nullable=False), + sa.Column('sha512', sa.String(length=128), nullable=False), + sa.Column('ssdeep', sa.String(length=255), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('parent_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['parent_id'], ['malware.id'], ), + sa.PrimaryKeyConstraint('id') + ) + with op.batch_alter_table('malware', schema=None) as batch_op: + batch_op.create_index('hash_index', ['md5', 'crc32', 'sha1', 'sha256', 'sha512'], unique=True) + batch_op.create_index(batch_op.f('ix_malware_md5'), ['md5'], unique=False) + batch_op.create_index(batch_op.f('ix_malware_sha256'), ['sha256'], unique=False) + + op.create_table('note', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('title', sa.String(length=255), nullable=True), + sa.Column('body', sa.Text(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('tag', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('tag', sa.String(length=255), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + with op.batch_alter_table('tag', schema=None) as batch_op: + batch_op.create_index(batch_op.f('ix_tag_tag'), ['tag'], unique=True) + + op.create_table('association', + sa.Column('tag_id', sa.Integer(), nullable=True), + sa.Column('note_id', sa.Integer(), nullable=True), + sa.Column('malware_id', sa.Integer(), nullable=True), + sa.Column('analysis_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['analysis_id'], ['analysis.id'], ), + sa.ForeignKeyConstraint(['malware_id'], ['malware.id'], ), + sa.ForeignKeyConstraint(['note_id'], ['note.id'], ), + sa.ForeignKeyConstraint(['tag_id'], ['tag.id'], ) + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### there is no downgrade from here ### + pass + # ### end Alembic commands ### diff --git a/viper/core/database.py b/viper/core/database.py index c7c94067d..44f88ac16 100644 --- a/viper/core/database.py +++ b/viper/core/database.py @@ -3,20 +3,27 @@ # See the file 'LICENSE' for copying permission. import os +import shutil import sys import json import logging from datetime import datetime from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Text -from sqlalchemy import Table, Index, create_engine, and_ +from sqlalchemy import Table, Index, MetaData, create_engine, and_ + from sqlalchemy.pool import NullPool from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship, backref, sessionmaker from sqlalchemy.orm import subqueryload from sqlalchemy.exc import SQLAlchemyError, IntegrityError -from viper.common.out import print_warning, print_error, print_success +from alembic import command +from alembic.config import Config as AlembicConfig +from alembic.migration import MigrationContext +from alembic.script import ScriptDirectory + +from viper.common.out import print_error, print_info, print_item, print_success, print_warning from viper.common.exceptions import Python2UnsupportedUnicode from viper.common.objects import File from viper.core.storage import get_sample_path, store_sample @@ -28,8 +35,19 @@ cfg = __config__ +INITIAL_ALEMBIC_DB_REVISION = "74c7becae858" + Base = declarative_base() +# http://alembic.zzzcomputing.com/en/latest/naming.html +Base.metadata = MetaData(naming_convention={ + "ix": 'ix_%(column_0_label)s', + "uq": "uq_%(table_name)s_%(column_0_name)s", + "ck": "ck_%(table_name)s_%(constraint_name)s", + "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", + "pk": "pk_%(table_name)s" +}) + association_table = Table( 'association', Base.metadata, @@ -193,6 +211,9 @@ class Database: def __init__(self): + self.url = None + self.type = None # either sqlite, mysql or postgresql + if cfg.database and cfg.database.connection: self._connect_database(cfg.database.connection) else: @@ -204,6 +225,9 @@ def __init__(self): Base.metadata.create_all(self.engine) self.Session = sessionmaker(bind=self.engine) + if not check_database(self.url): + upgrade_database(self.url, self.type, verbose=True) + self.added_ids = {} self.copied_id_sha256 = [] @@ -211,15 +235,21 @@ def __repr__(self): return "<{}>".format(self.__class__.__name__) def _connect_database(self, connection): + self.url = connection if connection.startswith("mysql+pymysql"): - self.engine = create_engine(connection) + self.type = "mysql" + self.engine = create_engine(self.url) elif connection.startswith("mysql"): - self.engine = create_engine(connection, connect_args={"check_same_thread": False}) + self.type = "mysql" + self.engine = create_engine(self.url, connect_args={"check_same_thread": False}) elif connection.startswith("postgresql"): - self.engine = create_engine(connection, connect_args={"sslmode": "disable"}) + self.type = "postgresql" + self.engine = create_engine(self.url, connect_args={"sslmode": "disable"}) else: + self.type = "sqlite" db_path = os.path.join(__project__.get_path(), 'viper.db') - self.engine = create_engine('sqlite:///{0}'.format(db_path), poolclass=NullPool) + self.url = 'sqlite:///{0}'.format(db_path) + self.engine = create_engine(self.url, poolclass=NullPool) def add_tags(self, sha256, tags): session = self.Session() @@ -677,3 +707,266 @@ def list_analysis(self): session = self.Session() rows = session.query(Analysis).all() return rows + + +def backup_database(database_url, sqlite=True, verbose=False): + # for sqlite a DB backup is easy (just copy the file) + if sqlite: + if not database_url.startswith('sqlite:///'): + raise Exception("Malformed sqlite database URL (should start with sqlite:///): {}".format(database_url)) + + # get path from url (for backup) + database_path = database_url[10:] # strip sqlite:/// to get path + + # backup of database name with a timestamp to avoid it to be overwritten + db_dir = os.path.dirname(database_path) + db_backup_path = os.path.join(db_dir, "viper_db_backup_{0}.db".format(datetime.utcnow().strftime("%Y%m%d-%H%M%S"))) + if verbose: + print_item("Backing up Sqlite DB to: {}".format(db_backup_path)) + + try: + shutil.copy(database_path, db_backup_path) + except Exception as e: + print_error("Failed to Backup. {0} Stopping".format(e)) + return + + else: + print_info("Skipping DB backup for non sqlite DB (e.g. MariaDB/PostgreSQL)") + + +# SQLAlchemy/Alembic database migration (update) +def _migrate_db_to_alembic_management(db_url, db_type, rev, alembic_cfg=None, engine=None, verbose=False): + """ migrate a non alembic database to a specified revision + + :param db_url: e.g. sqlite:///viper.db + :type db_url: String + :param db_type: e.g. sqlite, mysql, postgresql + :type db_type: String + :param rev: Alembic revision string which should be used + :type rev: String + :param alembic_cfg: configured AlembicConfig instance + :type alembic_cfg: object + :param engine: connected SQL Alchemy engine instance + :type engine: object + :param verbose: If True, print more status messages + :type verbose: Boolean + """ + + if not alembic_cfg: + # set URL and setup Alembic config + alembic_cfg = AlembicConfig() + alembic_cfg.set_main_option("script_location", "viper:alembic") + alembic_cfg.set_main_option("sqlalchemy.url", db_url) + + if not engine: + # setup SQLAlchemy engine and connect to db + engine = create_engine(db_url) + + if verbose: + print_item("Reading data from Database") + log.debug("Reading data from Database") + + malware = engine.execute('SELECT * FROM malware').fetchall() + analysis = engine.execute('SELECT * FROM analysis').fetchall() + association = engine.execute('SELECT * FROM association').fetchall() + notes = engine.execute('SELECT * FROM note').fetchall() + tags = engine.execute('SELECT * FROM tag').fetchall() + + validation_check = True + try: + log.debug("# cols malware: {}".format(len(malware[0]))) + if not len(malware[0]) == 13: + validation_check = False + except IndexError: + log.debug("# cols malware: no rows") + + try: + log.debug("# cols analysis: {}".format(len(analysis[0]))) + if not len(analysis[0]) == 4: + validation_check = False + except IndexError: + log.debug("# cols analysis: no rows") + + try: + log.debug("# cols association: {}".format(len(association[0]))) + if not len(association[0]) == 4: + validation_check = False + except IndexError: + log.debug("# cols association: no rows") + + try: + log.debug("# cols notes: {}".format(len(notes[0]))) + if not len(notes[0]) == 3: + validation_check = False + except IndexError: + log.debug("# cols notes: no rows") + + try: + log.debug("# cols tags: {}".format(len(tags[0]))) + if not len(tags[0]) == 2: + validation_check = False + except IndexError: + log.debug("# cols tags: no rows") + + if validation_check: + log.debug("successfully validated old DB schema") + else: + log.debug("failed to validated old DB schema") + print_error("Unsupported DB state - Exiting!") + sys.exit(1) + + if verbose: + print_item("Dropping tables from Database") + + if db_type == "sqlite": + engine.execute("DROP TABLE analysis;") + engine.execute("DROP TABLE note;") + engine.execute("DROP TABLE tag;") + engine.execute("DROP TABLE association;") + engine.execute("DROP TABLE malware;") + elif db_type == "mysql": + pass # TODO(frennkie) implement this + elif db_type == "postgresql": + engine.execute("DROP TABLE malware CASCADE;") + engine.execute("DROP TABLE association CASCADE;") + engine.execute("DROP TABLE analysis CASCADE;") + engine.execute("DROP TABLE note CASCADE;") + engine.execute("DROP TABLE tag CASCADE;") + else: + pass + + # re-create tables according to initial rev schema + if verbose: + print_item("Creating initial schema in Database (Revision: {})".format(rev)) + command.upgrade(alembic_cfg, rev) + + if verbose: + print_item("Inserting data back into Database") + + # Add all the rows back in + for row in analysis: + engine.execute("INSERT INTO analysis VALUES ('{0}', '{1}', '{2}', '{3}')".format(row[0], row[1], row[2], row[3])) + + for row in notes: + engine.execute("INSERT INTO note VALUES ('{0}', '{1}', '{2}')".format(row[0], row[1], row[2])) + + for row in tags: + engine.execute("INSERT INTO tag VALUES ('{0}', '{1}')".format(row[0], row[1])) + + for row in malware: + engine.execute("INSERT INTO malware VALUES (" + "'{0}', '{1}', '{2}', '{3}', '{4}', '{5}', '{6}', " + "'{7}', '{8}', '{9}', '{10}', '{11}', '{12}'" + ")".format(row[0], row[1], row[2], row[3], row[4], row[5], row[6], + row[7], row[8], row[9], row[10], row[11], row[12])) + + # Rebuild association table with foreign keys + for row in association: + if row[0] is None: + tag_id = "Null" + else: + tag_id = "(SELECT id from tag WHERE id='{0}')".format(row[0]) + if row[1] is None: + note_id = "Null" + else: + note_id = "(SELECT id from note WHERE id='{0}')".format(row[1]) + if row[2] is None: + malware_id = "Null" + else: + malware_id = "(SELECT id from malware WHERE id='{0}')".format(row[2]) + + if row[3] is None: + analysis_id = "Null" + else: + analysis_id = "(SELECT id from analysis WHERE id='{0}')".format(row[3]) + + engine.execute("INSERT INTO association VALUES ({0}, {1}, {2}, {3})".format(tag_id, note_id, malware_id, analysis_id)) + + +def _is_alembic_enabled(engine): + context = MigrationContext.configure(engine.connect()) + if context.get_current_revision(): + return True + else: + return False + + +def _is_alembic_up2date_with_rev(engine, rev): + context = MigrationContext.configure(engine.connect()) + if context.get_current_revision() == rev: + return True + else: + return False + + +def _get_current_script_head(alembic_cfg): + # set URL and setup Alembic config + script = ScriptDirectory.from_config(alembic_cfg) + return script.get_current_head() + + +def check_database(database_url): + # set URL and setup Alembic config + alembic_cfg = AlembicConfig() + alembic_cfg.set_main_option("script_location", "viper:alembic") + alembic_cfg.set_main_option("sqlalchemy.url", database_url) + + engine = create_engine(database_url) + if not _is_alembic_enabled(engine): + return False + + current_head = _get_current_script_head(alembic_cfg) + if not _is_alembic_up2date_with_rev(engine, current_head): + return False + + return True + + +def upgrade_database(db_url, db_type, create_backup=True, verbose=False): + if check_database(db_url): + print_info("Already up2date!") + return + + if create_backup: + if db_type == "sqlite": + backup_database(db_url, sqlite=True, verbose=verbose) + else: + backup_database(db_url, sqlite=False, verbose=verbose) + + # set URL and setup Alembic config + alembic_cfg = AlembicConfig() + alembic_cfg.set_main_option("script_location", "viper:alembic") + alembic_cfg.set_main_option("sqlalchemy.url", db_url) + + # setup SQLAlchemy engine and connect to db + if verbose: + print_item("Connecting to Viper Databases: {}".format(db_url)) + engine = create_engine(db_url) + + if not _is_alembic_enabled(engine): + log.warning("Database ({}) has never seen an Alembic migration".format(db_url)) + if verbose: + print_warning("Database ({}) has never seen an Alembic migration".format(db_url)) + + # migrate to initial alembic revision for Viper + _migrate_db_to_alembic_management(db_url, db_type, INITIAL_ALEMBIC_DB_REVISION, alembic_cfg, engine, verbose=verbose) + + else: + log.debug("is_alembic_enabled: True") + + current_head = _get_current_script_head(alembic_cfg) + if _is_alembic_up2date_with_rev(engine, current_head): + log.debug("is_alembic_up2date_with_rev (Rev: {}): True".format(current_head)) + if verbose: + print_item("Database is now up-to-date".format(current_head)) + + else: + log.debug("is_alembic_up2date_with_rev (Rev: {}): False".format(current_head)) + + log.info("Migrating to head ({})".format(current_head)) + if verbose: + print_warning("Migrating to head ({})".format(current_head)) + command.upgrade(alembic_cfg, current_head) + + if verbose: + print_success("DB update finished successfully") diff --git a/viper/core/ui/console.py b/viper/core/ui/console.py index bd1299b45..9c540e7c9 100644 --- a/viper/core/ui/console.py +++ b/viper/core/ui/console.py @@ -47,12 +47,6 @@ def logo(): db = Database() count = db.get_sample_count() - try: - db.find('all') - except Exception: - print_error("You need to update your Viper database. Run 'python update.py -d'") - sys.exit() - if __project__.name: name = __project__.name else: