From 8754ed45d2a5dcf3a7783056d16f5003bced2bce Mon Sep 17 00:00:00 2001
From: ChrisBAshton <chrisashtonweb@gmail.com>
Date: Fri, 25 Nov 2016 12:58:10 +0000
Subject: [PATCH 1/3] started adding 'spider' command

---
 lib/wraith/cli.rb                       |   9 ++
 lib/wraith/helpers/custom_exceptions.rb |   6 +
 lib/wraith/spider.rb                    | 197 +++++++++++++-----------
 lib/wraith/validate.rb                  |  10 ++
 lib/wraith/wraith.rb                    |  12 +-
 spec/configs/test_config--spider.yaml   |  46 ++++++
 spec/validate_spec.rb                   |  23 ++-
 7 files changed, 209 insertions(+), 94 deletions(-)
 create mode 100644 spec/configs/test_config--spider.yaml

diff --git a/lib/wraith/cli.rb b/lib/wraith/cli.rb
index 3d9333ed..97a0a02a 100644
--- a/lib/wraith/cli.rb
+++ b/lib/wraith/cli.rb
@@ -57,6 +57,15 @@ def setup
     end
   end
 
+  desc "spider [config_name]", "crawls a site for paths and stores them to YML file"
+  def spider(config)
+    within_acceptable_limits do
+      logger.info Wraith::Validate.new(config).validate("spider")
+      spider = Wraith::Spider.new(config)
+      spider.crawl
+    end
+  end
+
   desc "reset_shots [config_name]", "removes all the files in the shots folder"
   def reset_shots(config_name)
     within_acceptable_limits do
diff --git a/lib/wraith/helpers/custom_exceptions.rb b/lib/wraith/helpers/custom_exceptions.rb
index 52b6d5db..0206aeb4 100644
--- a/lib/wraith/helpers/custom_exceptions.rb
+++ b/lib/wraith/helpers/custom_exceptions.rb
@@ -10,6 +10,12 @@ class MissingRequiredPropertyError < CustomError
 class ConfigFileDoesNotExistError < CustomError
 end
 
+class PropertyOutOfContextError < CustomError
+end
+
+class InvalidYamlError < CustomError
+end
+
 class MissingImageError < CustomError
   def initialize(msg = false)
     default_msg = "Something went wrong! It looks like you're missing some images. Check your output directory and make sure that each path has four files for every screen size (data.txt, diff, base, latest). If in doubt, delete your output directory and run Wraith again."
diff --git a/lib/wraith/spider.rb b/lib/wraith/spider.rb
index 76317c3f..b2044411 100644
--- a/lib/wraith/spider.rb
+++ b/lib/wraith/spider.rb
@@ -4,105 +4,120 @@
 require "nokogiri"
 require "uri"
 
-class Wraith::Spidering
-  include Logging
-  attr_reader :wraith
+class Wraith::Spider
 
   def initialize(config)
     @wraith = Wraith::Wraith.new(config)
   end
 
-  def check_for_paths
-    if wraith.paths.nil?
-      unless wraith.sitemap.nil?
-        logger.info "no paths defined in config, loading paths from sitemap"
-        spider = Wraith::Sitemap.new(wraith)
-      else
-        logger.info "no paths defined in config, crawling from site root"
-        spider = Wraith::Crawler.new(wraith)
-      end
-      spider.determine_paths
-    end
-  end
-end
-
-class Wraith::Spider
-  attr_reader :wraith
-
-  def initialize(wraith)
-    @wraith = wraith
-    @paths = {}
-  end
-
-  def determine_paths
-    spider
-    write_file
-  end
-
-  private
-
-  def write_file
-    File.open(wraith.spider_file, "w+") { |file| file.write(@paths) }
-  end
-
-  def add_path(path)
-    @paths[path == "/" ? "home" : path.gsub("/", "__").chomp("__").downcase] = path.downcase
+  def crawl
+    throw 'test'
   end
 
-  def spider
-  end
 end
 
-class Wraith::Crawler < Wraith::Spider
-  include Logging
 
-  EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
-           gz jar js css dtd xsd ico raw mp3 mp4 \
-           wav wmv ape aac ac3 wma aiff mpg mpeg \
-           avi mov ogg mkv mka asx asf mp2 m1v \
-           m3u f4v pdf doc xls ppt pps bin exe rss xml)
 
-  def spider
-    if File.exist?(wraith.spider_file) && modified_since(wraith.spider_file, wraith.spider_days[0])
-      logger.info "using existing spider file"
-      @paths = eval(File.read(wraith.spider_file))
-    else
-      logger.info "creating new spider file"
-      Anemone.crawl(wraith.base_domain) do |anemone|
-        anemone.skip_links_like(/\.(#{EXT.join('|')})$/)
-        # Add user specified skips
-        anemone.skip_links_like(wraith.spider_skips)
-        anemone.on_every_page { |page| add_path(page.url.path) }
-      end
-    end
-  end
-
-  def modified_since(file, since)
-    (Time.now - File.ctime(file)) / (24 * 3600) < since
-  end
-end
-
-class Wraith::Sitemap < Wraith::Spider
-  include Logging
-
-  def spider
-    unless wraith.sitemap.nil?
-      logger.info "reading sitemap.xml from #{wraith.sitemap}"
-      if wraith.sitemap =~ URI.regexp
-        sitemap = Nokogiri::XML(open(wraith.sitemap))
-      else
-        sitemap = Nokogiri::XML(File.open(wraith.sitemap))
-      end
-      sitemap.css("loc").each do |loc|
-        path = loc.content
-        # Allow use of either domain in the sitemap.xml
-        wraith.domains.each do |_k, v|
-          path.sub!(v, "")
-        end
-        if wraith.spider_skips.nil? || wraith.spider_skips.none? { |regex| regex.match(path) }
-          add_path(path)
-        end
-      end
-    end
-  end
-end
+#
+# class Wraith::Spidering
+#   include Logging
+#   attr_reader :wraith
+#
+#   def initialize(config)
+#     @wraith = Wraith::Wraith.new(config)
+#   end
+#
+#   def check_for_paths
+#     if wraith.paths.nil?
+#       unless wraith.sitemap.nil?
+#         logger.info "no paths defined in config, loading paths from sitemap"
+#         spider = Wraith::Sitemap.new(wraith)
+#       else
+#         logger.info "no paths defined in config, crawling from site root"
+#         spider = Wraith::Crawler.new(wraith)
+#       end
+#       spider.determine_paths
+#     end
+#   end
+# end
+#
+# class Wraith::Spider
+#   attr_reader :wraith
+#
+#   def initialize(wraith)
+#     @wraith = wraith
+#     @paths = {}
+#   end
+#
+#   def determine_paths
+#     spider
+#     write_file
+#   end
+#
+#   private
+#
+#   def write_file
+#     File.open(wraith.spider_file, "w+") { |file| file.write(@paths) }
+#   end
+#
+#   def add_path(path)
+#     @paths[path == "/" ? "home" : path.gsub("/", "__").chomp("__").downcase] = path.downcase
+#   end
+#
+#   def spider
+#   end
+# end
+#
+# class Wraith::Crawler < Wraith::Spider
+#   include Logging
+#
+#   EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
+#            gz jar js css dtd xsd ico raw mp3 mp4 \
+#            wav wmv ape aac ac3 wma aiff mpg mpeg \
+#            avi mov ogg mkv mka asx asf mp2 m1v \
+#            m3u f4v pdf doc xls ppt pps bin exe rss xml)
+#
+#   def spider
+#     if File.exist?(wraith.spider_file) && modified_since(wraith.spider_file, wraith.spider_days[0])
+#       logger.info "using existing spider file"
+#       @paths = eval(File.read(wraith.spider_file))
+#     else
+#       logger.info "creating new spider file"
+#       Anemone.crawl(wraith.base_domain) do |anemone|
+#         anemone.skip_links_like(/\.(#{EXT.join('|')})$/)
+#         # Add user specified skips
+#         anemone.skip_links_like(wraith.spider_skips)
+#         anemone.on_every_page { |page| add_path(page.url.path) }
+#       end
+#     end
+#   end
+#
+#   def modified_since(file, since)
+#     (Time.now - File.ctime(file)) / (24 * 3600) < since
+#   end
+# end
+#
+# class Wraith::Sitemap < Wraith::Spider
+#   include Logging
+#
+#   def spider
+#     unless wraith.sitemap.nil?
+#       logger.info "reading sitemap.xml from #{wraith.sitemap}"
+#       if wraith.sitemap =~ URI.regexp
+#         sitemap = Nokogiri::XML(open(wraith.sitemap))
+#       else
+#         sitemap = Nokogiri::XML(File.open(wraith.sitemap))
+#       end
+#       sitemap.css("loc").each do |loc|
+#         path = loc.content
+#         # Allow use of either domain in the sitemap.xml
+#         wraith.domains.each do |_k, v|
+#           path.sub!(v, "")
+#         end
+#         if wraith.spider_skips.nil? || wraith.spider_skips.none? { |regex| regex.match(path) }
+#           add_path(path)
+#         end
+#       end
+#     end
+#   end
+# end
diff --git a/lib/wraith/validate.rb b/lib/wraith/validate.rb
index e448deda..75fc53a0 100644
--- a/lib/wraith/validate.rb
+++ b/lib/wraith/validate.rb
@@ -37,6 +37,8 @@ def validate_mode_properties(mode)
     when "latest"
       validate_history_mode
       validate_base_shots_exist
+    when "spider"
+      validate_spider_mode
     else
       logger.warn "Wraith doesn't know how to validate mode '#{mode}'. Continuing..."
     end
@@ -56,6 +58,14 @@ def validate_history_mode
     fail InvalidDomainsError, "History mode requires exactly one domain. #{docs_prompt}" if wraith.domains.length != 1
   end
 
+  def validate_spider_mode
+    fail MissingRequiredPropertyError, "You must specify an `imports` YML"\
+                  " before running `wraith spider`. #{docs_prompt}" unless wraith.imports
+
+    fail PropertyOutOfContextError, "Tried running `wraith spider` but you have already"\
+                                  " specified paths in your YML. #{docs_prompt}" if wraith.paths
+  end
+
   def validate_base_shots_exist
     unless File.directory?(wraith.history_dir)
       logger.error "You need to run `wraith history` at least once before you can run `wraith latest`!"
diff --git a/lib/wraith/wraith.rb b/lib/wraith/wraith.rb
index a689dbf3..b85e9a06 100644
--- a/lib/wraith/wraith.rb
+++ b/lib/wraith/wraith.rb
@@ -22,8 +22,12 @@ def open_config_file(config_name)
 
     possible_filenames.each do |filepath|
       if File.exist?(filepath)
-        config = File.open filepath
-        return YAML.load config
+        config = YAML.load_file(filepath)
+        if config
+          return config
+        else
+          fail InvalidYamlError, "could not parse \"#{config_name}\" as YAML"
+        end
       end
     end
     fail ConfigFileDoesNotExistError, "unable to find config \"#{config_name}\""
@@ -166,6 +170,10 @@ def phantomjs_options
     @config["phantomjs_options"]
   end
 
+  def imports
+    @config['imports'] || nil
+  end
+
   def verbose
     # @TODO - also add a `--verbose` CLI flag which overrides whatever you have set in the config
     @config["verbose"] || false
diff --git a/spec/configs/test_config--spider.yaml b/spec/configs/test_config--spider.yaml
new file mode 100644
index 00000000..bedb4983
--- /dev/null
+++ b/spec/configs/test_config--spider.yaml
@@ -0,0 +1,46 @@
+imports: "spider_paths.yaml"
+
+#Headless browser option
+browser: "phantomjs"
+
+# Type the name of the directory that shots will be stored in
+directory: "shots"
+
+# Add only 2 domains, key will act as a label
+domains:
+ english: "http://www.live.bbc.co.uk/news"
+ russian: "http://www.live.bbc.co.uk/russian"
+
+#Type screen widths below, here are a couple of examples
+screen_widths:
+ - 320
+ - 600
+ - 768x1500 # you can also specify the height, as we've done here
+ - 1024
+ - 1280
+
+#Amount of fuzz ImageMagick will use
+fuzz: "20%"
+
+#Set the filename of the spider file to use, if not specified it will fallback to spider.txt
+spider_file: bbc_co_uk_spider.txt
+
+#Set the number of days to keep the site spider file
+pider_days:
+ - 10
+
+#A list of URLs to skip when spidering. Ruby regular expressions can be
+#used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
+#http://www.yaml.org/YAML_for_ruby.html#regexps
+#
+# spider_skips:
+#   - /foo/bar.html # Matches /foo/bar.html explcitly
+#   - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
+
+#Choose how results are displayed, by default alphanumeric. Different screen widths are always grouped.
+#alphanumeric - all paths (with, and without, a difference) are shown, sorted by path
+#diffs_first - all paths (with, and without, a difference) are shown, sorted by difference size (largest first)
+#diffs_only - only paths with a difference are shown, sorted by difference size (largest first)
+mode: diffs_first
+
+threshold: 15
diff --git a/spec/validate_spec.rb b/spec/validate_spec.rb
index 150cf6fd..025446e9 100644
--- a/spec/validate_spec.rb
+++ b/spec/validate_spec.rb
@@ -53,7 +53,7 @@
       ')
       Wraith::Validate.new(config, true).validate("capture")
     end
-    
+
     it "should fail if no directory is specified" do
       config["domains"] = YAML.load('
           test:  http://something.bbc.com
@@ -88,4 +88,25 @@
       Wraith::Validate.new(history_conf, true).validate("history")
     end
   end
+
+  describe "validations specific to spider mode" do
+    let(:spider_conf) do
+      config.merge(YAML.load('
+        imports: "spider_paths.yml"
+      '))
+    end
+
+    it "should complain if imports is empty" do
+      spider_conf['imports'] = nil
+      expect { Wraith::Validate.new(spider_conf, true).validate("spider") }.to raise_error PropertyOutOfContextError
+    end
+
+    it "should complain if paths is set" do
+      spider_conf.merge(YAML.load('
+        paths:
+          home: /
+      '))
+      expect { Wraith::Validate.new(spider_conf, true).validate("spider") }.to raise_error PropertyOutOfContextError
+    end
+  end
 end

From b48e987414fc3622e257285ff219fa4356155461 Mon Sep 17 00:00:00 2001
From: ChrisBAshton <chrisashtonweb@gmail.com>
Date: Fri, 25 Nov 2016 13:43:39 +0000
Subject: [PATCH 2/3] got new spider mode working

---
 lib/wraith/cli.rb                     |  24 +----
 lib/wraith/helpers/utilities.rb       |   7 ++
 lib/wraith/spider.rb                  | 145 +++++++-------------------
 lib/wraith/wraith.rb                  |   2 +-
 spec/configs/test_config--spider.yaml |  12 +--
 spec/validate_spec.rb                 |  17 ++-
 wraith.gemspec                        |   1 -
 7 files changed, 60 insertions(+), 148 deletions(-)

diff --git a/lib/wraith/cli.rb b/lib/wraith/cli.rb
index 97a0a02a..1bebe4b2 100644
--- a/lib/wraith/cli.rb
+++ b/lib/wraith/cli.rb
@@ -22,26 +22,6 @@ def self.source_root
     File.expand_path("../../../", __FILE__)
   end
 
-  # define internal methods which user should not be able to run directly
-  no_commands do
-    def within_acceptable_limits
-      yield
-    rescue CustomError => e
-      logger.error e.message
-      # other errors, such as SystemError, will not be caught nicely and will give a stack trace (which we'd need)
-    end
-
-    def check_for_paths(config_name)
-      spider = Wraith::Spidering.new(config_name)
-      spider.check_for_paths
-    end
-
-    def copy_old_shots(config_name)
-      create = Wraith::FolderManager.new(config_name)
-      create.copy_old_shots
-    end
-  end
-
   desc "validate [config_name]", "checks your configuration and validates that all required properties exist"
   def validate(config_name)
     within_acceptable_limits do
@@ -140,7 +120,6 @@ def capture(config, multi = false)
     within_acceptable_limits do
       logger.info Wraith::Validate.new(config).validate("capture")
       reset_shots(config)
-      check_for_paths(config)
       setup_folders(config)
       save_images(config)
       crop_images(config)
@@ -165,10 +144,9 @@ def history(config)
     within_acceptable_limits do
       logger.info Wraith::Validate.new(config).validate("history")
       reset_shots(config)
-      check_for_paths(config)
       setup_folders(config)
       save_images(config)
-      copy_old_shots(config)
+      Wraith::FolderManager.new(config).copy_old_shots
     end
   end
 
diff --git a/lib/wraith/helpers/utilities.rb b/lib/wraith/helpers/utilities.rb
index e98e1aa0..f4ff6c6c 100644
--- a/lib/wraith/helpers/utilities.rb
+++ b/lib/wraith/helpers/utilities.rb
@@ -1,5 +1,12 @@
 require "wraith/helpers/custom_exceptions"
 
+def within_acceptable_limits
+  yield
+rescue CustomError => e
+  logger.error e.message
+  # other errors, such as SystemError, will not be caught nicely and will give a stack trace (which we'd need)
+end
+
 def convert_to_absolute(filepath)
   if !filepath
     "false"
diff --git a/lib/wraith/spider.rb b/lib/wraith/spider.rb
index b2044411..68f0b56b 100644
--- a/lib/wraith/spider.rb
+++ b/lib/wraith/spider.rb
@@ -1,123 +1,52 @@
 require "wraith"
 require "wraith/helpers/logger"
+require "yaml"
 require "anemone"
-require "nokogiri"
 require "uri"
 
 class Wraith::Spider
+  include Logging
+
+  EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
+           gz jar js css dtd xsd ico raw mp3 mp4 m4a \
+           wav wmv ape aac ac3 wma aiff mpg mpeg \
+           avi mov ogg mkv mka asx asf mp2 m1v \
+           m3u f4v pdf doc xls ppt pps bin exe rss xml)
+
+  attr_reader :wraith
 
   def initialize(config)
     @wraith = Wraith::Wraith.new(config)
+    @paths = {}
   end
 
   def crawl
-    throw 'test'
-  end
-
-end
+    logger.info "Crawling #{wraith.base_domain}"
+    Anemone.crawl(wraith.base_domain) do |anemone|
+      anemone.skip_links_like(/\.(#{EXT.join('|')})$/)
+      # Add user specified skips
+      anemone.skip_links_like(wraith.spider_skips)
+      anemone.on_every_page do |page|
+        logger.info "    #{page.url.path}"
+        add_path(page.url.path)
+      end
+    end
 
+    logger.info "Crawl complete."
+    write_file
+  end
 
+  def add_path(path)
+    @paths[path == "/" ? "home" : path.gsub("/", "__").chomp("__").downcase] = path.downcase
+  end
 
-#
-# class Wraith::Spidering
-#   include Logging
-#   attr_reader :wraith
-#
-#   def initialize(config)
-#     @wraith = Wraith::Wraith.new(config)
-#   end
-#
-#   def check_for_paths
-#     if wraith.paths.nil?
-#       unless wraith.sitemap.nil?
-#         logger.info "no paths defined in config, loading paths from sitemap"
-#         spider = Wraith::Sitemap.new(wraith)
-#       else
-#         logger.info "no paths defined in config, crawling from site root"
-#         spider = Wraith::Crawler.new(wraith)
-#       end
-#       spider.determine_paths
-#     end
-#   end
-# end
-#
-# class Wraith::Spider
-#   attr_reader :wraith
-#
-#   def initialize(wraith)
-#     @wraith = wraith
-#     @paths = {}
-#   end
-#
-#   def determine_paths
-#     spider
-#     write_file
-#   end
-#
-#   private
-#
-#   def write_file
-#     File.open(wraith.spider_file, "w+") { |file| file.write(@paths) }
-#   end
-#
-#   def add_path(path)
-#     @paths[path == "/" ? "home" : path.gsub("/", "__").chomp("__").downcase] = path.downcase
-#   end
-#
-#   def spider
-#   end
-# end
-#
-# class Wraith::Crawler < Wraith::Spider
-#   include Logging
-#
-#   EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
-#            gz jar js css dtd xsd ico raw mp3 mp4 \
-#            wav wmv ape aac ac3 wma aiff mpg mpeg \
-#            avi mov ogg mkv mka asx asf mp2 m1v \
-#            m3u f4v pdf doc xls ppt pps bin exe rss xml)
-#
-#   def spider
-#     if File.exist?(wraith.spider_file) && modified_since(wraith.spider_file, wraith.spider_days[0])
-#       logger.info "using existing spider file"
-#       @paths = eval(File.read(wraith.spider_file))
-#     else
-#       logger.info "creating new spider file"
-#       Anemone.crawl(wraith.base_domain) do |anemone|
-#         anemone.skip_links_like(/\.(#{EXT.join('|')})$/)
-#         # Add user specified skips
-#         anemone.skip_links_like(wraith.spider_skips)
-#         anemone.on_every_page { |page| add_path(page.url.path) }
-#       end
-#     end
-#   end
-#
-#   def modified_since(file, since)
-#     (Time.now - File.ctime(file)) / (24 * 3600) < since
-#   end
-# end
-#
-# class Wraith::Sitemap < Wraith::Spider
-#   include Logging
-#
-#   def spider
-#     unless wraith.sitemap.nil?
-#       logger.info "reading sitemap.xml from #{wraith.sitemap}"
-#       if wraith.sitemap =~ URI.regexp
-#         sitemap = Nokogiri::XML(open(wraith.sitemap))
-#       else
-#         sitemap = Nokogiri::XML(File.open(wraith.sitemap))
-#       end
-#       sitemap.css("loc").each do |loc|
-#         path = loc.content
-#         # Allow use of either domain in the sitemap.xml
-#         wraith.domains.each do |_k, v|
-#           path.sub!(v, "")
-#         end
-#         if wraith.spider_skips.nil? || wraith.spider_skips.none? { |regex| regex.match(path) }
-#           add_path(path)
-#         end
-#       end
-#     end
-#   end
-# end
+  def write_file
+    logger.info "Writing to YML file:"
+    config = {}
+    config['paths'] = @paths
+    File.open(wraith.imports, "w+") do |file|
+      file.write(config.to_yaml)
+      logger.info "Spider paths written to #{wraith.imports}"
+    end
+  end
+end
diff --git a/lib/wraith/wraith.rb b/lib/wraith/wraith.rb
index b85e9a06..86c7977d 100644
--- a/lib/wraith/wraith.rb
+++ b/lib/wraith/wraith.rb
@@ -171,7 +171,7 @@ def phantomjs_options
   end
 
   def imports
-    @config['imports'] || nil
+    @config['imports'] || false
   end
 
   def verbose
diff --git a/spec/configs/test_config--spider.yaml b/spec/configs/test_config--spider.yaml
index bedb4983..ea99392b 100644
--- a/spec/configs/test_config--spider.yaml
+++ b/spec/configs/test_config--spider.yaml
@@ -8,8 +8,8 @@ directory: "shots"
 
 # Add only 2 domains, key will act as a label
 domains:
- english: "http://www.live.bbc.co.uk/news"
- russian: "http://www.live.bbc.co.uk/russian"
+ personal: "http://ashton.codes"
+ business: "http://webdapper.com"
 
 #Type screen widths below, here are a couple of examples
 screen_widths:
@@ -22,17 +22,9 @@ screen_widths:
 #Amount of fuzz ImageMagick will use
 fuzz: "20%"
 
-#Set the filename of the spider file to use, if not specified it will fallback to spider.txt
-spider_file: bbc_co_uk_spider.txt
-
-#Set the number of days to keep the site spider file
-pider_days:
- - 10
-
 #A list of URLs to skip when spidering. Ruby regular expressions can be
 #used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
 #http://www.yaml.org/YAML_for_ruby.html#regexps
-#
 # spider_skips:
 #   - /foo/bar.html # Matches /foo/bar.html explcitly
 #   - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
diff --git a/spec/validate_spec.rb b/spec/validate_spec.rb
index 025446e9..78e389bb 100644
--- a/spec/validate_spec.rb
+++ b/spec/validate_spec.rb
@@ -91,18 +91,25 @@
 
   describe "validations specific to spider mode" do
     let(:spider_conf) do
-      config.merge(YAML.load('
+      YAML.load('
+        domains:
+          test: http://www.bbc.com
+
+        browser: "casperjs"
+
+        directory: some/dir
+
         imports: "spider_paths.yml"
-      '))
+      ')
     end
 
     it "should complain if imports is empty" do
-      spider_conf['imports'] = nil
-      expect { Wraith::Validate.new(spider_conf, true).validate("spider") }.to raise_error PropertyOutOfContextError
+      spider_conf.delete 'imports'
+      expect { Wraith::Validate.new(spider_conf, true).validate("spider") }.to raise_error MissingRequiredPropertyError
     end
 
     it "should complain if paths is set" do
-      spider_conf.merge(YAML.load('
+      spider_conf.merge!(YAML.load('
         paths:
           home: /
       '))
diff --git a/wraith.gemspec b/wraith.gemspec
index 072c302c..2476304c 100644
--- a/wraith.gemspec
+++ b/wraith.gemspec
@@ -26,7 +26,6 @@ Gem::Specification.new do |spec|
   spec.add_runtime_dependency 'image_size'
   spec.add_runtime_dependency 'anemone'
   spec.add_runtime_dependency 'robotex'
-  spec.add_runtime_dependency 'nokogiri', '~> 1.6.7'
   spec.add_runtime_dependency 'log4r'
   spec.add_runtime_dependency 'thor'
   spec.add_runtime_dependency 'parallel'

From e792e0f7c7992bd84ae1fd898e66085176fe0e52 Mon Sep 17 00:00:00 2001
From: ChrisBAshton <chrisashtonweb@gmail.com>
Date: Fri, 25 Nov 2016 14:28:19 +0000
Subject: [PATCH 3/3] save spider paths file relative to main config file

---
 lib/wraith/spider.rb           |  4 +--
 lib/wraith/wraith.rb           |  8 +++--
 spec/configs/spider_paths.yaml | 66 ++++++++++++++++++++++++++++++++++
 3 files changed, 74 insertions(+), 4 deletions(-)
 create mode 100644 spec/configs/spider_paths.yaml

diff --git a/lib/wraith/spider.rb b/lib/wraith/spider.rb
index 68f0b56b..a0fb16db 100644
--- a/lib/wraith/spider.rb
+++ b/lib/wraith/spider.rb
@@ -41,10 +41,10 @@ def add_path(path)
   end
 
   def write_file
-    logger.info "Writing to YML file:"
+    logger.info "Writing to YML file..."
     config = {}
     config['paths'] = @paths
-    File.open(wraith.imports, "w+") do |file|
+    File.open("#{wraith.config_dir}/#{wraith.imports}", "w+") do |file|
       file.write(config.to_yaml)
       logger.info "Spider paths written to #{wraith.imports}"
     end
diff --git a/lib/wraith/wraith.rb b/lib/wraith/wraith.rb
index 8b4ede1c..47615afd 100644
--- a/lib/wraith/wraith.rb
+++ b/lib/wraith/wraith.rb
@@ -35,7 +35,7 @@ def determine_config_path(config_name)
 
     possible_filenames.each do |filepath|
       if File.exist?(filepath)
-        @config_dir = absolute_path_of_dir(convert_to_absolute filepath)
+        @calculated_config_dir = absolute_path_of_dir(convert_to_absolute filepath)
         return convert_to_absolute filepath
       end
     end
@@ -43,8 +43,12 @@ def determine_config_path(config_name)
     fail ConfigFileDoesNotExistError, "unable to find config \"#{config_name}\""
   end
 
+  def config_dir
+    @calculated_config_dir
+  end
+
   def apply_imported_config(config_to_import, config)
-    path_to_config = "#{@config_dir}/#{config_to_import}"
+    path_to_config = "#{config_dir}/#{config_to_import}"
     if File.exist?(path_to_config)
       yaml = YAML.load_file path_to_config
       return yaml.merge(config)
diff --git a/spec/configs/spider_paths.yaml b/spec/configs/spider_paths.yaml
new file mode 100644
index 00000000..65f514b3
--- /dev/null
+++ b/spec/configs/spider_paths.yaml
@@ -0,0 +1,66 @@
+---
+paths:
+  home: /
+  __about: /about
+  __skills: /skills/
+  __cv: /cv
+  __blog__category__dissertation: /blog/category/dissertation/
+  __blog__category__reports: /blog/category/reports/
+  __blog__category__portfolio: /blog/category/portfolio/
+  __blog__chorister: /blog/chorister/
+  __blog__category__updates: /blog/category/updates/
+  __contact: /contact
+  __terminal: /terminal/
+  __blog__dissertation: /blog/dissertation/
+  __blog__discipline-good-enough: /blog/discipline-good-enough/
+  __blog__it-is-done: /blog/it-is-done/
+  __blog__composer: /blog/composer/
+  __blog__mid-project-demonstration: /blog/mid-project-demonstration/
+  __blog__completing-the-core: /blog/completing-the-core/
+  __blog__commercial-viability: /blog/commercial-viability/
+  __blog__cooking-on-gas: /blog/cooking-on-gas/
+  __blog__choosing-bdd-framework: /blog/choosing-bdd-framework/
+  __blog__reading: /blog/reading/
+  __blog__following-plan: /blog/following-plan/
+  __blog__outline-project-specification: /blog/outline-project-specification/
+  __blog__php-frameworks: /blog/php-frameworks/
+  __blog__industrial-year-report: /blog/industrial-year-report/
+  __blog__category__dissertation__page__2: /blog/category/dissertation/page/2/
+  __blog__exploring-suitability-viola-jones-framework-counting-people: /blog/exploring-suitability-viola-jones-framework-counting-people/
+  __blog__smartresolution: /blog/smartresolution/
+  __blog__commonwealth-games-quiz: /blog/commonwealth-games-quiz/
+  __blog__magazine-parentchild-themes: /blog/magazine-parentchild-themes/
+  __blog__reff: /blog/reff/
+  __blog__voicecouncil-magazine: /blog/voicecouncil-magazine/
+  __blog__hover-bike: /blog/hover-bike/
+  __blog__3d-solar-system: /blog/3d-solar-system/
+  __blog__how-stressed-are-you: /blog/how-stressed-are-you/
+  __blog__studentmunch-com: /blog/studentmunch-com/
+  __blog__alice-holmes-anthropomorphism: /blog/alice-holmes-anthropomorphism/
+  __blog__nhs-winter: /blog/nhs-winter/
+  __blog__abermads-co-uk: /blog/abermads-co-uk/
+  __blog__tube-spotted: /blog/tube-spotted/
+  __blog__radioactive-evolution: /blog/radioactive-evolution/
+  __blog__when-do-i-need-a-non-javascript-solution: /blog/when-do-i-need-a-non-javascript-solution/
+  __blog__use-requirejs-wordpress-plugins-jquery-ui: /blog/use-requirejs-wordpress-plugins-jquery-ui/
+  __blog__high-impact-minimal-effort-cross-browser-testing: /blog/high-impact-minimal-effort-cross-browser-testing/
+  __blog__five-line-rule: /blog/five-line-rule/
+  __blog__the-pessimistic-ultimatum: /blog/the-pessimistic-ultimatum/
+  __blog__how-to-differentiate-between-links: /blog/how-to-differentiate-between-links/
+  __blog__stupidly-simple-programming-introduction: /blog/stupidly-simple-programming-introduction/
+  __blog__zaatari-refugee-camp-rebuilding-lives-in-the-desert: /blog/zaatari-refugee-camp-rebuilding-lives-in-the-desert/
+  __blog__category__updates__page__2: /blog/category/updates/page/2/
+  __blog__requirements-features: /blog/requirements-features/
+  __blog__building-shoulders-giants: /blog/building-shoulders-giants/
+  __blog__clarifying-requirements: /blog/clarifying-requirements/
+  __blog__orthodontic-refactoring: /blog/orthodontic-refactoring/
+  __blog__localised-css: /blog/localised-css/
+  __blog__project-clarified: /blog/project-clarified/
+  __blog__identifying-competitors: /blog/identifying-competitors/
+  __blog__and-so-it-begins: /blog/and-so-it-begins/
+  __blog__does-drm-promote-piracy: /blog/does-drm-promote-piracy/
+  __blog__what-is-the-surprise-app-for-blackberry: /blog/what-is-the-surprise-app-for-blackberry/
+  __blog__if-humans-were-computer-components-an-analogy: /blog/if-humans-were-computer-components-an-analogy/
+  __blog__redesigning-ip-datagrams-for-a-faster-internet: /blog/redesigning-ip-datagrams-for-a-faster-internet/
+  __blog__trinary-is-it-the-future: /blog/trinary-is-it-the-future/
+  __blog__the-demise-of-blackberry: /blog/the-demise-of-blackberry/