-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathParser.h
36 lines (29 loc) · 807 Bytes
/
Parser.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#ifndef PARSER_H
#include <regex>
#include <set>
#include <deque>
#include <vector>
#include <string>
#include <sstream>
#include <stack>
#include <list>
#include "gumbo.h"
const std::regex imageexpr{"(.*(jpg|png|bmp|gif)+$)"};
const std::regex urlexpr{"([a-zA-Z/:]+[\\.]+[a-zA-Z\\./?=]*[^\\s,@\\\"])"};
class Parser
{
friend class Spider;
Parser(std::string host, bool other_gather_urls);
~Parser();
bool load_stream(std::stringstream& stream);
void traverse_tree();
void get_link(GumboNode* node);
std::string format_link(std::string link);
void return_data(std::list<std::string>* const url_pool, std::set<std::string>* const data_pool);
GumboOutput* output;
std::string host;
bool gather_urls;
std::deque<std::string> urls_gathered;
std::deque<std::string> data_gathered;
};
#endif