-
Notifications
You must be signed in to change notification settings - Fork 0
/
token.cpp
115 lines (103 loc) · 3.06 KB
/
token.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//
// Created by koji on 1/8/22.
//
#include <iostream>
#include <vector>
#include <cassert>
#include "token.h"
std::shared_ptr<Token> Tokenizer::tokenize(std::istream &istream) {
char c;
switch (state) {
case ST_DATA:
if (istream.eof()) {
auto token = std::make_shared<Token>(Token {TK_EOF});
return token;
}
istream >> c;
if (c == 0) {
auto token = std::make_shared<Token>(Token {TK_EOF});
return token;
}
if (c == '<') {
state = ST_TAG_OPEN;
return tokenize(istream);
}
if (std::isalnum(c) || std::isspace(c)) {
auto token = std::make_shared<Token>(Token(c));
auto next = tokenize(istream);
token->next = next;
return token;
}
break;
case ST_TAG_OPEN:
c = consume(istream);
if (c == '/') {
state = ST_END_TAG_OPEN;
return tokenize(istream);
}
if (std::isalnum(c)) {
StartOrEndTag start_tag{TAG_START};
state = ST_TAG_NAME;
auto token = std::make_shared<Token>(reconsume_tag_name_state(istream, start_tag, c));
auto next = tokenize(istream);
token->next = next;
return token;
}
case ST_END_TAG_OPEN:
c = consume(istream);
if (std::isalnum(c)) {
StartOrEndTag end_tag{TAG_END};
state = ST_TAG_NAME;
auto token = std::make_shared<Token>(reconsume_tag_name_state(istream, end_tag, c));
// auto next = tokenize(istream);
token->next = tokenize(istream);
return token;
}
}
}
char Tokenizer::consume(std::istream &istream) {
char c;
while (istream >> c) {
if (!std::isspace(c))
return c;
}
return 0;
}
char Tokenizer::re_consume(std::istream &istream, unsigned int size) {
return false;
}
bool Tokenizer::can_tokenize(std::string str) {
if (str == "<") {
return true;
}
if (str == ">") {
return true;
}
if (str == "/") {
return true;
}
return false;
}
Token Tokenizer::reconsume_tag_name_state(std::istream &istream, StartOrEndTag start_tag, char current_char) {
assert(state == ST_TAG_NAME);
char c = current_char;
std::vector<char> acc;
// 先頭文字が重複してしまうのでいったん除いておく
// acc.push_back(tolower(c));
while (c) {
switch (c) {
case '>':
state = ST_DATA;
start_tag.name = std::string (acc.begin(), acc.end());
return Token(start_tag);
default:
acc.push_back(tolower(c));
break;
}
c = consume(istream);
}
}
Token TokenFactory::createStartTagToken() {
StartOrEndTag startTag{};
return Token(startTag);
}