diff --git a/kernel/Makefile b/kernel/Makefile index 14fd57af6..ab6bb154f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -1,7 +1,7 @@ TARGET = kernel.elf OBJS = main.o graphics.o mouse.o font.o hankaku.o newlib_support.o console.o \ pci.o asmfunc.o libcxx_support.o logger.o interrupt.o segment.o paging.o memory_manager.o \ - window.o layer.o timer.o frame_buffer.o acpi.o keyboard.o task.o terminal.o \ + window.o layer.o timer.o frame_buffer.o acpi.o keyboard.o task.o terminal.o tokenizer.o \ fat.o syscall.o file.o \ usb/memory.o usb/device.o usb/xhci/ring.o usb/xhci/trb.o usb/xhci/xhci.o \ usb/xhci/port.o usb/xhci/device.o usb/xhci/devmgr.o usb/xhci/registers.o \ diff --git a/kernel/terminal.cpp b/kernel/terminal.cpp index 9fdd5d99d..8e31a13a1 100644 --- a/kernel/terminal.cpp +++ b/kernel/terminal.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "font.hpp" #include "layer.hpp" @@ -17,12 +18,14 @@ #include "keyboard.hpp" #include "logger.hpp" #include "uefi.hpp" +#include "tokenizer.hpp" #include "usb/classdriver/cdc.hpp" #include "usb/xhci/xhci.hpp" + namespace { -WithError MakeArgVector(char* command, char* first_arg, +WithError MakeArgVector(const std::vector &args, char** argv, int argv_len, char* argbuf, int argbuf_len) { int argc = 0; int argbuf_index = 0; @@ -39,36 +42,14 @@ WithError MakeArgVector(char* command, char* first_arg, return MAKE_ERROR(Error::kSuccess); }; - if (auto err = push_to_argv(command)) { - return { argc, err }; - } - if (!first_arg) { + if (!args.size()) { return { argc, MAKE_ERROR(Error::kSuccess) }; } - char* p = first_arg; - while (true) { - while (isspace(p[0])) { - ++p; - } - if (p[0] == 0) { - break; - } - const char* arg = p; - - while (p[0] != 0 && !isspace(p[0])) { - ++p; - } - // here: p[0] == 0 || isspace(p[0]) - const bool is_end = p[0] == 0; - p[0] = 0; - if (auto err = push_to_argv(arg)) { + for (int i = 0; i < args.size(); i++) { + if (auto err = push_to_argv(args[i].c_str())) { return { argc, err }; } - if (is_end) { - break; - } - ++p; } return { argc, MAKE_ERROR(Error::kSuccess) }; @@ -335,21 +316,37 @@ Rectangle Terminal::InputKey( if (ascii == '\n') { linebuf_[linebuf_index_] = 0; - if (linebuf_index_ > 0) { - cmd_history_.pop_back(); - cmd_history_.push_front(linebuf_); - } - linebuf_index_ = 0; - cmd_history_index_ = -1; + std::vector tokens; + int redir_idx = -1; + int pipe_idx = -1; + std::unique_ptr t = Tokenize(&linebuf_[0], tokens, &redir_idx, &pipe_idx, nullptr); + if (t) { // input not end + cursor_.x = 0; + if (cursor_.y < kRows - 1) { + ++cursor_.y; + } else { + Scroll1(); + } + linebuf_[linebuf_index_] = ascii; + ++linebuf_index_; + Print("<"); + } else { // input end + if (linebuf_index_ > 0) { + cmd_history_.pop_back(); + cmd_history_.push_front(linebuf_); + } + linebuf_index_ = 0; + cmd_history_index_ = -1; - cursor_.x = 0; - if (cursor_.y < kRows - 1) { - ++cursor_.y; - } else { - Scroll1(); + cursor_.x = 0; + if (cursor_.y < kRows - 1) { + ++cursor_.y; + } else { + Scroll1(); + } + ExecuteLine(tokens, redir_idx, pipe_idx); + Print(">"); } - ExecuteLine(); - Print(">"); draw_area.pos = ToplevelWindow::kTopLeftMargin; draw_area.size = window_->InnerSize(); } else if (ascii == '\b') { @@ -394,37 +391,35 @@ void Terminal::Scroll1() { {4, 4 + 16*cursor_.y}, {8*kColumns, 16}, {0, 0, 0}); } -void Terminal::ExecuteLine() { - char* command = &linebuf_[0]; - char* first_arg = strchr(&linebuf_[0], ' '); - char* redir_char = strchr(&linebuf_[0], '>'); - char* pipe_char = strchr(&linebuf_[0], '|'); - char* command_end = &linebuf_[strlen(&linebuf_[0])]; - - auto trim_space = [&command](char* end_ptr) { - while (command < end_ptr && isspace(end_ptr[-1])) { - *--end_ptr = 0; - } - }; - trim_space(command_end); - - if (first_arg) { - *first_arg = 0; - do { - ++first_arg; - } while (isspace(*first_arg)); - } +/** +* Terminal::ExecuteLine +* +* リダイレクトが複数含まれる場合、最初のリダイレクトのに有効となる +* リダイレクト以降の内容は処理しない簡易な実装となっているため、 +* コマンドの最後に書く必要がある。 +* リダイレクトを先頭に書くことはできない。 +* +* @param[in] args ターミナルの入力をtokenに分割したもの +* @param[in] redir_idx リダイレクト'>'が最初に登場するargsのインデックス +* argsにリダイレクトが含まれない場合-1 +* @param[in] pipe_idx パイプ'|'が最初に登場するargsのインデックス +* argsにパイプが含まれない場合-1 +*/ +void Terminal::ExecuteLine(std::vector& args, int redir_idx, int pipe_idx) { + std::string command = args[0]; auto original_stdout = files_[1]; int exit_code = 0; - if (redir_char) { - *redir_char = 0; - trim_space(redir_char); - char* redir_dest = &redir_char[1]; - while (isspace(*redir_dest)) { - ++redir_dest; + if (redir_idx != -1 && redir_idx != 0) { + if (args.size() < redir_idx + 2) { + PrintToFD(*files_[2], + "failed to create a redirect file"); + return; } + char redir_dest[12]; // maximum file name size of FAT (8.3 format) is = 12 + args[redir_idx+1].copy(redir_dest, 12); + args.erase(args.begin() + redir_idx, args.end()); auto [ file, post_slash ] = fat::FindFile(redir_dest); if (file == nullptr) { @@ -445,13 +440,14 @@ void Terminal::ExecuteLine() { std::shared_ptr pipe_fd; uint64_t subtask_id = 0; - if (pipe_char) { - *pipe_char = 0; - trim_space(pipe_char); - char* subcommand = &pipe_char[1]; - while (isspace(*subcommand)) { - ++subcommand; + if (pipe_idx != -1 && pipe_idx != 0) { + std::string pipe_char = ""; + for(auto s = args.begin()+pipe_idx + 1; s != args.end(); ++s) { + pipe_char += *s; } + args.erase(args.begin() + pipe_idx-1, args.end()); + + const char* subcommand = pipe_char.c_str(); auto& subtask = task_manager->NewTask(); pipe_fd = std::make_shared(subtask); @@ -468,22 +464,30 @@ void Terminal::ExecuteLine() { (*layer_task_map)[layer_id_] = subtask_id; } - if (strcmp(command, "echo") == 0) { - if (first_arg && first_arg[0] == '$') { - if (strcmp(&first_arg[1], "?") == 0) { + if (command == ">") { + PrintToFD(*files_[2], "no command before > \n"); + exit_code = 1; + } else if (command == "|") { + PrintToFD(*files_[2], "no command before | \n"); + exit_code = 1; + } else if (command == "echo") { + if (args.size() > 1) { + if (args[1] == "$?") { PrintToFD(*files_[1], "%d", last_exit_code_); + } else { + for (int i = 1; i < args.size(); i++) { + PrintToFD(*files_[1], "%s ", args[i].c_str()); + } } - } else if (first_arg) { - PrintToFD(*files_[1], "%s", first_arg); } PrintToFD(*files_[1], "\n"); - } else if (strcmp(command, "clear") == 0) { + } else if (command == "clear") { if (show_window_) { FillRectangle(*window_->InnerWriter(), {4, 4}, {8*kColumns, 16*kRows}, {0, 0, 0}); } cursor_.y = 0; - } else if (strcmp(command, "lspci") == 0) { + } else if (command == "lspci") { for (int i = 0; i < pci::num_device; ++i) { const auto& dev = pci::devices[i]; auto vendor_id = pci::ReadVendorId(dev.bus, dev.device, dev.function); @@ -492,36 +496,32 @@ void Terminal::ExecuteLine() { dev.bus, dev.device, dev.function, vendor_id, dev.header_type, dev.class_code.base, dev.class_code.sub, dev.class_code.interface); } - } else if (strcmp(command, "ls") == 0) { - char* file_name = first_arg; + } else if (command == "ls") { bool verbose = false; - if (file_name && file_name[0] == '-') { - for (++file_name; *file_name && !isspace(*file_name); ++file_name) { - if (*file_name == 'l') verbose = true; - } - while (isspace(*file_name)) file_name++; - } - if (!file_name || file_name[0] == '\0') { + if (args.size() < 2) { ListAllEntries(*files_[1], fat::boot_volume_image->root_cluster, verbose); } else { - auto [ dir, post_slash ] = fat::FindFile(file_name); - if (dir == nullptr) { - PrintToFD(*files_[2], "No such file or directory: %s\n", file_name); - exit_code = 1; - } else if (dir->attr == fat::Attribute::kDirectory) { - ListAllEntries(*files_[1], dir->FirstCluster(), verbose); - } else { - char name[13]; - fat::FormatName(*dir, name); - if (post_slash) { - PrintToFD(*files_[2], "%s is not a directory\n", name); + if (args[1] == "-l") { + verbose = true; + args.erase(args.begin() + 1); + } + for (int i = 1; i < args.size(); i++) { + const char* file_name = args[i].c_str(); + auto [ dir, post_slash ] = fat::FindFile(file_name); + if (dir == nullptr) { + PrintToFD(*files_[2], "No such file or directory: %s\n", file_name); exit_code = 1; + } else if (dir->attr == fat::Attribute::kDirectory) { + ListAllEntries(*files_[1], dir->FirstCluster(), verbose); } else { - if (verbose) { - PrintFileAttr(*files_[1], *dir); + char name[13]; + fat::FormatName(*dir, name); + if (post_slash) { + PrintToFD(*files_[2], "%s is not a directory\n", name); + exit_code = 1; } else { - if (files_[1]->IsTerminal() && dir->attr == fat::Attribute::kDirectory) { - PrintToFD(*files_[1], "\033[94m%s\033[0m\n", name); + if (verbose) { + PrintFileAttr(*files_[1], *dir); } else { PrintToFD(*files_[1], "%s\n", name); } @@ -529,23 +529,24 @@ void Terminal::ExecuteLine() { } } } - } else if (strcmp(command, "cat") == 0) { + + } else if (command == "cat") { std::shared_ptr fd; - if (!first_arg || first_arg[0] == '\0') { + if (args.size() < 2) { fd = files_[0]; } else { - auto [ file_entry, post_slash ] = fat::FindFile(first_arg); - if (!file_entry) { - PrintToFD(*files_[2], "no such file: %s\n", first_arg); - exit_code = 1; - } else if (file_entry->attr != fat::Attribute::kDirectory && post_slash) { - char name[13]; - fat::FormatName(*file_entry, name); - PrintToFD(*files_[2], "%s is not a directory\n", name); - exit_code = 1; - } else { - fd = std::make_shared(*file_entry); - } + auto [ file_entry, post_slash ] = fat::FindFile(args[1].c_str()); + if (!file_entry) { + PrintToFD(*files_[2], "no such file: %s\n", args[1].c_str()); + exit_code = 1; + } else if (file_entry->attr != fat::Attribute::kDirectory && post_slash) { + char name[13]; + fat::FormatName(*file_entry, name); + PrintToFD(*files_[2], "%s is not a directory\n", name); + exit_code = 1; + } else { + fd = std::make_shared(*file_entry); + } } if (fd) { char u8buf[1024]; @@ -559,14 +560,19 @@ void Terminal::ExecuteLine() { } DrawCursor(true); } - } else if (strcmp(command, "noterm") == 0) { + } else if (command == "noterm") { + std::string first_arg = ""; + for(auto s = args.begin() + 1; s != args.end(); ++s) { + first_arg += *s; + } + auto term_desc = new TerminalDescriptor{ first_arg, true, false, files_ }; task_manager->NewTask() .InitContext(TaskTerminal, reinterpret_cast(term_desc)) .Wakeup(); - } else if (strcmp(command, "memstat") == 0) { + } else if (command == "memstat") { const auto p_stat = memory_manager->Stat(); PrintToFD(*files_[1], "Phys used : %lu frames (%llu MiB)\n", p_stat.allocated_frames, @@ -574,7 +580,7 @@ void Terminal::ExecuteLine() { PrintToFD(*files_[1], "Phys total: %lu frames (%llu MiB)\n", p_stat.total_frames, p_stat.total_frames * kBytesPerFrame / 1024 / 1024); - } else if (strcmp(command, "date") == 0) { + } else if (command == "date") { EFI_TIME t; uefi_rt->GetTime(&t, nullptr); if (t.TimeZone == EFI_UNSPECIFIED_TIMEZONE) { @@ -589,11 +595,11 @@ void Terminal::ExecuteLine() { PrintToFD(*files_[1], "-%02d%02d\n", -t.TimeZone / 60, -t.TimeZone % 60); } } - } else if (strcmp(command, "reboot") == 0) { + } else if (command == "reboot") { uefi_rt->ResetSystem(EfiResetWarm, EFI_SUCCESS, 0, nullptr); - } else if (strcmp(command, "poweroff") == 0) { + } else if (command == "poweroff") { uefi_rt->ResetSystem(EfiResetShutdown, EFI_SUCCESS, 0, nullptr); - } else if (strcmp(command, "lsusb") == 0) { + } else if (command == "lsusb") { auto devmgr = usb::xhci::controller->DeviceManager(); for (int slot = 1; slot < 256; ++slot) { auto dev = devmgr->FindBySlot(slot); @@ -608,18 +614,19 @@ void Terminal::ExecuteLine() { dev->DeviceDesc().device_sub_class, dev->DeviceDesc().device_protocol); } - } else if (strcmp(command, "usbtest") == 0) { + } else if (command == "usbtest") { [&]{ if (!usb::cdc::driver) { PrintToFD(*files_[2], "CDC device not exist\n"); exit_code = 1; return; } - size_t send_len; - if (first_arg && first_arg[0]) { - send_len = strlen(first_arg); - usb::cdc::driver->SendSerial(first_arg, send_len); + if (args.size() < 2) { + for (int i = 1; i < args.size(); i++) { + send_len = args[i].length(); + usb::cdc::driver->SendSerial(args[i].c_str(), send_len); + } } else { send_len = 1; usb::cdc::driver->SendSerial("a", 1); @@ -633,7 +640,7 @@ void Terminal::ExecuteLine() { files_[1]->Write(buf.data(), recv_len); PrintToFD(*files_[1], "\n"); }(); - } else if (strcmp(command, "setbaud") == 0) { + } else if (command == "setbaud") { [&]{ usb::cdc::LineCoding line_coding{ 9600, @@ -642,9 +649,9 @@ void Terminal::ExecuteLine() { 8 }; - if (first_arg && first_arg[0]) { + if (args.size() < 2) { char *endp; - line_coding.dte_rate = strtol(first_arg, &endp, 0); + line_coding.dte_rate = strtol(args[1].c_str(), &endp, 0); if (*endp != '\0') { PrintToFD(*files_[2], "Baud rate must be an integer"); exit_code = 1; @@ -661,7 +668,7 @@ void Terminal::ExecuteLine() { PrintToFD(*files_[2], "Setting baud rate to %u\n", line_coding.dte_rate); usb::cdc::driver->SetLineCoding(line_coding); }(); - } else if (strcmp(command, "comproc") == 0) { + } else if (command == "comproc") { [&]{ if (!usb::cdc::driver) { PrintToFD(*files_[2], "CDC device not exist\n"); @@ -671,7 +678,7 @@ void Terminal::ExecuteLine() { std::vector insn; - if (first_arg && strcmp(first_arg, "sample") == 0) { + if (args.size() < 2 && strcmp(args[1].c_str(), "sample") == 0) { const std::array kSample = { 0x00, 0x20, 0xC1, 0x00, @@ -711,13 +718,13 @@ void Terminal::ExecuteLine() { } PrintToFD(*files_[1], "exit_code=%d\n", code); }(); - } else if (command[0] != 0) { - auto file_entry = FindCommand(command); + } else if (!command.empty()) { + auto file_entry = FindCommand(command.c_str()); if (!file_entry) { - PrintToFD(*files_[2], "no such command: %s\n", command); + PrintToFD(*files_[2], "no such command: %s\n", command.c_str()); exit_code = 1; } else { - auto [ ec, err ] = ExecuteFile(*file_entry, command, first_arg); + auto [ ec, err ] = ExecuteFile(*file_entry, command.c_str(), args); if (err) { PrintToFD(*files_[2], "failed to exec file: %s\n", err.Name()); exit_code = -ec; @@ -744,7 +751,7 @@ void Terminal::ExecuteLine() { } WithError Terminal::ExecuteFile(fat::DirectoryEntry& file_entry, - char* command, char* first_arg) { + const char* command, std::vector& args) { __asm__("cli"); auto& task = task_manager->CurrentTask(); __asm__("sti"); @@ -762,7 +769,7 @@ WithError Terminal::ExecuteFile(fat::DirectoryEntry& file_entry, int argv_len = 32; // argv = 8x32 = 256 bytes auto argbuf = reinterpret_cast(args_frame_addr.value + sizeof(char*) * argv_len); int argbuf_len = 4096 - sizeof(char*) * argv_len; - auto argc = MakeArgVector(command, first_arg, argv, argv_len, argbuf, argbuf_len); + auto argc = MakeArgVector(args, argv, argv_len, argbuf, argbuf_len); if (argc.error) { return { 0, argc.error }; } diff --git a/kernel/terminal.hpp b/kernel/terminal.hpp index a00d4b56c..fe4570e23 100644 --- a/kernel/terminal.hpp +++ b/kernel/terminal.hpp @@ -68,9 +68,9 @@ class Terminal { std::array linebuf_{}; void Scroll1(); - void ExecuteLine(); + void ExecuteLine(std::vector& tokens, int redir, int pipes); WithError ExecuteFile(fat::DirectoryEntry& file_entry, - char* command, char* first_arg); + const char* command, std::vector& args); void Print(char32_t c); std::deque> cmd_history_{}; diff --git a/kernel/tests/.gitignore b/kernel/tests/.gitignore new file mode 100644 index 000000000..5e9c83484 --- /dev/null +++ b/kernel/tests/.gitignore @@ -0,0 +1,2 @@ +/tests +/*.o diff --git a/kernel/tests/Makefile b/kernel/tests/Makefile new file mode 100644 index 000000000..683af640f --- /dev/null +++ b/kernel/tests/Makefile @@ -0,0 +1,28 @@ +CFLAGS += -O2 -Wall -g +CPPFLAGS += -O2 -Wall -g -std=c++17 -MMD -MP -MF $(@:.o=.d) + +TARGET = tests +VPATH = ../ +SRC = main.cpp tokenizer.cpp tokenizer_test.cpp +OBJS = $(SRC:.cpp=.o) +DEPS = $(SRC:.cpp=.d) + +.PHONY: all +all: $(TARGET) + +.PHONY: clean +clean: + rm -rf *.o $(TARGET) $(DEPS) + +$(TARGET): $(OBJS) Makefile + clang++ $(LDFLAGS) -o $@ $(OBJS) -fuse-ld=lld + +%.o: %.cpp Makefile + clang++ $(CPPFLAGS) $(CFLAGS) -c $< -o $@ + +.PHONY: test +test: $(TARGET) + ./$(TARGET) + +-include $(DEPS) +.SECONDARY: $(OBJS) diff --git a/kernel/tests/main.cpp b/kernel/tests/main.cpp new file mode 100644 index 000000000..994acf9f8 --- /dev/null +++ b/kernel/tests/main.cpp @@ -0,0 +1,23 @@ + +#include +#include "tokenizer_test.hpp" + +int main() { + const int test_num = 1; + int success = 0; + const bool verbose = false; + + if (verbose) std::cout << "test: tokenizer" << std::endl; + if (TestTokenize(verbose)) { + success++; + } + + std::cout << "test: " << success << "/" << test_num << " passed" << std::endl; + if (test_num == success) { + std::cout << "\e[38;5;10mOK\e[0m\n" << std::endl; + return 0; + } else { + std::cout << "\e[38;5;9mERR\e[0m\n" << std::endl; + return -1; + } +} diff --git a/kernel/tests/tokenizer_test.cpp b/kernel/tests/tokenizer_test.cpp new file mode 100644 index 000000000..7277d5d73 --- /dev/null +++ b/kernel/tests/tokenizer_test.cpp @@ -0,0 +1,104 @@ +#include "tokenizer_test.hpp" + +#include +#include +#include +#include +#include + +#include "../tokenizer.hpp" + +bool IsTISSame(const std::unique_ptr &tis1, const std::unique_ptr &tis2) { + if (tis1 == nullptr ^ tis2 == nullptr) { return false; } + if (tis1 == nullptr && tis2 == nullptr) { return true; } + if (tis1->last_state != tis2->last_state) { return false; } + if (tis1->last_state != tis2->last_state) { return false; } + if (tis1->tmp_token != tis2->tmp_token) { return false; } + return true; +} + +void PrintTIS(const std::unique_ptr &tis) { + std::cout << " >>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; + if (!tis) { std::cout << " nullptr" << std::endl; } + else { + std::cout << " stete: " << tis->last_state << std::endl; + std::cout << " last_state: " << tis->last_state << std::endl; + std::cout << " tmp_token: `" << tis->tmp_token << "`" << std::endl; + std::cout << " len(tmp_token): " << tis->tmp_token.length() << std::endl; + } + std::cout << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl; +} + +bool TestTokenize(bool verbose) { + bool ret = true; + auto t0 = std::make_unique(InToken, BackSlash, "hoge"); + auto t5 = std::make_unique(InDoubleQuoted, Init, "piyo"); + auto t2 = std::make_unique(Init, BackSlash, ""); + auto t9 = std::make_unique(InDoubleQuoted, Init, "ed> hoge"); + struct { + const int expected; + const char* result[32]; + int redir; + int pipe; + char linebuf[100]; + std::unique_ptr eis; // expected + std::unique_ptr iis; // input + } tbl[] = { + /* 00 */ {9, {"hoge", "|", "fuga", "|", "piyo", ">", "foo", ">", "bar"}, 5, 1, R"(hoge |fuga| piyo >foo> bar)", nullptr, nullptr}, + // パイプ, 空白の前後が空白、複数のリダイレクト + /* 01 */ {3, {"minied", "-p", "mini|ed>"}, -1, -1, R"(minied -p "mini|ed>" hoge\)", std::move(t0), nullptr}, + // ダブルクォート中のリダイレクト, パイプ、\終端 + /* 02 */ {4, {"hoge", ">", "piyo", "|"}, -1, -1, R"(hoge ">" piyo '|')", nullptr, nullptr}, + // クウォーテーションの中にパイプ、リダイレクト単体 + /* 03 */ {2, {"hoge fuga", "piyo"}, -1, -1, R"(hoge\ fuga \pi\yo)", nullptr, nullptr}, + // tokenの中のバックスラッシュ + /* 04 */ {3, {"ho'ge", "fuga", "piyo"}, -1, -1, R"(ho\'ge fu"ga" pi'yo)", nullptr, nullptr}, + // token中のクウォーテーション、token中の\' + /* 05 */ {1, {"hoge fuga"}, -1, -1, R"('hoge fuga' "piyo)", std::move(t5), nullptr}, + // 閉じていないクウォーテーション + /* 06 */ {2, {"hoge", "fugapiyo"}, -1, -1, R"(hoge fuga\ +piyo)", nullptr, nullptr}, + // token中の\,改行 + /* 07 */ {2, {"hoge", "fuga piyo"}, -1, -1, R"(hoge "fuga \ +piyo")", nullptr, nullptr}, + // ダブルクォート中のバックスラッシュ,改行 + /* 08 */ {2, {"hoge", "fuga\\ \npiyo"}, -1, -1, R"(hoge "fuga\ +piyo")", nullptr, nullptr}, + //ダブルクォート中のバックスラッシュ,スペース,改行 + }; + for (size_t i = 0; i < sizeof(tbl) / sizeof(tbl[0]); i++) { + if (verbose) printf("case %zd: `%s`\n", i, tbl[i].linebuf); + std::vector tokens; + int redir = -1, *p_redir = &redir; + int pipe = -1, *p_pipe = &pipe; + auto t = std::move(tbl[i].iis); + + t = Tokenize(tbl[i].linebuf, tokens, p_redir, p_pipe, std::move(t)); + // return val check + + if (!IsTISSame(t, tbl[i].eis)) { PrintTIS(t); PrintTIS(tbl[i].eis); printf(" \e[38;5;9mERR: invalid return val\e[0m\n"); ret = false; } + // size of tokens check + if (tokens.size() != tbl[i].expected) { + if (verbose) printf(" \e[38;5;9mERR: num of tokens. expected %d but %zu.\e[0m\n", tbl[i].expected, tokens.size()); + ret = false; + } + // redir & pipe check + if (redir != tbl[i].redir) { + if (verbose) { + printf(" `%d`, `%d`\n", tbl[i].redir, redir); + printf(" \e[38;5;9mredir ERR\e[0m\n"); + } + ret = false; + } + if (pipe != tbl[i].pipe) { if (verbose) printf(" \e[38;5;9mpipe ERR\e[0m\n"); ret = false; } + // token check + for (size_t j = 0; j < tokens.size(); j++) { + if (verbose) printf(" cmp `%s`, `%s`\n", tbl[i].result[j], tokens[j].c_str()); + if (tokens[j].c_str() == NULL || tbl[i].result[j] == NULL) { if (verbose) printf(" \e[38;5;9mERR\e[0m\n");ret = false; continue; } + if (strcmp(tokens[j].c_str(), tbl[i].result[j])) { + if (verbose) printf(" \e[38;5;9mERR\e[0m\n"); ret = false; + } + } + } + return ret; +} diff --git a/kernel/tests/tokenizer_test.hpp b/kernel/tests/tokenizer_test.hpp new file mode 100644 index 000000000..fefa1976d --- /dev/null +++ b/kernel/tests/tokenizer_test.hpp @@ -0,0 +1,7 @@ +#pragma once + +#include "../tokenizer.hpp" + +bool IsTisSame(struct TokenizerInnerState *tis1, struct TokenizerInnerState *tis2); +void PrintTis(struct TokenizerInnerState *tis); +bool TestTokenize(bool verbose); diff --git a/kernel/tokenizer.cpp b/kernel/tokenizer.cpp new file mode 100644 index 000000000..7e5b6aa8b --- /dev/null +++ b/kernel/tokenizer.cpp @@ -0,0 +1,134 @@ +#include "tokenizer.hpp" + +#include +#include +#include + +/** +* Tokenize +* +* ターミナルが受け取った文字列をtokenizeする +* +* @param[in] *c ターミナルからの入力文字列 +* @param[out] tokens tokenを格納するvector +* @param[out] *redir_idx リダイレクト文字'>'が最初に登場するtokenのインデックス + tokenize未完了から続きをtokenizeする場合には、Tokenizeが返した値、 + 新たにtokenizeする場合-1を初期値とする +* @param[out] *pipe_idx パイプ文字'|'が最初に登場するtokenのインデックス + tokenize未完了から続きをtokenizeする場合には、Tokenizeが返した値、 + 新たにtokenizeする場合-1を初期値とする +* @param[in] last_state 前にtokenizeが完了しなかった時の、前のTokenizerの内部状態 + tokenize未完了から続きをtokenizeする場合には、Tokenizeが返した値、 + 新たにtokenizeする場合nullptrを初期値とする +* @return std::unique_ptr 入力が完全でtokenizeが完了した場合 +* nullptr, tokenizeが完了しなかった場合 +* Tokenizerの内部状態を返す +*/ + +std::unique_ptr Tokenize(const char *c, std::vector& tokens, + int *redir_idx, int *pipe_idx, + const std::unique_ptr last_istate) { + State state = Init; + State last_state = Init; + std::string tmp_token; + if (last_istate) { + state = last_istate->state; + last_state = last_istate->last_state; + tmp_token = last_istate->tmp_token; + } + auto update_state = [&](State new_state) { + last_state = state; state = new_state; + }; + auto revert_state = [&](State new_state) { + state = last_state; last_state = new_state; + }; + while (true) { + switch (state) { + case BackSlash: + if (!*c) { revert_state(BackSlash); break; } + switch (last_state) { + case Init: + if (!isspace(*c)) { tmp_token = *c; } + state = InToken; last_state = BackSlash; + break; + case InToken: + if (*c != '\n') { tmp_token += *c; } + revert_state(BackSlash); + break; + case InDoubleQuoted: + case InSingleQuoted: + if (*c != '\n') { + tmp_token += '\\'; + tmp_token += *c; + } + revert_state(BackSlash); + break; + } + break; + case Init: + if (*c == '>' || *c == '|') { + tmp_token = *c; + tokens.push_back(tmp_token); + if (*c == '>' && *redir_idx == -1) { *redir_idx = tokens.size() -1; } + if (*c == '|' && *pipe_idx == -1) { *pipe_idx = tokens.size() -1; } + } else if (*c == '"') { + update_state(InDoubleQuoted); + tmp_token = c[1]; + c++; + } else if (*c == '\'') { + update_state(InSingleQuoted); + tmp_token = c[1]; + c++; + } else if (!isspace(*c) && *c) { + update_state(InToken); + tmp_token = *c; + } + break; + case InToken: + if (*c == '>' || *c == '|') { + update_state(Init); + tokens.push_back(tmp_token); + tmp_token = *c; + tokens.push_back(tmp_token); + if (*c == '>' && *redir_idx == -1) { *redir_idx = tokens.size() -1; } + if (*c == '|' && *pipe_idx == -1) { *pipe_idx = tokens.size() -1; } + } + if (isspace(*c) || *c == '\0') { + update_state(Init); + tokens.push_back(tmp_token); + tmp_token.clear(); + } else if (*c != '"' && *c != '\'') { + tmp_token += *c; + } + break; + case InDoubleQuoted: + if (*c == '"') { + update_state(Init); + tokens.push_back(tmp_token); + tmp_token.clear(); + } else if (*c) { + tmp_token += *c; + } + break; + case InSingleQuoted: + if (*c == '\'') { + update_state(Init); + tokens.push_back(tmp_token); + tmp_token.clear(); + } else if (*c) { + tmp_token += *c; + } + break; + } + if (*c == '\0') { break; } + c++; + if (*c == '\\') { + update_state(BackSlash); c++; + } + } + if (state != Init || (state == Init && last_state == BackSlash)) { + return std::make_unique(state, last_state, tmp_token); + } else { + return nullptr; + } +} diff --git a/kernel/tokenizer.hpp b/kernel/tokenizer.hpp new file mode 100644 index 000000000..8203c975b --- /dev/null +++ b/kernel/tokenizer.hpp @@ -0,0 +1,31 @@ +/** + * @file tokenizer.hpp + * + * シェルのトークナイザを提供する。 + */ + +#pragma once + +#include +#include +#include + +enum State { + Init, // 初期状態 + InToken, // 通常の文字を受理してトークンを処理している状態 + InDoubleQuoted, //ダブルクウォートを受理してトークンを処理している状態 + InSingleQuoted, // シングルクウォートを受理してトークンを処理している状態 + BackSlash, // バックスラッシュを受理した直後の状態 +}; + +struct TokenizerInnerState { + State state; // 現在の状態 + State last_state; // 1つ前の状態 + std::string tmp_token; // 現在受理している文字列 + TokenizerInnerState(State state=Init, State last_state=Init, const char *tmp_token=""): state(state), last_state(last_state), tmp_token(tmp_token) {} + TokenizerInnerState(State state=Init, State last_state=Init, const std::string tmp_token=""): state(state), last_state(last_state), tmp_token(tmp_token) {} +}; +using tis_uniq = std::unique_ptr; + +tis_uniq Tokenize(const char *c, std::vector& tokens, + int *redir_idx, int *pipe_idx, tis_uniq last_istate);