Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement nvme driver #1284

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ post-includes-bsd += -isystem bsd/$(arch)
$(out)/musl/%.o: pre-include-api = -isystem include/api/internal_musl_headers -isystem musl/src/include

ifneq ($(werror),0)
CFLAGS_WERROR = -Werror
CFLAGS_WERROR = -Wall
endif
# $(call compiler-flag, -ffoo, option, file)
# returns option if file builds with -ffoo, empty otherwise
Expand Down Expand Up @@ -889,6 +889,13 @@ drivers += drivers/virtio-vring.o
ifeq ($(conf_drivers_mmio),1)
drivers += drivers/virtio-mmio.o
endif
ifeq ($(conf_drivers_nvme),1)
drivers += drivers/nvme.o
drivers += drivers/nvme-queue.o
endif
ifeq ($(conf_drivers_io_test),1)
drivers += drivers/io_test.o
endif
drivers += drivers/virtio-net.o
drivers += drivers/virtio-blk.o
drivers += drivers/virtio-scsi.o
Expand Down
6 changes: 6 additions & 0 deletions arch/x64/arch-setup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,9 @@ void arch_init_premain()
#if CONF_drivers_ide
#include "drivers/ide.hh"
#endif
#if CONF_drivers_nvme
#include "drivers/nvme.hh"
#endif

extern bool opt_pci_disabled;
void arch_init_drivers()
Expand Down Expand Up @@ -364,6 +367,9 @@ void arch_init_drivers()
#endif
#if CONF_drivers_ide
drvman->register_driver(ide::ide_drive::probe);
#endif
#if CONF_drivers_nvme
drvman->register_driver(nvme::probe);
#endif
boot_time.event("drivers probe");
drvman->load_all();
Expand Down
1 change: 1 addition & 0 deletions conf/profiles/x64/all.mk
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ include conf/profiles/$(arch)/virtio-mmio.mk
include conf/profiles/$(arch)/virtio-pci.mk
include conf/profiles/$(arch)/vmware.mk
include conf/profiles/$(arch)/xen.mk
include conf/profiles/$(arch)/nvme.mk

conf_drivers_vga?=1
6 changes: 6 additions & 0 deletions conf/profiles/x64/base.mk
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ export conf_drivers_pci?=1
export conf_drivers_scsi?=1
endif

export conf_drivers_nvme?=0
ifeq ($(conf_drivers_nvme),1)
export conf_drivers_pci?=1
endif

export conf_drivers_vmxnet3?=0
ifeq ($(conf_drivers_vmxnet3),1)
export conf_drivers_pci?=1
Expand Down Expand Up @@ -72,3 +77,4 @@ export conf_drivers_virtio?=0
export conf_drivers_pci?=0
export conf_drivers_mmio?=0
export conf_drivers_scsi?=0
export conf_drivers_io_test?=0
3 changes: 3 additions & 0 deletions conf/profiles/x64/nvme.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
conf_drivers_pci?=1

conf_drivers_nvme?=1
3 changes: 2 additions & 1 deletion core/debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,11 @@ bool logger::parse_configuration(void)
add_tag("virtio-blk", logger_warn);
add_tag("virtio-net", logger_warn);
add_tag("vmxnet3", logger_warn);
add_tag("pci", logger_info);
add_tag("pci", logger_debug);
add_tag("poll", logger_info);
add_tag("dhcp", logger_info);
add_tag("acpi", logger_error);
add_tag("nvme", logger_debug);

return (true);
}
Expand Down
59 changes: 59 additions & 0 deletions drivers/blk_ioctl.hh
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add a copyright statement to all your files (see this one for example -

/*
* Copyright (C) 2017 Waldemar Kozaczuk
* Inspired by original MFS implementation by James Root from 2015
*
* This work is open source software, licensed under the terms of the
* BSD license as described in the LICENSE file in the top-level directory.
*/
).

Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#ifndef BLK_IOCTL_H
#define BLK_IOCTL_H

#define _IOC_NRBITS 8
#define _IOC_TYPEBITS 8
#define _IOC_SIZEBITS 13
#define _IOC_DIRBITS 3

#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1)
#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1)
#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1)

#define _IOC_NRSHIFT 0
#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS)

#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
#define _IOC_TYP(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)

#define BLKGETSIZE64 114
#define BLKFLSBUF 97
#define BLKDISCARD 119

TRACEPOINT(trace_blk_ioctl, "dev=%s type=%#x nr=%d size=%d, dir=%d", char*, int, int, int, int);

void no_bio_done(bio * b) {delete b;};

int
blk_ioctl(struct device* dev, u_long io_cmd, void* buf)
{
assert(dev);
trace_blk_ioctl(dev->name, _IOC_TYP(io_cmd), _IOC_NR(io_cmd), _IOC_SIZE(io_cmd), _IOC_DIR(io_cmd));

switch (_IOC_NR(io_cmd)) {
case BLKGETSIZE64:
//device capacity in bytes
*(off_t*) buf = dev->size;
break;
case BLKFLSBUF: {
auto* bio = alloc_bio();
bio->bio_dev = dev;
bio->bio_done = no_bio_done;
bio->bio_cmd = BIO_FLUSH;

dev->driver->devops->strategy(bio);
}
break;
default:
printf("ioctl not defined; type:%#x nr:%d size:%d, dir:%d\n",_IOC_TYP(io_cmd),_IOC_NR(io_cmd),_IOC_SIZE(io_cmd),_IOC_DIR(io_cmd));
return EINVAL;
}
return 0;
}

#endif
128 changes: 128 additions & 0 deletions drivers/io-test.cc
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please describe in comments what this test does and how one can run it? How different is it from various disk I/O-related tests located under tests/misc-*cc?

I also suggest we move this to the tests/ folder.

Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#include "drivers/io-test.hh"
#include <osv/contiguous_alloc.hh>
#include <stdio.h>
#include <stdlib.h>
#include <random>
#include <machine/atomic.h>
#include <osv/clock.hh>

volatile bool running;
volatile u64 completed_io;
volatile u64 requested_io;
std::atomic<unsigned int> open_req;
u32 max_open;
u64 max_ios;

void test_block_device(struct device *dev, int test_duration, int blcks_per_io, int blocksize, int blockshift)
{
int report_step = 1e6;
int io_size = blocksize * blcks_per_io;
completed_io = 0;
requested_io = 0;
open_req.store(0);
max_open = 64;
max_ios = 1 << 30;

printf("Start IO test dev : %s, IO size : %d\n",dev->name,io_size);
sched::thread *t;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of OSv internal API to create threads I would suggest using the standard std::thread API so that the test can be built and run on Linux so that we can easily compare the results. The same applies to other OSv-specific API (memory::alloc_phys_contiguous_aligned). Unless there is a very specific reason to use those because the test is OSv specific or tests OSv internal API.

t = sched::thread::make([dev,io_size,blockshift] { requesting(dev,io_size,blockshift);},
sched::thread::attr().name("IO_Test Request"));

sched::thread *timer;
timer = sched::thread::make([test_duration] { usleep(test_duration);},
sched::thread::attr().name("IO_Test_Timer"));

sched::thread *repo;
repo = sched::thread::make([test_duration,report_step,io_size] { reporting(test_duration,report_step,io_size);},
sched::thread::attr().name("IO_Test_Timer"));
auto c = clock::get();

running = true;
u64 start = c->time();
timer->start();
t->start();
repo->start();

timer->join();
running = false;
u64 com = completed_io;
u64 end = c->time();
int iops = (com * 1e9)/ (end - start);

t->join();
repo->join();
printf("Test results runtime: %llu, completed IO : %llu, IOPS : %d\n",end-start,com,iops);
}

void reporting(int test_duration, int report_step, int io_size) {
u32 prev_compl = completed_io;
u32 compl_diff;
u32 compl_tem;
auto c = clock::get();
int time_diff;
int time_tem;
int prev_time = c->time();
while(running) {
usleep(report_step);
compl_tem = completed_io;
time_tem = c->time();

compl_diff = compl_tem - prev_compl;
prev_compl = compl_tem;
time_diff = time_tem - prev_time;
prev_time = time_tem;
double iops = (compl_diff * 1e9 ) / (double) time_diff;

printf("Timestep: %d, completed : %d, IOPS : %lf, open : %d\n",time_diff,compl_diff,iops,open_req.load());
}
}


void requesting(struct device *dev, u32 io_size, int blockshift) {
void* buff;
bio* bio;
off_t max_blocks = dev->size >> blockshift;
off_t max_offset = (max_blocks - 1) - (io_size >> blockshift);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> distrib(0, max_offset);

while(running) {
if(requested_io >= max_ios )
break;

buff = memory::alloc_phys_contiguous_aligned(io_size,2);
assert(buff);
memset(buff, 1, io_size);

bio = alloc_bio();
bio->bio_dev = dev;
bio->bio_data = buff;
bio->bio_done = io_done;
bio->bio_length = io_size;
bio->bio_bcount = io_size;
bio->bio_cmd = BIO_READ;

bio->bio_offset = ((off_t) distrib(gen)) << blockshift;

while(max_open<=open_req) {
usleep(10);
}
open_req.fetch_add(1);
atomic_add_64(&requested_io,1);
dev->driver->devops->strategy(bio);
}
}

void io_done(struct bio* bio) {

if(bio->bio_flags != BIO_DONE) {
printf("BIO_Error during IO Test: %x\n",bio->bio_flags);
}
u64 old = atomic_fetchadd_long(&completed_io, 1);

open_req.fetch_add(-1);

free(bio->bio_data);
delete bio;
}
12 changes: 12 additions & 0 deletions drivers/io-test.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef IO_TEST_H
#define IO_TEST_H

#include <osv/device.h>
#include <osv/bio.h>

void requesting(struct device *dev, u32 io_size, int blocksize);
void reporting(int test_duration, int report_step, int io_size);
void io_done(struct bio* bio);
void test_block_device(struct device *dev, int test_duration,int blcks_per_io, int blocksize=512, int blockshift=9);

#endif
Loading