Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LibJVM: Start parsing class files #11500

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Userland/Libraries/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ add_subdirectory(LibIMAP)
add_subdirectory(LibImageDecoderClient)
add_subdirectory(LibIPC)
add_subdirectory(LibJS)
add_subdirectory(LibJVM)
add_subdirectory(LibKeyboard)
add_subdirectory(LibLine)
add_subdirectory(LibM)
Expand Down
6 changes: 6 additions & 0 deletions Userland/Libraries/LibJVM/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
set(SOURCES
ClassFileParser.cpp
)

serenity_lib(LibJVM jvm)
target_link_libraries(LibJVM LibCore)
63 changes: 63 additions & 0 deletions Userland/Libraries/LibJVM/ClassFile.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2021, Noah Haasis <haasis_noah@yahoo.de>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#pragma once

#include <AK/ByteBuffer.h>
#include <AK/Error.h>
#include <AK/FixedArray.h>
#include <AK/OwnPtr.h>
#include <AK/Types.h>

#include "ConstantPool.h"

namespace JVM {

struct AttributeInfo {
u16 attribute_name_index;
u32 attribute_length;
u8 const* info;

ConstantPool::Utf8 attribute_name(ConstantPool const& constant_pool) const { return constant_pool.utf8_at(attribute_name_index); }
};

struct FieldInfo {
u16 access_flags;
u16 name_index;
u16 descriptor_index;
AK::FixedArray<AttributeInfo> attributes;

ConstantPool::Utf8 name(ConstantPool const& constant_pool) const { return constant_pool.utf8_at(name_index); }
ConstantPool::Utf8 descriptor(ConstantPool const& constant_pool) const { return constant_pool.utf8_at(descriptor_index); }
};

struct MethodInfo {
u16 access_flags;
u16 name_index;
u16 descriptor_index;
AK::FixedArray<AttributeInfo> attributes;

ConstantPool::Utf8 name(ConstantPool const& constant_pool) const { return constant_pool.utf8_at(name_index); }
ConstantPool::Utf8 descriptor(ConstantPool const& constant_pool) const { return constant_pool.utf8_at(descriptor_index); }
};

// https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html
struct ClassFile {
u16 minor_version;
u16 major_version;
ConstantPool constant_pool;
u16 access_flags;
ConstantPool::Class this_class;
AK::Optional<ConstantPool::Class> super_class;
FixedArray<ConstantPool::Class> interfaces;
FixedArray<FieldInfo> fields;
FixedArray<MethodInfo> methods;
FixedArray<AttributeInfo> attributes;

// The constant pool and attributes contain references into this buffer for e.g. strings or bytecode.
ByteBuffer class_file_data;
};
}
195 changes: 195 additions & 0 deletions Userland/Libraries/LibJVM/ClassFileParser.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
/*
* Copyright (c) 2021, Noah Haasis <haasis_noah@yahoo.de>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#include "ClassFileParser.h"
#include <AK/Endian.h>
#include <AK/TypeCasts.h>

namespace JVM {

ErrorOr<OwnPtr<ClassFile>> ClassFileParser::parse(ByteBuffer buffer)
{
m_source = move(buffer);
m_classfile = make<ClassFile>();

u32 magic = read_u32();
VERIFY(magic == 0xcafebabe);

m_classfile->major_version = read_u16();
m_classfile->minor_version = read_u16();

parse_constant_pool();

m_classfile->access_flags = read_u16();

parse_class_references();

parse_interfaces();

parse_fields();

parse_methods();

parse_attributes();

VERIFY((size_t)m_offset == m_source.size());

m_classfile->class_file_data = move(m_source);

return move(m_classfile);
}

void ClassFileParser::parse_constant_pool()
{
u16 constant_pool_count = read_u16() - 1;

auto constants = FixedArray<ConstantPool::Constant>(constant_pool_count);
m_classfile->constant_pool.set_constants(constants);
for (auto& constant : m_classfile->constant_pool.constants())
constant = parse_constant_info();
}

ConstantPool::Constant ClassFileParser::parse_constant_info()
{
u8 tag = read_u8();
switch (tag) {
case ConstantPool::Constant::Tag::Class: {
ConstantPool::Class class_constant;
class_constant.name_index = read_u16() - 1;
return ConstantPool::Constant(class_constant);
}
case ConstantPool::Constant::Tag::Utf8: {
ConstantPool::Utf8 utf_8;
utf_8.length = read_u16();
utf_8.bytes = data_at_offset();
advance((int)utf_8.length);
return ConstantPool::Constant(utf_8);
}
case ConstantPool::Constant::Tag::NameAndType: {
ConstantPool::NameAndType name_and_type;
name_and_type.name_index = read_u16() - 1;
name_and_type.descriptor_index = read_u16() - 1;
return ConstantPool::Constant(name_and_type);
}
case ConstantPool::Constant::Tag::Integer: {
ConstantPool::Integer integer;
integer.bytes = read_u32();
return ConstantPool::Constant(integer);
}
case ConstantPool::Constant::Tag::Methodref: {
ConstantPool::Methodref methodref;
methodref.class_index = read_u16();
methodref.name_and_type_index = read_u16();
return ConstantPool::Constant(methodref);
}
default:
TODO();
}
}

void ClassFileParser::parse_class_references()
{
m_classfile->this_class = m_classfile->constant_pool.class_at(read_u16() - 1);
u16 super_class_index = read_u16() - 1;
if (super_class_index == 0)
m_classfile->super_class = {};
else
m_classfile->super_class = m_classfile->constant_pool.class_at(super_class_index);
}

void ClassFileParser::parse_interfaces()
{
u16 interfaces_count = read_u16();
auto interfaces = FixedArray<ConstantPool::Class>(interfaces_count);
for (auto& interface : interfaces)
interface = m_classfile->constant_pool.class_at(read_u16() - 1);

m_classfile->interfaces = interfaces;
}

void ClassFileParser::parse_fields()
{
u16 fields_count = read_u16();
auto fields = FixedArray<FieldInfo>(fields_count);

for (auto& field : fields) {
FieldInfo field_info;
field_info.access_flags = read_u16();
field_info.name_index = read_u16() - 1;
field_info.descriptor_index = read_u16() - 1;
parse_attributes();
field = field_info;
}

m_classfile->fields = fields;
}

void ClassFileParser::parse_methods()
{
u16 methods_count = read_u16();
auto methods = FixedArray<MethodInfo>(methods_count);

for (auto& method : methods) {
MethodInfo method_info;
method_info.access_flags = read_u16();
method_info.name_index = read_u16() - 1;
method_info.descriptor_index = read_u16() - 1;
parse_attributes();
method = method_info;
}

m_classfile->methods = methods;
}

void ClassFileParser::parse_attributes()
{
u16 attributes_count = read_u16();
auto attributes = FixedArray<AttributeInfo>(attributes_count);

for (auto& attribute : attributes) {
AttributeInfo attribute_info;
attribute_info.attribute_name_index = read_u16() - 1;
attribute_info.attribute_length = read_u32();
attribute_info.info = data_at_offset();
advance((int)attribute_info.attribute_length);
attribute = attribute_info;
}

m_classfile->attributes = attributes;
}

u32 ClassFileParser::read_u32()
{
auto result = AK::convert_between_host_and_big_endian(*((u32 const*)data_at_offset()));
advance(4);
return result;
}

u16 ClassFileParser::read_u16()
{
auto result = AK::convert_between_host_and_big_endian(*((u16 const*)data_at_offset()));
advance(2);
return result;
}

u8 ClassFileParser::read_u8()
{
auto result = *data_at_offset();
advance(1);
return result;
}

void ClassFileParser::advance(int amount)
{
m_offset += amount;
}

u8 const* ClassFileParser::data_at_offset() const
{
return m_source.offset_pointer(m_offset);
}

}
42 changes: 42 additions & 0 deletions Userland/Libraries/LibJVM/ClassFileParser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (c) 2021, Noah Haasis <haasis_noah@yahoo.de>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#pragma once

#include "ClassFile.h"
#include <AK/ByteBuffer.h>

namespace JVM {

// https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html
class ClassFileParser {
public:
ErrorOr<OwnPtr<ClassFile>> parse(ByteBuffer);

private:
u16 read_u16();
u32 read_u32();
u8 read_u8();

void advance(int);

u8 const* data_at_offset() const;

void parse_constant_pool();
ConstantPool::Constant parse_constant_info();
void parse_class_references();
void parse_interfaces();
void parse_fields();
void parse_methods();
void parse_attributes();

ByteBuffer m_source;
int m_offset = 0;

AK::OwnPtr<ClassFile> m_classfile;
};

}
Loading