Skip to content

Commit

Permalink
Merge branch 'main' into users/boomanaiden154/bhive-comparison-script
Browse files Browse the repository at this point in the history
  • Loading branch information
boomanaiden154 committed Jun 24, 2024
2 parents e3e32a5 + cb073f7 commit e8158b0
Show file tree
Hide file tree
Showing 35 changed files with 1,381 additions and 105 deletions.
1 change: 1 addition & 0 deletions .bazeliskrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
USE_BAZEL_VERSION=6.x
3 changes: 2 additions & 1 deletion .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ build \
--strict_proto_deps=off \
--cxxopt=-std=c++17 \
--host_cxxopt=-std=c++17 \
--repo_env=CC=clang
--repo_env=CC=clang \
--define=use_fast_cpp_protos=true
2 changes: 1 addition & 1 deletion .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ jobs:
python-version: '3.10'
cache: 'pip'
- name: Install python deps
run: pip3 install -r requirements.in
run: pip3 install -r requirements.txt
- name: Run build
run: bazel build ...
- name: Run tests
Expand Down
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
# Bazel directories.
/bazel-*

# requirements.txt is built from requirements.in via
# bazel run :requirements.update.
/requirements.txt

/compile_commands.json

# Editor temporary files
Expand Down
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ ARG bazelisk_version=1.19.0
RUN curl -L https://github.com/bazelbuild/bazelisk/releases/download/v${bazelisk_version}/bazelisk-linux-amd64 > /usr/bin/bazelisk && chmod +x /usr/bin/bazelisk && ln -s /usr/bin/bazelisk /usr/bin/bazel
WORKDIR /gematria
COPY . .
RUN pip3 install -r requirements.in
ENV USE_BAZEL_VERSION 6.4.0
RUN pip3 install -r requirements.txt

4 changes: 2 additions & 2 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ new_git_repository(

# LLVM and its dependencies

LLVM_COMMIT = "718fbbef5f18a2b7e7fc4f842b1452ae9bee581a"
LLVM_COMMIT = "74a5e7784b32aba5670ff427b158d1e6e38012f1"

LLVM_SHA256 = "e17b455b320e5c09acecadf2fb0f9ce471d6668382569132d2c7f144ca10bafa"
LLVM_SHA256 = "a0d8932b90d5a423a7fcf2c70afee531c7f897153c2d7913a757cbecb52aec40"

http_archive(
name = "llvm-raw",
Expand Down
46 changes: 43 additions & 3 deletions gematria/datasets/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ cc_binary(
"//gematria/utils:string",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/flags:parse",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings:str_format",
],
)

Expand All @@ -74,38 +76,54 @@ cc_binary(
],
)

cc_binary(
name = "process_and_filter_bbs",
srcs = ["process_and_filter_bbs.cc"],
deps = [
"//gematria/llvm:disassembler",
"//gematria/llvm:llvm_architecture_support",
"//gematria/utils:string",
"@llvm-project//llvm:Support",
],
)

gematria_py_test(
name = "convert_bhive_to_llvm_exegesis_input_test",
size = "small",
srcs = ["convert_bhive_to_llvm_exegesis_input_test.py"],
data = [
"tests/lit.cfg.py",
"tests/lit.site.cfg.py",
"convert_bhive_to_llvm_exegesis_input_tests/lit.cfg.py",
"convert_bhive_to_llvm_exegesis_input_tests/lit.site.cfg.py",
":convert_bhive_to_llvm_exegesis_input",
"@llvm-project//llvm:FileCheck",
"@llvm-project//llvm:not",
"@llvm-project//llvm:split-file",
] + glob(["tests/*.test"]),
] + glob(["convert_bhive_to_llvm_exegesis_input_tests/*.test"]),
)

cc_library(
name = "find_accessed_addrs",
srcs = ["find_accessed_addrs.cc"],
hdrs = ["find_accessed_addrs.h"],
copts = ["-Iexternal/llvm-project/llvm"],
# This library uses various POSIX APIs. Only tested on Linux, and we'll likely use some
# Linux-only APIs in future.
target_compatible_with = [
"@platforms//os:linux",
],
deps = [
":block_wrapper",
"//gematria/llvm:disassembler",
"//gematria/llvm:llvm_architecture_support",
"@com_google_absl//absl/random",
"@com_google_absl//absl/random:distributions",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/strings:string_view",
"@com_google_absl//absl/types:span",
"@llvm-project//llvm:MC",
"@llvm-project//llvm:X86UtilsAndDesc",
],
)

Expand Down Expand Up @@ -196,3 +214,25 @@ cc_binary(
"@llvm-project//llvm:Support",
],
)

cc_binary(
name = "extract_bbs_from_obj",
srcs = ["extract_bbs_from_obj.cc"],
deps = [
"@llvm-project//llvm:Object",
"@llvm-project//llvm:Support",
],
)

gematria_py_test(
name = "extract_bbs_from_obj_test",
size = "small",
srcs = ["extract_bbs_from_obj_test.py"],
data = [
"extract_bbs_from_obj_tests/lit.cfg.py",
"extract_bbs_from_obj_tests/lit.site.cfg.py",
":extract_bbs_from_obj",
"@llvm-project//llvm:FileCheck",
"@llvm-project//llvm:yaml2obj",
] + glob(["extract_bbs_from_obj_tests/*.test"]),
)
14 changes: 11 additions & 3 deletions gematria/datasets/convert_bhive_to_llvm_exegesis_input.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,14 @@ ABSL_FLAG(unsigned, max_bb_count, std::numeric_limits<unsigned>::max(),
"The maximum number of basic blocks to process");
ABSL_FLAG(unsigned, report_progress_every, std::numeric_limits<unsigned>::max(),
"The number of blocks after which to report progress.");
ABSL_FLAG(unsigned, max_annotation_attempts, 50,
"The maximum number of times to attempt to annotate a block before "
"giving up.");

absl::StatusOr<gematria::AccessedAddrs> GetAccessedAddrs(
absl::Span<const uint8_t> basic_block,
gematria::ExegesisAnnotator* exegesis_annotator) {
gematria::ExegesisAnnotator* exegesis_annotator,
const unsigned max_annotation_attempts) {
const AnnotatorType annotator_implementation =
absl::GetFlag(FLAGS_annotator_implementation);
switch (annotator_implementation) {
Expand All @@ -105,7 +109,8 @@ absl::StatusOr<gematria::AccessedAddrs> GetAccessedAddrs(
case AnnotatorType::kExegesis:
return gematria::LlvmExpectedToStatusOr(
exegesis_annotator->findAccessedAddrs(
llvm::ArrayRef(basic_block.begin(), basic_block.end())));
llvm::ArrayRef(basic_block.begin(), basic_block.end()),
max_annotation_attempts));
case AnnotatorType::kNone:
return gematria::AccessedAddrs();
}
Expand Down Expand Up @@ -226,6 +231,8 @@ int main(int argc, char* argv[]) {
const unsigned max_bb_count = absl::GetFlag(FLAGS_max_bb_count);
const unsigned report_progress_every =
absl::GetFlag(FLAGS_report_progress_every);
const unsigned max_annotation_attempts =
absl::GetFlag(FLAGS_max_annotation_attempts);
unsigned int file_counter = 0;
for (std::string line; std::getline(bhive_csv_file, line);) {
if (file_counter >= max_bb_count) break;
Expand Down Expand Up @@ -253,7 +260,8 @@ int main(int argc, char* argv[]) {
continue;
}

auto addrs = GetAccessedAddrs(*bytes, exegesis_annotator.get());
auto addrs = GetAccessedAddrs(*bytes, exegesis_annotator.get(),
max_annotation_attempts);

if (!addrs.ok()) {
std::cerr << "Failed to find addresses for block '" << hex
Expand Down
22 changes: 19 additions & 3 deletions gematria/datasets/convert_bhive_to_llvm_exegesis_input_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
from lit.main import main
# Copyright 2024 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import lit.main
import sys

# Lit expects the test folder path to be specifided on the command-line, which
# is usually passed in through CMake. Bazel doesn't support this configuration,
# so we manually add the path here.
sys.argv.append("./gematria/datasets/tests")
sys.argv.append(
"./gematria/datasets/convert_bhive_to_llvm_exegesis_input_tests"
)
sys.argv.append("-vv")

if __name__ == "__main__":
main()
lit.main.main()
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
# Copyright 2024 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import lit.formats

config.name = 'gematria'
config.name = 'convert_bhive_to_llvm_exegesis_input_tests'
config.test_format = lit.formats.ShTest(True)

config.suffixes = ['.test']
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright 2024 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

config.obj_root = os.path.join(
os.getcwd(), 'gematria/datasets/convert_bhive_to_llvm_exegesis_input_tests'
)
config.tools_root = os.path.join(os.getcwd(), 'gematria/datasets')
config.llvm_tools_root = os.path.join(os.getcwd(), 'external/llvm-project/llvm')

lit_config.load_config(config, os.path.join(config.obj_root, 'lit.cfg.py'))
File renamed without changes.
98 changes: 98 additions & 0 deletions gematria/datasets/extract_bbs_from_obj.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Copyright 2024 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "llvm/ADT/StringExtras.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/WithColor.h"

using namespace llvm;

static cl::opt<std::string> InputFilename(cl::Positional,
cl::desc("Input object file"),
cl::init("-"));

int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv, "extract_bbs_from_obj\n");

ExitOnError ExitOnErr("extract_bbs_from_obj error: ");

object::OwningBinary<object::Binary> ObjBinary =
ExitOnErr(object::createBinary(InputFilename));
object::Binary &Binary = *ObjBinary.getBinary();
object::ObjectFile *Obj = cast<object::ObjectFile>(&Binary);

for (const auto &Section : Obj->sections()) {
if (!Section.isText()) continue;

DenseMap<uint64_t, object::BBAddrMap> BBAddrMap;
if (const auto *Elf = dyn_cast<object::ELFObjectFileBase>(Obj)) {
auto BBAddrMapping = ExitOnErr(Elf->readBBAddrMap(Section.getIndex()));
for (auto &BBAddr : BBAddrMapping) {
BBAddrMap.try_emplace(BBAddr.getFunctionAddress(), std::move(BBAddr));
}
} else {
ExitOnErr(make_error<StringError>(errc::invalid_argument,
"Specified object file is not ELF."));
}

std::vector<std::pair<uint64_t, uint64_t>> BasicBlocks;

for (const auto &[FunctionAddress, BasicBlockAddressMap] : BBAddrMap) {
for (const auto &BasicBlockEntry : BasicBlockAddressMap.getBBEntries()) {
uint64_t StartAddress = FunctionAddress + BasicBlockEntry.Offset;
BasicBlocks.push_back(
std::make_pair(StartAddress, BasicBlockEntry.Size));
}
}

std::sort(BasicBlocks.begin(), BasicBlocks.end(), [](auto &LHS, auto &RHS) {
return std::get<0>(LHS) < std::get<1>(RHS);
});

if (BasicBlocks.size() == 0) {
dbgs() << "No basic blocks present in section.\n";
continue;
}

size_t BasicBlockIndex = 0;

uint64_t SectionEndAddress = Section.getAddress() + Section.getSize();

uint64_t CurrentAddress = std::get<0>(BasicBlocks[BasicBlockIndex]);

if (SectionEndAddress < CurrentAddress) continue;

StringRef SectionContents = ExitOnErr(Section.getContents());

while (CurrentAddress < SectionEndAddress &&
BasicBlockIndex < BasicBlocks.size()) {
uint64_t OffsetInSection = CurrentAddress - Section.getAddress();
StringRef BasicBlock(SectionContents.data() + OffsetInSection,
std::get<1>(BasicBlocks[BasicBlockIndex]));
std::string BBHex = toHex(BasicBlock);
outs() << BBHex << "\n";
BasicBlockIndex++;
if (BasicBlockIndex >= BasicBlocks.size()) {
break;
}
CurrentAddress = std::get<0>(BasicBlocks[BasicBlockIndex]);
}
}

return 0;
}
25 changes: 25 additions & 0 deletions gematria/datasets/extract_bbs_from_obj_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2024 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import lit.main
import sys

# Lit expects the test folder path to be specifided on the command-line, which
# is usually passed in through CMake. Bazel doesn't support this configuration,
# so we manually add the path here.
sys.argv.append("./gematria/datasets/extract_bbs_from_obj_tests")
sys.argv.append("-vv")

if __name__ == "__main__":
lit.main.main()
Loading

0 comments on commit e8158b0

Please sign in to comment.