Skip to content

Commit

Permalink
several additions to preprocessing and machine learning to reconstruc…
Browse files Browse the repository at this point in the history
…t words, some speed ups for netlist comparison
  • Loading branch information
SimonKlx committed Nov 11, 2024
1 parent 8a2485e commit ba8ea22
Show file tree
Hide file tree
Showing 9 changed files with 353 additions and 103 deletions.
2 changes: 1 addition & 1 deletion plugins/machine_learning/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ if(PL_MACHINE_LEARNING OR BUILD_ALL_PLUGINS)
SHARED
HEADER ${MACHINE_LEARNING_INC}
SOURCES ${MACHINE_LEARNING_SRC} ${MACHINE_LEARNING_PYTHON_SRC}
LINK_LIBRARIES nlohmann_json::nlohmann_json
LINK_LIBRARIES nlohmann_json::nlohmann_json netlist_preprocessing
)
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ namespace hal
/* Forward declarations */
class Gate;
class Netlist;
enum class PinDirection : int;

namespace machine_learning
{
Expand All @@ -34,12 +35,12 @@ namespace hal
/**
* @brief Maps word pairs to corresponding gates.
*/
std::map<const std::pair<const std::string, const std::string>, std::vector<const Gate*>> word_to_gates;
std::map<std::tuple<std::string, PinDirection, std::string>, std::vector<Gate*>> word_to_gates;

/**
* @brief Maps gates to associated word pairs.
*/
std::map<const Gate*, std::vector<std::pair<const std::string, const std::string>>> gate_to_words;
std::map<const Gate*, std::vector<std::tuple<std::string, PinDirection, std::string>>> gate_to_words;
};

/**
Expand Down
179 changes: 153 additions & 26 deletions plugins/machine_learning/src/labels/gate_pair_label.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "hal_core/netlist/netlist.h"
#include "hal_core/utilities/log.h"
#include "nlohmann/json.hpp"
#include "netlist_preprocessing/netlist_preprocessing.h"

#include <stdlib.h>

Expand All @@ -17,7 +18,7 @@ namespace hal
{
MultiBitInformation calculate_multi_bit_information(const std::vector<Gate*>& gates)
{
std::map<std::pair<std::string, std::string>, std::set<std::tuple<u32, Gate*>>> word_to_gates_unsorted;
std::map<std::tuple<std::string, PinDirection, std::string>, std::set<std::tuple<u32, Gate*>>> word_to_gates_unsorted;

for (const auto g : gates)
{
Expand All @@ -33,46 +34,67 @@ namespace hal
// std::cout << "Trying to parse string: " << json_string << std::endl;

// TODO catch exceptions and return result
const nlohmann::json j = nlohmann::json::parse(json_string);
const std::vector<std::tuple<std::string, u32, std::string, std::string>> index_information = j;
nlohmann::json j = nlohmann::json::parse(json_string);
std::vector<netlist_preprocessing::indexed_identifier> index_information = j.get<std::vector<netlist_preprocessing::indexed_identifier>>();

// TODO remove
// if (!index_information.empty())
// {
// std::cout << "For gate " << g->get_id() << " found " << std::get<0>(index_information.front()) << " - " << std::get<1>(index_information.front()) << std::endl;
// }
if (!index_information.empty())
{
std::cout << "For gate " << g->get_id() << " found " <<index_information.front().identifier << " - " << index_information.front().index << " - " << index_information.front().distance << std::endl;
}

for (const auto& [name, index, _origin, direction] : index_information)
// for each pin, only consider the index information with the least distance
std::map<std::string, u32> pin_to_min_distance;
for (const auto& [_name, _index, _origin, pin, _direction, distance] : index_information)
{
word_to_gates_unsorted[{name, direction}].insert({index, g});
if (const auto it = pin_to_min_distance.find(pin); it == pin_to_min_distance.end())
{
pin_to_min_distance.insert({pin, distance});
}
else
{
pin_to_min_distance.at(pin) = std::min(it->second, distance);
}
}


for (const auto& [name, index, _origin, pin, direction, distance] : index_information)
{
if (pin_to_min_distance.at(pin) == distance)
{
word_to_gates_unsorted[{name, direction, pin}].insert({index, g});
}
}
}

// 1. Sort out words with the same name by checking whether they contain duplicate indices
// 2. Dedupe all words by only keeping one word/name_direction for each multi_bit_signal/vector of gates.
std::map<std::vector<const Gate*>, std::pair<std::string, std::string>> gates_to_word;
std::map<std::vector<Gate*>, std::tuple<std::string, PinDirection, std::string>> gates_to_word;

for (const auto& [name_direction, word] : word_to_gates_unsorted)
{
std::set<u32> indices;
std::vector<const Gate*> gates;
std::set<Gate*> unique_gates;
std::vector<Gate*> gates;

// TODO remove
// std::cout << "Order Word: " << std::endl;
for (const auto& [index, gate] : word)
for (auto& [index, gate] : word)
{
// TODO remove
// std::cout << index << std::endl;

indices.insert(index);
unique_gates.insert(gate);

gates.push_back(gate);
}

// sanity check
if (indices.size() != word.size())
{
// TODO return result
log_error("machine_learning", "Found index double in word {}-{}!", name_direction.first, name_direction.second);
log_error("machine_learning", "Found index double in word {}-{} - !", std::get<0>(name_direction), enum_to_string(std::get<1>(name_direction)), std::get<2>(name_direction));

// TODO remove
std::cout << "Insane Word: " << std::endl;
Expand All @@ -84,25 +106,82 @@ namespace hal
continue;
}

if (unique_gates.size() != word.size())
{
continue;
}

if (unique_gates.size() <= 1)
{
continue;
}

std::cout << "Word [" << word.size() << "] " << std::get<0>(name_direction) << " - " << std::get<1>(name_direction) << " - " << std::get<2>(name_direction) << " : " << std::endl;
for (const auto& [index, gate] : word)
{
std::cout << index << ": " << gate->get_id() << std::endl;
}

if (const auto it = gates_to_word.find(gates); it == gates_to_word.end())
{
gates_to_word.insert({gates, name_direction});
}
// NOTE could think about a priorization of shorter names or something similar
// else
}

MultiBitInformation mbi;

for (const auto& [gates, name_direction] : gates_to_word)
for (auto& [word_gates, name_direction] : gates_to_word)
{
mbi.word_to_gates[name_direction] = gates;
for (const auto g : gates)
mbi.word_to_gates[name_direction] = word_gates;
for (const auto g : word_gates)
{
mbi.gate_to_words[g].push_back(name_direction);
}
}

// filter words for each gate:
// 1) For each direction only take the biggest word
// 2) From all remaining only take the smallest word
// std::map<const Gate*, std::vector<const std::tuple<const std::string, const PinDirection, const std::string>>> filtered_gate_to_words;
// for (const auto g : gates)
// {
// const auto it = mbi.gate_to_words.find(g);
// if (it == mbi.gate_to_words.end())
// {
// continue;
// }

// std::set<u32> sizes;
// for (const auto& w : it->second)
// {
// sizes.insert(mbi.word_to_gates.at(w).size());
// }

// std::vector<const std::tuple<const std::string, const PinDirection, const std::string>> filtered_words;
// for (const auto& w : it->second)
// {
// if (mbi.word_to_gates.at(w).size() == *(sizes.begin()))
// {
// filtered_words.push_back(w);
// }
// }

// filtered_gate_to_words.insert({g, filtered_words});
// }

// std::map<const std::tuple<const std::string, const PinDirection, const std::string>, std::vector<const Gate*>> filtered_word_to_gates;
// for (const auto& [g, words] : filtered_gate_to_words)
// {
// for (const auto& w : words)
// {
// filtered_word_to_gates[w].push_back(g);
// }
// }

// mbi.gate_to_words = filtered_gate_to_words;
// mbi.word_to_gates = filtered_word_to_gates;

return mbi;
}
} // namespace
Expand All @@ -126,7 +205,7 @@ namespace hal
for (const auto& g : gates)
{
// positive labels
std::unordered_set<Gate*> pos_gates;
std::unordered_set<const Gate*> pos_gates;
if (mbi.gate_to_words.find(g) == mbi.gate_to_words.end())
{
// gate is only in a group with itself
Expand All @@ -138,16 +217,16 @@ namespace hal
// add all gates that are part of at least one other signal group as positive pair
for (const auto& name_direction : mbi.gate_to_words.at(g))
{
const auto& gates = mbi.word_to_gates.at(name_direction);
for (const auto g_i : gates)
const auto& word_gates = mbi.word_to_gates.at(name_direction);
for (const auto* g_i : word_gates)
{
if (g == g_i)
{
continue;
}

pairs.push_back({g, g_i});
pos_gates.insert(g);
pos_gates.insert(g_i);
}
}
}
Expand All @@ -156,13 +235,15 @@ namespace hal
const u64 pos_count = pos_gates.size();
const u64 neg_count = std::min(gates.size() - pos_count, pos_count);

std::cout << "Gate ID: " << g->get_id() << " " << pos_count << " vs. " << neg_count << std::endl;

std::set<Gate*> chosen_gates;
for (u32 i = 0; i < neg_count; i++)
{
const u32 start = std::rand() % lc.nl->get_gates().size();
for (u32 idx = start; idx < start + lc.nl->get_gates().size(); idx++)
const u32 start = std::rand() % gates.size();
for (u32 idx = start; idx < start + gates.size(); idx = (idx + 1) % gates.size())
{
const auto g_i = lc.nl->get_gates().at(idx % lc.nl->get_gates().size());
const auto g_i = gates.at(idx % gates.size());
if (pos_gates.find(g_i) == pos_gates.end() && chosen_gates.find(g_i) == chosen_gates.end())
{
pairs.push_back({g, g_i});
Expand All @@ -179,8 +260,54 @@ namespace hal
std::vector<u32> SharedSignalGroup::calculate_label(LabelContext& lc, const Gate* g_a, const Gate* g_b) const
{
const auto& mbi = lc.get_multi_bit_information();
const auto& words_a = mbi.gate_to_words.at(g_a);
const auto& words_b = mbi.gate_to_words.at(g_b);

const auto it_a = mbi.gate_to_words.find(g_a);
if (it_a == mbi.gate_to_words.end())
{
return {0};
}

const auto it_b = mbi.gate_to_words.find(g_b);
if (it_b == mbi.gate_to_words.end())
{
return {0};
}

const auto& words_a = it_a->second;
const auto& words_b = it_b->second;

// // only consider the smallest words a gate is part of
// std::set<u32> sizes_a;
// std::set<u32> sizes_b;

// for (const auto& w_a : words_a)
// {
// sizes_a.insert(mbi.word_to_gates.at(w_a).size());
// }

// for (const auto& w_b : words_b)
// {
// sizes_b.insert(mbi.word_to_gates.at(w_b).size());
// }

// std::vector<std::pair<std::string, PinDirection>> filtered_words_a;
// std::vector<std::pair<std::string, PinDirection>> filtered_words_b;

// for (const auto& w_a : words_a)
// {
// if (mbi.word_to_gates.at(w_a).size() == *(sizes_a.begin()))
// {
// filtered_words_a.push_back(w_a);
// }
// }

// for (const auto& w_b : words_b)
// {
// if (mbi.word_to_gates.at(w_b).size() == *(sizes_b.begin()))
// {
// filtered_words_b.push_back(w_b);
// }
// }

for (const auto& wa : words_a)
{
Expand Down
2 changes: 1 addition & 1 deletion plugins/machine_learning/src/plugin_machine_learning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ namespace hal

std::set<std::string> MachineLearningPlugin::get_dependencies() const
{
return {};
return {"netlist_preprocessing"};
}
} // namespace hal
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "hal_core/defines.h"
#include "hal_core/utilities/result.h"
#include "nlohmann/json.hpp"

#include <map>
#include <vector>
Expand All @@ -40,6 +41,8 @@ namespace hal
class Module;
class Net;

enum class PinDirection;

namespace netlist_preprocessing
{
/**
Expand Down Expand Up @@ -152,6 +155,46 @@ namespace hal
*/
Result<u32> simplify_lut_inits(Netlist* nl);


/**
* Represents an identifier with an associated index and additional metadata, used for reconstructing and annotating names and indices
* for flip flops in synthesized netlists based on input and output net names as well as gate names.
*
* This struct is designed specifically for use with synthesized netlists. By analyzing net and gate names, we attempt to reconstruct a
* multi bit word and index for each flip flop.
*
* The reconstructed identifiers, stored as `indexed_identifier` instances, are added to the gate data container in the netlist.
*
* Members:
* - identifier: The reconstructed name of the flip flop.
* - index: The index number associated with the identifier, if part of a multi-bit signal.
* - origin: The original source or scope of the identifier.
* - pin: The specific pin associated with the identifier.
* - direction: The direction of the pin (e.g., INPUT, OUTPUT, INOUT).
* - distance: The distance or offset, representing additional structural information.
*/
struct indexed_identifier
{
indexed_identifier();
indexed_identifier(const std::string& identifier, const u32 index, const std::string& origin, const std::string& pin, const PinDirection& direction, const u32 distance);

std::string identifier; /**< The reconstructed name of the multi-bit words. */
u32 index; /**< The index associated with the identifier, used for multi-bit signals. */
std::string origin; /**< The origin or source of the identifier within the netlist (either "gate_name" or "net_name"). */
std::string pin; /**< The pin name associated with this identifier. */
PinDirection direction; /**< Direction of the pin. */
u32 distance; /**< Distance to merged net which name this index was derived from. */

// Overload < operator for strict weak ordering
bool operator<(const indexed_identifier& other) const;
};

// Serialization function for indexed_identifier as a list of values
void to_json(nlohmann::json& j, const indexed_identifier& id);

// Deserialization function for indexed_identifier from a list of values
void from_json(const nlohmann::json& j, indexed_identifier& id);

/**
* Tries to reconstruct a name and index for each flip flop that was part of a multi-bit wire in the verilog code.
* This is NOT a general netlist reverse engineering algorithm and ONLY works on synthesized netlists with names annotated by the synthesizer.
Expand Down
Loading

0 comments on commit ba8ea22

Please sign in to comment.