Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Program-support #664

Merged
merged 14 commits into from
Nov 26, 2024
96 changes: 96 additions & 0 deletions icicle/backend/cpu/include/cpu_program_executor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#pragma once

#include <vector>
#include <functional>
#include "icicle/program/symbol.h"
#include "icicle/program/program.h"

namespace icicle {

template <typename S>
class CpuProgramExecutor
{
public:
CpuProgramExecutor(Program<S>& program)
mickeyasa marked this conversation as resolved.
Show resolved Hide resolved
: m_program(program), m_variable_ptrs(program.get_nof_vars()), m_intermidites(program.m_nof_intermidiates)
{
// initialize m_variable_ptrs vector
int variable_ptrs_idx = program.m_nof_inputs + program.m_nof_outputs;
for (int idx = 0; idx < program.m_nof_constants; ++idx) {
m_variable_ptrs[variable_ptrs_idx++] = &(program.m_constants[idx]);
}
for (int idx = 0; idx < program.m_nof_intermidiates; ++idx) {
m_variable_ptrs[variable_ptrs_idx++] = &(m_intermidites[idx]);
}
}

// execute the program an return a program to the result
void execute()
{
const std::byte* instruction;
for (InstructionType instruction : m_program.m_instructions) {
const int func_select = (instruction & 0xFF);
mickeyasa marked this conversation as resolved.
Show resolved Hide resolved
mickeyasa marked this conversation as resolved.
Show resolved Hide resolved
(this->*m_function_arr[instruction & 0xFF])(instruction);
}
}

std::vector<S*> m_variable_ptrs;

private:
Program<S> m_program;
std::vector<S> m_intermidites;
mickeyasa marked this conversation as resolved.
Show resolved Hide resolved

// exe functions
void exe_add(const InstructionType instruction)
{
const std::byte* inst_arr = reinterpret_cast<const std::byte*>(&instruction);
*m_variable_ptrs[(int)inst_arr[3]] = *m_variable_ptrs[(int)inst_arr[1]] + *m_variable_ptrs[(int)inst_arr[2]];
}

void exe_mult(const InstructionType instruction)
{
const std::byte* inst_arr = reinterpret_cast<const std::byte*>(&instruction);
*m_variable_ptrs[(int)inst_arr[3]] = *m_variable_ptrs[(int)inst_arr[1]] * *m_variable_ptrs[(int)inst_arr[2]];
}

void exe_sub(const InstructionType instruction)
{
const std::byte* inst_arr = reinterpret_cast<const std::byte*>(&instruction);
*m_variable_ptrs[(int)inst_arr[3]] = *m_variable_ptrs[(int)inst_arr[1]] - *m_variable_ptrs[(int)inst_arr[2]];
}

void exe_inverse(const InstructionType instruction)
{
const std::byte* inst_arr = reinterpret_cast<const std::byte*>(&instruction);
*m_variable_ptrs[(int)inst_arr[3]] = S::inverse(*m_variable_ptrs[(int)inst_arr[1]]);
}

void exe_predef_ab_minus_c(const InstructionType instruction)
{
const std::byte* inst_arr = reinterpret_cast<const std::byte*>(&instruction);
const S& a = *m_variable_ptrs[0];
const S& b = *m_variable_ptrs[1];
const S& c = *m_variable_ptrs[2];
*m_variable_ptrs[3] = a * b - c;
}
void exe_predef_eq_x_ab_minus_c(const InstructionType instruction)
{
const std::byte* inst_arr = reinterpret_cast<const std::byte*>(&instruction);
const S& a = *m_variable_ptrs[0];
const S& b = *m_variable_ptrs[1];
const S& c = *m_variable_ptrs[2];
const S& eq = *m_variable_ptrs[3];
*m_variable_ptrs[4] = eq * (a * b - c);
}

using FunctionPtr = void (CpuProgramExecutor::*)(const InstructionType);
inline static const FunctionPtr m_function_arr[] = {
&CpuProgramExecutor::exe_add, // OP_ADD
&CpuProgramExecutor::exe_mult, // OP_MULT
&CpuProgramExecutor::exe_sub, // OP_SUB
&CpuProgramExecutor::exe_inverse, // OP_INV
// pre defined functions
&CpuProgramExecutor::exe_predef_ab_minus_c, &CpuProgramExecutor::exe_predef_eq_x_ab_minus_c};
};

} // namespace icicle
2 changes: 1 addition & 1 deletion icicle/backend/cpu/src/field/cpu_vec_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ class VectorOpTask : public TaskBase
public:
T m_intermidiate_res; // pointer to the output. Can be a vector or scalar pointer
uint64_t m_idx_in_batch; // index in the batch. Used in intermediate res tasks
}; // class VectorOpTask
}; // class VectorOpTask
mickeyasa marked this conversation as resolved.
Show resolved Hide resolved

#define NOF_OPERATIONS_PER_TASK 512
#define CONFIG_NOF_THREADS_KEY "n_threads"
Expand Down
2 changes: 1 addition & 1 deletion icicle/include/icicle/fields/quartic_extension.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ class QuarticExtensionField
FF::reduce(
(CONFIG::nonresidue_is_negative
? (FF::mul_wide(xs.real, x0) + FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, x2)))
: (FF::mul_wide(xs.real, x0)) - FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, x2)))),
: (FF::mul_wide(xs.real, x0))-FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, x2)))),
mickeyasa marked this conversation as resolved.
Show resolved Hide resolved
FF::reduce(
(CONFIG::nonresidue_is_negative
? FWide::neg(FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im3, x2)))
Expand Down
12 changes: 4 additions & 8 deletions icicle/include/icicle/fields/storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ struct
#ifdef __CUDA_ARCH__
__align__(LIMBS_ALIGNMENT(1))
#endif
storage<1>
{
storage<1> {
static constexpr unsigned LC = 1;
uint32_t limbs[1];
};
Expand All @@ -28,8 +27,7 @@ struct
#ifdef __CUDA_ARCH__
__align__(LIMBS_ALIGNMENT(1))
#endif
storage<3>
{
storage<3> {
static constexpr unsigned LC = 3;
uint32_t limbs[3];
};
Expand All @@ -40,8 +38,7 @@ struct
#ifdef __CUDA_ARCH__
__align__(LIMBS_ALIGNMENT(LIMBS_COUNT))
#endif
storage
{
storage {
static_assert(LIMBS_COUNT % 2 == 0, "odd number of limbs is not supported\n");
static constexpr unsigned LC = LIMBS_COUNT;
union { // works only with even LIMBS_COUNT
Expand All @@ -55,7 +52,6 @@ struct
#ifdef __CUDA_ARCH__
__align__(LIMBS_ALIGNMENT(LIMBS_COUNT))
#endif
storage_array
{
storage_array {
storage<LIMBS_COUNT> storages[OMEGAS_COUNT];
};
140 changes: 140 additions & 0 deletions icicle/include/icicle/program/program.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#pragma once

#include <vector>
#include <functional>
#include "icicle/program/symbol.h"

namespace icicle {

using InstructionType = uint32_t;

enum PreDefinedPrograms { AB_MINUS_C, EQ_X_AB_MINUS_C };
mickeyasa marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief A class that convert the function described by user into a program that can be executed
*
* This class recieves a Symbol instance that contains a DFG representing the required calculation.

Check failure on line 16 in icicle/include/icicle/program/program.h

View workflow job for this annotation

GitHub Actions / Check Spelling

recieves ==> receives
* It generates a vector of instructions that represent the calculation.
* Each instruction has the following format.
* bits 7:0 - opcode according to enum OpCode
* bits 15:8 - operand 1 selector from the input vector
* bits 23:16 - operand 2 selector from the input vector
* bits 31:24 - result selector
*/
template <typename S>
class Program
{
public:
// Generate a program based on a lambda function
Program(std::function<Symbol<S>(std::vector<Symbol<S>>&)> program_func, const int nof_inputs)
{
std::vector<Symbol<S>> program_inputs(nof_inputs);
set_as_inputs(program_inputs);
Symbol<S> result = program_func(program_inputs);
generate_program(result);
}

// Generate a program based on a PreDefinedPrograms
Program(PreDefinedPrograms pre_def)
{
switch (pre_def) {
mickeyasa marked this conversation as resolved.
Show resolved Hide resolved
case AB_MINUS_C:
m_nof_inputs = 3;
break;
case EQ_X_AB_MINUS_C:
m_nof_inputs = 4;
break;
default:
ICICLE_LOG_ERROR << "Illegal opcode: " << int(pre_def);
}
m_nof_outputs = 1;
int instruction = int(OpCode::NOF_OPERATIONS) + int(pre_def);
m_instructions.push_back(instruction);
}

// run over all symbols at the vector and set there gen process to OP_INPUT
mickeyasa marked this conversation as resolved.
Show resolved Hide resolved
void set_as_inputs(std::vector<Symbol<S>>& combine_inputs)
{
m_nof_inputs = combine_inputs.size();
for (int input_idx = 0; input_idx < m_nof_inputs; input_idx++) {
combine_inputs[input_idx].set_as_input(input_idx);
}
}

// run over the DFG held by result and gemerate the program
void generate_program(Symbol<S>& result)
mickeyasa marked this conversation as resolved.
Show resolved Hide resolved
{
m_nof_outputs = 1;
result.m_operation->m_mem_addr = m_nof_inputs;
Operation<S>::reset_visit();
allocate_constants(result.m_operation);
Operation<S>::reset_visit();
generate_program(result.m_operation);
}

// Program
std::vector<InstructionType> m_instructions;
std::vector<S> m_constants;
int m_nof_inputs = 0;
int m_nof_outputs = 0;
int m_nof_constants = 0;
int m_nof_intermidiates = 0;

const int get_nof_vars() const { return m_nof_inputs + m_nof_outputs + m_nof_constants + m_nof_intermidiates; }

private:
void generate_program(std::shared_ptr<Operation<S>> operation)
{
if (
operation == nullptr || operation->was_visited(true) || operation->m_opcode == OP_INPUT ||
operation->m_opcode == OP_CONST)
return;
generate_program(operation->m_operand1);
generate_program(operation->m_operand2);

// Build an instruction
std::byte int_arr[4] = {};
int_arr[0] = std::byte(operation->m_opcode);
int_arr[1] = std::byte(operation->m_operand1->m_mem_addr);
if (operation->m_operand2) { int_arr[2] = std::byte(operation->m_operand2->m_mem_addr); }
if (operation->m_mem_addr < 0) { operation->m_mem_addr = allocate_intermidiate(); }
int_arr[3] = std::byte(operation->m_mem_addr);
InstructionType instruction;
std::memcpy(&instruction, int_arr, 4);
m_instructions.push_back(instruction);
}

void allocate_constants(std::shared_ptr<Operation<S>> operation)
{
if (operation == nullptr || operation->was_visited(true)) return;
allocate_constants(operation->m_operand1);
allocate_constants(operation->m_operand2);
if (operation->m_opcode == OP_CONST) {
m_constants.push_back(*(operation->m_constant));
operation->m_mem_addr = allocate_constant();
}
}

int allocate_constant() { return (m_nof_inputs + m_nof_outputs + m_nof_constants++); }
int allocate_intermidiate() { return (m_nof_inputs + m_nof_outputs + m_nof_constants + m_nof_intermidiates++); }

public:
void print_program()
{
std::cout << "nof_inputs: " << m_nof_inputs << std::endl;
std::cout << "nof_outputs: " << m_nof_outputs << std::endl;
std::cout << "nof_constants: " << m_nof_constants << std::endl;
std::cout << "nof_intermidiates: " << m_nof_intermidiates << std::endl;
std::cout << "Constants:: " << std::endl;
for (auto constant : m_constants) {
std::cout << " " << constant << std::endl;
}
std::cout << "Instructions:: " << std::endl;
for (auto inst : m_instructions) {
std::cout << " Opcode: " << (inst & 0xFF) << ", op1: " << ((inst >> 8) & 0xFF)
<< ", op2: " << ((inst >> 16) & 0xFF) << ", Res: " << ((inst >> 24) & 0xFF) << std::endl;
}
}
};

} // namespace icicle
Loading
Loading