Skip to content

Commit

Permalink
Update some experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
mwydmuch committed Oct 25, 2023
1 parent 03676cb commit 74452a6
Show file tree
Hide file tree
Showing 11 changed files with 2,053 additions and 1,764 deletions.
1 change: 1 addition & 0 deletions experiments/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"PSP": {"func": psprecision_at_k, "inv_ps": True},
# "PSnDCG": {"func": psndcg_at_k, "inv_ps": True},
"MacP": {"func": macro_precision_at_k, "inv_ps": False},
"MacF1": {"func": macro_f1_measure_at_k, "inv_ps": False},
"C": {"func": coverage_at_k, "inv_ps": False},
"MacR": {"func": macro_recall_at_k, "inv_ps": False},
}
Expand Down
8 changes: 8 additions & 0 deletions experiments/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ if [[ ! -e $INV_PRIORS_FILE ]]; then
python3 ${SCRIPT_DIR}/calculate_inv_priors.py $TRAIN_FILE $INV_PRIORS_FILE
fi

# Calculate inverse priors test
INV_PRIORS_TEST_FILE="${DATASET_FILE}.inv_priors_test"
if [[ ! -e $INV_PRIORS_TEST_FILE ]]; then
python3 ${SCRIPT_DIR}/calculate_inv_priors.py $TEST_FILE $INV_PRIORS_TEST_FILE
fi

# Calculate other weights
W_POW_FILE="${DATASET_FILE}.w_pow"
if [[ ! -e $W_POW_FILE ]]; then
Expand Down Expand Up @@ -123,6 +129,8 @@ if [[ ! -e $TEST_RESULT_FILE ]] || [[ -e $TEST_LOCK_FILE ]]; then
TEST_ARGS="${TEST_ARGS} --labelsWeights ${INV_PS_FILE}"
elif [[ $TEST_ARGS == *"--labelsWeights invP"* ]]; then
TEST_ARGS="${TEST_ARGS} --labelsWeights ${INV_PRIORS_FILE}"
elif [[ $TEST_ARGS == *"--labelsWeights invPTest"* ]]; then
TEST_ARGS="${TEST_ARGS} --labelsWeights ${INV_PRIORS_TEST_FILE}"
elif [[ $TEST_ARGS == *"--labelsWeights wPow"* ]]; then
TEST_ARGS="${TEST_ARGS} --labelsWeights ${W_POW_FILE}"
elif [[ $TEST_ARGS == *"--labelsWeights wLog"* ]]; then
Expand Down
7 changes: 4 additions & 3 deletions experiments/test_bc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ fi
TEST_ON_TRAIN=0
TEST_USING_NXC_TEST=0

TEST_RESULT_FILE=${RESULTS_DIR}/${TEST_CONFIG}_bc
TEST_RESULT_FILE=${RESULTS_DIR}/${TEST_CONFIG}_bcaPrec
TEST_LOCK_FILE=${RESULTS_DIR}/.test_lock_${TEST_CONFIG}
if [[ ! -e $TEST_RESULT_FILE ]] || [[ -e $TEST_LOCK_FILE ]]; then
mkdir -p $RESULTS_DIR
Expand All @@ -129,13 +129,13 @@ if [[ ! -e $TEST_RESULT_FILE ]] || [[ -e $TEST_LOCK_FILE ]]; then
TEST_ARGS="${TEST_ARGS} --labelsWeights ${W_LOG_FILE}"
fi

PRED_CONFIG=$(echo "${TEST_ARGS}" | tr " /" "__")
PRED_CONFIG=$(echo "${TEST_ARGS}_bcaPrec" | tr " /" "__")
PRED_FILE=${MODEL}/pred_${PRED_CONFIG}
PRED_LOCK_FILE=${MODEL}/.test_lock_${PRED_CONFIG}
PRED_RESULT_FILE=${MODEL}/pred_results_${PRED_CONFIG}
if [[ ! -e $PRED_FILE ]] || [[ -e $PRED_LOCK_FILE ]]; then
touch $PRED_LOCK_FILE
${ROOT_DIR}/nxc bc -i $TEST_FILE -o $MODEL $TEST_ARGS --prediction $PRED_FILE --measures "" | tee -a $PRED_RESULT_FILE
${ROOT_DIR}/nxc bcaPrec -i $TEST_FILE -o $MODEL $TEST_ARGS --prediction $PRED_FILE --measures "" | tee -a $PRED_RESULT_FILE
rm -rf $PRED_LOCK_FILE
fi

Expand All @@ -152,6 +152,7 @@ if [[ ! -e $TEST_RESULT_FILE ]] || [[ -e $TEST_LOCK_FILE ]]; then
fi
done

echo $PRED_FILE | tee -a $TEST_RESULT_FILE
python3 ${SCRIPT_DIR}/evaluate.py $TEST_FILE $PRED_FILE $INV_PS_FILE | tee -a $TEST_RESULT_FILE

echo "Test date: $(date)" | tee -a $TEST_RESULT_FILE
Expand Down
19 changes: 19 additions & 0 deletions run_all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
DATASET=wiki10
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 100"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 3 --covWeights 1"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 5 --covWeights 1"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 5 --labelsWeights invP"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 5 --labelsWeights invPs"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 5 --labelsWeights wLog"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 5 --labelsWeights wPow"

bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 100 --threads 1"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 3 --covWeights 1 --threads 1"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 5 --covWeights 1 --threads 1"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 5 --labelsWeights invP --threads 1"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 5 --labelsWeights invPs --threads 1"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 5 --labelsWeights wLog --threads 1"
bash ./experiments/test.sh ${DATASET} "-m plt --seed 13" "--topK 5 --labelsWeights wPow --threads 1"
bash ./experiments/test_bc.sh ${DATASET} "-m plt --seed 13" "--topK 3"
bash ./experiments/test_bc.sh ${DATASET} "-m plt --seed 13" "--topK 5"

1 change: 1 addition & 0 deletions src/args.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ Args::Args() {
batches = 10;

bcIters = 1;
bcGreedy = true;
}

// Parse args
Expand Down
1 change: 1 addition & 0 deletions src/args.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ class Args : public FileHelper {
int batches;

int bcIters;
bool bcGreedy;

private:
std::default_random_engine rngSeeder;
Expand Down
11 changes: 7 additions & 4 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ void ofo(Args& args) {
}


void bc(Args& args) {
void bcaPrec(Args& args) {
// Load model args
args.loadFromFile(joinPath(args.output, "args.bin"));
args.printArgs();
Expand All @@ -262,7 +262,7 @@ void bc(Args& args) {

auto resAfterData = getResources();

std::vector<std::vector<Prediction>> predictions = model->bc(features, labels, args);
std::vector<std::vector<Prediction>> predictions = model->bcaPrec(features, labels, args);
auto resAfterFo = getResources();

// Output predictions
Expand All @@ -272,6 +272,9 @@ void bc(Args& args) {
out.close();
}

// Print additional model statistics
model->printInfo();

// Print resources
auto realTime = static_cast<Real>(std::chrono::duration_cast<std::chrono::milliseconds>(
resAfterFo.timePoint - resAfterData.timePoint)
Expand Down Expand Up @@ -470,8 +473,8 @@ int main(int argc, char** argv) {
predict(args);
else if (command == "ofo")
ofo(args);
else if (command == "bc")
bc(args);
else if (command == "bcaPrec")
bcaPrec(args);
else if (command == "testPredictionTime")
testPredictionTime(args);
else {
Expand Down
88 changes: 88 additions & 0 deletions src/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,94 @@ std::vector<std::vector<Prediction>> Model::bc(SRMatrix& features, SRMatrix& lab
}


std::vector<std::vector<Prediction>> Model::bcaPrec(SRMatrix& features, SRMatrix& labels, Args& args) {

thresholds.clear();
labelsWeights.clear();

Log(CERR) << "Starting initial prediction ...\n";

// Generate random prediction
n = labels.rows();
int trueK = args.topK;

std::vector<std::vector<Prediction>> predictions(n);
// std::default_random_engine rng;
// std::uniform_int_distribution<int> dist(0, m);
// for(int i = 0; i < n; ++i){
// printProgress(i, n);
// std::unordered_set<int> repCheck;
// for(int j = 0; j < args.topK; ++j){
// int l = dist(rng);
// while(repCheck.count(l)) l = dist(rng);
// predictions[i].push_back({l, this->predictForLabel(l, features[i], args)});
// repCheck.insert(l);
// }
// }

tp.resize(m + 2, 0.0);
fp.resize(m + 2, 0.0);
fn.resize(m + 2, 0.0);

// Generate by predicting labels
if(!args.bcGreedy){
args.sampleTopK = 0;
predictions = this->predictBatch(features, args);
n = predictions.size();
args.sampleTopK = 0;

std::ofstream out0(args.prediction + "_0");
outputPrediction2(predictions, out0);
out0.close();

for(int j = 0; j < n; ++j){
for(auto &p : predictions[j]){
tp[p.label] += p.value;
fp[p.label] += (1 - p.value);
}
}
}

Log(CERR) << "Starting block cordinated algorithm ...\n";
std::random_device rd;
std::mt19937 g(rd());

std::vector<int> o(n, 0);
for(int i = 0; i < n; ++i) o[i] = i;

for(int i = 0; i < args.bcIters; ++i){
Log(CERR) << " Iter " << i + 1 << "/" << args.bcIters << " ...\n";

std::shuffle(o.begin(), o.end(), g);

for(int tj = 0; tj < n; ++tj){
int j = o[tj];
printProgress(tj, n);
if(!args.bcGreedy){
for(auto& p : predictions[j]){
tp[p.label] -= p.value;
fp[p.label] -= (1 - p.value);
}
}

predictions[j].clear();
this->predict(predictions[j], features[j], args);

for(auto& p : predictions[j]){
tp[p.label] += p.value;
fp[p.label] += (1 - p.value);
}
}

std::ofstream out(args.prediction + "_" + std::to_string(i + 1));
outputPrediction2(predictions, out);
out.close();
}

return predictions;
}


Base* Model::trainBase(ProblemData& problemsData, Args& args) {
Base* base = new Base();
base->train(problemsData, args);
Expand Down
5 changes: 5 additions & 0 deletions src/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class Model {
Real microOfo(SRMatrix& features, SRMatrix& labels, Args& args);
std::vector<Real> macroOfo(SRMatrix& features, SRMatrix& labels, Args& args);
std::vector<std::vector<Prediction>> bc(SRMatrix& features, SRMatrix& labels, Args& args);
std::vector<std::vector<Prediction>> bcaPrec(SRMatrix& features, SRMatrix& labels, Args& args);

virtual void load(Args& args, std::string infile) = 0;
virtual void preload(Args& args, std::string infile) { preloaded = true; };
Expand All @@ -76,8 +77,12 @@ class Model {
std::vector<Real> thresholds; // For prediction with thresholds
std::vector<Real> labelsWeights; // For prediction with label weights

std::vector<Real> tp;
std::vector<Real> fp;
std::vector<Real> fn;
std::vector<double> a;
std::vector<double> b;

int n; // Number of samples

// Base utils
Expand Down
39 changes: 28 additions & 11 deletions src/models/plt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,27 +266,44 @@ void PLT::predict(std::vector<Prediction>& prediction, SparseVector& features, A
};
}

if(!a.empty() && !b.empty()){
// if(!a.empty() && !b.empty()){
// calculateValue = [&](TreeNode* node, Real prob) {

// Real score = -9999999;
// int bestL = -1;
// for(auto& l : nodesLabels[node->index]){
// //Real tmpScore = (b[l] * prob - a[l]) / (b[l] * (b[l] + 1.0/n));
// Real tmpScore = (b[l] * prob - a[l]) / (b[l] * b[l]);
// if(tmpScore >= score){
// bestL = l;
// score = tmpScore;
// }
// }

// if(bestL == -1) std::cerr << "Something is wrong bestL == -1!\n";

// // std::cout << node->index << " " << nodesLabels[node->index]
// // << prob << " " << bestL << " " << b[bestL] << " " << a[bestL] << " " << score << "\n";
// // int x;
// // std::cin >> x;

// return score;
// };
// }

if(!tp.empty() && !fp.empty()){
//Log(CERR) << "Using TP/FP scores ...\n";
calculateValue = [&](TreeNode* node, Real prob) {

Real score = -9999999;
int bestL = -1;
for(auto& l : nodesLabels[node->index]){
//Real tmpScore = (b[l] * prob - a[l]) / (b[l] * (b[l] + 1.0/n));
Real tmpScore = (b[l] * prob - a[l]) / (b[l] * b[l]);
Real tmpScore = (tp[l] + prob) / (fp[l] + 1) - tp[l] / (fp[l] + 0.000001);
if(tmpScore >= score){
bestL = l;
score = tmpScore;
}
}

if(bestL == -1) std::cerr << "Something is wrong bestL == -1!\n";

// std::cout << node->index << " " << nodesLabels[node->index]
// << prob << " " << bestL << " " << b[bestL] << " " << a[bestL] << " " << score << "\n";
// int x;
// std::cin >> x;

return score;
};
}
Expand Down
Loading

0 comments on commit 74452a6

Please sign in to comment.