Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix some bugs in the entire loop #274

Merged
merged 2 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rdagent/core/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def clear(self) -> None:
"""
Clear the workspace
"""
shutil.rmtree(self.workspace_path)
shutil.rmtree(self.workspace_path, ignore_errors=True)
self.code_dict = {}

def execute(self) -> object | None:
Expand Down
12 changes: 10 additions & 2 deletions rdagent/scenarios/kaggle/developer/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from rdagent.components.coder.factor_coder.factor import FactorTask
from rdagent.components.runner import CachedRunner
from rdagent.components.runner.conf import RUNNER_SETTINGS
from rdagent.core.exception import ModelEmptyError
from rdagent.core.exception import FactorEmptyError, ModelEmptyError
from rdagent.core.experiment import ASpecificExp
from rdagent.oai.llm_utils import md5_hash
from rdagent.scenarios.kaggle.experiment.kaggle_experiment import (
Expand Down Expand Up @@ -41,12 +41,20 @@ class KGModelRunner(KGCachedRunner[KGModelExperiment]):
def develop(self, exp: KGModelExperiment) -> KGModelExperiment:
self.build_from_SOTA(exp)
if exp.sub_workspace_list[0].target_task.model_type == "XGBoost":
if exp.sub_workspace_list[0].code_dict == {}:
raise ModelEmptyError("No model is implemented")
exp.experiment_workspace.inject_code(**{"model_xgb.py": exp.sub_workspace_list[0].code_dict["model.py"]})
elif exp.sub_workspace_list[0].target_task.model_type == "RandomForest":
if exp.sub_workspace_list[0].code_dict == {}:
raise ModelEmptyError("No model is implemented")
exp.experiment_workspace.inject_code(**{"model_rf.py": exp.sub_workspace_list[0].code_dict["model.py"]})
elif exp.sub_workspace_list[0].target_task.model_type == "LightGBM":
if exp.sub_workspace_list[0].code_dict == {}:
raise ModelEmptyError("No model is implemented")
exp.experiment_workspace.inject_code(**{"model_lgb.py": exp.sub_workspace_list[0].code_dict["model.py"]})
elif exp.sub_workspace_list[0].target_task.model_type == "NN":
if exp.sub_workspace_list[0].code_dict == {}:
raise ModelEmptyError("No model is implemented")
exp.experiment_workspace.inject_code(**{"model_nn.py": exp.sub_workspace_list[0].code_dict["model.py"]})
if RUNNER_SETTINGS.cache_result:
cache_hit, result = self.get_cache_result(exp)
Expand Down Expand Up @@ -113,7 +121,7 @@ def develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
exp.experiment_workspace.data_description.append((sub_ws.target_task.get_task_information(), feature_shape))
current_feature_file_count += 1
if implemented_factor_count == 0:
raise ModelEmptyError("No factor is implemented")
raise FactorEmptyError("No factor is implemented")

if RUNNER_SETTINGS.cache_result:
cache_hit, result = self.get_cache_result(exp)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


def prepreprocess():
def prepreprocess(debug_mode=False):
"""
This method loads the data, drops the unnecessary columns, and splits it into train and validation sets.
"""
# Load and preprocess the data
data_df = pd.read_csv("/kaggle/input/train.csv")
data_df = data_df.head(1200)
if debug_mode:
data_df = data_df.sample(frac=0.1, random_state=42)
data_df = data_df
data_df = data_df.drop(["id"], axis=1)

X = data_df.drop(["class"], axis=1)
Expand Down Expand Up @@ -79,11 +81,11 @@ def preprocess_transform(X: pd.DataFrame, preprocessor):
return X_transformed


def preprocess_script():
def preprocess_script(debug_mode=False):
"""
This method applies the preprocessing steps to the training, validation, and test datasets.
"""
X_train, X_valid, y_train, y_valid = prepreprocess()
X_train, X_valid, y_train, y_valid = prepreprocess(debug_mode=debug_mode)

# Fit the preprocessor on the training data
preprocessor = preprocess_fit(X_train)
Expand All @@ -94,7 +96,8 @@ def preprocess_script():

# Load and preprocess the test data
submission_df = pd.read_csv("/kaggle/input/test.csv")
submission_df = submission_df.head(500)
if debug_mode:
data_df = data_df.sample(frac=0.1, random_state=42)
passenger_ids = submission_df["id"]
submission_df = submission_df.drop(["id"], axis=1)
X_test = preprocess_transform(submission_df, preprocessor)
Expand Down
2 changes: 1 addition & 1 deletion rdagent/scenarios/kaggle/experiment/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from fea_share_preprocess import preprocess_script

X_train, X_valid, y_train, y_valid, X_test, passenger_ids = preprocess_script()
X_train, X_valid, y_train, y_valid, X_test, passenger_ids = preprocess_script(debug_mode=True)

pickle.dump(X_train, open("X_train.pkl", "wb"))
pickle.dump(X_valid, open("X_valid.pkl", "wb"))
Expand Down
2 changes: 1 addition & 1 deletion rdagent/scenarios/kaggle/proposal/proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:

context_dict = {
"hypothesis_and_feedback": hypothesis_feedback,
"RAG": None,
"RAG": rag_content,
"hypothesis_output_format": prompt_dict["hypothesis_output_format"],
"hypothesis_specification": None,
}
Expand Down