Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compile with -Werror and -Wall #1116

Merged
merged 9 commits into from
Oct 13, 2017
2 changes: 1 addition & 1 deletion src/common/cmake/Common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
include(ExternalProject)
include(CMakeParseArguments)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g -Werror -Wall -Wno-error=unused-function")

set(FLATBUFFERS_VERSION "1.7.1")

Expand Down
3 changes: 0 additions & 3 deletions src/common/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
#include "io.h"
#include <functional>

/* This is used to define the array of object IDs. */
const UT_icd object_id_icd = {sizeof(ObjectID), NULL, NULL, NULL};

const UniqueID NIL_ID = UniqueID::nil();

const unsigned char NIL_DIGEST[DIGEST_SIZE] = {0};
Expand Down
2 changes: 1 addition & 1 deletion src/common/common_protocol.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ to_flatbuf(flatbuffers::FlatBufferBuilder &fbb,
ObjectID object_ids[],
int64_t num_objects) {
std::vector<flatbuffers::Offset<flatbuffers::String>> results;
for (size_t i = 0; i < num_objects; i++) {
for (int64_t i = 0; i < num_objects; i++) {
results.push_back(to_flatbuf(fbb, object_ids[i]));
}
return fbb.CreateVector(results);
Expand Down
2 changes: 1 addition & 1 deletion src/common/io.cc
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ int64_t read_vector(int fd, int64_t *type, std::vector<uint8_t> &buffer) {
if (closed) {
goto disconnected;
}
if (length > buffer.size()) {
if (static_cast<size_t>(length) > buffer.size()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just have length be uint64_t (I'm assuming size_t is not an option)?
It's better to let the data type match the semantics of the variable than to check for it. Otherwise if you pass around signed variables for unsigned quantities then you'll keep having to worry about whether a negative value is possible and if it might have a special meaning.

buffer.resize(length);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would "work" but it's very confusing and wouldn't make sense if length is negative. There should at least be a cast here. But see my comment above.

}
closed = read_bytes(fd, buffer.data(), length);
Expand Down
2 changes: 1 addition & 1 deletion src/common/lib/python/common_extension.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "common.h"

typedef uint8_t TaskSpec;
struct TaskBuilder;
class TaskBuilder;

extern PyObject *CommonError;

Expand Down
8 changes: 8 additions & 0 deletions src/common/state/error_table.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
#include "error_table.h"
#include "redis.h"

const char *error_types[] = {"object_hash_mismatch", "put_reconstruction",
"worker_died"};
const char *error_messages[] = {
"A nondeterministic task was reexecuted.",
"An object created by ray.put was evicted and could not be reconstructed. "
"The driver may need to be restarted.",
"A worker died or was killed while executing a task."};

void push_error(DBHandle *db_handle,
DBClientID driver_id,
int error_index,
Expand Down
9 changes: 2 additions & 7 deletions src/common/state/error_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,8 @@ typedef enum {
} error_index;

/** Information about the error to be displayed to the user. */
static const char *error_types[] = {"object_hash_mismatch",
"put_reconstruction", "worker_died"};
static const char *error_messages[] = {
"A nondeterministic task was reexecuted.",
"An object created by ray.put was evicted and could not be reconstructed. "
"The driver may need to be restarted.",
"A worker died or was killed while executing a task."};
extern const char *error_types[];
extern const char *error_messages[];

/**
* Push an error to the given Python driver.
Expand Down
20 changes: 9 additions & 11 deletions src/common/state/redis.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ void get_redis_shards(redisContext *context,
/* Try to read the Redis shard locations from the primary shard. If we find
* that all of them are present, exit. */
reply = (redisReply *) redisCommand(context, "LRANGE RedisShards 0 -1");
if (reply->elements == num_redis_shards) {
if (static_cast<int>(reply->elements) == num_redis_shards) {
break;
}

Expand All @@ -146,7 +146,7 @@ void get_redis_shards(redisContext *context,
/* Parse the Redis shard addresses. */
char db_shard_address[16];
int db_shard_port;
for (int i = 0; i < reply->elements; ++i) {
for (size_t i = 0; i < reply->elements; ++i) {
/* Parse the shard addresses and ports. */
CHECK(reply->element[i]->type == REDIS_REPLY_STRING);
CHECK(parse_ip_addr_port(reply->element[i]->str, db_shard_address,
Expand Down Expand Up @@ -297,7 +297,7 @@ DBHandle *db_connect(const std::string &db_primary_address,
get_redis_shards(db->sync_context, db_shards_addresses, db_shards_ports);
CHECKM(db_shards_addresses.size() > 0, "No Redis shards found");
/* Connect to the shards. */
for (int i = 0; i < db_shards_addresses.size(); ++i) {
for (size_t i = 0; i < db_shards_addresses.size(); ++i) {
db_connect_shard(db_shards_addresses[i], db_shards_ports[i], client,
client_type, node_ip_address, num_args, args, db, &context,
&subscribe_context, &sync_context);
Expand All @@ -317,7 +317,7 @@ void DBHandle_free(DBHandle *db) {

/* Clean up the Redis shards. */
CHECK(db->contexts.size() == db->subscribe_contexts.size());
for (int i = 0; i < db->contexts.size(); ++i) {
for (size_t i = 0; i < db->contexts.size(); ++i) {
redisAsyncFree(db->contexts[i]);
redisAsyncFree(db->subscribe_contexts[i]);
}
Expand Down Expand Up @@ -360,7 +360,7 @@ void db_attach(DBHandle *db, event_loop *loop, bool reattach) {
}
/* Attach other redis shards to the event loop. */
CHECK(db->contexts.size() == db->subscribe_contexts.size());
for (int i = 0; i < db->contexts.size(); ++i) {
for (size_t i = 0; i < db->contexts.size(); ++i) {
int err = redisAeAttach(loop, db->contexts[i]);
/* If the database is reattached in the tests, redis normally gives
* an error which we can safely ignore. */
Expand Down Expand Up @@ -678,7 +678,7 @@ void redis_object_table_lookup_callback(redisAsyncContext *c,
/* Extract the manager IDs from the response into a vector. */
std::vector<DBClientID> manager_ids;

for (int j = 0; j < reply->elements; ++j) {
for (size_t j = 0; j < reply->elements; ++j) {
CHECK(reply->element[j]->type == REDIS_REPLY_STRING);
DBClientID manager_id;
memcpy(manager_id.id, reply->element[j]->str, sizeof(manager_id.id));
Expand Down Expand Up @@ -777,7 +777,7 @@ void redis_object_table_subscribe_to_notifications(
* src/common/redismodule/ray_redis_module.cc. */
const char *object_channel_prefix = "OC:";
const char *object_channel_bcast = "BCAST";
for (int i = 0; i < db->subscribe_contexts.size(); ++i) {
for (size_t i = 0; i < db->subscribe_contexts.size(); ++i) {
int status = REDIS_OK;
/* Subscribe to notifications from the object table. This uses the client ID
* as the channel name so this channel is specific to this client.
Expand Down Expand Up @@ -1075,8 +1075,6 @@ void redis_task_table_subscribe_callback(redisAsyncContext *c,
strcmp(message_type->str, "pmessage") == 0) {
/* Handle a task table event. Parse the payload and call the callback. */
auto message = flatbuffers::GetRoot<TaskReply>(payload->str);
/* Extract the task ID. */
TaskID task_id = from_flatbuf(message->task_id());
/* Extract the scheduling state. */
int64_t state = message->state();
/* Extract the local scheduler ID. */
Expand Down Expand Up @@ -1188,7 +1186,7 @@ void redis_db_client_table_scan(DBHandle *db,
}
/* Get all the database client information. */
CHECK(reply->type == REDIS_REPLY_ARRAY);
for (int i = 0; i < reply->elements; ++i) {
for (size_t i = 0; i < reply->elements; ++i) {
redisReply *client_reply = (redisReply *) redisCommand(
db->sync_context, "HGETALL %b", reply->element[i]->str,
reply->element[i]->len);
Expand All @@ -1198,7 +1196,7 @@ void redis_db_client_table_scan(DBHandle *db,
memset(&db_client, 0, sizeof(db_client));
int num_fields = 0;
/* Parse the fields into a DBClient. */
for (int j = 0; j < client_reply->elements; j = j + 2) {
for (size_t j = 0; j < client_reply->elements; j = j + 2) {
const char *key = client_reply->element[j]->str;
const char *value = client_reply->element[j + 1]->str;
if (strcmp(key, "ray_client_id") == 0) {
Expand Down
4 changes: 2 additions & 2 deletions src/common/task.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ class TaskBuilder {
}

void SetRequiredResource(int64_t resource_index, double value) {
if (resource_index >= resource_vector_.size()) {
if (static_cast<size_t>(resource_index) >= resource_vector_.size()) {
/* Make sure the resource vector is constructed entry by entry,
* in order. */
CHECK(resource_index == resource_vector_.size());
CHECK(static_cast<size_t>(resource_index) == resource_vector_.size());
resource_vector_.resize(resource_index + 1);
Copy link
Contributor

@mehrdadn mehrdadn Oct 12, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is especially confusing... are you adding 1 to a potentially negative value? Is that what you intend?
(Note that the check is indeed above, but the code should make sense without the CHECKs.)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you saying that in general casting from signed to unsigned is bad? E.g., would it be better to do the following?

CHECK(resource_index == static_cast<int64_t>(resource_vector_.size()));

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I'm saying (like in the previous comment) the variable should be unsigned. Right now you're adding 1 to a signed value and then resizing based on that, which doesn't make sense in the negative case.

Copy link
Contributor

@pcmoritz pcmoritz Oct 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So in our case we have decided to make all index variables signed and we try to keep it that way; this is also done by others like Google and Arrow so we are in good company. The reason is that it simplifies arithmetic and guards against the variable wrapping around zero (it is easier to detect a negative index than a large positive one).

Copy link
Contributor

@mehrdadn mehrdadn Oct 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Signed values don't actually simplify anything. (?) They only make it complicated to range-check, since now you have to check both < 0 and >= size() rather than just the latter. I'm also not sure what you mean about "guarding" against variables wrapping around zero—adding (ptrdiff_t)(-1) and adding ~(size_t)0 to a pointer p results in exactly the same address p - 1 regardless of which one you choose. In fact, if you have checked pointers enabled (_GLIBCXX_DEBUG may do this? not sure), the checking may actually catch large unsigned values since those would always be illegal even if they were to coincidentally land inside the array, but it cannot possibly catch bitwise-identical negative values if they happen to point inside the array.

Moreover, what there is a difference in is that overflowing signed values is in fact undefined rather than well-defined behavior. So that means your code could do all sorts of crazy things if you in fact do happen to accidentally overflow, rather than merely writing to a lower spot than the one you intended to.

More generally, I would not hold Google as the example of ideal C++ coding. They have guidelines that are pretty universally understood to be often bad advice (no need to take my word for this; just Google around...), and tailored to their own particular use cases. I don't know why Arrow might do this but I'm sure you won't have trouble finding people who use unsigned values either... =P

That said, you obviously don't have to change this if you'd rather not. Just letting you know what the issues are.

}
resource_vector_[resource_index] = value;
Expand Down
2 changes: 1 addition & 1 deletion src/common/task.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

typedef uint8_t TaskSpec;

struct TaskBuilder;
class TaskBuilder;

#define NIL_TASK_ID NIL_ID
#define NIL_ACTOR_ID NIL_ID
Expand Down
12 changes: 5 additions & 7 deletions src/common/test/task_table_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ TEST subscribe_timeout_test(void) {
(void *) subscribe_timeout_context);
/* Disconnect the database to see if the subscribe times out. */
close(db->subscribe_context->c.fd);
for (int i = 0; i < db->subscribe_contexts.size(); ++i) {
for (size_t i = 0; i < db->subscribe_contexts.size(); ++i) {
close(db->subscribe_contexts[i]->c.fd);
}
aeProcessEvents(g_loop, AE_TIME_EVENTS);
Expand All @@ -176,7 +176,6 @@ TEST subscribe_timeout_test(void) {
/* === Test publish timeout === */

const char *publish_timeout_context = "publish_timeout";
const int publish_test_number = 272;
int publish_failed = 0;

void publish_done_callback(TaskID task_id, void *user_context) {
Expand Down Expand Up @@ -205,7 +204,7 @@ TEST publish_timeout_test(void) {
(void *) publish_timeout_context);
/* Disconnect the database to see if the publish times out. */
close(db->context->c.fd);
for (int i = 0; i < db->contexts.size(); ++i) {
for (size_t i = 0; i < db->contexts.size(); ++i) {
close(db->contexts[i]->c.fd);
}
aeProcessEvents(g_loop, AE_TIME_EVENTS);
Expand All @@ -227,7 +226,7 @@ int64_t reconnect_db_callback(event_loop *loop,
redisAsyncFree(db->subscribe_context);
db->subscribe_context = redisAsyncConnect("127.0.0.1", 6379);
db->subscribe_context->data = (void *) db;
for (int i = 0; i < db->subscribe_contexts.size(); ++i) {
for (size_t i = 0; i < db->subscribe_contexts.size(); ++i) {
redisAsyncFree(db->subscribe_contexts[i]);
db->subscribe_contexts[i] = redisAsyncConnect("127.0.0.1", 6380 + i);
db->subscribe_contexts[i]->data = (void *) db;
Expand All @@ -247,7 +246,6 @@ int64_t terminate_event_loop_callback(event_loop *loop,
/* === Test subscribe retry === */

const char *subscribe_retry_context = "subscribe_retry";
const int subscribe_retry_test_number = 273;
int subscribe_retry_succeeded = 0;

void subscribe_retry_done_callback(ObjectID object_id, void *user_context) {
Expand Down Expand Up @@ -277,7 +275,7 @@ TEST subscribe_retry_test(void) {
(void *) subscribe_retry_context);
/* Disconnect the database to see if the subscribe times out. */
close(db->subscribe_context->c.fd);
for (int i = 0; i < db->subscribe_contexts.size(); ++i) {
for (size_t i = 0; i < db->subscribe_contexts.size(); ++i) {
close(db->subscribe_contexts[i]->c.fd);
}
/* Install handler for reconnecting the database. */
Expand Down Expand Up @@ -329,7 +327,7 @@ TEST publish_retry_test(void) {
(void *) publish_retry_context);
/* Disconnect the database to see if the publish times out. */
close(db->subscribe_context->c.fd);
for (int i = 0; i < db->subscribe_contexts.size(); ++i) {
for (size_t i = 0; i < db->subscribe_contexts.size(); ++i) {
close(db->subscribe_contexts[i]->c.fd);
}
/* Install handler for reconnecting the database. */
Expand Down
4 changes: 2 additions & 2 deletions src/common/test/test_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,15 @@ static inline void flushall_redis(void) {
/* Readd the shard locations. */
freeReplyObject(redisCommand(context, "SET NumRedisShards %d",
db_shards_addresses.size()));
for (int i = 0; i < db_shards_addresses.size(); ++i) {
for (size_t i = 0; i < db_shards_addresses.size(); ++i) {
freeReplyObject(redisCommand(context, "RPUSH RedisShards %s:%d",
db_shards_addresses[i].c_str(),
db_shards_ports[i]));
}
redisFree(context);

/* Flush the remaining shards. */
for (int i = 0; i < db_shards_addresses.size(); ++i) {
for (size_t i = 0; i < db_shards_addresses.size(); ++i) {
context = redisConnect(db_shards_addresses[i].c_str(), db_shards_ports[i]);
freeReplyObject(redisCommand(context, "FLUSHALL"));
redisFree(context);
Expand Down
2 changes: 2 additions & 0 deletions src/global_scheduler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,7 @@ project(global_scheduler)

include(${CMAKE_CURRENT_LIST_DIR}/../common/cmake/Common.cmake)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall")

add_executable(global_scheduler global_scheduler.cc global_scheduler_algorithm.cc)
target_link_libraries(global_scheduler common ${HIREDIS_LIB})
6 changes: 3 additions & 3 deletions src/global_scheduler/global_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ void object_table_subscribe_callback(
ObjectID_to_string(object_id, id_string, ID_STRING_SIZE));
ARROW_UNUSED(id_string);
LOG_DEBUG("\tManagers<%d>:", manager_vector.size());
for (int i = 0; i < manager_vector.size(); i++) {
for (size_t i = 0; i < manager_vector.size(); i++) {
LOG_DEBUG("\t\t%s", manager_vector[i]);
}

Expand All @@ -304,7 +304,7 @@ void object_table_subscribe_callback(
LOG_DEBUG("New object added to object_info_table with id = %s",
ObjectID_to_string(object_id, id_string, ID_STRING_SIZE));
LOG_DEBUG("\tmanager locations:");
for (int i = 0; i < manager_vector.size(); i++) {
for (size_t i = 0; i < manager_vector.size(); i++) {
LOG_DEBUG("\t\t%s", manager_vector[i]);
}
}
Expand All @@ -314,7 +314,7 @@ void object_table_subscribe_callback(

/* In all cases, replace the object location vector on each callback. */
obj_info_entry.object_locations.clear();
for (int i = 0; i < manager_vector.size(); i++) {
for (size_t i = 0; i < manager_vector.size(); i++) {
obj_info_entry.object_locations.push_back(std::string(manager_vector[i]));
}
}
Expand Down
6 changes: 1 addition & 5 deletions src/global_scheduler/global_scheduler_algorithm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,7 @@ double calculate_cost_pending(const GlobalSchedulerState *state,
TaskSpec *task_spec) {
/* Calculate how much data is already present on this machine. TODO(rkn): Note
* that this information is not being used yet. Fix this. */
int64_t data_size =
locally_available_data_size(state, scheduler->id, task_spec);
locally_available_data_size(state, scheduler->id, task_spec);
/* TODO(rkn): This logic does not load balance properly when the different
* machines have different sizes. Fix this. */
return scheduler->num_recent_tasks_sent + scheduler->info.task_queue_length;
Expand All @@ -157,11 +156,8 @@ bool handle_task_waiting(GlobalSchedulerState *state,
"task wait handler encounted a task with NULL spec");

bool task_feasible = false;
/* The total size of the task's data. */
int64_t task_object_size = 0;

/* Go through all the nodes, calculate the score for each, pick max score. */
LocalScheduler *scheduler = NULL;
double best_local_scheduler_score = INT32_MIN;
CHECKM(best_local_scheduler_score < 0,
"We might have a floating point underflow");
Expand Down
2 changes: 1 addition & 1 deletion src/local_scheduler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ endif(APPLE)

include_directories("${PYTHON_INCLUDE_DIRS}")

# set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} --std=c99 -Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall")

if(UNIX AND NOT APPLE)
link_libraries(rt)
Expand Down
5 changes: 2 additions & 3 deletions src/local_scheduler/local_scheduler_algorithm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ void SchedulingAlgorithmState_free(SchedulingAlgorithmState *algorithm_state) {
remove_actor(algorithm_state, actor_id);
}
/* Free the list of cached actor task specs and the task specs themselves. */
for (int i = 0; i < algorithm_state->cached_submitted_actor_tasks.size();
for (size_t i = 0; i < algorithm_state->cached_submitted_actor_tasks.size();
++i) {
TaskQueueEntry task = algorithm_state->cached_submitted_actor_tasks[i];
TaskQueueEntry_free(&task);
Expand Down Expand Up @@ -682,7 +682,7 @@ int reconstruct_object_timeout_handler(event_loop *loop,

int64_t max_num_to_reconstruct = 10000;
int64_t num_reconstructed = 0;
for (int64_t i = 0; i < object_ids_to_reconstruct.size(); i++) {
for (size_t i = 0; i < object_ids_to_reconstruct.size(); i++) {
ObjectID object_id = object_ids_to_reconstruct[i];
/* Only call reconstruct if we are still missing the object. */
if (state->algorithm_state->remote_objects.find(object_id) !=
Expand Down Expand Up @@ -1112,7 +1112,6 @@ void handle_actor_creation_notification(

for (int i = 0; i < num_cached_actor_tasks; ++i) {
TaskQueueEntry task = algorithm_state->cached_submitted_actor_tasks[i];
TaskSpec *spec = task.spec;
/* Note that handle_actor_task_submitted may append the spec to the end of
* the cached_submitted_actor_tasks array. */
handle_actor_task_submitted(state, algorithm_state, task.spec,
Expand Down
4 changes: 2 additions & 2 deletions src/local_scheduler/local_scheduler_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ LocalSchedulerConnection *LocalSchedulerConnection_init(

/* Parse the reply object. */
auto reply_message = flatbuffers::GetRoot<RegisterClientReply>(reply);
for (int i = 0; i < reply_message->gpu_ids()->size(); ++i) {
for (size_t i = 0; i < reply_message->gpu_ids()->size(); ++i) {
result->gpu_ids.push_back(reply_message->gpu_ids()->Get(i));
}
/* If the worker is not an actor, there should not be any GPU IDs here. */
Expand Down Expand Up @@ -122,7 +122,7 @@ TaskSpec *local_scheduler_get_task(LocalSchedulerConnection *conn,
* actor methods. */
if (ActorID_equal(conn->actor_id, NIL_ACTOR_ID)) {
conn->gpu_ids.clear();
for (int i = 0; i < reply_message->gpu_ids()->size(); ++i) {
for (size_t i = 0; i < reply_message->gpu_ids()->size(); ++i) {
conn->gpu_ids.push_back(reply_message->gpu_ids()->Get(i));
}
}
Expand Down
Loading