-
Notifications
You must be signed in to change notification settings - Fork 0
/
impute_excercise.py
27 lines (22 loc) · 937 Bytes
/
impute_excercise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pandas
import numpy
def imputation(filename):
# Pandas dataframes have a method called 'fillna(value)', such that you can
# pass in a single value to replace any NAs in a dataframe or series. You
# can call it like this:
# dataframe['column'] = dataframe['column'].fillna(value)
#
# Using the numpy.mean function, which calculates the mean of a numpy
# array, impute any missing values in our Lahman baseball
# data sets 'weight' column by setting them equal to the average weight.
#
# You can access the 'weight' colum in the baseball data frame by
# calling baseball['weight']
baseball = pandas.read_csv(filename)
value=numpy.mean(baseball['weight'])
baseball['weight']=baseball['weight'].fillna(value)
# print(baseball['weight'])
# YOUR CODE GOES HERE
return baseball
if __name__=="__main__":
imputation('baseballdatabank-2017.1/core/Master.csv')