forked from justmarkham/DAT7
-
Notifications
You must be signed in to change notification settings - Fork 1
/
03_file_reading.py
103 lines (80 loc) · 2.85 KB
/
03_file_reading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
'''
Lesson on file reading using Airline Safety Data
https://github.com/fivethirtyeight/data/tree/master/airline-safety
'''
# read the whole file at once, return a single string (including newlines)
# 'rU' mode (read universal) converts different line endings into '\n'
f = open('airlines.csv', 'rU')
data = f.read()
f.close()
# use a context manager to automatically close your file
with open('airlines.csv', 'rU') as f:
data = f.read()
# read the file into a list (each list element is one row)
with open('airlines.csv', 'rU') as f:
data = []
for row in f:
data.append(row)
# do the same thing using a list comprehension
with open('airlines.csv', 'rU') as f:
data = [row for row in f]
# side note: splitting strings
'Hello DAT7 students'.split()
'apple,banana,cherry'.split(',')
# split each string (at the commas) into a list
with open('airlines.csv', 'rU') as f:
data = [row.split(',') for row in f]
# do the same thing using the csv module
import csv
with open('airlines.csv', 'rU') as f:
data = [row for row in csv.reader(f)]
# separate the header and data
header = data[0]
data = data[1:]
'''
EXERCISES:
1. Create a list containing the average number of incidents per year for each airline.
Example for Aer Lingus: (2 + 0)/30 = 0.07
Expected output: [0.07, 2.73, 0.23, ...]
2. Create a list of airline names (without the star).
Expected output: ['Aer Lingus', 'Aeroflot', 'Aerolineas Argentinas', ...]
3. Create a list (of the same length) that contains 1 if there's a star and 0 if not.
Expected output: [0, 1, 0, ...]
4. BONUS: Create a dictionary in which the key is the airline name (without the star)
and the value is the average number of incidents.
Expected output: {'Aer Lingus': 0.07, 'Aeroflot': 2.73, ...}
'''
# Part 1
incidents = [round((int(row[2]) + int(row[5])) / float(30), 2) for row in data]
# Parts 2 and 3
airlines = []
starred = []
for row in data:
if row[0][-1] == '*':
starred.append(1)
airlines.append(row[0][:-1])
else:
starred.append(0)
airlines.append(row[0])
# Part 4
airline_incidents = dict(zip(airlines, incidents))
'''
A few extra things that will help you with the homework
'''
# 'set' data structure is useful for gathering unique elements
my_list = [1, 2, 1]
set(my_list) # returns a set of 1, 2
len(set(my_list)) # count of unique elements
# 'in' statement is useful for lists
1 in my_list # True
3 in my_list # False
# 'in' is useful for strings (checks for substrings)
my_string = 'hello there'
'the' in my_string # True
'then' in my_string # False
# 'in' is useful for dictionaries (checks keys but not values)
my_dict = {'name':'Kevin', 'title':'instructor'}
'name' in my_dict # True
'Kevin' in my_dict # False
# 'count' method for strings counts how many times a character appears
my_string.count('e') # 3