-
Notifications
You must be signed in to change notification settings - Fork 3
/
mentionCount.py
106 lines (93 loc) · 4.04 KB
/
mentionCount.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import csv
from re import findall
fileDir = "20150320-BUFNJ"
# INPUT FILE NAMES
transcriptFileName = fileDir + r"\HOME-P1.txt"
rosterFileName = fileDir + r"\roster.csv"
# OUTPUT FILE NAMES
mentionSeqFileName = fileDir + r"\OUT-mentionSeq.csv"
mentionSummaryFileName = fileDir + r"\OUT-mentionSummary.csv"
# Read roster file
rosterFile = open(rosterFileName)
rosterCSVreader = csv.reader(rosterFile, delimiter = ",")
# Set up dictionary to store game roster info
rosterDict = {}
rowCount = 0
for row in rosterCSVreader:
# Initial count is 0. will be updated while reading file
if rowCount > 0:
rosterDict[row[0]] = {"TEAM": row[1],
"TOI": float(row[6]),
"POS": row[5],
"TOT-MENTIONS": 0,
"PORTION-MENTIONS": {},
"POSS_POS": 0,
"POSS_NEG": 0}
rowCount += 1
rosterFile.close() # done with roster file
# Make a regular expression string with all the identifiers, for later use.
# Want all players separated by a | character
idRegEx = ""
for playerId in rosterDict.keys():
idRegEx += playerId + "|"
idRegEx = idRegEx[:-1] # remove last pipe
fullRegEx = "(" + idRegEx + ")"
# Read in transcript file
transcriptFile = open(transcriptFileName)
transcriptStr = transcriptFile.read()
transcriptFile.close()
# Break up the file based on new line characters.
# These designate stoppages in play.
transcriptList = transcriptStr.split("\n\n")
# Iterate over the transcript text by portion
portionNum = 1
masterPortionList = []
for portion in transcriptList:
portionProgressionList = findall(fullRegEx, portion)
for player in portionProgressionList:
# add to total
rosterDict[player]["TOT-MENTIONS"] += 1
# add for specific portion
if portionNum in rosterDict[player]["PORTION-MENTIONS"].keys():
rosterDict[player]["PORTION-MENTIONS"][portionNum] += 1
else:
rosterDict[player]["PORTION-MENTIONS"][portionNum] = 1
portionNum += 1
masterPortionList.append(portionProgressionList)
for playerIndex in range(len(portionProgressionList)):
playerId = portionProgressionList[playerIndex]
if playerIndex < len(portionProgressionList) - 1:
nextPlayerId = portionProgressionList[playerIndex + 1]
playerTeam = rosterDict[playerId]["TEAM"]
nextPlayerTeam = rosterDict[nextPlayerId]["TEAM"]
# Define "positive possession" as one where your team has the puck after your call
if playerTeam == nextPlayerTeam and playerId != nextPlayerId:
rosterDict[playerId]["POSS_POS"] += 1
else:
rosterDict[playerId]["POSS_NEG"] += 1
# If this is the last touch, record as a positive possession
else:
rosterDict[playerId]["POSS_POS"] += 1
# Create output csv with summary for player/portion.
mentionSummaryOutList = [["Player", "Team", "Portion", "MentionCount"]]
for player in rosterDict:
for playerPortion in rosterDict[player]["PORTION-MENTIONS"]:
mentionSummaryOutList.append([player,
rosterDict[player]["TEAM"],
playerPortion,
rosterDict[player]["PORTION-MENTIONS"][playerPortion]])
with open(mentionSummaryFileName, "wb") as mentionSummaryFile:
csvSummary = csv.writer(mentionSummaryFile)
csvSummary.writerows(mentionSummaryOutList)
# Create output csv for mention sequence.
mentionID = 1
portionID = 1
mentionSeqOutList = [["MentionID", "Portion", "Player", "MentionTeam"]]
for portion in masterPortionList:
for playerMention in portion:
mentionSeqOutList.append([mentionID, portionID, playerMention, rosterDict[playerMention]["TEAM"]])
mentionID += 1
portionID += 1
with open(mentionSeqFileName, "wb") as mentionSeqFile:
csvSummary = csv.writer(mentionSeqFile)
csvSummary.writerows(mentionSeqOutList)