elo<-read_csv("C:/users/abbie/Desktop/MADA2023/WNBA-stats/wnba-team-elo-ratings.csv")Rows: 10488 Columns: 16
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): date, team1, team2, name1, name2
dbl (11): season, neutral, playoff, score1, score2, elo1_pre, elo2_pre, elo1...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(elo)Rows: 10,488
Columns: 16
$ season <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, …
$ date <chr> "10/10/2019", "10/10/2019", "10/8/2019", "10/8/2019", "10/6/…
$ team1 <chr> "WAS", "CON", "WAS", "CON", "WAS", "CON", "WAS", "CON", "WAS…
$ team2 <chr> "CON", "WAS", "CON", "WAS", "CON", "WAS", "CON", "WAS", "CON…
$ name1 <chr> "Washington Mystics", "Connecticut Sun", "Washington Mystics…
$ name2 <chr> "Connecticut Sun", "Washington Mystics", "Connecticut Sun", …
$ neutral <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ playoff <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ score1 <dbl> 89, 78, 86, 90, 94, 81, 87, 99, 95, 86, 94, 90, 75, 78, 92, …
$ score2 <dbl> 78, 89, 90, 86, 81, 94, 99, 87, 86, 95, 90, 94, 92, 56, 75, …
$ elo1_pre <dbl> 1684, 1634, 1693, 1626, 1671, 1648, 1700, 1618, 1694, 1624, …
$ elo2_pre <dbl> 1634, 1684, 1626, 1693, 1648, 1671, 1618, 1700, 1624, 1694, …
$ elo1_post <dbl> 1692, 1627, 1684, 1634, 1693, 1626, 1671, 1648, 1700, 1618, …
$ elo2_post <dbl> 1627, 1692, 1634, 1684, 1626, 1693, 1648, 1671, 1618, 1700, …
$ prob1 <dbl> 0.718, 0.282, 0.476, 0.524, 0.399, 0.601, 0.763, 0.237, 0.74…
$ is_home1 <dbl> 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, …
#get date to date form
elo$date<-anydate(elo$date) #different length character strings weren't compatable with lubridate
#filter for HOU
elo_hou_raw<-elo%>%
filter(team1 == "HOU" | team2 == "HOU",
!duplicated(date),
date < "2001-01-01")%>% #Sometimes there's a duplicate entry for the same game where the only difference is team1 and team2 are switched
arrange(date)%>%
mutate(game = row_number()) #Count the game number since that is the x axis for the graph
glimpse(elo_hou_raw)Rows: 138
Columns: 17
$ season <dbl> 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, …
$ date <date> 1997-06-21, 1997-06-24, 1997-06-26, 1997-06-28, 1997-06-30,…
$ team1 <chr> "HOU", "HOU", "NYL", "HOU", "HOU", "NYL", "NYL", "PHO", "HOU…
$ team2 <chr> "CLE", "PHO", "HOU", "LVA", "LAS", "HOU", "HOU", "HOU", "SAC…
$ name1 <chr> "Houston Comets", "Houston Comets", "New York Liberty", "Hou…
$ name2 <chr> "Cleveland Rockers", "Phoenix Mercury", "Houston Comets", "U…
$ neutral <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ playoff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ score1 <dbl> 76, 72, 62, 76, 71, 72, 65, 69, 89, 82, 77, 86, 64, 81, 63, …
$ score2 <dbl> 56, 55, 60, 58, 66, 67, 58, 64, 61, 60, 69, 76, 73, 57, 74, …
$ elo1_pre <dbl> 1500, 1530, 1539, 1535, 1558, 1556, 1564, 1554, 1547, 1542, …
$ elo2_pre <dbl> 1500, 1515, 1544, 1479, 1498, 1564, 1556, 1554, 1485, 1466, …
$ elo1_post <dbl> 1530, 1544, 1548, 1558, 1564, 1564, 1579, 1561, 1563, 1567, …
$ elo2_post <dbl> 1470, 1501, 1535, 1456, 1492, 1556, 1541, 1547, 1469, 1441, …
$ prob1 <dbl> 0.387, 0.634, 0.381, 0.466, 0.692, 0.603, 0.397, 0.614, 0.69…
$ is_home1 <dbl> 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, …
$ game <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…

