Notes: setting up my R environment by loading the ‘tidyverse’, ‘lubridate’, ‘ggplot2’, ‘dplyr’, and ‘readr’ packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggplot2)
library(dplyr)
library(readr)
getwd()
## [1] "/cloud/project/Google_Data_Analytics/Member_vs_Casual_Riders/CSV"
q1_2020 <- read_csv("Divvy_Trips_2020_Q1.csv")
## Rows: 215737 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): ride_id, rideable_type, started_at, ended_at, start_station_name, e...
## dbl (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, en...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
q4_2019 <- read_csv("Divvy_Trips_2019_Q4.csv")
## Rows: 224770 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): start_time, end_time, from_station_name, to_station_name, usertype,...
## dbl (5): trip_id, bikeid, from_station_id, to_station_id, birthyear
## num (1): tripduration
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
q3_2019 <- read_csv("Divvy_Trips_2019_Q3.csv")
## Rows: 301024 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): start_time, end_time, from_station_name, to_station_name, usertype,...
## dbl (5): trip_id, bikeid, from_station_id, to_station_id, birthyear
## num (1): tripduration
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
q2_2019 <- read_csv("Divvy_Trips_2019_Q2.csv")
## Rows: 306200 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): 01 - Rental Details Local Start Time, 01 - Rental Details Local End...
## dbl (5): 01 - Rental Details Rental ID, 01 - Rental Details Bike ID, 03 - Re...
## num (1): 01 - Rental Details Duration In Seconds Uncapped
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(q1_2020)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(q2_2019)
## [1] "01 - Rental Details Rental ID"
## [2] "01 - Rental Details Local Start Time"
## [3] "01 - Rental Details Local End Time"
## [4] "01 - Rental Details Bike ID"
## [5] "01 - Rental Details Duration In Seconds Uncapped"
## [6] "03 - Rental Start Station ID"
## [7] "03 - Rental Start Station Name"
## [8] "02 - Rental End Station ID"
## [9] "02 - Rental End Station Name"
## [10] "User Type"
## [11] "Member Gender"
## [12] "05 - Member Details Member Birthday Year"
colnames(q3_2019)
## [1] "trip_id" "start_time" "end_time"
## [4] "bikeid" "tripduration" "from_station_id"
## [7] "from_station_name" "to_station_id" "to_station_name"
## [10] "usertype" "gender" "birthyear"
colnames(q4_2019)
## [1] "trip_id" "start_time" "end_time"
## [4] "bikeid" "tripduration" "from_station_id"
## [7] "from_station_name" "to_station_id" "to_station_name"
## [10] "usertype" "gender" "birthyear"
q1_2020 <- q1_2020 %>%
select(-c(start_lat, start_lng, end_lat, end_lng))
q4_2019 <- q4_2019 %>%
select(-c(birthyear, gender,"tripduration"))
q3_2019 <- q3_2019 %>%
select(-c(birthyear, gender,"tripduration"))
q2_2019 <- q2_2019 %>%
select(-c("01 - Rental Details Duration In Seconds Uncapped", "Member Gender","05 - Member Details Member Birthday Year"))
(q4_2019 <- rename(q4_2019
,ride_id = trip_id
,rideable_type = bikeid
,started_at = start_time
,ended_at = end_time
,start_station_name = from_station_name
,start_station_id = from_station_id
,end_station_name = to_station_name
,end_station_id = to_station_id
,member_casual = usertype))
## # A tibble: 224,770 × 9
## ride_id started_at ended…¹ ridea…² start…³ start…⁴ end_s…⁵ end_s…⁶ membe…⁷
## <dbl> <chr> <chr> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 25223640 10/1/2019 0… 10/1/2… 2215 20 Sheffi… 309 Leavit… Subscr…
## 2 25223641 10/1/2019 0… 10/1/2… 6328 19 Throop… 241 Morgan… Subscr…
## 3 25223642 10/1/2019 0… 10/1/2… 3003 84 Milwau… 199 Wabash… Subscr…
## 4 25223643 10/1/2019 0… 10/1/2… 3275 313 Lakevi… 290 Kedzie… Subscr…
## 5 25223644 10/1/2019 0… 10/1/2… 5294 210 Ashlan… 382 Wester… Subscr…
## 6 25223645 10/1/2019 0… 10/1/2… 1891 156 Clark … 226 Racine… Subscr…
## 7 25223646 10/1/2019 0… 10/1/2… 1061 84 Milwau… 142 McClur… Subscr…
## 8 25223647 10/1/2019 0… 10/1/2… 1274 156 Clark … 463 Clark … Subscr…
## 9 25223648 10/1/2019 0… 10/1/2… 6011 156 Clark … 463 Clark … Subscr…
## 10 25223649 10/1/2019 0… 10/1/2… 2957 336 Cottag… 336 Cottag… Custom…
## # … with 224,760 more rows, and abbreviated variable names ¹ended_at,
## # ²rideable_type, ³start_station_id, ⁴start_station_name, ⁵end_station_id,
## # ⁶end_station_name, ⁷member_casual
(q3_2019 <- rename(q3_2019
,ride_id = trip_id
,rideable_type = bikeid
,started_at = start_time
,ended_at = end_time
,start_station_name = from_station_name
,start_station_id = from_station_id
,end_station_name = to_station_name
,end_station_id = to_station_id
,member_casual = usertype))
## # A tibble: 301,024 × 9
## ride_id started_at ended…¹ ridea…² start…³ start…⁴ end_s…⁵ end_s…⁶ membe…⁷
## <dbl> <chr> <chr> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 23479388 7/1/2019 0:… 7/1/20… 3591 117 Wilton… 497 Kimbal… Subscr…
## 2 23479389 7/1/2019 0:… 7/1/20… 5353 381 Wester… 203 Wester… Custom…
## 3 23479390 7/1/2019 0:… 7/1/20… 6180 313 Lakevi… 144 Larrab… Custom…
## 4 23479391 7/1/2019 0:… 7/1/20… 5540 313 Lakevi… 144 Larrab… Custom…
## 5 23479392 7/1/2019 0:… 7/1/20… 6014 168 Michig… 62 McCorm… Custom…
## 6 23479393 7/1/2019 0:… 7/1/20… 4941 300 Broadw… 232 Pine G… Subscr…
## 7 23479394 7/1/2019 0:… 7/1/20… 3770 168 Michig… 62 McCorm… Custom…
## 8 23479395 7/1/2019 0:… 7/1/20… 5442 313 Lakevi… 144 Larrab… Custom…
## 9 23479396 7/1/2019 0:… 7/1/20… 2957 43 Michig… 195 Columb… Custom…
## 10 23479397 7/1/2019 0:… 7/1/20… 6091 43 Michig… 195 Columb… Custom…
## # … with 301,014 more rows, and abbreviated variable names ¹ended_at,
## # ²rideable_type, ³start_station_id, ⁴start_station_name, ⁵end_station_id,
## # ⁶end_station_name, ⁷member_casual
(q2_2019 <- rename(q2_2019
,ride_id = "01 - Rental Details Rental ID"
,rideable_type = "01 - Rental Details Bike ID"
,started_at = "01 - Rental Details Local Start Time"
,ended_at = "01 - Rental Details Local End Time"
,start_station_name = "03 - Rental Start Station Name"
,start_station_id = "03 - Rental Start Station ID"
,end_station_name = "02 - Rental End Station Name"
,end_station_id = "02 - Rental End Station ID"
,member_casual = "User Type"))
## # A tibble: 306,200 × 9
## ride_id started_at ended…¹ ridea…² start…³ start…⁴ end_s…⁵ end_s…⁶ membe…⁷
## <dbl> <chr> <chr> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 22178529 4/1/2019 0:… 4/1/20… 6251 81 Daley … 56 Despla… Subscr…
## 2 22178530 4/1/2019 0:… 4/1/20… 6226 317 Wood S… 59 Wabash… Subscr…
## 3 22178531 4/1/2019 0:… 4/1/20… 5649 283 LaSall… 174 Canal … Subscr…
## 4 22178532 4/1/2019 0:… 4/1/20… 4151 26 McClur… 133 Kingsb… Subscr…
## 5 22178533 4/1/2019 0:… 4/1/20… 3270 202 Halste… 129 Blue I… Subscr…
## 6 22178534 4/1/2019 0:… 4/1/20… 3123 420 Ellis … 426 Ellis … Subscr…
## 7 22178535 4/1/2019 0:… 4/1/20… 6418 503 Drake … 500 Centra… Subscr…
## 8 22178536 4/1/2019 0:… 4/1/20… 4513 260 Kedzie… 499 Kosciu… Subscr…
## 9 22178537 4/1/2019 0:… 4/1/20… 3280 211 St. Cl… 211 St. Cl… Custom…
## 10 22178538 4/1/2019 0:… 4/1/20… 5534 211 St. Cl… 211 St. Cl… Custom…
## # … with 306,190 more rows, and abbreviated variable names ¹ended_at,
## # ²rideable_type, ³start_station_id, ⁴start_station_name, ⁵end_station_id,
## # ⁶end_station_name, ⁷member_casual
str(q1_2020)
## tibble [215,737 × 9] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:215737] "EACB19130B0CDA4A" "8FED874C809DC021" "789F3C21E472CA96" "C9A388DAC6ABF313" ...
## $ rideable_type : chr [1:215737] "docked_bike" "docked_bike" "docked_bike" "docked_bike" ...
## $ started_at : chr [1:215737] "1/21/2020 20:06" "1/30/2020 14:22" "1/9/2020 19:29" "1/6/2020 16:17" ...
## $ ended_at : chr [1:215737] "1/21/2020 20:14" "1/30/2020 14:26" "1/9/2020 19:32" "1/6/2020 16:25" ...
## $ start_station_name: chr [1:215737] "Western Ave & Leland Ave" "Clark St & Montrose Ave" "Broadway & Belmont Ave" "Clark St & Randolph St" ...
## $ start_station_id : num [1:215737] 239 234 296 51 66 212 96 96 212 38 ...
## $ end_station_name : chr [1:215737] "Clark St & Leland Ave" "Southport Ave & Irving Park Rd" "Wilton Ave & Belmont Ave" "Fairbanks Ct & Grand Ave" ...
## $ end_station_id : num [1:215737] 326 318 117 24 212 96 212 212 96 100 ...
## $ member_casual : chr [1:215737] "member" "member" "member" "member" ...
str(q4_2019)
## tibble [224,770 × 9] (S3: tbl_df/tbl/data.frame)
## $ ride_id : num [1:224770] 25223640 25223641 25223642 25223643 25223644 ...
## $ started_at : chr [1:224770] "10/1/2019 0:01" "10/1/2019 0:02" "10/1/2019 0:04" "10/1/2019 0:04" ...
## $ ended_at : chr [1:224770] "10/1/2019 0:17" "10/1/2019 0:06" "10/1/2019 0:18" "10/1/2019 0:43" ...
## $ rideable_type : num [1:224770] 2215 6328 3003 3275 5294 ...
## $ start_station_id : num [1:224770] 20 19 84 313 210 156 84 156 156 336 ...
## $ start_station_name: chr [1:224770] "Sheffield Ave & Kingsbury St" "Throop (Loomis) St & Taylor St" "Milwaukee Ave & Grand Ave" "Lakeview Ave & Fullerton Pkwy" ...
## $ end_station_id : num [1:224770] 309 241 199 290 382 226 142 463 463 336 ...
## $ end_station_name : chr [1:224770] "Leavitt St & Armitage Ave" "Morgan St & Polk St" "Wabash Ave & Grand Ave" "Kedzie Ave & Palmer Ct" ...
## $ member_casual : chr [1:224770] "Subscriber" "Subscriber" "Subscriber" "Subscriber" ...
str(q3_2019)
## tibble [301,024 × 9] (S3: tbl_df/tbl/data.frame)
## $ ride_id : num [1:301024] 23479388 23479389 23479390 23479391 23479392 ...
## $ started_at : chr [1:301024] "7/1/2019 0:00" "7/1/2019 0:01" "7/1/2019 0:01" "7/1/2019 0:02" ...
## $ ended_at : chr [1:301024] "7/1/2019 0:20" "7/1/2019 0:18" "7/1/2019 0:27" "7/1/2019 0:27" ...
## $ rideable_type : num [1:301024] 3591 5353 6180 5540 6014 ...
## $ start_station_id : num [1:301024] 117 381 313 313 168 300 168 313 43 43 ...
## $ start_station_name: chr [1:301024] "Wilton Ave & Belmont Ave" "Western Ave & Monroe St" "Lakeview Ave & Fullerton Pkwy" "Lakeview Ave & Fullerton Pkwy" ...
## $ end_station_id : num [1:301024] 497 203 144 144 62 232 62 144 195 195 ...
## $ end_station_name : chr [1:301024] "Kimball Ave & Belmont Ave" "Western Ave & 21st St" "Larrabee St & Webster Ave" "Larrabee St & Webster Ave" ...
## $ member_casual : chr [1:301024] "Subscriber" "Customer" "Customer" "Customer" ...
str(q2_2019)
## tibble [306,200 × 9] (S3: tbl_df/tbl/data.frame)
## $ ride_id : num [1:306200] 22178529 22178530 22178531 22178532 22178533 ...
## $ started_at : chr [1:306200] "4/1/2019 0:02" "4/1/2019 0:03" "4/1/2019 0:11" "4/1/2019 0:13" ...
## $ ended_at : chr [1:306200] "4/1/2019 0:09" "4/1/2019 0:20" "4/1/2019 0:15" "4/1/2019 0:18" ...
## $ rideable_type : num [1:306200] 6251 6226 5649 4151 3270 ...
## $ start_station_id : num [1:306200] 81 317 283 26 202 420 503 260 211 211 ...
## $ start_station_name: chr [1:306200] "Daley Center Plaza" "Wood St & Taylor St" "LaSalle St & Jackson Blvd" "McClurg Ct & Illinois St" ...
## $ end_station_id : num [1:306200] 56 59 174 133 129 426 500 499 211 211 ...
## $ end_station_name : chr [1:306200] "Desplaines St & Kinzie St" "Wabash Ave & Roosevelt Rd" "Canal St & Madison St" "Kingsbury St & Kinzie St" ...
## $ member_casual : chr [1:306200] "Subscriber" "Subscriber" "Subscriber" "Subscriber" ...
q4_2019 <- mutate(q4_2019, ride_id = as.character(ride_id)
,rideable_type = as.character(rideable_type))
q3_2019 <- mutate(q3_2019, ride_id = as.character(ride_id)
,rideable_type = as.character(rideable_type))
q2_2019 <- mutate(q2_2019, ride_id = as.character(ride_id)
,rideable_type = as.character(rideable_type))
all_trips <- bind_rows(q2_2019, q3_2019, q4_2019, q1_2020)
remove(q1_2020, q2_2019, q4_2019, q3_2019)
colnames(all_trips)
## [1] "ride_id" "started_at" "ended_at"
## [4] "rideable_type" "start_station_id" "start_station_name"
## [7] "end_station_id" "end_station_name" "member_casual"
nrow(all_trips)
## [1] 2546189
dim(all_trips)
## [1] 2546189 9
head(all_trips)
## # A tibble: 6 × 9
## ride_id started_at ended…¹ ridea…² start…³ start…⁴ end_s…⁵ end_s…⁶ membe…⁷
## <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr> <chr>
## 1 22178529 4/1/2019 0:02 4/1/20… 6251 81 Daley … 56 Despla… Subscr…
## 2 22178530 4/1/2019 0:03 4/1/20… 6226 317 Wood S… 59 Wabash… Subscr…
## 3 22178531 4/1/2019 0:11 4/1/20… 5649 283 LaSall… 174 Canal … Subscr…
## 4 22178532 4/1/2019 0:13 4/1/20… 4151 26 McClur… 133 Kingsb… Subscr…
## 5 22178533 4/1/2019 0:19 4/1/20… 3270 202 Halste… 129 Blue I… Subscr…
## 6 22178534 4/1/2019 0:19 4/1/20… 3123 420 Ellis … 426 Ellis … Subscr…
## # … with abbreviated variable names ¹ended_at, ²rideable_type,
## # ³start_station_id, ⁴start_station_name, ⁵end_station_id, ⁶end_station_name,
## # ⁷member_casual
str(all_trips)
## tibble [1,047,731 × 9] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:2546189] "22178529" "22178530" "22178531" "22178532" ...
## $ started_at : chr [1:2546189] "4/1/2019 0:02" "4/1/2019 0:03" "4/1/2019 0:11" "4/1/2019 0:13" ...
## $ ended_at : chr [1:2546189] "4/1/2019 0:09" "4/1/2019 0:20" "4/1/2019 0:15" "4/1/2019 0:18" ...
## $ rideable_type : chr [1:2546189] "6251" "6226" "5649" "4151" ...
## $ start_station_id : num [1:2546189] 81 317 283 26 202 420 503 260 211 211 ...
## $ start_station_name: chr [1:2546189] "Daley Center Plaza" "Wood St & Taylor St" "LaSalle St & Jackson Blvd" "McClurg Ct & Illinois St" ...
## $ end_station_id : num [1:2546189] 56 59 174 133 129 426 500 499 211 211 ...
## $ end_station_name : chr [1:2546189] "Desplaines St & Kinzie St" "Wabash Ave & Roosevelt Rd" "Canal St & Madison St" "Kingsbury St & Kinzie St" ...
## $ member_casual : chr [1:2546189] "Subscriber" "Subscriber" "Subscriber" "Subscriber" ...
summary(all_trips)
## started_at ended_at member_casual
## Length:2546189 Length:2546189 Length:2546189
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
table(all_trips$member_casual)
##
## casual Customer member Subscriber
## 183728 345675 378407 1638379
all_trips <- all_trips %>%
mutate(member_casual = recode(member_casual
,"Subscriber" = "member"
,"Customer" = "casual"))
table(all_trips$member_casual)
##
## casual member
## 529403 2016786
all_trips$date <- as.Date(all_trips$started_at) #The default format is yyyy-mm-dd
all_trips$month <- format(as.Date(all_trips$date), "%m")
all_trips$day <- format(as.Date(all_trips$date), "%d")
all_trips$year <- format(as.Date(all_trips$date), "%Y")
all_trips$day_of_week <- format(as.Date(all_trips$date), "%A")
all_trips <- mutate(all_trips, started_at = mdy_hm(started_at)
, ended_at = mdy_hm(ended_at))
all_trips$ride_length <- difftime(all_trips$ended_at,all_trips$started_at, units='mins')
str(all_trips)
## tibble [1,047,731 × 15] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:2546189] "22178529" "22178530" "22178531" "22178532" ...
## $ started_at : POSIXct[1:2546189], format: "2019-04-01 00:02:00" "2019-04-01 00:03:00" ...
## $ ended_at : POSIXct[1:2546189], format: "2019-04-01 00:09:00" "2019-04-01 00:20:00" ...
## $ rideable_type : chr [1:2546189] "6251" "6226" "5649" "4151" ...
## $ start_station_id : num [1:2546189] 81 317 283 26 202 420 503 260 211 211 ...
## $ start_station_name: chr [1:2546189] "Daley Center Plaza" "Wood St & Taylor St" "LaSalle St & Jackson Blvd" "McClurg Ct & Illinois St" ...
## $ end_station_id : num [1:2546189] 56 59 174 133 129 426 500 499 211 211 ...
## $ end_station_name : chr [1:2546189] "Desplaines St & Kinzie St" "Wabash Ave & Roosevelt Rd" "Canal St & Madison St" "Kingsbury St & Kinzie St" ...
## $ member_casual : chr [1:2546189] "member" "member" "member" "member" ...
## $ date : Date[1:2546189], format: "4-01-20" "4-01-20" ...
## $ month : chr [1:2546189] "01" "01" "01" "01" ...
## $ day : chr [1:2546189] "20" "20" "20" "20" ...
## $ year : chr [1:2546189] "4" "4" "4" "4" ...
## $ day_of_week : chr [1:2546189] "Tuesday" "Tuesday" "Tuesday" "Tuesday" ...
## $ ride_length : 'difftime' num [1:2546189] 7 17 4 5 ...
## ..- attr(*, "units")= chr "mins"
nrow(all_trips)
## [1] 2546189
all_trips <- na.omit(all_trips)
all_trips <- distinct(all_trips)
all_trips <- all_trips[!(all_trips$ride_length <=0),]
nrow(all_trips)
## [1] 2540092
mean(all_trips$ride_length) #straight average (total ride length / rides)
## Time difference of 23.86972 mins
median(all_trips$ride_length) #midpoint number in the ascending array of ride lengths
## Time difference of 11 mins
max(all_trips$ride_length) #longest ride
## Time difference of 156450 mins
min(all_trips$ride_length) #shortest ride
## Time difference of 1 mins
summary(all_trips$ride_length)
## Length Class Mode
## 2540092 difftime numeric
table(all_trips$member_casual)
##
## casual member
## 525712 2014380
aggregate(all_trips$ride_length ~ all_trips$member_casual, FUN = mean) #straight average (total ride length / rides)
## all_trips$member_casual all_trips$ride_length
## 1 casual 62.26152 mins
## 2 member 13.85024 mins
aggregate(all_trips$ride_length ~ all_trips$member_casual, FUN = median) #midpoint number in the ascending array of ride lengths
## all_trips$member_casual all_trips$ride_length
## 1 casual 26 mins
## 2 member 10 mins
aggregate(all_trips$ride_length ~ all_trips$member_casual, FUN = max) #longest ride
## all_trips$member_casual all_trips$ride_length
## 1 casual 156450 mins
## 2 member 150944 mins
aggregate(all_trips$ride_length ~ all_trips$member_casual, FUN = min) #shortest ride
## all_trips$member_casual all_trips$ride_length
## 1 casual 1 mins
## 2 member 1 mins
aggregate(all_trips$ride_length ~ all_trips$member_casual + all_trips$day_of_week, FUN = mean)
## all_trips$member_casual all_trips$day_of_week all_trips$ride_length
## 1 casual Friday 52.70968 mins
## 2 member Friday 13.68401 mins
## 3 casual Monday 53.82897 mins
## 4 member Monday 13.48617 mins
## 5 casual Saturday 70.13204 mins
## 6 member Saturday 12.78588 mins
## 7 casual Sunday 67.81096 mins
## 8 member Sunday 13.49104 mins
## 9 casual Thursday 76.56711 mins
## 10 member Thursday 13.87723 mins
## 11 casual Tuesday 54.19205 mins
## 12 member Tuesday 14.01073 mins
## 13 casual Wednesday 67.37661 mins
## 14 member Wednesday 13.83719 mins
all_trips$day_of_week <- ordered(all_trips$day_of_week, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))
aggregate(all_trips$ride_length ~ all_trips$member_casual + all_trips$day_of_week, FUN = mean)
## all_trips$member_casual all_trips$day_of_week all_trips$ride_length
## 1 casual Sunday 67.81096 mins
## 2 member Sunday 13.49104 mins
## 3 casual Monday 53.82897 mins
## 4 member Monday 13.48617 mins
## 5 casual Tuesday 54.19205 mins
## 6 member Tuesday 14.01073 mins
## 7 casual Wednesday 67.37661 mins
## 8 member Wednesday 13.83719 mins
## 9 casual Thursday 76.56711 mins
## 10 member Thursday 13.87723 mins
## 11 casual Friday 52.70968 mins
## 12 member Friday 13.68401 mins
## 13 casual Saturday 70.13204 mins
## 14 member Saturday 12.78588 mins
all_trips %>%
mutate(weekday = wday(started_at, label = TRUE)) %>% #creates weekday field using wday()
group_by(member_casual, weekday) %>% #groups by usertype and weekday
summarise(number_of_rides = n() #calculates the number of rides and average duration
,average_duration = mean(ride_length)) %>% # calculates the average duration
arrange(member_casual, weekday)
## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## # A tibble: 14 × 4
## # Groups: member_casual [2]
## member_casual weekday number_of_rides average_duration
## <chr> <ord> <int> <drtn>
## 1 casual Sun 116201 59.26285 mins
## 2 casual Mon 61917 57.25147 mins
## 3 casual Tue 54226 64.46007 mins
## 4 casual Wed 60343 67.73447 mins
## 5 casual Thu 56161 67.83439 mins
## 6 casual Fri 65851 68.38865 mins
## 7 casual Sat 111013 57.69203 mins
## 8 member Sun 183539 15.36367 mins
## 9 member Mon 331667 13.80016 mins
## 10 member Tue 362604 13.48410 mins
## 11 member Wed 355027 13.07827 mins
## 12 member Thu 309712 13.52449 mins
## 13 member Fri 292576 13.34896 mins
## 14 member Sat 179255 16.04394 mins
all_trips %>%
mutate(weekday = wday(started_at, label = TRUE)) %>%
group_by(member_casual, weekday) %>%
summarise(number_of_rides = n()
,average_duration = mean(ride_length)) %>%
arrange(member_casual, weekday) %>%
ggplot() +
geom_col(aes(x = weekday, y = number_of_rides, fill = member_casual), position = "dodge")
## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
all_trips %>%
mutate(weekday = wday(started_at, label = TRUE)) %>%
group_by(member_casual, weekday) %>%
summarise(number_of_rides = n()
,average_duration_in_minutes = mean(ride_length)) %>%
arrange(member_casual, weekday) %>%
ggplot() +
geom_col(aes(x = weekday, y = average_duration_in_minutes, fill = member_casual), position = "dodge")
## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.
counts <- aggregate(all_trips$ride_length ~ all_trips$member_casual + all_trips$day_of_week, FUN = mean)
write.csv(counts, file = '/cloud/project/Google_Data_Analytics/Member_vs_Casual_Riders/CSV/avg_ride_length.csv')