# ------------------------------------------------------------------------
# Data wrangling with the Tidyverse
# Written by Dominic Henry (dominichenry@gmail.com) - 29/03/2018

# Code used to illustrate key functions and operations from the 
# core Tidyverse packages. 

# Data are based on waterbird counts conducted in KwaZulu-Natal at 
# 60 sites over several successive months (missions). Each site falls 
# within a wetland cluster and has a protection status (full, partial, 
# non-protected). Habitat and weather measurements were taken after 
# each point count. 
# ------------------------------------------------------------------------


# Install and load libraries ----------------------------------------------
install.packages("tidyverse", dependencies = TRUE)
install.packages("here", dependencies = TRUE)

library(tidyverse)
library(here)

# Import data using R projects and here package ---------------------------
# In this case the csv files are stored within a subfolder
# called "data" in my R project folder

counts <- readr::read_csv(here("data","bird_counts.csv"))

## OR

# Import data by setting your own working directory -----------------------
setwd("C:/Users/...")
counts <- readr::read_csv("bird_counts.csv")


# Tibble ------------------------------------------------------------------
counts  # Say goodbye to head()!

print(counts, n = 20)
print(counts, width = Inf)

# Tidy column names -------------------------------------------------------
counts <- dplyr::rename_all(counts, tolower)
counts

# Select & reorder --------------------------------------------------------
counts <- dplyr::select(counts, site,protection,year,month,abundance,richness,
                        everything())
names(counts)

counts <- dplyr::select(counts, -matches("sl_|aq_"),-humidity)
names(counts)

# Split site variable -----------------------------------------------------
counts <- tidyr::separate(counts, site, c("mission","cluster","site"), sep = "_")
counts

# Arrange -----------------------------------------------------------------
counts <- dplyr::arrange(counts, mission, cluster, site)
counts

# Mutate ------------------------------------------------------------------
counts <- dplyr::mutate(counts, year = year + 10)
counts

counts <- dplyr::mutate(counts, air_temp = (air_temp - 32)/1.8)
counts

# What about the "$"?

# Filter ------------------------------------------------------------------
counts[["ph"]]
counts <-  dplyr::filter(counts,!is.na(ph))
counts

# All together ------------------------------------------------------------
counts <- read_csv(here("data","bird_counts.csv")) %>% 
  rename_all(tolower) %>% 
  select(site,protection,year,month,abundance,richness,everything()) %>% 
  select(-matches("sl_|aq_"), -humidity) %>% 
  separate(site, c("mission","cluster","site"), sep = "_") %>% 
  arrange(mission, cluster, site) %>% 
  mutate(year = year + 10,air_temp = (air_temp - 32)/1.8) %>% 
  filter(!is.na(ph))

counts # Success!


# Mutate multiple columns -------------------------------------------------
counts %>% 
  select(ph:sal)

counts %>% 
  mutate_at(vars(ph:sal),scale) %>% 
  select(ph:sal)

## Base R ##
as.data.frame(apply(counts[,16:20], 2, function(x) scale (x)))


# Group and summarise -----------------------------------------------------
counts %>% 
  group_by(cluster)

counts %>% 
  group_by(cluster) %>% 
  summarise(mean_abun = mean(abundance),sd_abun = sd(abundance))

# Group, summarise and plot -----------------------------------------------
counts %>% 
  mutate(year = as.factor(year)) %>% 
  group_by(cluster,year) %>% 
  summarise(mean_rich = mean(richness),sd_rich = sd(richness)) %>% 
  ggplot(aes(x = cluster, y = mean_rich, fill = year))+
  geom_bar(stat = "identity",position = position_dodge())+
  geom_errorbar(aes(ymin=mean_rich, ymax=mean_rich + sd_rich),
                width=.2,position=position_dodge(.9))


# Reorder factor levels ---------------------------------------------------
counts %>% 
  group_by(cluster) %>% 
  summarise(mean_abun = mean(abundance)) %>% 
  ggplot(aes(x = fct_reorder(cluster,mean_abun), y = mean_abun))+
  geom_bar(stat = "identity")

## Forcats ##
fct_reorder(cluster,mean_abun)
## Base R ##
counts$cluster <-factor(counts$cluster, levels = counts[order(counts$mean_abun),"cluster"])


# Filter and summarise ----------------------------------------------------
counts %>% 
  filter(protection == "FP" & air_temp < 30) %>% 
  group_by(cluster, year) %>% 
  summarise(total = sum(abundance), max = max(abundance),n_counts = n()) %>% 
  filter(total > 500)


## Bird community data ##

# Import data -------------------------------------------------------------
birds <- read_csv("birds.csv")
names(birds)
birds

# Select ------------------------------------------------------------------
select(birds, starts_with("Purple"))      
select(birds, matches("teal|duck|goose")) 
select(birds, contains("Tern"))          
select(birds, ends_with("fisher"))

# Transform from wide to long ---------------------------------------------
birds_wide <- birds %>% select(Mission:Site,matches("goose"))
birds_wide

birds_long <- tidyr::gather(data = birds_wide, key = species, value = abundance, -(Mission:Site))
birds_long %>% arrange(desc(abundance))

# Wide to long to diversity -----------------------------------------------
library(vegan)

bird_div <- gather(birds, key = species, value = abundance, -(Mission:Site)) %>% 
  group_by(Mission, Cluster, Site) %>% 
  summarise(shan_div = diversity(abundance, index = "shannon"))
bird_div

bird_div %>% group_by(Cluster) %>% 
  summarise(mean_div = mean(shan_div)) %>% 
  ggplot(aes(x = Cluster,y = mean_div))+
  geom_bar(stat = "identity")

bird_div %>% group_by(Mission) %>% 
  summarise(mean_div = mean(shan_div)) %>% 
  ggplot(aes(x = Mission,y = mean_div))+
  geom_bar(stat = "identity")

# Quick tibble diversion --------------------------------------------------
tibtest <- tibble(first = c(1:4), 
                  second = c("big","small","huge","medium"), 
                  third = list(1:10,c("a","b","c"),100:200,1:5))
tibtest

tibtest[3]   
tibtest[[3]]

# Misc --------------------------------------------------------------------

## Sections, snippets, keyboard sc, cheatsheets
