Chapter 2 R Programming

R is a functional programming language. It is most popular among academia and Data Scientists.

2.1 General Information

2.1.1 Cleaning the environment

rm(list = ls())

2.1.2 Installing a package

# install
install.packages("ggplot2")  

# removing the library 
detach(ggplot2, unload = TRUE)  

2.1.3 Browsing help on packages

browseVignettes("ggplot2")

2.1.4 Directories

Get working directory. Don’t worry if you already create a project and working in the project directory.

I can assign the path to an object.

wd <- getwd()
wd

Listing environment objects

ls()

I can list files in the working directory and assign it to an object

files <- list.files()
head(files)

Alternative way to list files in the working directory

files2 <- dir()
head(files2)

2.1.5 List specific files

Let’s check the arguments of dir function.

args(dir)
dir(pattern = "^R_", full.names = F, ignore.case = T )

Create a new folder in the WD

old.dir <- getwd()
dir.create("testdir")

2.1.6 Working Directory

As warning shows, this is not the best way to change WD in code chunk. Changing in the global options is a better way, unless you work in a project folder.

setwd("testdir")

2.1.7 Create a new file

file.create("testdir/mytest.R")
## [1] TRUE

Check if a file exits. Interactive

file.exists("testdir/mytest.R")

### Sample usage
if(!file.exists("testdir/mytest.R")){
  print("File not exist!")
  } else {
    "File exists!"}

2.1.8 Show file info

file.info("testdir/mytest.R")
##                  size isdir mode               mtime               ctime
## testdir/mytest.R    0 FALSE  644 2022-07-31 00:33:48 2022-07-31 00:33:48
##                                atime uid gid  uname grname
## testdir/mytest.R 2022-07-31 00:32:35 501  20 deayan  staff

2.1.9 Listing files in a directory

args(list.files)
## function (path = ".", pattern = NULL, all.files = FALSE, full.names = FALSE, 
##     recursive = FALSE, ignore.case = FALSE, include.dirs = FALSE, 
##     no.. = FALSE) 
## NULL

Let’s list all the files that their names starts with “my”

myfiles <- list.files(path="testdir", 
                      pattern = "^my")


### print the second file in myfiles
myfiles[2]
## [1] "mytest10.R"

2.1.10 renaming a file

args(file.rename)
## function (from, to) 
## NULL
file.rename("testdir/mytest.R", "testdir/mytest10.R")

2.1.11 copy a file from and to

file.copy("testdir/mytest2.R", "testdir/mytest3.R")

2.1.12 file path

Assign a name to a file path (It does not matter if file exists or not)

path1 <- file.path(to = "new/mytest3.R")
path1

Alternative way to assign name to a file path

abc <- file.path(from="new2", to="mytest3.R")
abc

Create a directory: testdir/new

dir.create(file.path("testdir", "new"), recursive = TRUE )

2.1.13 Show arguments of a function

str(file.path)
args((file.path))

2.1.14 this needs more context and examples

Change the search path so that you can use variable names directly

But this is not a practical way.

attach(mtcars)
detach(mtcars)

Best alternative is to use with() function

2.2 Create Data

2.2.1 Create sequence of numbers

a <- seq(from = 5, to = 14, by = 2)
a
## [1]  5  7  9 11 13
seq(10, 20) ## default increment is 1
##  [1] 10 11 12 13 14 15 16 17 18 19 20
seq(10, 30, by =2)
##  [1] 10 12 14 16 18 20 22 24 26 28 30

take the length from the length of this argument.

So, this will create a sequence starting from 1 to 100 with length = 10

seq(1, 100, along.with = 1:4)
## [1]   1  34  67 100

desired length of the sequence.

seq(1, 100, length.out = 4)
## [1]   1  34  67 100

2.2.2 short cuts

This will create a sequence starting from 1 with length equal to the length of the argument.

seq_along(1:10)
##  [1]  1  2  3  4  5  6  7  8  9 10

This will create a sequence starting from 1 with length equal to the argument.

seq_len(20)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20

2.2.3 %in% statement

This creates a logical vector, where testing each element in vector “a” if ever matches any element in vector “b”

## lets create a sequence
a = seq_len(10)

## test which elements of a is included in the list
a %in% c(2, 4, 6, 8, 0)
##  [1] FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE

print the elements of list a that is included in the given list

a[a %in% c(2, 4, 6, 8, 0)]
## [1] 2 4 6 8

We can negate this logical statement

# sub-setting property

a[ ! (a %in% c(2, 4, 6, 8, 0))]
## [1]  1  3  5  7  9 10

2.2.4 which() function

which(x, arr.ind = FALSE, useNames = TRUE)

x: input is a logical statement

vector returns location index of true values

Here, which function returns indices of list a that the element is greater than 3

which(a > 3)
## [1]  4  5  6  7  8  9 10

Let’s define two character vectors.

d <- LETTERS[1:10]
d
##  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J"
e <- LETTERS[7:10]
e
## [1] "G" "H" "I" "J"

This shows whether an element of vector d any matches an element of vector e

d %in% e
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE

And here are the locations of TRUE values of vector d (matches vector e)

which(d %in% e)   
## [1]  7  8  9 10
array1 = 1:12

which( array1 %% 2 == 0, arr.ind = F)   ## location in the array (1:12)
## [1]  2  4  6  8 10 12

2.2.5 Where is the min, max, first true/false?

which.min() which.max()

# Here is the list
a = c(2, 4, 1, 7, 9, 1, 3, 5, 9, NA, "4")

## print the location of the min element 
which.min(a)
## [1] 3
## print the min element itself
a[which.min(a)]
## [1] "1"
## print the location of the min element 
which.max(a)
## [1] 5
## print the min element itself
a[which.max(a)]
## [1] "9"

If the input is a logical vector, max will indicate first TRUE, and min will indicate first FALSE.

which.max(a > 3) ## the second element
## [1] 2
## print the second element
a[which.max(a > 3)]
## [1] "4"

2.2.6 match(a, b) function

match: An integer vector giving the position in table of the first match if there is a match, otherwise nomatch.

a = 1:15
b = seq(1, 20, by=3)

match(a, b)  ## returns location of true values of vector a
##  [1]  1 NA NA  2 NA NA  3 NA NA  4 NA NA  5 NA NA
a %in% b
##  [1]  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE
## [13]  TRUE FALSE FALSE

2.3 Create a Data Frame

There are multiple options and tools here.

  • data frame
  • tibble
  • data table
df <- cars
head(df)
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
# test if value 5 in speed column
5 %in% df$speed
## [1] FALSE
# create a dataframe
df2 <- data.frame(Type = c("fruit", "fruit","fruit", "veggie","veggie"),
                  Name = c("red apple", "green apple", "red apple", "green apple" ,"red apple"), Color = c(NA, "red", "blue", "yellow", "red"))

df2
##     Type        Name  Color
## 1  fruit   red apple   <NA>
## 2  fruit green apple    red
## 3  fruit   red apple   blue
## 4 veggie green apple yellow
## 5 veggie   red apple    red
df2 <- within(df2, 
              { newcol = "No"
              newcol[Type %in% c("fruit")] = "No"
              newcol[Name %in% c( "green apple")] = "Yes"
})

head(df2, 3)
##    Type        Name Color newcol
## 1 fruit   red apple  <NA>     No
## 2 fruit green apple   red    Yes
## 3 fruit   red apple  blue     No

subsetting

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df3 <- c("home", "veggie", "fruit")

df2 %>%
    filter(df2$Type %in% df3)
##     Type        Name  Color newcol
## 1  fruit   red apple   <NA>     No
## 2  fruit green apple    red    Yes
## 3  fruit   red apple   blue     No
## 4 veggie green apple yellow    Yes
## 5 veggie   red apple    red   <NA>

dropping columns

df2[, !(colnames(df2) %in% c("Name", "Color")) ]
##     Type newcol
## 1  fruit     No
## 2  fruit    Yes
## 3  fruit     No
## 4 veggie    Yes
## 5 veggie   <NA>

selecting columns

df2[, (colnames(df2) %in% c("Name", "Color")) ]
##          Name  Color
## 1   red apple   <NA>
## 2 green apple    red
## 3   red apple   blue
## 4 green apple yellow
## 5   red apple    red

creating custom operator

`%notin%` <- Negate(`%in%`)

numbs <- rep(seq(3), 4)
numbs
##  [1] 1 2 3 1 2 3 1 2 3 1 2 3
4 %notin% numbs
## [1] TRUE