Chapter 2 R Programming
R is a functional programming language. It is most popular among academia and Data Scientists.
2.1 General Information
2.1.2 Installing a package
# install
install.packages("ggplot2")
# removing the library
detach(ggplot2, unload = TRUE) 2.1.4 Directories
Get working directory. Don’t worry if you already create a project and working in the project directory.
I can assign the path to an object.
wd <- getwd()
wdListing environment objects
ls()I can list files in the working directory and assign it to an object
files <- list.files()
head(files)Alternative way to list files in the working directory
files2 <- dir()
head(files2)2.1.5 List specific files
Let’s check the arguments of dir function.
args(dir)dir(pattern = "^R_", full.names = F, ignore.case = T )Create a new folder in the WD
old.dir <- getwd()
dir.create("testdir")2.1.6 Working Directory
As warning shows, this is not the best way to change WD in code chunk. Changing in the global options is a better way, unless you work in a project folder.
setwd("testdir")2.1.7 Create a new file
file.create("testdir/mytest.R")## [1] TRUE
Check if a file exits. Interactive
file.exists("testdir/mytest.R")
### Sample usage
if(!file.exists("testdir/mytest.R")){
print("File not exist!")
} else {
"File exists!"}2.1.8 Show file info
file.info("testdir/mytest.R")## size isdir mode mtime ctime
## testdir/mytest.R 0 FALSE 644 2022-07-31 00:33:48 2022-07-31 00:33:48
## atime uid gid uname grname
## testdir/mytest.R 2022-07-31 00:32:35 501 20 deayan staff
2.1.9 Listing files in a directory
args(list.files)## function (path = ".", pattern = NULL, all.files = FALSE, full.names = FALSE,
## recursive = FALSE, ignore.case = FALSE, include.dirs = FALSE,
## no.. = FALSE)
## NULL
Let’s list all the files that their names starts with “my”
myfiles <- list.files(path="testdir",
pattern = "^my")
### print the second file in myfiles
myfiles[2]## [1] "mytest10.R"
2.1.10 renaming a file
args(file.rename)## function (from, to)
## NULL
file.rename("testdir/mytest.R", "testdir/mytest10.R")2.1.12 file path
Assign a name to a file path (It does not matter if file exists or not)
path1 <- file.path(to = "new/mytest3.R")
path1Alternative way to assign name to a file path
abc <- file.path(from="new2", to="mytest3.R")
abcCreate a directory: testdir/new
dir.create(file.path("testdir", "new"), recursive = TRUE )2.2 Create Data
2.2.1 Create sequence of numbers
a <- seq(from = 5, to = 14, by = 2)
a## [1] 5 7 9 11 13
seq(10, 20) ## default increment is 1## [1] 10 11 12 13 14 15 16 17 18 19 20
seq(10, 30, by =2)## [1] 10 12 14 16 18 20 22 24 26 28 30
take the length from the length of this argument.
So, this will create a sequence starting from 1 to 100 with length = 10
seq(1, 100, along.with = 1:4)## [1] 1 34 67 100
desired length of the sequence.
seq(1, 100, length.out = 4)## [1] 1 34 67 100
2.2.2 short cuts
This will create a sequence starting from 1 with length equal to the length of the argument.
seq_along(1:10)## [1] 1 2 3 4 5 6 7 8 9 10
This will create a sequence starting from 1 with length equal to the argument.
seq_len(20)## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
2.2.3 %in% statement
This creates a logical vector, where testing each element in vector “a” if ever matches any element in vector “b”
## lets create a sequence
a = seq_len(10)
## test which elements of a is included in the list
a %in% c(2, 4, 6, 8, 0)## [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE FALSE
print the elements of list a that is included in the given list
a[a %in% c(2, 4, 6, 8, 0)]## [1] 2 4 6 8
We can negate this logical statement
# sub-setting property
a[ ! (a %in% c(2, 4, 6, 8, 0))]## [1] 1 3 5 7 9 10
2.2.4 which() function
which(x, arr.ind = FALSE, useNames = TRUE)
x: input is a logical statement
vector returns location index of true values
Here, which function returns indices of list a that the element is greater than 3
which(a > 3)## [1] 4 5 6 7 8 9 10
Let’s define two character vectors.
d <- LETTERS[1:10]
d## [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J"
e <- LETTERS[7:10]
e## [1] "G" "H" "I" "J"
This shows whether an element of vector d any matches an element of vector e
d %in% e## [1] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
And here are the locations of TRUE values of vector d (matches vector e)
which(d %in% e) ## [1] 7 8 9 10
array1 = 1:12
which( array1 %% 2 == 0, arr.ind = F) ## location in the array (1:12)## [1] 2 4 6 8 10 12
2.2.5 Where is the min, max, first true/false?
which.min() which.max()
# Here is the list
a = c(2, 4, 1, 7, 9, 1, 3, 5, 9, NA, "4")
## print the location of the min element
which.min(a)## [1] 3
## print the min element itself
a[which.min(a)]## [1] "1"
## print the location of the min element
which.max(a)## [1] 5
## print the min element itself
a[which.max(a)]## [1] "9"
If the input is a logical vector, max will indicate first TRUE, and min will indicate first FALSE.
which.max(a > 3) ## the second element## [1] 2
## print the second element
a[which.max(a > 3)]## [1] "4"
2.2.6 match(a, b) function
match: An integer vector giving the position in table of the first match if there is a match, otherwise nomatch.
a = 1:15
b = seq(1, 20, by=3)
match(a, b) ## returns location of true values of vector a## [1] 1 NA NA 2 NA NA 3 NA NA 4 NA NA 5 NA NA
a %in% b## [1] TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
## [13] TRUE FALSE FALSE
2.3 Create a Data Frame
There are multiple options and tools here.
- data frame
- tibble
- data table
df <- cars
head(df)## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
# test if value 5 in speed column
5 %in% df$speed## [1] FALSE
# create a dataframe
df2 <- data.frame(Type = c("fruit", "fruit","fruit", "veggie","veggie"),
Name = c("red apple", "green apple", "red apple", "green apple" ,"red apple"), Color = c(NA, "red", "blue", "yellow", "red"))
df2## Type Name Color
## 1 fruit red apple <NA>
## 2 fruit green apple red
## 3 fruit red apple blue
## 4 veggie green apple yellow
## 5 veggie red apple red
df2 <- within(df2,
{ newcol = "No"
newcol[Type %in% c("fruit")] = "No"
newcol[Name %in% c( "green apple")] = "Yes"
})
head(df2, 3)## Type Name Color newcol
## 1 fruit red apple <NA> No
## 2 fruit green apple red Yes
## 3 fruit red apple blue No
subsetting
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df3 <- c("home", "veggie", "fruit")
df2 %>%
filter(df2$Type %in% df3)## Type Name Color newcol
## 1 fruit red apple <NA> No
## 2 fruit green apple red Yes
## 3 fruit red apple blue No
## 4 veggie green apple yellow Yes
## 5 veggie red apple red <NA>
dropping columns
df2[, !(colnames(df2) %in% c("Name", "Color")) ]## Type newcol
## 1 fruit No
## 2 fruit Yes
## 3 fruit No
## 4 veggie Yes
## 5 veggie <NA>
selecting columns
df2[, (colnames(df2) %in% c("Name", "Color")) ]## Name Color
## 1 red apple <NA>
## 2 green apple red
## 3 red apple blue
## 4 green apple yellow
## 5 red apple red
creating custom operator
`%notin%` <- Negate(`%in%`)
numbs <- rep(seq(3), 4)
numbs## [1] 1 2 3 1 2 3 1 2 3 1 2 3
4 %notin% numbs## [1] TRUE