Chapter 2 R Programming
R is a functional programming language. It is most popular among academia and Data Scientists.
2.1 General Information
2.1.2 Installing a package
# install
install.packages("ggplot2")
# removing the library
detach(ggplot2, unload = TRUE)
2.1.4 Directories
Get working directory. Don’t worry if you already create a project and working in the project directory.
I can assign the path to an object.
<- getwd()
wd wd
Listing environment objects
ls()
I can list files in the working directory and assign it to an object
<- list.files()
files head(files)
Alternative way to list files in the working directory
<- dir()
files2 head(files2)
2.1.5 List specific files
Let’s check the arguments of dir
function.
args(dir)
dir(pattern = "^R_", full.names = F, ignore.case = T )
Create a new folder in the WD
<- getwd()
old.dir dir.create("testdir")
2.1.6 Working Directory
As warning shows, this is not the best way to change WD in code chunk. Changing in the global options is a better way, unless you work in a project folder.
setwd("testdir")
2.1.7 Create a new file
file.create("testdir/mytest.R")
## [1] TRUE
Check if a file exits. Interactive
file.exists("testdir/mytest.R")
### Sample usage
if(!file.exists("testdir/mytest.R")){
print("File not exist!")
else {
} "File exists!"}
2.1.8 Show file info
file.info("testdir/mytest.R")
## size isdir mode mtime ctime
## testdir/mytest.R 0 FALSE 644 2022-07-31 00:33:48 2022-07-31 00:33:48
## atime uid gid uname grname
## testdir/mytest.R 2022-07-31 00:32:35 501 20 deayan staff
2.1.9 Listing files in a directory
args(list.files)
## function (path = ".", pattern = NULL, all.files = FALSE, full.names = FALSE,
## recursive = FALSE, ignore.case = FALSE, include.dirs = FALSE,
## no.. = FALSE)
## NULL
Let’s list all the files that their names starts with “my”
<- list.files(path="testdir",
myfiles pattern = "^my")
### print the second file in myfiles
2] myfiles[
## [1] "mytest10.R"
2.1.10 renaming a file
args(file.rename)
## function (from, to)
## NULL
file.rename("testdir/mytest.R", "testdir/mytest10.R")
2.1.12 file path
Assign a name to a file path (It does not matter if file exists or not)
<- file.path(to = "new/mytest3.R")
path1 path1
Alternative way to assign name to a file path
<- file.path(from="new2", to="mytest3.R")
abc abc
Create a directory: testdir/new
dir.create(file.path("testdir", "new"), recursive = TRUE )
2.2 Create Data
2.2.1 Create sequence of numbers
<- seq(from = 5, to = 14, by = 2)
a a
## [1] 5 7 9 11 13
seq(10, 20) ## default increment is 1
## [1] 10 11 12 13 14 15 16 17 18 19 20
seq(10, 30, by =2)
## [1] 10 12 14 16 18 20 22 24 26 28 30
take the length from the length of this argument.
So, this will create a sequence starting from 1 to 100 with length = 10
seq(1, 100, along.with = 1:4)
## [1] 1 34 67 100
desired length of the sequence.
seq(1, 100, length.out = 4)
## [1] 1 34 67 100
2.2.2 short cuts
This will create a sequence starting from 1 with length equal to the length of the argument.
seq_along(1:10)
## [1] 1 2 3 4 5 6 7 8 9 10
This will create a sequence starting from 1 with length equal to the argument.
seq_len(20)
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
2.2.3 %in% statement
This creates a logical vector, where testing each element in vector “a” if ever matches any element in vector “b”
## lets create a sequence
= seq_len(10)
a
## test which elements of a is included in the list
%in% c(2, 4, 6, 8, 0) a
## [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE FALSE
print the elements of list a
that is included in the given list
%in% c(2, 4, 6, 8, 0)] a[a
## [1] 2 4 6 8
We can negate this logical statement
# sub-setting property
! (a %in% c(2, 4, 6, 8, 0))] a[
## [1] 1 3 5 7 9 10
2.2.4 which()
function
which(x, arr.ind = FALSE, useNames = TRUE)
x: input is a logical statement
vector returns location index of true values
Here, which function returns indices of list a
that the element is greater than 3
which(a > 3)
## [1] 4 5 6 7 8 9 10
Let’s define two character vectors.
<- LETTERS[1:10]
d d
## [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J"
<- LETTERS[7:10]
e e
## [1] "G" "H" "I" "J"
This shows whether an element of vector d
any matches an element of vector e
%in% e d
## [1] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
And here are the locations of TRUE values of vector d (matches vector e)
which(d %in% e)
## [1] 7 8 9 10
= 1:12
array1
which( array1 %% 2 == 0, arr.ind = F) ## location in the array (1:12)
## [1] 2 4 6 8 10 12
2.2.5 Where is the min, max, first true/false?
which.min() which.max()
# Here is the list
= c(2, 4, 1, 7, 9, 1, 3, 5, 9, NA, "4")
a
## print the location of the min element
which.min(a)
## [1] 3
## print the min element itself
which.min(a)] a[
## [1] "1"
## print the location of the min element
which.max(a)
## [1] 5
## print the min element itself
which.max(a)] a[
## [1] "9"
If the input is a logical vector, max
will indicate first TRUE
, and min
will indicate first FALSE
.
which.max(a > 3) ## the second element
## [1] 2
## print the second element
which.max(a > 3)] a[
## [1] "4"
2.2.6 match(a, b)
function
match
: An integer vector giving the position in table of the first match if there is a match, otherwise nomatch.
= 1:15
a = seq(1, 20, by=3)
b
match(a, b) ## returns location of true values of vector a
## [1] 1 NA NA 2 NA NA 3 NA NA 4 NA NA 5 NA NA
%in% b a
## [1] TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
## [13] TRUE FALSE FALSE
2.3 Create a Data Frame
There are multiple options and tools here.
- data frame
- tibble
- data table
<- cars
df head(df)
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
# test if value 5 in speed column
5 %in% df$speed
## [1] FALSE
# create a dataframe
<- data.frame(Type = c("fruit", "fruit","fruit", "veggie","veggie"),
df2 Name = c("red apple", "green apple", "red apple", "green apple" ,"red apple"), Color = c(NA, "red", "blue", "yellow", "red"))
df2
## Type Name Color
## 1 fruit red apple <NA>
## 2 fruit green apple red
## 3 fruit red apple blue
## 4 veggie green apple yellow
## 5 veggie red apple red
<- within(df2,
df2 = "No"
{ newcol %in% c("fruit")] = "No"
newcol[Type %in% c( "green apple")] = "Yes"
newcol[Name
})
head(df2, 3)
## Type Name Color newcol
## 1 fruit red apple <NA> No
## 2 fruit green apple red Yes
## 3 fruit red apple blue No
subsetting
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
<- c("home", "veggie", "fruit")
df3
%>%
df2 filter(df2$Type %in% df3)
## Type Name Color newcol
## 1 fruit red apple <NA> No
## 2 fruit green apple red Yes
## 3 fruit red apple blue No
## 4 veggie green apple yellow Yes
## 5 veggie red apple red <NA>
dropping columns
!(colnames(df2) %in% c("Name", "Color")) ] df2[,
## Type newcol
## 1 fruit No
## 2 fruit Yes
## 3 fruit No
## 4 veggie Yes
## 5 veggie <NA>
selecting columns
colnames(df2) %in% c("Name", "Color")) ] df2[, (
## Name Color
## 1 red apple <NA>
## 2 green apple red
## 3 red apple blue
## 4 green apple yellow
## 5 red apple red
creating custom operator
`%notin%` <- Negate(`%in%`)
<- rep(seq(3), 4)
numbs numbs
## [1] 1 2 3 1 2 3 1 2 3 1 2 3
4 %notin% numbs
## [1] TRUE