dsaravanan
/
reference


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
							# R reference

which R  /usr/lib/R
R RHOME  /usr/lib/R

# run bash commands from within R shell
> system('cd Reference; vi -M rref.txt')
> system('pwd')
> system('ulimit -s')
> system('ulimit -s -H')

# capture the output from shell commands and save it into R
> nlines <- system('wc -l < /home/saran/Analytics/DataSet/Hr5m.csv', intern=TRUE)

licence()
RShowDoc("COPYING")
demo()
demo(scoping)

# to get information on specific function
help(solve)/?solve
help.search(solve)/??solve

# special characters and syntactic meaning
help('[[')
help('if')
help('for')
help('function')
help.search('function')
?help.search

# help in HTML format
help.start()

# readline
help('readline')

example(solve)
example(print)

# list of arithmetic and logical operators in R
+       Addition
-       Subtraction
*       Multiplication
/       Division
^ / **  Exponentiation
x %% y  Modulo (remainder of integer division)
x %/% y Integer division
==      Equal to
!=      Differs from
>       Greater than
<       Less than
>=      Greater than or equal to
<=      Less than or equal to
&       Logical and
|       Logical or
!       Logical not

# mathematical functions
abs(x)              Absolute value
sqrt(x)             Square root
ceiling(x)          Nearest integer >= x
floor(x)            Nearest integer <= x
trunc(x)            Integer part
rount(x, digits=n)  Round x to n digits
sin(x),cos(x),tan(x)Trigonometric functions
log(x)              Natural logarithm
log10(x)            Base 10 logarithm
exp(x)              e raise to x

# R basic data structures
A data structure is either homogeneous (all elements are of the same data type) or
heterogeneous (elements can be of more than one data type).

Dimension   Homogeneous     Heterogeneous
1           Vector          List
2           Matrix          Data Frame
3+          Array


If commands are stored in an external file, they may be executed at any time in an R
session with the command
> source('/path/file.R')

The function sink, will divert all subsequent output from the console to an external 
file, output.txt
> sink('output.txt')
The command sink() restores it to the console once again.

The entities that R creates and manipulates are known as objects. These may be variables,
arrays of numbers, character strings, functions or more general structures built from such
components. During an R session, objects are created and stored. The R command
> objects() (alternatively ls()) 
can be used to display the names of the objects which are currently stored within R. The
collection of objects currently stored is called the workspace. To remove objects the 
function rm is available.
> rm(x, y)

objects are written to a file -> .RData (in the current directory)
command lines used in the session are saved to a file -> .Rhistory (in the current directory)

It is recommended that you should use separate working directories for analysis conducted 
with R. (similar to virtual environments in Python)

# assign of a vector
x <- c(10.4, 5.6, 3.1, 6.4, 21.7)
assign('x', c(10.4, 5.6, 3.1, 6.4, 21.7))
c(10.4, 5.6, 3.1, 6.4, 21.7) -> x

x <- c(1, 2, 3)
y <- c(4, 5)
v <- 2*x + y + 1
7 10 11

# elementary arithmetic operators
x + y - z * x/y + z^4
sum(x), prod(x), log(x), exp(x)
sin(x), cos(x), tan(x), sqrt(x)
max(x), min(x), log10(x), length(x)

# parallel maximum/minimum
x <- c(1, 2, 3); y <- c(4, 5)
max(x, y) 
5
min(x, y)
1
pmax(x, y)
4 5 4
pmin(x, y)
i 2 3

# sample mean
mean(x) = sum(x)/lenght(x)
# sample variance
var(x) = sum((x-mean(x))^2)/(length(x)-1)

# sort
sort(x)      # return sorted list
order(x)     # return position of values in the sorted list
sort.list(x) # return position of values in the sorted list

For most purposes the user will not be concerned if the 'numbers' in a numeric vector are
integers, reals or even complex. Internally calculations are done as double precision real
numbers, or double precision complex numbers if the input data are complex.

# integers
x <- c(1L, 2L, 3L, 4L, 5L)

# complex number
sqrt(-17+0i)

# sequence
x <- 1:10 is equivalent to x <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
The colon operator has high priority within an expression.
x <- 2*1:15 is equivalent to x <- c(2, 4, 6, ..., 26, 28, 30)

seq(2,10) is equivalent to c(2, 3, 4, 5, 6, 7, 8, 9, 10)

1:30 == seq(1,30) == seq(from=1, to=30) == seq(to=30, from=1)

seq(from=1, to=10, by=2) == c(1, 3, 5, 7, 9)
seq(from=1, by=2, length=5) == c(1, 3, 5, 7, 9)
seq(-5, 5, .2) == c(-5.0, 4.8, -4.6, ..., 4.6, 4.8, 5.0)

# backward sequence
x <- 30:1 is equivalent to x <- c(30, 29, 28, ..., 1)

# replicating an object
x <- c(1, 2, 3, 4, 5)
rep(x, times=5) == c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5)
rep(x, each=5)  == c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5)

# logical vectors
TRUE && FALSE gives FALSE
TRUE || FALSE gives TRUE
TRUE == TRUE  gives TRUE
TRUE != TRUE  gives FALSE

logical vectors are generated by conditions
for example: 
x <- 5 
temp <- x > 13
temp   # sets temp as a vector of the same length as x with values FALSE corresponding to 
         the elements of x where the condition is not met and TRUE where it is

logical operators are <, <=, >, >=, ==, !=
if c1 and c2 are logical expressions, then c1 & c2 is their intersection ("and"), 
c1 | c2 is their union ("or"), and !c1 is the negation of c1

# missing values
In general any operation on an NA becomes an NA. The motivation for this rule is simply 
that if the specification of an operation is incomplete, the result cannot be known and
hence is not available.

The function is.na(x) gives a logical vector of the same size as x with value TRUE if and
only if the corresponding element in x is NA.

z <- c(1:3, NA)
z
1 2 3 NA
is.na(z)
FALSE FALSE FALSE TRUE

Logical expression z == NA is quite different from is.na(z) since NA is not really a value but
a marker for a quantity that is not available. Thus z == NA is a vector of the same length as z
all of whose values are NA as the logical expression itself is incomplete and hence undecidable.

z == NA
NA NA NA NA

# NaN - Not a Number
0/0 = NaN
Inf - Inf = NaN

is.na() is TRUE both for NA and NaN values

is.na(0/0)
TRUE
is.na(Inf-Inf)
TRUE

to differentiate these is.nan() is only TRUE for NaN

is.na(NA)
TRUE
is.nan(0/0)
TRUE
is.nan(NA)
FALSE

missing values are sometimes printed as <NA> when character vectors are printed without quotes

# character vectors
Character strings are entered using either matching double (") or single (') quotes. Escape 
sequences are \n - newline, \t - tab and \b - backspace. (see ?Quotes for a full list)

paste() function takes an arbitary number of arguments and concatenates them one by one into
character strings.

labs <- paste(c("X", "Y"), 1:10, sep="") 
makes labs into the character vector
c("X1", "Y2", "X3", "Y4", "X5", "Y6", "X7", "Y8", "X9", "Y10")
c("X", "Y") is repeated 5 times to match the sequence 1:10

xnames <- paste(c('x'), 1:3, sep="")  # "x1" "x2" "x3"
ynames <- paste(c('y'), 1:3, sep="")  # "y1" "y2" "y3"
znames <- paste(c('z'), 1:3, sep=",") # "z,1" "z,2" "z,3"

# index vectors; selecting and modifying subsets of a data set

1. logical vector:
index vector is recycled to the same length as the vector from which elements are to be selected.
values corresponding to TRUE in the index vector are selected and those corresponding to FALSE are 
omitted.

y <- x[!is.na(x)] 
creates (or re-creates) an object y which will contain the non-missing values of x, in the
same order. Note that if x has missing values, y will be shorter than x.

(x+1)[(!is.na(x)) & x>0] -> z
creates an object z and places in it the values of the vector x+1 for which the corresponding
values in x was both non-missing and positive.

2. vector of positive integral quantities
the values in the index vector must lie in the set {1, 2, ..., lenght(x)}

x[6] # sixth component of x
x[1:10] # selects the first 10 elements of x (assuming length(x) is not less than 10)

c("X","Y")[rep(c(1,2,2,1), times=4)] # produces a character vector of length 16 consisting of
"x", "y", "y", "x" repeated four times

3. vector of negative integral quantities
such an index vector specifies the values to be excluded rather than included

y <- x[-(1:5)] # gives y all but the first five elements of x

4. vector of character strings
names attribute to identify its components

fruit <- c(5, 10, 1, 20)
> fruit
5 10 1 20
names(fruit) <- c("orange", "banana", "apple", "peach")
> fruit
orange banana apple peach
     5     10     1    20


An indexed expression can also appear on the receiving end of an assignment, in which case the
assignment operation is performed only on those elements of the vector.
x[is.na(x)] <- 0 # replaces any missing values in x by zeros
y[y<0] <- -y[y<0] has the same effect as y <- abs(y)


$ sudo R
# install packages
install.packages("reshape")
install.packages("dplyr")
install.packages("ggplot2")
install.packages("tidyr")
install.packages("readr")
install.packages("purrr")
install.packages("tibble")
install.packages("stringr")
install.packages("forcats")
install.packages("mlr")
install.packages("jsonlite")
install.packages("rmarkdown")
install.packages(c("pkg1", "pkg2", "pkg3"),
lib = file.path("/home/ndayalan/.R/x86_64-pc-linux-gnu-library/3.2/"))

# working directory
getwd()

# package version
packageVersion("rmarkdown")
packageVersion("knitr")

# get library location
.libPaths()

# view all installed packages
library()

# view packages currently loaded
search()

# new packages
new.packages()

# old packages
old.packages()

# update packages
update.packages()
update.packages(ask = FALSE)

# remove packages
remove.packages("pkgname")
remove.packages(c("pkg1", "pkg2", "pkg3"),
lib = file.path("/home/ndayalan/.R/x86_64-pc-linux-gnu-library/3.2/"))

# help packages
help(package="pkgname")
help(package="datasets") # list of all in-built datasets in R

# view in-built datasets
ls("package:datasets")
help(package="datasets")
data()                                          # list of all the available datasets
data(package = .packages(all.available = TRUE)) # list of all datasets in the available pkgs
data(package = "package name")  # list the datasets from the "package name" package
data(package = "datasets")      # list the datasets from the "datasets" package

# load in-built dataset
df <- dataset_name

# read data from a file
read.csv(file="/path/of/directory/file.csv")
read.table(file="/path/of/directory/file.csv", sep=",", header=T)
data.table::fread("/path/of/directory/file.csv")

# head(), tail()
head(df, n=4)
tail(df, n=4)

# write data to a file
write.csv(x=df, file="/path/of/directory/file.csv")
write.csv(x=df, file="/path/of/directory/file.csv", row.names=FALSE)

# check a file exists
file.exists(file="/path/of/directory/file.csv")

CRAN Mirrors - https://cran.r-project.org/mirrors.html

# RMarkdown
pandoc -f markdown -t latex -o output.pdf input.Rmd   # output to pdf file
pandoc -f markdown -t html -o output.html input.Rmd   # output to html file

# to run .Rmd file on R
> require(rmarkdown)
> render('notebook.Rmd')

# list the names of all Language engines
> names(knitr::knit_engines$get())

# check python config in R
> py_config()

# Themes in RMarkdown - https://bootswatch.com/3/
default, cerulean, journal, flatly, darkly, readable, spacelab, united, cosmo, lumen, paper,
sandstone, simplex, yeti

# highlight 
default, tango, pygments, kate, monochrome, espresso, zenburn, haddock, breezedark, textmate

# Error in library("devtools") : there is no package called 'devtools'
$ sudo apt-get install libssl-dev
$ sudo apt-get install libxml2-dev
$ sudo apt-get install libcurl4-openssl-dev
> install.packages("devtools")
> install.packages("usethis")
> library('devtools')

# Statistics in R

# generate a population of size 10000 from 1 to 100 randomly
population <- sample.int(100, 10000, replace=TRUE)
# length of population
lenght(population)
# maximum value
max(population)
# minimum value
min(population)
# first ten values
head(population, 10)
population[1:10]
# last ten values
tail(population, 10)
population[9991:100000]

# draw a sample from population of size 100
sample1 <- sample(population, 100)
# length of sample
length(sample1)
# first ten values
head(sample1, 10)
sample1[1:10]
# last ten values
tail(sample1, 10)
sample1[91:100]

# mean
mean(sample1)

# median
median(sample1)

# mode
Mode <- function(x) {
    ux <- unique(x)
    ux[which.max(tabulate(match(x, ux)))]
}

# R resources
Offical page - https://www.r-project.org
Download page - https://www.cran.r-project.org
Stat Methods - https://www.statmethods.net
R seek - https://www.rseek.org
UCLA R - https://www.ats.ucla.edu/stat/r/
UPenn R - https://finzi.psych.upenn.edu/search.html
RStudio - https://www.rstudio.com
Rattle - https://rattle.togaware.com

# statmod package for gauss quadrature
library(statmod)
gauss.quad(2, kind="legendre")