123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501 |
- # R reference
- which R /usr/lib/R
- R RHOME /usr/lib/R
- # run bash commands from within R shell
- > system('cd Reference; vi -M rref.txt')
- > system('pwd')
- > system('ulimit -s')
- > system('ulimit -s -H')
- # capture the output from shell commands and save it into R
- > nlines <- system('wc -l < /home/saran/Analytics/DataSet/Hr5m.csv', intern=TRUE)
- licence()
- RShowDoc("COPYING")
- demo()
- demo(scoping)
- # to get information on specific function
- help(solve)/?solve
- help.search(solve)/??solve
- # special characters and syntactic meaning
- help('[[')
- help('if')
- help('for')
- help('function')
- help.search('function')
- ?help.search
- # help in HTML format
- help.start()
- # readline
- help('readline')
- example(solve)
- example(print)
- # list of arithmetic and logical operators in R
- + Addition
- - Subtraction
- * Multiplication
- / Division
- ^ / ** Exponentiation
- x %% y Modulo (remainder of integer division)
- x %/% y Integer division
- == Equal to
- != Differs from
- > Greater than
- < Less than
- >= Greater than or equal to
- <= Less than or equal to
- & Logical and
- | Logical or
- ! Logical not
- # mathematical functions
- abs(x) Absolute value
- sqrt(x) Square root
- ceiling(x) Nearest integer >= x
- floor(x) Nearest integer <= x
- trunc(x) Integer part
- rount(x, digits=n) Round x to n digits
- sin(x),cos(x),tan(x)Trigonometric functions
- log(x) Natural logarithm
- log10(x) Base 10 logarithm
- exp(x) e raise to x
- # R basic data structures
- A data structure is either homogeneous (all elements are of the same data type) or
- heterogeneous (elements can be of more than one data type).
- Dimension Homogeneous Heterogeneous
- 1 Vector List
- 2 Matrix Data Frame
- 3+ Array
- If commands are stored in an external file, they may be executed at any time in an R
- session with the command
- > source('/path/file.R')
- The function sink, will divert all subsequent output from the console to an external
- file, output.txt
- > sink('output.txt')
- The command sink() restores it to the console once again.
- The entities that R creates and manipulates are known as objects. These may be variables,
- arrays of numbers, character strings, functions or more general structures built from such
- components. During an R session, objects are created and stored. The R command
- > objects() (alternatively ls())
- can be used to display the names of the objects which are currently stored within R. The
- collection of objects currently stored is called the workspace. To remove objects the
- function rm is available.
- > rm(x, y)
- objects are written to a file -> .RData (in the current directory)
- command lines used in the session are saved to a file -> .Rhistory (in the current directory)
- It is recommended that you should use separate working directories for analysis conducted
- with R. (similar to virtual environments in Python)
- # assign of a vector
- x <- c(10.4, 5.6, 3.1, 6.4, 21.7)
- assign('x', c(10.4, 5.6, 3.1, 6.4, 21.7))
- c(10.4, 5.6, 3.1, 6.4, 21.7) -> x
- x <- c(1, 2, 3)
- y <- c(4, 5)
- v <- 2*x + y + 1
- 7 10 11
- # elementary arithmetic operators
- x + y - z * x/y + z^4
- sum(x), prod(x), log(x), exp(x)
- sin(x), cos(x), tan(x), sqrt(x)
- max(x), min(x), log10(x), length(x)
- # parallel maximum/minimum
- x <- c(1, 2, 3); y <- c(4, 5)
- max(x, y)
- 5
- min(x, y)
- 1
- pmax(x, y)
- 4 5 4
- pmin(x, y)
- i 2 3
- # sample mean
- mean(x) = sum(x)/lenght(x)
- # sample variance
- var(x) = sum((x-mean(x))^2)/(length(x)-1)
- # sort
- sort(x) # return sorted list
- order(x) # return position of values in the sorted list
- sort.list(x) # return position of values in the sorted list
- For most purposes the user will not be concerned if the 'numbers' in a numeric vector are
- integers, reals or even complex. Internally calculations are done as double precision real
- numbers, or double precision complex numbers if the input data are complex.
- # integers
- x <- c(1L, 2L, 3L, 4L, 5L)
- # complex number
- sqrt(-17+0i)
- # sequence
- x <- 1:10 is equivalent to x <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
- The colon operator has high priority within an expression.
- x <- 2*1:15 is equivalent to x <- c(2, 4, 6, ..., 26, 28, 30)
- seq(2,10) is equivalent to c(2, 3, 4, 5, 6, 7, 8, 9, 10)
- 1:30 == seq(1,30) == seq(from=1, to=30) == seq(to=30, from=1)
- seq(from=1, to=10, by=2) == c(1, 3, 5, 7, 9)
- seq(from=1, by=2, length=5) == c(1, 3, 5, 7, 9)
- seq(-5, 5, .2) == c(-5.0, 4.8, -4.6, ..., 4.6, 4.8, 5.0)
- # backward sequence
- x <- 30:1 is equivalent to x <- c(30, 29, 28, ..., 1)
- # replicating an object
- x <- c(1, 2, 3, 4, 5)
- rep(x, times=5) == c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5)
- rep(x, each=5) == c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5)
- # logical vectors
- TRUE && FALSE gives FALSE
- TRUE || FALSE gives TRUE
- TRUE == TRUE gives TRUE
- TRUE != TRUE gives FALSE
- logical vectors are generated by conditions
- for example:
- x <- 5
- temp <- x > 13
- temp # sets temp as a vector of the same length as x with values FALSE corresponding to
- the elements of x where the condition is not met and TRUE where it is
- logical operators are <, <=, >, >=, ==, !=
- if c1 and c2 are logical expressions, then c1 & c2 is their intersection ("and"),
- c1 | c2 is their union ("or"), and !c1 is the negation of c1
- # missing values
- In general any operation on an NA becomes an NA. The motivation for this rule is simply
- that if the specification of an operation is incomplete, the result cannot be known and
- hence is not available.
- The function is.na(x) gives a logical vector of the same size as x with value TRUE if and
- only if the corresponding element in x is NA.
- z <- c(1:3, NA)
- z
- 1 2 3 NA
- is.na(z)
- FALSE FALSE FALSE TRUE
- Logical expression z == NA is quite different from is.na(z) since NA is not really a value but
- a marker for a quantity that is not available. Thus z == NA is a vector of the same length as z
- all of whose values are NA as the logical expression itself is incomplete and hence undecidable.
- z == NA
- NA NA NA NA
- # NaN - Not a Number
- 0/0 = NaN
- Inf - Inf = NaN
- is.na() is TRUE both for NA and NaN values
- is.na(0/0)
- TRUE
- is.na(Inf-Inf)
- TRUE
- to differentiate these is.nan() is only TRUE for NaN
- is.na(NA)
- TRUE
- is.nan(0/0)
- TRUE
- is.nan(NA)
- FALSE
- missing values are sometimes printed as <NA> when character vectors are printed without quotes
- # character vectors
- Character strings are entered using either matching double (") or single (') quotes. Escape
- sequences are \n - newline, \t - tab and \b - backspace. (see ?Quotes for a full list)
- paste() function takes an arbitary number of arguments and concatenates them one by one into
- character strings.
- labs <- paste(c("X", "Y"), 1:10, sep="")
- makes labs into the character vector
- c("X1", "Y2", "X3", "Y4", "X5", "Y6", "X7", "Y8", "X9", "Y10")
- c("X", "Y") is repeated 5 times to match the sequence 1:10
- xnames <- paste(c('x'), 1:3, sep="") # "x1" "x2" "x3"
- ynames <- paste(c('y'), 1:3, sep="") # "y1" "y2" "y3"
- znames <- paste(c('z'), 1:3, sep=",") # "z,1" "z,2" "z,3"
- # index vectors; selecting and modifying subsets of a data set
- 1. logical vector:
- index vector is recycled to the same length as the vector from which elements are to be selected.
- values corresponding to TRUE in the index vector are selected and those corresponding to FALSE are
- omitted.
- y <- x[!is.na(x)]
- creates (or re-creates) an object y which will contain the non-missing values of x, in the
- same order. Note that if x has missing values, y will be shorter than x.
- (x+1)[(!is.na(x)) & x>0] -> z
- creates an object z and places in it the values of the vector x+1 for which the corresponding
- values in x was both non-missing and positive.
- 2. vector of positive integral quantities
- the values in the index vector must lie in the set {1, 2, ..., lenght(x)}
- x[6] # sixth component of x
- x[1:10] # selects the first 10 elements of x (assuming length(x) is not less than 10)
- c("X","Y")[rep(c(1,2,2,1), times=4)] # produces a character vector of length 16 consisting of
- "x", "y", "y", "x" repeated four times
- 3. vector of negative integral quantities
- such an index vector specifies the values to be excluded rather than included
- y <- x[-(1:5)] # gives y all but the first five elements of x
- 4. vector of character strings
- names attribute to identify its components
- fruit <- c(5, 10, 1, 20)
- > fruit
- 5 10 1 20
- names(fruit) <- c("orange", "banana", "apple", "peach")
- > fruit
- orange banana apple peach
- 5 10 1 20
- An indexed expression can also appear on the receiving end of an assignment, in which case the
- assignment operation is performed only on those elements of the vector.
- x[is.na(x)] <- 0 # replaces any missing values in x by zeros
- y[y<0] <- -y[y<0] has the same effect as y <- abs(y)
- $ sudo R
- # install packages
- install.packages("reshape")
- install.packages("dplyr")
- install.packages("ggplot2")
- install.packages("tidyr")
- install.packages("readr")
- install.packages("purrr")
- install.packages("tibble")
- install.packages("stringr")
- install.packages("forcats")
- install.packages("mlr")
- install.packages("jsonlite")
- install.packages("rmarkdown")
- install.packages(c("pkg1", "pkg2", "pkg3"),
- lib = file.path("/home/ndayalan/.R/x86_64-pc-linux-gnu-library/3.2/"))
- # working directory
- getwd()
- # package version
- packageVersion("rmarkdown")
- packageVersion("knitr")
- # get library location
- .libPaths()
- # view all installed packages
- library()
- # view packages currently loaded
- search()
- # new packages
- new.packages()
- # old packages
- old.packages()
- # update packages
- update.packages()
- update.packages(ask = FALSE)
- # remove packages
- remove.packages("pkgname")
- remove.packages(c("pkg1", "pkg2", "pkg3"),
- lib = file.path("/home/ndayalan/.R/x86_64-pc-linux-gnu-library/3.2/"))
- # help packages
- help(package="pkgname")
- help(package="datasets") # list of all in-built datasets in R
- # view in-built datasets
- ls("package:datasets")
- help(package="datasets")
- data() # list of all the available datasets
- data(package = .packages(all.available = TRUE)) # list of all datasets in the available pkgs
- data(package = "package name") # list the datasets from the "package name" package
- data(package = "datasets") # list the datasets from the "datasets" package
- # load in-built dataset
- df <- dataset_name
- # read data from a file
- read.csv(file="/path/of/directory/file.csv")
- read.table(file="/path/of/directory/file.csv", sep=",", header=T)
- data.table::fread("/path/of/directory/file.csv")
- # head(), tail()
- head(df, n=4)
- tail(df, n=4)
- # write data to a file
- write.csv(x=df, file="/path/of/directory/file.csv")
- write.csv(x=df, file="/path/of/directory/file.csv", row.names=FALSE)
- # check a file exists
- file.exists(file="/path/of/directory/file.csv")
- CRAN Mirrors - https://cran.r-project.org/mirrors.html
- # RMarkdown
- pandoc -f markdown -t latex -o output.pdf input.Rmd # output to pdf file
- pandoc -f markdown -t html -o output.html input.Rmd # output to html file
- # to run .Rmd file on R
- > require(rmarkdown)
- > render('notebook.Rmd')
- # list the names of all Language engines
- > names(knitr::knit_engines$get())
- # check python config in R
- > py_config()
- # Themes in RMarkdown - https://bootswatch.com/3/
- default, cerulean, journal, flatly, darkly, readable, spacelab, united, cosmo, lumen, paper,
- sandstone, simplex, yeti
- # highlight
- default, tango, pygments, kate, monochrome, espresso, zenburn, haddock, breezedark, textmate
- # Error in library("devtools") : there is no package called 'devtools'
- $ sudo apt-get install libssl-dev
- $ sudo apt-get install libxml2-dev
- $ sudo apt-get install libcurl4-openssl-dev
- > install.packages("devtools")
- > install.packages("usethis")
- > library('devtools')
- # Statistics in R
- # generate a population of size 10000 from 1 to 100 randomly
- population <- sample.int(100, 10000, replace=TRUE)
- # length of population
- lenght(population)
- # maximum value
- max(population)
- # minimum value
- min(population)
- # first ten values
- head(population, 10)
- population[1:10]
- # last ten values
- tail(population, 10)
- population[9991:100000]
- # draw a sample from population of size 100
- sample1 <- sample(population, 100)
- # length of sample
- length(sample1)
- # first ten values
- head(sample1, 10)
- sample1[1:10]
- # last ten values
- tail(sample1, 10)
- sample1[91:100]
- # mean
- mean(sample1)
- # median
- median(sample1)
- # mode
- Mode <- function(x) {
- ux <- unique(x)
- ux[which.max(tabulate(match(x, ux)))]
- }
- # R resources
- Offical page - https://www.r-project.org
- Download page - https://www.cran.r-project.org
- Stat Methods - https://www.statmethods.net
- R seek - https://www.rseek.org
- UCLA R - https://www.ats.ucla.edu/stat/r/
- UPenn R - https://finzi.psych.upenn.edu/search.html
- RStudio - https://www.rstudio.com
- Rattle - https://rattle.togaware.com
- # statmod package for gauss quadrature
- library(statmod)
- gauss.quad(2, kind="legendre")
|