rref.txt 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. # R reference
  2. which R /usr/lib/R
  3. R RHOME /usr/lib/R
  4. # run bash commands from within R shell
  5. > system('cd Reference; vi -M rref.txt')
  6. > system('pwd')
  7. > system('ulimit -s')
  8. > system('ulimit -s -H')
  9. # capture the output from shell commands and save it into R
  10. > nlines <- system('wc -l < /home/saran/Analytics/DataSet/Hr5m.csv', intern=TRUE)
  11. licence()
  12. RShowDoc("COPYING")
  13. demo()
  14. demo(scoping)
  15. # to get information on specific function
  16. help(solve)/?solve
  17. help.search(solve)/??solve
  18. # special characters and syntactic meaning
  19. help('[[')
  20. help('if')
  21. help('for')
  22. help('function')
  23. help.search('function')
  24. ?help.search
  25. # help in HTML format
  26. help.start()
  27. # readline
  28. help('readline')
  29. example(solve)
  30. example(print)
  31. # list of arithmetic and logical operators in R
  32. + Addition
  33. - Subtraction
  34. * Multiplication
  35. / Division
  36. ^ / ** Exponentiation
  37. x %% y Modulo (remainder of integer division)
  38. x %/% y Integer division
  39. == Equal to
  40. != Differs from
  41. > Greater than
  42. < Less than
  43. >= Greater than or equal to
  44. <= Less than or equal to
  45. & Logical and
  46. | Logical or
  47. ! Logical not
  48. # mathematical functions
  49. abs(x) Absolute value
  50. sqrt(x) Square root
  51. ceiling(x) Nearest integer >= x
  52. floor(x) Nearest integer <= x
  53. trunc(x) Integer part
  54. rount(x, digits=n) Round x to n digits
  55. sin(x),cos(x),tan(x)Trigonometric functions
  56. log(x) Natural logarithm
  57. log10(x) Base 10 logarithm
  58. exp(x) e raise to x
  59. # R basic data structures
  60. A data structure is either homogeneous (all elements are of the same data type) or
  61. heterogeneous (elements can be of more than one data type).
  62. Dimension Homogeneous Heterogeneous
  63. 1 Vector List
  64. 2 Matrix Data Frame
  65. 3+ Array
  66. If commands are stored in an external file, they may be executed at any time in an R
  67. session with the command
  68. > source('/path/file.R')
  69. The function sink, will divert all subsequent output from the console to an external
  70. file, output.txt
  71. > sink('output.txt')
  72. The command sink() restores it to the console once again.
  73. The entities that R creates and manipulates are known as objects. These may be variables,
  74. arrays of numbers, character strings, functions or more general structures built from such
  75. components. During an R session, objects are created and stored. The R command
  76. > objects() (alternatively ls())
  77. can be used to display the names of the objects which are currently stored within R. The
  78. collection of objects currently stored is called the workspace. To remove objects the
  79. function rm is available.
  80. > rm(x, y)
  81. objects are written to a file -> .RData (in the current directory)
  82. command lines used in the session are saved to a file -> .Rhistory (in the current directory)
  83. It is recommended that you should use separate working directories for analysis conducted
  84. with R. (similar to virtual environments in Python)
  85. # assign of a vector
  86. x <- c(10.4, 5.6, 3.1, 6.4, 21.7)
  87. assign('x', c(10.4, 5.6, 3.1, 6.4, 21.7))
  88. c(10.4, 5.6, 3.1, 6.4, 21.7) -> x
  89. x <- c(1, 2, 3)
  90. y <- c(4, 5)
  91. v <- 2*x + y + 1
  92. 7 10 11
  93. # elementary arithmetic operators
  94. x + y - z * x/y + z^4
  95. sum(x), prod(x), log(x), exp(x)
  96. sin(x), cos(x), tan(x), sqrt(x)
  97. max(x), min(x), log10(x), length(x)
  98. # parallel maximum/minimum
  99. x <- c(1, 2, 3); y <- c(4, 5)
  100. max(x, y)
  101. 5
  102. min(x, y)
  103. 1
  104. pmax(x, y)
  105. 4 5 4
  106. pmin(x, y)
  107. i 2 3
  108. # sample mean
  109. mean(x) = sum(x)/lenght(x)
  110. # sample variance
  111. var(x) = sum((x-mean(x))^2)/(length(x)-1)
  112. # sort
  113. sort(x) # return sorted list
  114. order(x) # return position of values in the sorted list
  115. sort.list(x) # return position of values in the sorted list
  116. For most purposes the user will not be concerned if the 'numbers' in a numeric vector are
  117. integers, reals or even complex. Internally calculations are done as double precision real
  118. numbers, or double precision complex numbers if the input data are complex.
  119. # integers
  120. x <- c(1L, 2L, 3L, 4L, 5L)
  121. # complex number
  122. sqrt(-17+0i)
  123. # sequence
  124. x <- 1:10 is equivalent to x <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
  125. The colon operator has high priority within an expression.
  126. x <- 2*1:15 is equivalent to x <- c(2, 4, 6, ..., 26, 28, 30)
  127. seq(2,10) is equivalent to c(2, 3, 4, 5, 6, 7, 8, 9, 10)
  128. 1:30 == seq(1,30) == seq(from=1, to=30) == seq(to=30, from=1)
  129. seq(from=1, to=10, by=2) == c(1, 3, 5, 7, 9)
  130. seq(from=1, by=2, length=5) == c(1, 3, 5, 7, 9)
  131. seq(-5, 5, .2) == c(-5.0, 4.8, -4.6, ..., 4.6, 4.8, 5.0)
  132. # backward sequence
  133. x <- 30:1 is equivalent to x <- c(30, 29, 28, ..., 1)
  134. # replicating an object
  135. x <- c(1, 2, 3, 4, 5)
  136. rep(x, times=5) == c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5)
  137. rep(x, each=5) == c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5)
  138. # logical vectors
  139. TRUE && FALSE gives FALSE
  140. TRUE || FALSE gives TRUE
  141. TRUE == TRUE gives TRUE
  142. TRUE != TRUE gives FALSE
  143. logical vectors are generated by conditions
  144. for example:
  145. x <- 5
  146. temp <- x > 13
  147. temp # sets temp as a vector of the same length as x with values FALSE corresponding to
  148. the elements of x where the condition is not met and TRUE where it is
  149. logical operators are <, <=, >, >=, ==, !=
  150. if c1 and c2 are logical expressions, then c1 & c2 is their intersection ("and"),
  151. c1 | c2 is their union ("or"), and !c1 is the negation of c1
  152. # missing values
  153. In general any operation on an NA becomes an NA. The motivation for this rule is simply
  154. that if the specification of an operation is incomplete, the result cannot be known and
  155. hence is not available.
  156. The function is.na(x) gives a logical vector of the same size as x with value TRUE if and
  157. only if the corresponding element in x is NA.
  158. z <- c(1:3, NA)
  159. z
  160. 1 2 3 NA
  161. is.na(z)
  162. FALSE FALSE FALSE TRUE
  163. Logical expression z == NA is quite different from is.na(z) since NA is not really a value but
  164. a marker for a quantity that is not available. Thus z == NA is a vector of the same length as z
  165. all of whose values are NA as the logical expression itself is incomplete and hence undecidable.
  166. z == NA
  167. NA NA NA NA
  168. # NaN - Not a Number
  169. 0/0 = NaN
  170. Inf - Inf = NaN
  171. is.na() is TRUE both for NA and NaN values
  172. is.na(0/0)
  173. TRUE
  174. is.na(Inf-Inf)
  175. TRUE
  176. to differentiate these is.nan() is only TRUE for NaN
  177. is.na(NA)
  178. TRUE
  179. is.nan(0/0)
  180. TRUE
  181. is.nan(NA)
  182. FALSE
  183. missing values are sometimes printed as <NA> when character vectors are printed without quotes
  184. # character vectors
  185. Character strings are entered using either matching double (") or single (') quotes. Escape
  186. sequences are \n - newline, \t - tab and \b - backspace. (see ?Quotes for a full list)
  187. paste() function takes an arbitary number of arguments and concatenates them one by one into
  188. character strings.
  189. labs <- paste(c("X", "Y"), 1:10, sep="")
  190. makes labs into the character vector
  191. c("X1", "Y2", "X3", "Y4", "X5", "Y6", "X7", "Y8", "X9", "Y10")
  192. c("X", "Y") is repeated 5 times to match the sequence 1:10
  193. xnames <- paste(c('x'), 1:3, sep="") # "x1" "x2" "x3"
  194. ynames <- paste(c('y'), 1:3, sep="") # "y1" "y2" "y3"
  195. znames <- paste(c('z'), 1:3, sep=",") # "z,1" "z,2" "z,3"
  196. # index vectors; selecting and modifying subsets of a data set
  197. 1. logical vector:
  198. index vector is recycled to the same length as the vector from which elements are to be selected.
  199. values corresponding to TRUE in the index vector are selected and those corresponding to FALSE are
  200. omitted.
  201. y <- x[!is.na(x)]
  202. creates (or re-creates) an object y which will contain the non-missing values of x, in the
  203. same order. Note that if x has missing values, y will be shorter than x.
  204. (x+1)[(!is.na(x)) & x>0] -> z
  205. creates an object z and places in it the values of the vector x+1 for which the corresponding
  206. values in x was both non-missing and positive.
  207. 2. vector of positive integral quantities
  208. the values in the index vector must lie in the set {1, 2, ..., lenght(x)}
  209. x[6] # sixth component of x
  210. x[1:10] # selects the first 10 elements of x (assuming length(x) is not less than 10)
  211. c("X","Y")[rep(c(1,2,2,1), times=4)] # produces a character vector of length 16 consisting of
  212. "x", "y", "y", "x" repeated four times
  213. 3. vector of negative integral quantities
  214. such an index vector specifies the values to be excluded rather than included
  215. y <- x[-(1:5)] # gives y all but the first five elements of x
  216. 4. vector of character strings
  217. names attribute to identify its components
  218. fruit <- c(5, 10, 1, 20)
  219. > fruit
  220. 5 10 1 20
  221. names(fruit) <- c("orange", "banana", "apple", "peach")
  222. > fruit
  223. orange banana apple peach
  224. 5 10 1 20
  225. An indexed expression can also appear on the receiving end of an assignment, in which case the
  226. assignment operation is performed only on those elements of the vector.
  227. x[is.na(x)] <- 0 # replaces any missing values in x by zeros
  228. y[y<0] <- -y[y<0] has the same effect as y <- abs(y)
  229. $ sudo R
  230. # install packages
  231. install.packages("reshape")
  232. install.packages("dplyr")
  233. install.packages("ggplot2")
  234. install.packages("tidyr")
  235. install.packages("readr")
  236. install.packages("purrr")
  237. install.packages("tibble")
  238. install.packages("stringr")
  239. install.packages("forcats")
  240. install.packages("mlr")
  241. install.packages("jsonlite")
  242. install.packages("rmarkdown")
  243. install.packages(c("pkg1", "pkg2", "pkg3"),
  244. lib = file.path("/home/ndayalan/.R/x86_64-pc-linux-gnu-library/3.2/"))
  245. # working directory
  246. getwd()
  247. # package version
  248. packageVersion("rmarkdown")
  249. packageVersion("knitr")
  250. # get library location
  251. .libPaths()
  252. # view all installed packages
  253. library()
  254. # view packages currently loaded
  255. search()
  256. # new packages
  257. new.packages()
  258. # old packages
  259. old.packages()
  260. # update packages
  261. update.packages()
  262. update.packages(ask = FALSE)
  263. # remove packages
  264. remove.packages("pkgname")
  265. remove.packages(c("pkg1", "pkg2", "pkg3"),
  266. lib = file.path("/home/ndayalan/.R/x86_64-pc-linux-gnu-library/3.2/"))
  267. # help packages
  268. help(package="pkgname")
  269. help(package="datasets") # list of all in-built datasets in R
  270. # view in-built datasets
  271. ls("package:datasets")
  272. help(package="datasets")
  273. data() # list of all the available datasets
  274. data(package = .packages(all.available = TRUE)) # list of all datasets in the available pkgs
  275. data(package = "package name") # list the datasets from the "package name" package
  276. data(package = "datasets") # list the datasets from the "datasets" package
  277. # load in-built dataset
  278. df <- dataset_name
  279. # read data from a file
  280. read.csv(file="/path/of/directory/file.csv")
  281. read.table(file="/path/of/directory/file.csv", sep=",", header=T)
  282. data.table::fread("/path/of/directory/file.csv")
  283. # head(), tail()
  284. head(df, n=4)
  285. tail(df, n=4)
  286. # write data to a file
  287. write.csv(x=df, file="/path/of/directory/file.csv")
  288. write.csv(x=df, file="/path/of/directory/file.csv", row.names=FALSE)
  289. # check a file exists
  290. file.exists(file="/path/of/directory/file.csv")
  291. CRAN Mirrors - https://cran.r-project.org/mirrors.html
  292. # RMarkdown
  293. pandoc -f markdown -t latex -o output.pdf input.Rmd # output to pdf file
  294. pandoc -f markdown -t html -o output.html input.Rmd # output to html file
  295. # to run .Rmd file on R
  296. > require(rmarkdown)
  297. > render('notebook.Rmd')
  298. # list the names of all Language engines
  299. > names(knitr::knit_engines$get())
  300. # check python config in R
  301. > py_config()
  302. # Themes in RMarkdown - https://bootswatch.com/3/
  303. default, cerulean, journal, flatly, darkly, readable, spacelab, united, cosmo, lumen, paper,
  304. sandstone, simplex, yeti
  305. # highlight
  306. default, tango, pygments, kate, monochrome, espresso, zenburn, haddock, breezedark, textmate
  307. # Error in library("devtools") : there is no package called 'devtools'
  308. $ sudo apt-get install libssl-dev
  309. $ sudo apt-get install libxml2-dev
  310. $ sudo apt-get install libcurl4-openssl-dev
  311. > install.packages("devtools")
  312. > install.packages("usethis")
  313. > library('devtools')
  314. # Statistics in R
  315. # generate a population of size 10000 from 1 to 100 randomly
  316. population <- sample.int(100, 10000, replace=TRUE)
  317. # length of population
  318. lenght(population)
  319. # maximum value
  320. max(population)
  321. # minimum value
  322. min(population)
  323. # first ten values
  324. head(population, 10)
  325. population[1:10]
  326. # last ten values
  327. tail(population, 10)
  328. population[9991:100000]
  329. # draw a sample from population of size 100
  330. sample1 <- sample(population, 100)
  331. # length of sample
  332. length(sample1)
  333. # first ten values
  334. head(sample1, 10)
  335. sample1[1:10]
  336. # last ten values
  337. tail(sample1, 10)
  338. sample1[91:100]
  339. # mean
  340. mean(sample1)
  341. # median
  342. median(sample1)
  343. # mode
  344. Mode <- function(x) {
  345. ux <- unique(x)
  346. ux[which.max(tabulate(match(x, ux)))]
  347. }
  348. # R resources
  349. Offical page - https://www.r-project.org
  350. Download page - https://www.cran.r-project.org
  351. Stat Methods - https://www.statmethods.net
  352. R seek - https://www.rseek.org
  353. UCLA R - https://www.ats.ucla.edu/stat/r/
  354. UPenn R - https://finzi.psych.upenn.edu/search.html
  355. RStudio - https://www.rstudio.com
  356. Rattle - https://rattle.togaware.com
  357. # statmod package for gauss quadrature
  358. library(statmod)
  359. gauss.quad(2, kind="legendre")