histStock.R 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. listIn <- commandArgs(TRUE)
  2. library(lubridate)
  3. data <- read.csv("/path/to/assignment/explainer")
  4. myHist <- function(df, listIn) #listIn has metric[1-numerator & 2-denominator], minValue[3], and number of months for price return[4]
  5. {
  6. nbins <- 100
  7. df2 <- subset(df, select= c("ticker_exchange", "price", "date"))
  8. df2$date <- as.Date(df2$date, format="%d/%m/%Y")
  9. df2$date <- format(df2$date, "%m-%Y")
  10. if (listIn[2]=='1')
  11. {
  12. df <- subset(df, select=c("ticker_exchange", "price", "date", listIn[1]))
  13. df <- df[complete.cases(df), ]
  14. df[, 4] <- as.numeric(as.character(df[, 4]))
  15. df <- subset(df, df[, 4] > as.numeric(listIn[3]))
  16. }
  17. else
  18. {
  19. df <- subset(df, select=c("ticker_exchange", "price", "date", listIn[1], listIn[2])) # filtered unnecessary columns
  20. df <- df[complete.cases(df), ]
  21. df[, 4] <- as.numeric(as.character(df[, 4]))
  22. df[, 5] <- as.numeric(as.character(df[, 5]))
  23. df <- subset(df, (df[, 4]/df[, 5]) > as.numeric(listIn[3]))
  24. }
  25. #============================= applied constraint for metric above ===================================
  26. df$date <- as.Date(df$date, format="%d/%m/%Y") # converted to date object
  27. df$nextDate <- df$date %m+% months(as.integer(listIn[4])) # found next date for calculating returns
  28. df$date <- format(df$date, "%m-%Y")
  29. df$nextDate <- format(df$nextDate, "%m-%Y") # changed format of both for correct m-Y matching
  30. df <- subset(df, select=c("ticker_exchange", "price", "nextDate"))
  31. outframe <- merge(df, df2, by.x=c("nextDate", "ticker_exchange"), by.y=c("date", "ticker_exchange"))
  32. #============================== arranged 2 prices at 2 dates by merging along ticker and dates =========
  33. colnames(outframe)[3] <- "P2"
  34. colnames(outframe)[4] <- "P1"
  35. outframe[, 3] <- as.numeric(as.character(outframe[, 3]))
  36. outframe[, 4] <- as.numeric(as.character(outframe[, 4]))
  37. outframe$returns <- ((outframe$P1/outframe$P2) -1)*100 #calculated returns
  38. outframe <- unique(outframe[complete.cases(outframe), ]) #removed duplicate entries and NAs
  39. range <- max(outframe$returns)-min(outframe$returns)
  40. bins <- seq(min(outframe$returns)-(range/100), max(outframe$returns)+(range/100), by=range/nbins)
  41. hist(outframe$returns, breaks=bins)
  42. return(transform(table(cut(outframe$returns, bins))))
  43. }
  44. myHist(data, listIn)