12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152 |
- listIn <- commandArgs(TRUE)
- library(lubridate)
- data <- read.csv("/path/to/assignment/explainer")
- myHist <- function(df, listIn) #listIn has metric[1-numerator & 2-denominator], minValue[3], and number of months for price return[4]
- {
- nbins <- 100
- df2 <- subset(df, select= c("ticker_exchange", "price", "date"))
- df2$date <- as.Date(df2$date, format="%d/%m/%Y")
- df2$date <- format(df2$date, "%m-%Y")
- if (listIn[2]=='1')
- {
- df <- subset(df, select=c("ticker_exchange", "price", "date", listIn[1]))
- df <- df[complete.cases(df), ]
- df[, 4] <- as.numeric(as.character(df[, 4]))
- df <- subset(df, df[, 4] > as.numeric(listIn[3]))
- }
- else
- {
- df <- subset(df, select=c("ticker_exchange", "price", "date", listIn[1], listIn[2])) # filtered unnecessary columns
- df <- df[complete.cases(df), ]
- df[, 4] <- as.numeric(as.character(df[, 4]))
- df[, 5] <- as.numeric(as.character(df[, 5]))
- df <- subset(df, (df[, 4]/df[, 5]) > as.numeric(listIn[3]))
- }
- #============================= applied constraint for metric above ===================================
- df$date <- as.Date(df$date, format="%d/%m/%Y") # converted to date object
- df$nextDate <- df$date %m+% months(as.integer(listIn[4])) # found next date for calculating returns
- df$date <- format(df$date, "%m-%Y")
- df$nextDate <- format(df$nextDate, "%m-%Y") # changed format of both for correct m-Y matching
- df <- subset(df, select=c("ticker_exchange", "price", "nextDate"))
- outframe <- merge(df, df2, by.x=c("nextDate", "ticker_exchange"), by.y=c("date", "ticker_exchange"))
- #============================== arranged 2 prices at 2 dates by merging along ticker and dates =========
- colnames(outframe)[3] <- "P2"
- colnames(outframe)[4] <- "P1"
- outframe[, 3] <- as.numeric(as.character(outframe[, 3]))
- outframe[, 4] <- as.numeric(as.character(outframe[, 4]))
- outframe$returns <- ((outframe$P1/outframe$P2) -1)*100 #calculated returns
- outframe <- unique(outframe[complete.cases(outframe), ]) #removed duplicate entries and NAs
- range <- max(outframe$returns)-min(outframe$returns)
- bins <- seq(min(outframe$returns)-(range/100), max(outframe$returns)+(range/100), by=range/nbins)
- hist(outframe$returns, breaks=bins)
- return(transform(table(cut(outframe$returns, bins))))
- }
- myHist(data, listIn)
|