#!/bin/sh

if [ "$2" = "" ] ; then
echo Usage: $0 YYYY MM 
exit
fi
#
YEAR=$1
MONTH=$2
STI=$3
ENI=$4
STJ=$5
ENJ=$6
gamma2=$7
cost2=$8
epsilon2=$9
current_day=${10}
current_time=${11}

# The number of svm convert in a month (31days: 243) ** 247

TIME2=39

# Current dir
DIR2=`pwd`
RDIR=/home/yoshikane/R-3.6.3/bin
#DIR3="/home/gyin/ML/ml_siteC_Jul"

# actually cannot use this because it is cross-validation including year 2020 (need to change later)
DIR3="/data37/gyin/siteC_07_m1v1"
# 11years
##
YEAR1="2007"
YEAR2="2008"
YEAR3="2009"
YEAR4="2010"
YEAR5="2011"
YEAR6="2012"
YEAR7="2013"
YEAR8="2014"
YEAR9="2015"
YEAR10="2016"
YEAR11="2017"
YEAR12="2018"
YEAR13="2019"
##
if [ ! -e ./data-out-cdf/${YEAR}${MONTH}${current_day}${current_time} ]; then
 mkdir ./data-out-cdf/${YEAR}${MONTH}${current_day}${current_time}
fi
#
echo "CDF", ${YEAR},${MONTH},${current_day},${current_time}
#
#rm ./data-svm/*
#
for lon in `seq -f "%03g" ${STI} ${ENI}`
 do
 for lat in `seq -f "%03g" ${STJ} ${ENJ}`
  do
     #lon2=${lon:1:2}
     #lat2=${lat:1:2}
     lon2=${lon}
     lat2=${lat}
###############

cat<<EOF>test.R1
library( e1071 )
library(MLmetrics)

# read prevous data for generating CDF from ML_siteC_Jul folder
dat2a.df <- read.table("${DIR3}/data-out/${YEAR1}${MONTH}/output-${YEAR1}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2b.df <- read.table("${DIR3}/data-out/${YEAR2}${MONTH}/output-${YEAR2}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2c.df <- read.table("${DIR3}/data-out/${YEAR3}${MONTH}/output-${YEAR3}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2d.df <- read.table("${DIR3}/data-out/${YEAR4}${MONTH}/output-${YEAR4}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2e.df <- read.table("${DIR3}/data-out/${YEAR5}${MONTH}/output-${YEAR5}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2f.df <- read.table("${DIR3}/data-out/${YEAR6}${MONTH}/output-${YEAR6}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2g.df <- read.table("${DIR3}/data-out/${YEAR7}${MONTH}/output-${YEAR7}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2h.df <- read.table("${DIR3}/data-out/${YEAR8}${MONTH}/output-${YEAR8}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2i.df <- read.table("${DIR3}/data-out/${YEAR9}${MONTH}/output-${YEAR9}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2j.df <- read.table("${DIR3}/data-out/${YEAR10}${MONTH}/output-${YEAR10}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2k.df <- read.table("${DIR3}/data-out/${YEAR11}${MONTH}/output-${YEAR11}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)
dat2l.df <- read.table("${DIR3}/data-out/${YEAR12}${MONTH}/output-${YEAR12}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)   
dat2m.df <- read.table("${DIR3}/data-out/${YEAR13}${MONTH}/output-${YEAR13}${MONTH}-${lon2}_${lat2}_${gamma2}_${cost2}_${epsilon2}.csv",sep=",",skip=1)   



x1=(1:${TIME2})


if( ${YEAR} == 2007){
dat1a.df <- dat2a.df
dat1.df <- rbind(dat2b.df,dat2c.df,dat2d.df,dat2e.df,dat2f.df,dat2g.df,dat2h.df,dat2i.df,dat2j.df,dat2k.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2008){
dat1a.df <- dat2b.df
dat1.df <- rbind(dat2a.df,dat2c.df,dat2d.df,dat2e.df,dat2f.df,dat2g.df,dat2h.df,dat2i.df,dat2j.df,dat2k.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2009){
dat1a.df <- dat2c.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2d.df,dat2e.df,dat2f.df,dat2g.df,dat2h.df,dat2i.df,dat2j.df,dat2k.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2010){
dat1a.df <- dat2d.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2e.df,dat2f.df,dat2g.df,dat2h.df,dat2i.df,dat2j.df,dat2k.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2011){
dat1a.df <- dat2e.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2d.df,dat2f.df,dat2g.df,dat2h.df,dat2i.df,dat2j.df,dat2k.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2012){
dat1a.df <- dat2f.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2d.df,dat2e.df,dat2g.df,dat2h.df,dat2i.df,dat2j.df,dat2k.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2013){
dat1a.df <- dat2g.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2d.df,dat2e.df,dat2f.df,dat2h.df,dat2i.df,dat2j.df,dat2k.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2014){
dat1a.df <- dat2h.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2d.df,dat2e.df,dat2f.df,dat2g.df,dat2i.df,dat2j.df,dat2k.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2015){
dat1a.df <- dat2i.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2d.df,dat2e.df,dat2f.df,dat2g.df,dat2h.df,dat2j.df,dat2k.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2016){
dat1a.df <- dat2j.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2d.df,dat2e.df,dat2f.df,dat2g.df,dat2h.df,dat2i.df,dat2k.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2017){
dat1a.df <- dat2k.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2d.df,dat2e.df,dat2f.df,dat2g.df,dat2h.df,dat2i.df,dat2j.df,dat2l.df,dat2m.df)
}else if(${YEAR} == 2018){
dat1a.df <- dat2l.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2d.df,dat2e.df,dat2f.df,dat2g.df,dat2h.df,dat2i.df,dat2j.df,dat2k.df,dat2m.df)
}else if(${YEAR} == 2019){
dat1a.df <- dat2m.df
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2d.df,dat2e.df,dat2f.df,dat2g.df,dat2h.df,dat2i.df,dat2j.df,dat2k.df,dat2l.df)
}else if(${YEAR} ==2020){
dat1a.df <- read.table("$DIR2/data-out/${YEAR}${MONTH}${current_day}${current_time}/output-${YEAR}${MONTH}${current_day}${current_time}-${lon}_${lat}.csv",sep=",",skip=1)
dat1.df <- rbind(dat2a.df,dat2b.df,dat2c.df,dat2d.df,dat2e.df,dat2f.df,dat2g.df,dat2h.df,dat2i.df,dat2j.df,dat2k.df,dat2l.df,dat2m.df)
}else {
}  

## replace
dat1.df[dat1.df < 0.1] = 0
dat1.df <- data.frame(dat1.df)
dat1a.df[dat1a.df < 0.1] = 0
dat1a.df <- data.frame(dat1a.df)

sim.df <- dat1a.df[,2]
sim.df <- data.frame(sim.df)

aaa <- subset(dat1.df[,2],dat1.df[,2] >0)   
bbb <- subset(dat1.df[,3],dat1.df[,3] >0)   

# estimate cdfs curves for six years except for target year
# empirical cdf function
simcdf <- ecdf(aaa)
obscdf <- ecdf(bbb)

# estimate the cdfs corresponding to the rainfall intensities of the target year
## step 1
#simcdf2 <- simcdf(dat1a.df[,2]) 
#obscdf2 <- obscdf(dat1a.df[,3])



obscdf3a <- obscdf(dat1.df[,3])  # obs probability training period
obscdf3a <- sort(obscdf3a)
obscdf3b <- sort(dat1.df[,3])    # obs value training period
obscdf3 <- data.frame(obscdf3a,obscdf3b)


minobs <- min(subset(obscdf3a,obscdf3a>0))


simcdf4a <- simcdf(dat1a.df[,2]) # ml probability fcst
simcdf4 <- data.frame(simcdf4a)
simcdf4[simcdf4 < minobs & simcdf4 > 0 ] = minobs

csim2 <- 1:${TIME2}
x<-1
while(x<=${TIME2}){
sim6=simcdf4[x,1]
obsnum2 <- which(abs(obscdf3a-sim6) == min(abs(obscdf3a-sim6)))
csim2[x] <- obscdf3[obsnum2,2]
x<-x+1 
}

svmcdf <- csim2
svm <- dat1a.df[,2]
#obs <- dat1a.df[,3]
#sim <- dat1a.df[,4]
sim <- dat1a.df[,3]

svmcdf[svmcdf < 0.1] = 0
svm[svm < 0.1] = 0
#obs[obs < 0.1] = 0
sim[sim < 0.1] = 0

#rmse2=sqrt(MSE(svmcdf,obs))
#mae2=MAE(svmcdf,obs)
#cor2=cor(svmcdf,obs)

#rmse3=sqrt(MSE(svm,obs))
#mae3=MAE(svm,obs)
#cor3=cor(svm,obs)

#rmse4=sqrt(MSE(sim,obs))
#mae4=MAE(sim,obs)
#cor4=cor(sim,obs)

x2 <- data.frame(x1,svmcdf,svm,sim)
#x2 <- data.frame(x1, svmcdf, svm, obs, sim) 
#x2 <- data.frame(x1, svmcdf, svm, obs, sim, cor2, cor3, cor4, rmse2, rmse3, rmse4, mae2, mae3, mae4) 
#x3 <- data.frame(cor2, cor3, cor4, rmse2, rmse3, rmse4, mae2, mae3, mae4) 
#x3

write.csv(x2, "${DIR2}/data-out-cdf/${YEAR}${MONTH}${current_day}${current_time}/output-${YEAR}${MONTH}${current_day}${current_time}-${lon}_${lat}.csv")
#quit()
EOF
 
${RDIR}/R --vanilla --no-slave <  test.R1
#R --vanilla --no-save <  test.R1

 done
done










