Taking too long? Close loading screen.

Programming Syntax

[mathjax]

Basics

Define a Function






 
function(<arg_1>, <arg_2>, ...) {
    <statement_1>
    <statement_2>
    ...
    return(<value>)
}
FUNCTION <function_name>(<arg_1>, ...)
    <statement_1>
    <statement_2>
    ...
    RETURN <value>
ENDFUNCTION






 
function(<arg_1>, <arg_2>, ...) {
    <statement_1>
    <statement_2>
    ...
    return(<value>)
}
FUNCTION <function_name>(<arg_1>, ...)
    <statement_1>
    <statement_2>
    ...
    RETURN <value>
ENDFUNCTION

Logical Operator

Definition Python R DCS Prophet
Exactly equal to   ==    
Not equal to   !=    
Negation of a logical vector x   !x    
and <cond_1> and <cond_2> <cond_1> & <cond_2> <cond_1> AND <cond_2> <cond_1> AND <cond_2>
or <cond_1> or <cond_2> <cond_1> | <cond_2> <cond_1> OR <cond_2> <cond_1> OR <cond_2>
Is a an element of the vector c (a, b, c)?   a %in% c(a, b, c)    

Subsetting a Dataframe

[icon name=”python” style=”brands”]

 

[icon name=”r-project” style=”brands”] <df>[<vec_1>, <vec_2>]

Table of Contents

Statements

If-Elseif-Else-Endif Statement








 
if <condition> {
    <statements>
} else if <condition> {
    <statements>
} else {
    <statements>
}

Inline Statement

ifelse(<condition>, <true_value>, <false_value>)
If <condition> Then
    <statements>
Elseif <condition> Then
    <statements>
Else
    <statements>
Endif
IF <condition> THEN
    <statements>
ELSEIF <condition> THEN
    <statements>
ELSE
    <statements>
ENDIF








 
if <condition> {
    <statements>
} else if <condition> {
    <statements>
} else {
    <statements>
}

Inline Statement

ifelse(<condition>, <true_value>, <false_value>)
If <condition> Then
    <statements>
Elseif <condition> Then
    <statements>
Else
    <statements>
Endif
IF <condition> THEN
    <statements>
ELSEIF <condition> THEN
    <statements>
ELSE
    <statements>
ENDIF

Looping Statement

For-Loop Statement








 
for (<var> in <vec>) {
    <statements>
    if <condition> {
        break
    }
    if <condition> {
        next
    }
}
For Each <vec>
    If <condition> Then
        QUITLOOP
    EndIf
Next
FOR <var> := <start> TO <end> STEP <step>
    <statement>
NEXT
FOR <var> FROM <start> TO <end> STEP <step>
    <statement>
ENDFOR








 
for (<var> in <vec>) {
    <statements>
    if <condition> {
        break
    }
    if <condition> {
        next
    }
}
For Each <vec>
    If <condition> Then
        QUITLOOP
    EndIf
Next
FOR <var> := <start> TO <end> STEP <step>
    <statement>
NEXT
FOR <var> FROM <start> TO <end> STEP <step>
    <statement>
ENDFOR

Do-While Statement








 
while (<condition>) {
    <statements>
    if <condition> {
        break
    }
    if <condition> {
        next
    }
}
Do While <condition>
    <statement>
Loop
Do
    <statement>
Loop While <condition>
DO WHILE (<condition>)
    <statements>
LOOP
DO
    <statements>
LOOP WHILE (<condition>)








 
while (<condition>) {
    <statements>
    if <condition> {
        break
    }
    if <condition> {
        next
    }
}
Do While <condition>
    <statement>
Loop
Do
    <statement>
Loop While <condition>
DO WHILE (<condition>)
    <statements>
LOOP
DO
    <statements>
LOOP WHILE (<condition>)

Functions

Applying a function

[icon name=”python” style=”brands”]

 

[icon name=”r-project” style=”brands”] apply (<df>, <margin>, <func>)

Sorting a Dataframe

[icon name=”python” style=”brands”]

[icon name=”r-project” style=”brands”] <data_frame>[order(<df>$<var_1>, ..., <df>$<var_2>, <decreasing=FALSE>), ]

Applications

Construct a confidence interval


Python


R

 
# Initialization step
> n <- 100
> n_sim <- 1000 # number of replications
> mu <- 5 # true mean vaLue
> sigma <- 2
> count <- rep(NA, n_sim) # Repetition step
> set.seed(0)
# to make results reproducibLe
> for (i in 1:n_sim) {
    # Draw a random sampLe of size n from a normal distribution
    # with mean mu and standard deviation sigma
+   x <- rnorm(n, mean = mu, sd = sigma)
+   count[i] <- (abs(mean(x) - mu) <= qnorm(0.975) * sigma / sqrt(n))
+ }
# Final result
> mean(count)
[1] 0.952

Define a Loglikelihood Function

Maximized loglikelihood function is defined as:

(l(hat{mu}_1,…,hat{mu}_n)=sum^{n}_{i=1}{y_{i}ln{hat{mu}_i}}-sum^{n}_{i=1}{hat{mu}_i}+c)

where (c) are constants not involving (hat{mu}_i’s)


Python


R

 
> LL <- function(observed, predicted){
    predicted_pos <- ifelse(predicted <= 0, 0.000001, predicted)
    return(sum(observed*log(predicted_pos) - predicted))
}
> observed <- c(2, 3, 6, 7, 8, 9, 10, 12, 15)
> predicted <- c(2.516332, 2.516332, 7.451633, 7.451633, 7.451633, 7.451633, + 12.386934, 12.386934, 12.386934)
> LL(observed, predicted)
[1] 85.98277

Constructing the Fibonacci sequence

The sequence of Fibonacci numbers (F_1,F_2,…) is defined by (F_1=F_2=1) and:

(F_n=F_{n-1}+F_{n-2}) for (nle3)


Python


R

 

Based on Recursion

> Fib <- function(n) {
    if (n == 1 I n == 2) {
        X <- 1
} else {
    x <- Fib(n - 1) + Fib(n - 2)
}
    return(x)
}

Based on For-Loop

> Fib <- function(n) {
    if (n == 1 | n == 2) {
        return (1)
    }
    x <- rep(NA, n)
    x[1] <- x[2] <- 1
    for (i in 3:n) {
        x[i] <- x[i - 1] + x[i - 2]
    }
    return(x[n])
}

Simple Linear Regression

Simple regression model is defined as:

(y_i=beta_0+beta_{1}x_i+varepsilon_i), (varepsilon_{i}sim N(0,sigma^2),text{i.i.d}), (i=1,2,…,n)

Prediction Interval


Python


R

 
n <- 100
n_sim <- 10000 # Total number of simulation
beta0 <- 2 # True intercept
beta1 <- 4 # True slope
sigma <- 1
count<- rep(NA, n_sim) # Create a list of n_sim # of null values
set.seed(0) # Set a seed so that results will be reproducible
x <- runif(n) # Generate n # of uniform random variables
x0 <- 0.8 # Predictor value of interest
for (i in 1:n_sim) {
    y <- rnorm(n, mean = beta0 + beta1 * x, sd = sigma)
    m <- lm(y ~ x)
    # Target of prediction
    y0 <- rnorm(1, mean = beta0 + beta1 * x0, sd = sigma)
    # Lower bound of 95% prediction interval
    l <- predict(m, newdata = data.frame (x = x0), interval = "prediction")[, 2]
    # Upper bound of 95% prediction interval
    u <- predict(m, newdata = data.frame (x = x0), interval = "prediction")[, 3]
    count[i] <- (l <= y0) & (y0 <= u)
}
# The mean of all simulations should be close to 0.95
> mean(count)
[1] 0.951