We have an example of using R to come up with the pricing for non-life products, specifically mopeds, at http://www.cybaea.net/journal/2012/03/13/R-code-for-Chapter-2-of-Non_Life-Insurance-Pricing-with-GLM/. The code first creates a table of the statistics available for the product line, then compares the pricing to actual statistics in use.
The first part of the code that accumulates the data is as follows:
con <- url("http://www2.math.su.se/~esbj/GLMbook/moppe.sas")
data <- readLines(con, n = 200L, warn = FALSE, encoding = "unknown")
close(con)
## Find the data range
data.start <- grep("^cards;", data) + 1L
data.end <- grep("^;", data[data.start:999L]) + data.start - 2L
table.1.2 <- read.table(text = data[data.start:data.end],
header = FALSE,
sep = "",
quote = "",
col.names = c("premiekl", "moptva", "zon", "dur",
"medskad", "antskad", "riskpre", "helpre", "cell"),
na.strings = NULL,
colClasses = c(rep("factor", 3), "numeric",
rep("integer", 4), "NULL"),
comment.char = "")
rm(con, data, data.start, data.end)
# Remainder of Script adds comments/descriptions
comment(table.1.2) <-
c("Title: Partial casco moped insurance from Wasa insurance, 1994--1999",
"Source: http://www2.math.su.se/~esbj/GLMbook/moppe.sas",
"Copyright: http://www2.math.su.se/~esbj/GLMbook/")
## See the SAS code for this derived field
table.1.2$skadfre = with(table.1.2, antskad / dur)
## English language column names as comments:
comment(table.1.2$premiekl) <-
c("Name: Class",
"Code: 1=Weight over 60kg and more than 2 gears",
"Code: 2=Other")
comment(table.1.2$moptva) <-
c("Name: Age",
"Code: 1=At most 1 year",
"Code: 2=2 years or more")
comment(table.1.2$zon) <-
c("Name: Zone",
"Code: 1=Central and semi-central parts of Sweden's three largest cities",
"Code: 2=suburbs and middle-sized towns",
"Code: 3=Lesser towns, except those in 5 or 7",
"Code: 4=Small towns and countryside, except 5--7",
"Code: 5=Northern towns",
"Code: 6=Northern countryside",
"Code: 7=Gotland (Sweden's largest island)")
comment(table.1.2$dur) <-
c("Name: Duration",
"Unit: year")
comment(table.1.2$medskad) <-
c("Name: Claim severity",
"Unit: SEK")
comment(table.1.2$antskad) <- "Name: No. claims"
comment(table.1.2$riskpre) <-
c("Name: Pure premium",
"Unit: SEK")
comment(table.1.2$helpre) <-
c("Name: Actual premium",
"Note: The premium for one year according to the tariff in force 1999",
"Unit: SEK")
comment(table.1.2$skadfre) <-
c("Name: Claim frequency",
"Unit: /year")
## Save results for later
save(table.1.2, file = "table.1.2.RData")
## Print the table (not as pretty as the book)
print(table.1.2)
The resultant first 10 rows of the table are as follows:
premiekl moptva zon dur medskad antskad riskpre helpre skadfre
1 1 1 1 62.9 18256 17 4936 2049 0.27027027
2 1 1 2 112.9 13632 7 845 1230 0.06200177
3 1 1 3 133.1 20877 9 1411 762 0.06761833
4 1 1 4 376.6 13045 7 242 396 0.01858736
5 1 1 5 9.4 0 0 0 990 0.00000000
6 1 1 6 70.8 15000 1 212 594 0.01412429
7 1 1 7 4.4 8018 1 1829 396 0.22727273
8 1 2 1 352.1 8232 52 1216 1229 0.14768532
9 1 2 2 840.1 7418 69 609 738 0.08213308
10 1 2 3 1378.3 7318 75 398 457 0.05441486
Then, we go through each product/statistics to determine whether the pricing for a product is in line with others. Note, the repos = clause on the install.packages statement is a fairly new addition to R:
# make sure the packages we want to use are installed
install.packages(c("data.table", "foreach", "ggplot2"), dependencies = TRUE, repos = "http://cran.us.r-project.org")
# load the data table we need
if (!exists("table.1.2"))
load("table.1.2.RData")
library("foreach")
## We are looking to reproduce table 2.7 which we start building here,
## add columns for our results.
table27 <-
data.frame(rating.factor =
c(rep("Vehicle class", nlevels(table.1.2$premiekl)),
rep("Vehicle age", nlevels(table.1.2$moptva)),
rep("Zone", nlevels(table.1.2$zon))),
class =
c(levels(table.1.2$premiekl),
levels(table.1.2$moptva),
levels(table.1.2$zon)),
stringsAsFactors = FALSE)
## Calculate duration per rating factor level and also set the
## contrasts (using the same idiom as in the code for the previous
## chapter). We use foreach here to execute the loop both for its
## side-effect (setting the contrasts) and to accumulate the sums.
# new.cols are set to claims, sums, levels
new.cols <-
foreach (rating.factor = c("premiekl", "moptva", "zon"),
.combine = rbind) %do%
{
nclaims <- tapply(table.1.2$antskad, table.1.2[[rating.factor]], sum)
sums <- tapply(table.1.2$dur, table.1.2[[rating.factor]], sum)
n.levels <- nlevels(table.1.2[[rating.factor]])
contrasts(table.1.2[[rating.factor]]) <-
contr.treatment(n.levels)[rank(-sums, ties.method = "first"), ]
data.frame(duration = sums, n.claims = nclaims)
}
table27 <- cbind(table27, new.cols)
rm(new.cols)
#build frequency distribution
model.frequency <-
glm(antskad ~ premiekl + moptva + zon + offset(log(dur)),
data = table.1.2, family = poisson)
rels <- coef( model.frequency )
rels <- exp( rels[1] + rels[-1] ) / exp( rels[1] )
table27$rels.frequency <-
c(c(1, rels[1])[rank(-table27$duration[1:2], ties.method = "first")],
c(1, rels[2])[rank(-table27$duration[3:4], ties.method = "first")],
c(1, rels[3:8])[rank(-table27$duration[5:11], ties.method = "first")])
# note the severities involved
model.severity <-
glm(medskad ~ premiekl + moptva + zon,
data = table.1.2[table.1.2$medskad > 0, ],
family = Gamma("log"), weights = antskad)
rels <- coef( model.severity )
rels <- exp( rels[1] + rels[-1] ) / exp( rels[1] )
## Aside: For the canonical link function use
## rels <- rels[1] / (rels[1] + rels[-1])
table27$rels.severity <-
c(c(1, rels[1])[rank(-table27$duration[1:2], ties.method = "first")],
c(1, rels[2])[rank(-table27$duration[3:4], ties.method = "first")],
c(1, rels[3:8])[rank(-table27$duration[5:11], ties.method = "first")])
table27$rels.pure.premium <- with(table27, rels.frequency * rels.severity)
print(table27, digits = 2)
The resultant display is as follows:
rating.factor class duration n.claims rels.frequency rels.severity
1 Vehicle class 1 9833 391 1.00 1.00
2 Vehicle class 2 8825 395 0.78 0.55
11 Vehicle age 1 1918 141 1.55 1.79
21 Vehicle age 2 16740 645 1.00 1.00
12 Zone 1 1451 206 7.10 1.21
22 Zone 2 2486 209 4.17 1.07
3 Zone 3 2889 132 2.23 1.07
4 Zone 4 10069 207 1.00 1.00
5 Zone 5 246 6 1.20 1.21
6 Zone 6 1369 23 0.79 0.98
7 Zone 7 148 3 1.00 1.20
rels.pure.premium
1 1.00
2 0.42
11 2.78
21 1.00
12 8.62
22 4.48
3 2.38
4 1.00
5 1.46
6 0.78
7 1.20
Here, we can see that some vehicle classes (2,6) are priced very low in comparison to statistics for that vehicle where as other are overpriced (12, 22).