## ftse100.R - Display employee productivity for FTSE-100 consitituents
## Copyright © 2010 Allan Engelhardt <http://www.cybaea.net/>
## All Rights Reserved.
## Get the index constituents.
ftse.100 <-
read.csv(file = "http://uk.old.finance.yahoo.com/d/quotes.csv?s=@%5EFTSE&f=s&e=.csv", header = FALSE)
names(ftse.100) <- c("symbol")
data <-
data.frame(
symbol = NULL,
employees = NULL,
profit = NULL,
sector = NULL
)
## For each stock symbol, get employees, profit, and sector
for (symbol in ftse.100$symbol) {
profile.url <-
paste("http://uk.finance.yahoo.com/q/pr?s=", symbol, sep = "")
con <- url(profile.url, open = "r")
text <- readChar(con, 2^24) # enough bytes
close(con)
x <-
sub(
".*Number of employees:</td><td.*?>[[:space:]]*([[:digit:],]+).*",
"\\1",
text,
ignore.case = TRUE
)
x <- gsub(",", "", x)
empl <- tryCatch(
as.integer(x),
warning = function(x) {
NA
}
)
x <-
sub(
".*Net Profit.*?</td><td.*?>[[:space:]]*([+-]?[[:digit:],]+).*",
"\\1",
text
)
x <- gsub(",", "", x)
profit <- tryCatch(
as.integer(x) * 1e6,
warning = function(x) {
NA
}
)
sector <- sub(".*Sector:</td><td.*?>(.*?)</td>.*", "\\1", text)
if (any(c(empl, profit) <= 0, is.na(c(empl, profit)))) {
cat("Error parsing symbol", symbol, "see", profile.url, "\n")
} else {
data <-
rbind(
data,
data.frame(
symbol = symbol,
employees = empl,
profit = profit,
sector = sector
)
)
}
Sys.sleep(1)
}
## Save the data so we don't have to hit Yahoo all the time.
save(data, file = "data.RData")
## Save plot to file:
# png(filename="ftse100.png", width=800, height=800, pointsize=14, bg="white", res=100)
opar <- par(
cex.sub = sqrt(sqrt(2)),
font.sub = 3,
font.lab = 2
)
## x and y coordinates of plot and plot limits
x <- with(data, employees)
y <- with(data, profit / employees)
xlim <- c(10^floor(log10(min(x))), 10^ceiling(log10(max(x))))
ylim <- c(10^floor(log10(min(y))), 10^ceiling(log10(max(y))))
## Set up to display different color and symbols
plot_col <- 1
plot_pch <- 1
markers <- 21:25
pchs <-
rep(markers, ceiling(length(levels(data$sector)) / length(markers)))
palette(rainbow(length(levels(data$sector)), start = 3 / 6, end = 6 / 6))
# Make empty plot:
plot.new()
plot(
profit / employees ~ employees,
data = data[FALSE, ],
type = "p",
pch = pchs[plot_pch],
col = plot_col,
log = "xy",
xaxp = c(xlim, 1),
yaxp = c(ylim, 1),
xlim = xlim,
ylim = ylim,
main = "Profit per employee (FTSE 100)",
xlab = "Employees",
ylab = "Profit per employees (GBP)"
)
## Plot each sector
for (sector in levels(data$sector)) {
plot.xy(
xy.coords(
with(data[data$sector == sector, ], employees),
with(data[data$sector == sector, ], profit / employees),
log = "xy",
xlab = "",
ylab = ""
),
type = "p",
pch = pchs[plot_pch],
col = plot_col,
bg = plot_col
)
plot_pch <- plot_pch + 1
plot_col <- plot_col + 1
}
legend(
x = "bottomleft",
legend = levels(data$sector),
title = "Industry Sectors",
col = palette(),
pt.bg = palette(),
pch = pchs,
cex = 2 / 3,
pt.cex = 1,
ncol = 2
)
## Fit a linear model to the log-log data:
m <- lm(log10(y) ~ log10(x))
xl <- c(xlim[1] * 5, xlim[2] / 5)
yl <- 10^predict(m, data.frame(x = xl))
lines(xl,
yl,
col = "darkred",
lty = "dashed",
lwd = 2
)
t <- sprintf("Power = %0.3g", m$coefficients[2])
text(
xl[2],
yl[2],
t,
adj = c(0.25, -1.5),
col = "darkred",
font = 2
)
## All done.
par(opar)
dev.off()