GCD 4.0.2 and paleofire 1.2.2

We are please to announce that the GCD and paleofire R packages have been updated to their 4.0.2 and 1.2.2 versions, respectively. GCD major update from 3.X.X to 4.X.X version number is associated to extensive changes in the way data is added into the package. GCD 4.X.X is now mirroring the online GCD SQL database on a monthly basis or whenever a significant number of new charcoal sites are added to the database.

Three new fields in the paleofiresites table (see ?paleofiresites for details) enable to perform analyses with specific database versions or enable to select sites according to the date at which they have been added into the database. See the quick example below for explanations:

par(mfrow=c(2,2))
plot( pfSiteSel(num_version < 400 ) ) # All sites in GCD version before 4.0.0
plot( pfSiteSel(gcd_version == "GCD1" ) ) # All sites in GCDv1
plot( pfSiteSel(update_date < "2016-01-01" ) ) # Sites included before 2016-01-01
plot( pfSiteSel(update_date > "2018-01-01" ) ) # Sites included since 2018-01-01

The map above that is used to display all GCD charcoal sites has been produced by adapting the code available at https://seethedatablog.wordpress.com/2016/12/23/r-simple-world-map-robinson-ggplot/ please follow that link for additional references and explanations. The modified code is copied below:

# ======================================================================================
# Create a simple world map in Robinson projection with labeled graticules using ggplot
# ======================================================================================

# Set a working directory with setwd() or work with an RStudio project

# __________ Set libraries
library(rgdal) # for spTransform() &amp;amp; project()
library(ggplot2) # for ggplot()
library(paleofire) #

setwd("somewhere...")

# __________ Load ready to use data from GitHub
load(url("https://github.com/valentinitnelav/RandomScripts/blob/master/NaturalEarth.RData?raw=true"))
# This will load 6 objects:
# xbl.X &amp;amp; lbl.Y are two data.frames that contain labels for graticule lines
# They can be created with the code at this link:
# https://gist.github.com/valentinitnelav/8992f09b4c7e206d39d00e813d2bddb1
# NE_box is a SpatialPolygonsDataFrame object and represents a bounding box for Earth
# NE_countries is a SpatialPolygonsDataFrame object representing countries
# NE_graticules is a SpatialLinesDataFrame object that represents 10 dg latitude lines and 20 dg longitude lines
# (for creating graticules check also the graticule package or gridlines fun. from sp package)
# (or check this gist: https://gist.github.com/valentinitnelav/a7871128d58097e9d227f7a04e00134f)
# NE_places - SpatialPointsDataFrame with city and town points
# NOTE: data downloaded from http://www.naturalearthdata.com/
# here is a sample script how to download, unzip and read such shapefiles:
# https://gist.github.com/valentinitnelav/a415f3fbfd90f72ea06b5411fb16df16

# __________ Project from long-lat (unprojected data) to Robinson projection
# spTransform() is used for shapefiles and project() in the case of data frames
# for more PROJ.4 strings check the followings
# http://proj4.org/projections/index.html
# https://epsg.io/

PROJ &amp;lt;- "+proj=robin +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
# or use the short form "+proj=robin"
NE_countries_rob &amp;lt;- spTransform(NE_countries, CRSobj = PROJ)
NE_graticules_rob &amp;lt;- spTransform(NE_graticules, CRSobj = PROJ)
NE_box_rob &amp;lt;- spTransform(NE_box, CRSobj = PROJ)

# Add lakes http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_lakes.zip

download.file("http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_lakes.zip", destfile="ne_10m_lakes.zip")
unzip("ne_10m_lakes.zip")
NE_lakes &amp;lt;- readOGR('ne_10m_lakes.shp',
'ne_10m_lakes')
NE_lakes &amp;lt;- spTransform(NE_lakes, CRSobj = PROJ)
NE_lakes &amp;lt;- NE_lakes[NE_lakes@data$scalerank==0,]

# project long-lat coordinates for graticule label data frames
# (two extra columns with projected XY are created)
prj.coord &amp;lt;- project(cbind(lbl.Y$lon, lbl.Y$lat), proj=PROJ)
lbl.Y.prj &amp;lt;- cbind(prj.coord, lbl.Y)
names(lbl.Y.prj)[1:2] &amp;lt;- c("X.prj","Y.prj")

prj.coord &amp;lt;- project(cbind(lbl.X$lon, lbl.X$lat), proj=PROJ)
lbl.X.prj &amp;lt;- cbind(prj.coord, lbl.X)
names(lbl.X.prj)[1:2] &amp;lt;- c("X.prj","Y.prj")

# Project GCD sites

gcd_rob=project(cbind(paleofiresites$long,paleofiresites$lat),PROJ)
gcd=data.frame(long=gcd_rob[,1],lat=gcd_rob[,2],GCD_version=paleofiresites$gcd_version)

# __________ Plot layers
ggplot() +
# add Natural Earth countries projected to Robinson, give black border and fill with gray
geom_polygon(data=NE_box_rob, aes(x=long, y=lat), colour="black", fill="grey90", size = 0.25) +
geom_polygon(data=NE_countries_rob, aes(long,lat, group=group), colour="black", fill="white", size = 0.25) +
geom_polygon(data=NE_lakes, aes(long,lat, group=group), colour="black", fill="white", size = 0.25) +
# Note: "Regions defined for each Polygons" warning has to do with fortify transformation. Might get deprecated in future!
# alternatively, use use map_data(NE_countries) to transform to data frame and then use project() to change to desired projection.
# add Natural Earth box projected to Robinson
geom_polygon(data=NE_box_rob, aes(x=long, y=lat), colour="black", fill="transparent", size = 0.25) +
# add graticules projected to Robinson
geom_path(data=NE_graticules_rob, aes(long, lat, group=group), linetype="dotted", color="grey50", size = 0.25) +
# add graticule labels - latitude and longitude
geom_text(data = lbl.Y.prj, aes(x = X.prj, y = Y.prj, label = lbl), color="grey50", size=2) +
geom_text(data = lbl.X.prj, aes(x = X.prj, y = Y.prj, label = lbl), color="grey50", size=2) +
# the default, ratio = 1 in coord_fixed ensures that one unit on the x-axis is the same length as one unit on the y-axis
geom_point(data=gcd, aes(long,lat, col= GCD_version),pch=2)+
coord_fixed(ratio = 1) +
# remove the background and default gridlines
theme_void()+
theme(legend.title = element_text(colour="black", size=8, face="bold"), # adjust legend title
legend.position = c(0.1, 0.2), # relative position of legend
plot.margin = unit(c(t=0, r=0, b=0, l=0), unit="cm"),
legend.background = element_rect(fill="white",
size=0.5, linetype="solid",
colour ="black")) # adjust margins

# save to pdf and png file
ggsave("map_draft_1.pdf", width=28, height=13.5, units="cm")
ggsave("map_1.png", width=26, height=13.5, units="cm", dpi=300)

# REFERENCES:
# This link was useful for graticule idea
# http://stackoverflow.com/questions/38532070/how-to-add-lines-of-longitude-and-latitude-on-a-map-using-ggplot2
# Working with shapefiles, projections and world maps in ggplot
# http://rpsychologist.com/working-with-shapefiles-projections-and-world-maps-in-ggplot

ggplot2 pollen diagram (and dendrogram)

In R, excellent packages provide ways to plot pollen or more generally stratigraphic diagrams. The two main tools come from the rioja package with “strat.plot” and the analogue package with “Stratiplot”. Althought those two functions are very comprehensive (you can include a dendrogram, pollen zones, etc.), easy to use, and highly customizable; I was still wondering if there is a way in R to plot a simple pollen diagram using only general plot syntax an preferably ggplot2. I came up with this simple solution that involve only ggplot2 syntax. Probably there is a possibility to add a dendrogram with grid.arrange and to overlay additionnal information but right now this approach seems sufficient to cover my very basic plotting needs:

library(ggplot2)
library(grid)
library(neotoma)
library(analogue)
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.4-1
## analogue version 0.17-0

First extract data from Neotoma using neotoma package, we will have a look at “Le Grand Etang de Suze-La-Rousse” in Provençal Drome by: “Argant, J. 1990. Climat et environnement au Quaternaire dans le bassin du Rhône d’après les données palynologiques. Documents du Laboratoire de Géologie de Lyon 3:1-199.”

suze <- get_site(sitename = 'Le Grand Etang%')
suze_pollen=get_dataset(suze)
suze_data=get_download(suze_pollen)

# head(suze_data[[1]]$counts)

core.pct <- data.frame(tran(suze_data[[1]]$counts, method = 'percent'))

age <- suze_data[[1]]$sample.meta$age
core.pct <- chooseTaxa(core.pct, max.abun = 10) 

Stratiplot(age ~ ., core.pct, sort = 'wa', type = 'poly',
 ylab ='Years Before Present')
           

Now we create a simple data.frame with information for plotting a pollen diagram in ggplot2:

df=data.frame(yr=rep(age,ncol(core.pct)),
 per=as.vector(as.matrix(core.pct)),
 taxa=as.factor(rep(colnames(core.pct),each=nrow(core.pct))))

We define a theme without gridlines, etc. and elements that will decrease readability of the diagram

theme_new <- theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), # remove grids
 panel.background = element_blank(), axis.line = element_line(colour = "black"),
 strip.text.x = element_text(size=10, angle=90, vjust=0), # Taxa names 
 strip.background = element_blank(),
 strip.text.y = element_text(angle = 0),
 legend.position="none",panel.border = element_blank(),
 axis.text.x=element_text(angle=45,hjust=1)) # Axis tick label angle

And finally the pollen diagram:

ggplot(df)+
 geom_line(aes(yr,per))+
 geom_area(aes(yr,per))+
 scale_x_reverse(breaks =seq(0,100000,1000))+
 scale_y_continuous(breaks =seq(0,100,10))+
 xlab("Age (cal. BP)")+ylab("%")+
 coord_flip()+
 theme_new+
 facet_grid(~df$taxa,scales = "free", space = "free")

Add an exageration line (the quick and dirty way):

df$exag=df$per*10
id=unique(df$taxa)
for(i in 1:length(id)){
 uplim=which(df$exag[df$taxa==id[i]]>max(df$per[df$taxa==id[i]]))
 df$exag[df$taxa==id[i]][uplim]<-max(df$per[df$taxa==id[i]])
} 

diag=ggplot(df)+
 geom_line(aes(yr,per))+
 geom_area(aes(yr,exag,fill=taxa))+
 geom_area(aes(yr,per))+
 scale_x_reverse(breaks =seq(0,1e8,1000))+
 scale_y_continuous(breaks =seq(0,100,10))+
 xlab("Age (cal. BP)")+ylab("%")+
 coord_flip(xlim=c(3000,14000))+
 theme_new+
 facet_grid(~df$taxa,scales = "free", space = "free")

diag

 

Because only ggplot2 syntax is used you can play around and add geom_point, geom_bar layouts etc. and customize your diagram the way you want with the theme() function.

UPDATE: Adding a dendrogram is not straightforward but definitively doable. We will use a combinason of rioja and ggdendro package such as:

diss <- dist(sqrt(core.pct/100)^2)
clust <- chclust(diss, method="coniss")
# bstick(clust) # optionnally look at number of optimal zones

Now we will use ggdendro and modify its output in order to inject ages that will replace sample numbers:

dendro <- as.dendrogram(clust)
ddata <- dendro_data(dendro, type="rectangle")

## MOD the segment data frame for age instead of value order -----------------------
ddata$segments->yo
yo$xx=NA
yo$xxend=NA

for(i in 1:length(age)){
 yo$xx[which(yo$x == i)]=age[i]
 yo$xxend[which(yo$xend == i)]=age[i]
 da=age[i+1]-age[i]
 yo$xx[yo$x>i & yo$x<i+1]<-age[i]+(yo$x[yo$x>i & yo$x<i+1]-i)*da
 yo$xxend[yo$xend>i & yo$xend<i+1]<-age[i]+(yo$xend[yo$xend>i & yo$xend<i+1]-i)*da
} # Please dont ask....
ddata$segments$x<-yo$xx
ddata$segments$xend<-yo$xxend

# -----------------------------------------------------------------------------------

We finally can plot the dendrogram along with the diagram with gridExtra

dendro=ggplot(segment(ddata)) +
 geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) +
 coord_flip(xlim=c(3000,14000)) +
 ylab("Distance")+
 scale_x_reverse(breaks =seq(0,1e8,1000))+
 theme_new+theme(axis.title.y=element_blank(),
 axis.text.y=element_blank(),
 axis.ticks.y=element_blank(),
 axis.line.y = element_blank())


g1 = ggplotGrob(diag)
g2 = ggplotGrob(dendro)
g2$heights[5:7]<-g1$heights[6:8] # Important to align plots!
grid.arrange(g1, g2, widths =c(1,0.3),ncol=2,newpage = TRUE)

You can follow the overall logic to add any type of additional plot as long as its a ggplot2 object!

paleofire 1.1.9 !

A new version of the R package paleofire 1.1.9 is available on CRAN (https://CRAN.R-project.org/package=paleofire) and GitHub (https://github.com/oblarquez/paleofire). The release includes few bug fixes and improvements of existing functions, including more flexibility in pfAddData function and new options in kdffreq, see examples below:

library(paleofire)
# Control the type and format of user defined charcoal files
# here for examples csv files with three columns 
#(depth, age and char) separated with semicolon and with "." 
# character used in the file for decimal points:

head(read.csv("http://blarquez.com/public/data/Ben_area.csv"
              ,sep=";"))
files=c("http://blarquez.com/public/data/Ben_area.csv",
 "http://blarquez.com/public/data/Small_area.csv")
mydata=pfAddData(files=files, sep=";", dec=".")
# Transform and compositing:
TR=pfTransform(add=mydata, method=c("MinMax","Box-Cox","Z-Score"),
 BasePeriod=c(200,2000))
COMP=pfComposite(TR1, bins=seq(0,8000,500))
plot(COMP)

screen-shot-2016-10-13-at-9-44-55-am


# Estimate the frequency of armed conflicts from 1946 to 2014 
 # using kernel density estimation from kdffreq
 # Data from the The Uppsala Conflict Data Program (UCDP) available at: https://www.prio.org

 dat=read.csv('http://ucdp.uu.se/downloads/ucdpprio/ucdp-prio-acd-4-2016.csv')
 res=kdffreq(dat$Year,bandwidth = "bw.ucv", nbboot=1000, up = 1946, lo = 2014, interval=1, pseudo=T)
 plot(res, ylab="# armed conflict/year")

screen-shot-2016-10-13-at-10-03-22-am

paleofire 1.1.5

A new version of the R package paleofire is available on CRAN at http://cran.r-project.org/web/packages/paleofire/index.html

This release includes a new vignette on the basic usage of paleofire for reconstructing regional charcoal composite curves and a new function to easily export GCD sites to Google Earth .kml file.

Screen Shot 2014-12-01 at 3.28.06 PM

The australasian sites above have been exported using:

library(paleofire)
x=pfSiteSel(id_region=="AUST")
require(sp)
pfToKml(x, file='/Users/Olivier/Desktop/sites.kml')

 

 

paleofire package highlighted in the PAGES Magazine

The paleofire package has been highlighted in the April issue of the PAGES Magazine :

PAGES_April2014Vannière B., O. Blarquez, J. Marlon, A.-L. Daniau and M. Power. 2014. Multi-Scale Analyses of fire-climate- Vegetation Interactions on Millennial Scales. PAGES Magazine, Volume 22, Page 40.

The article presents the workshop of the Global Paleofire Working Group held in Frasne (France) on 2-6 October 2013 that was supported by PAGES. The pdf of the article can be downloaded here or directly from the PAGES website.

Introducing the paleofire package

The new R package, paleofire, has been released on CRAN. The package is dedicated to the analysis and synthesis of charcoal series contained in the Global Charcoal Database (GCD) to reconstruct past biomass burning.

paleofire is an initiative of the Global Paleofire Working Group core team, whose aim is to encourage the use of sedimentary charcoal series to develop regional to global synthesis of paleofire activity, and to ease access to the GCD data through a common research framework that will be useful to the paleofire research community. Currently, paleofire features are organized into three different parts related to (i) site selection and charcoal series extraction from GCD data; (ii) charcoal data transformation and homogenization; and (iii) charcoal series compositing and syntheses.

Installation:

paleofire is available from both CRAN website and GitHub

To install the official release (v1.0, 01/2014) from CRAN you just need to type this line at the R prompt:

 install.packages("paleofire")

To install the development version of paleofire from the GitHub repository the devtools package is required: on Windows platform the Rtools.exe program is required in order to install the devtools package. Rtools.exe can be downloaded for a specific R version on http://cran.r-project.org/bin/windows/Rtools/

Once devtools is installed type the following lines at R prompt to install paleofire:

library(devtools) 
install_github("paleofire","paleofire") 
library(paleofire)

To test everything is working you can plot a map of all charcoal records included in the Global Charcoal Database v03:

plot(pfSiteSel()) 

For details and examples about paleofire please refer to the included manual.

More examples of the capabilities of paleofire to come in the near future…

Replicated rarefied richness

This example is intended to present the methods develloped for the paper Blarquez et al. (2013). We will use the macroremain record of the Lac du Loup a small subalpine lake with continuous macoremain counts for the last 11 750 years. More details on this site can be found in Blarquez et al. (2010).

First load the macroremain data:

dataMacro=read.csv("http://blarquez.com/public/code/dataMacro.csv")
# Macroremain counts
paramMacro=read.csv("http://blarquez.com/public/code/paramMacro.csv")
# Associated depths, ages and sample volumes
dataMacro[is.na(dataMacro)]=0
# Set missing values to zero

We now caluculates the median resolution of the record and the interpolated ages at which the influx will be calculated:

resMed=median(diff(paramMacro[,3]))
Ages=seq(-50, 11750, resMed)

We reconstruct the influx matrix using the pretreatment function which is the R implementation of the CharAnalysis CharPretreatment.m function originally develloped by P. Higuera and available at https://sites.google.com/site/charanalysis
Requires a matrix as input with the following columns:
CmTop CmBot AgeTop AgeBot Volume (params)
A serie from which to calculate accumulation rates (serie)

source("http://blarquez.com/public/code/pretreatment.R")
infMacro=matrix(ncol=length(dataMacro[1,]),nrow=length(Ages))
for (i in 1:length(dataMacro[1,])){
 + infMacro[,i]=c(pretreatment(params=paramMacro,serie=dataMacro[,i],
 + yrInterp=resMed,first=-50,last=11750,
 + plotit=F,Int=F)$accI)
 + }

Calculate samples macroremain sums and the minimun influx sum:

S=rowSums(infMacro)
nMin=min(na.omit(S[1:length(S)-1]))

We exclude the last sample (not calculated because of unknown accumulation rate) and samples with NA (because of vol==0):

del=which(S==0 | is.na(S))
infMacro=infMacro[-del,]
Ages=Ages[-del]
S=S[-del]

We calculate a proportion matrix which is used to replicate the rarefaction procedure:

S_=matrix(ncol=ncol(infMacro),nrow=nrow(infMacro))
for (i in 1:length(infMacro[1,])){
 + S_[,i]=c(S)
 + }
propMat=infMacro/S_

Replicate rarefaction with min influx n=1,2,…,500.
We will use the rarefy function from the package vegan:

library(vegan)
rare_=matrix(nrow=nrow(infMacro),ncol=500)
for (n in 1:500){
 + infN=ceiling(S_*n/nMin*propMat)
 + rare_[,n]=c(rarefy(infN,sample=n))
 + }

And finnally calculates the 90 percent confidence intervals and plot the rarefied richness using a 500 years locally weighted scatter plot smoother:

CI_=t(apply(rare_, 1, quantile, probs = c(0.05, 0.5, .95), na.rm = TRUE))
span=500/resMed/length(Ages)
plot(Ages,lowess(CI_[,2], f =span)$y,type="l",ylab="E[Tn]",ylim=c(min(CI_),
 + max(CI_)),xlab="Age",main="Rarefied Richness",xlim=c(12000,-100))
lines(Ages,lowess(CI_[,1], f =span)$y,type="l",lty=2)
lines(Ages,lowess(CI_[,3], f =span)$y,type="l",lty=2)

Screen Shot 2014-02-09 at 12.41.53 PM
References:
Blarquez O., Finsinger W. and C. Carcaillet. 2013. Assessing paleo-biodiversity using low proxy influx. PLoS ONE 8(4): e65852
Blarquez O., Carcaillet C., Mourier B., Bremond L. and O. Radakovitch. 2010. Trees in the subalpine belt since 11 700 cal. BP : origin, expansion and alteration of the modern forest. The Holocene 20 : 139-146. DOI : 10.1177/0959683609348857

Download pdf vignette