<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing with OASIS Tables v3.0 20080202//EN" "journalpub-oasis3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:oasis="http://docs.oasis-open.org/ns/oasis-exchange/table" xml:lang="en" dtd-version="3.0">
  <front>
    <journal-meta><journal-id journal-id-type="publisher">ESSD</journal-id><journal-title-group>
    <journal-title>Earth System Science Data</journal-title>
    <abbrev-journal-title abbrev-type="publisher">ESSD</abbrev-journal-title><abbrev-journal-title abbrev-type="nlm-ta">Earth Syst. Sci. Data</abbrev-journal-title>
  </journal-title-group><issn pub-type="epub">1866-3516</issn><publisher>
    <publisher-name>Copernicus Publications</publisher-name>
    <publisher-loc>Göttingen, Germany</publisher-loc>
  </publisher></journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.5194/essd-10-1613-2018</article-id><title-group><article-title>A rescued dataset of sub-daily meteorological observations for Europe and
the southern <?xmltex \hack{\break}?>Mediterranean region, 1877–2012</article-title><alt-title>Meteorological observations for Europe and
the southern Mediterranean region</alt-title>
      </title-group><?xmltex \runningtitle{Meteorological observations for Europe and
the southern Mediterranean region}?><?xmltex \runningauthor{L. Ashcroft et al.}?>
      <contrib-group>
        <contrib contrib-type="author" corresp="yes" rid="aff1 aff2">
          <name><surname>Ashcroft</surname><given-names>Linden</given-names></name>
          <email>linden.ashcroft@bom.gov.au</email>
        <ext-link>https://orcid.org/0000-0003-3898-6648</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Coll</surname><given-names>Joan Ramon</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Gilabert</surname><given-names>Alba</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Domonkos</surname><given-names>Peter</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1 aff3">
          <name><surname>Brunet</surname><given-names>Manola</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-9386-710X</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Aguilar</surname><given-names>Enric</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-8384-377X</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Castella</surname><given-names>Mercè</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Sigro</surname><given-names>Javier</given-names></name>
          
        <ext-link>https://orcid.org/0000-0003-0969-0338</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff3">
          <name><surname>Harris</surname><given-names>Ian</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff4">
          <name><surname>Unden</surname><given-names>Per</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff3 aff5">
          <name><surname>Jones</surname><given-names>Phil</given-names></name>
          
        <ext-link>https://orcid.org/0000-0001-5032-5493</ext-link></contrib>
        <aff id="aff1"><label>1</label><institution>Centre for Climate Change, Department of Geography, Universitat Rovira
i Virgili, Tarragona, Spain</institution>
        </aff>
        <aff id="aff2"><label>2</label><institution>Australian Bureau of Meteorology, Melbourne, Australia</institution>
        </aff>
        <aff id="aff3"><label>3</label><institution>Climate Research Unit, School of Environmental Sciences, University of
East Anglia, Norwich, UK</institution>
        </aff>
        <aff id="aff4"><label>4</label><institution>Swedish Meteorological and Hydrological Institute, Folkborgsvägen,
Norrköping, Sweden</institution>
        </aff>
        <aff id="aff5"><label>5</label><institution>Center of Excellence for Climate Change Research, Department of
Meteorology, King Abdulaziz University, Jeddah, Saudi Arabia</institution>
        </aff>
      </contrib-group>
      <author-notes><corresp id="corr1">Linden Ashcroft (linden.ashcroft@bom.gov.au)</corresp></author-notes><pub-date><day>10</day><month>September</month><year>2018</year></pub-date>
      
      <volume>10</volume>
      <issue>3</issue>
      <fpage>1613</fpage><lpage>1635</lpage>
      <history>
        <date date-type="received"><day>27</day><month>March</month><year>2018</year></date>
           <date date-type="rev-request"><day>3</day><month>May</month><year>2018</year></date>
           <date date-type="rev-recd"><day>8</day><month>August</month><year>2018</year></date>
           <date date-type="accepted"><day>10</day><month>August</month><year>2018</year></date>
      </history>
      <permissions>
        
        
      <license license-type="open-access"><license-p>This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this licence, visit <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link></license-p></license></permissions><self-uri xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018.html">This article is available from https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018.html</self-uri><self-uri xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018.pdf">The full text article is available as a PDF file from https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018.pdf</self-uri>
      <abstract>
    <p id="d1e201">Sub-daily meteorological observations are needed for input to and assessment
of high-resolution reanalysis products to improve understanding of weather
and climate variability. While there are millions of such weather
observations that have been collected by various organisations, many are yet
to be transcribed into a useable format.</p>
    <p id="d1e204">Under the auspices of the Uncertainties in Ensembles of Regional
ReAnalyses (UERRA) project, we
describe the compilation and development of a digital dataset of 8.8 million
meteorological observations of essential climate variables (ECVs) rescued
across the European and southern Mediterranean region. By presenting the
entire chain of data preparation, from the identification of regions lacking
in digitised sub-daily data and the location of original sources, through the
digitisation of the observations to the quality control procedures applied,
we provide a rescued dataset that is as traceable as possible for use by the
research community.</p>
    <p id="d1e207">Data from 127 stations and of 15 climate variables in the northern African and
European sectors have been prepared for the period 1877 to 2012. Quality
control of the data using a two-step semi-automatic statistical approach
identified 3.5 % of observations that required correction or removal, on
par with previous data rescue efforts.</p>
    <p id="d1e210">In addition to providing a new sub-daily meteorological dataset for the
research community, our experience in the development of this
sub-daily dataset gives us an
opportunity to share some suggestions for future data rescue projects.</p>
    <p id="d1e213">All versions of the dataset, from the raw digitised data to data that have
been quality controlled and converted to standard units, are available on
PANGAEA: <ext-link xlink:href="https://doi.org/10.1594/PANGAEA.886511" ext-link-type="DOI">10.1594/PANGAEA.886511</ext-link> (Ashcroft et al., 2018).</p>
  </abstract>
    </article-meta>
  </front>
<body>
      

<?pagebreak page1614?><sec id="Ch1.S1" sec-type="intro">
  <title>Introduction</title>
      <p id="d1e226">Digitising meteorological observations into a useable modern format is
crucial for long-term climate monitoring and meteorological service
development. High-quality observations are needed for almost all aspects of
meteorological and climatological research, but many spatial and temporal
gaps still exist in data products currently used by the international
research community (Brunet and Jones, 2011). For this reason,
meteorological data rescue and recovery is becoming increasingly important,
particularly in developing countries and for the early instrumental period,
as data are often only available in paper format and are at great risk of
being permanently lost (Brunet and Jones, 2011;
Page et al., 2004; World Meteorological Organization, 2016).</p>
      <p id="d1e229">In the last 20 years, many initiatives have been established to recover and
digitise land-based meteorological observations at national, regional and
international scales. The Atmospheric Circulation Reconstructions over the
Earth initiative (ACRE, Allan et al., 2011) coordinates climate data rescue
across the globe, while other projects such as MEditerranean DAta REscue
(MEDARE, <uri>www.omm.urv.cat/MEDARE/index.html</uri>, last access:
4 February 2018) and Historical Instrumental Climatological Surface Time
Series Of The Greater Alpine Region (HISTALP, <uri>wwww.zamg.ac.at/histalp</uri>,
last access: 6 May 2018) focus on particular regions (Auer et al., 2007;
Brunet et al., 2014a, b). Additional initiatives on a national to regional
scale, led by meteorological agencies (e.g. Kaspar et al., 2015) and research
projects (e.g. Ashcroft et al., 2014; Brunet et al., 2006, 2014a), have also
located and digitised meteorological observations, and ensured that they are
made available to the scientific community.</p>
      <p id="d1e238">Many of these projects have focused on the rescue of daily, monthly and/or
annually-averaged data, as these observations form the basis of long-term
climate analysis. Daily maximum temperature, minimum temperature and
precipitation totals are often the top priority for digitisation, because
these variables are used to monitor changes in climate and the incidences of
extreme weather events, both of which are important for the economic and
agricultural sectors (Brunet et al., 2006; Moberg et al., 2006). The
development of the 20th Century Reanalysis product – which uses only
sub-daily atmospheric pressure observations as input for a global reanalysis
– has also benefited from national and regional data rescue activities,
resulting in an increase in atmospheric pressure data recovery in recent
years (Compo et al., 2011; Cram et al., 2015).</p>
      <p id="d1e241">Far fewer recovery efforts have been made to uncover sub-daily meteorological
observations of other variables. We define sub-daily variables here as
variables observed at least once a day, up to every half an hour. These data,
rather than daily values or monthly
averages, are necessary input for global and regional reanalysis products,
which can greatly improve understanding of atmospheric circulation and of
high-temporal resolution extreme events (e.g. Cannon et al., 2015; Stickler
et al., 2014).</p>
      <p id="d1e245">This paper presents the experience and resultant dataset of a 2-year
digitisation effort aimed at recovering sub-daily meteorological data across
the European region. Our work formed part of Uncertainties in Ensembles of
Regional ReAnalyses (UERRA,
<uri>http://uerra.eu/</uri>, last access: 27 August 2018), a project under the
European Union 7th Framework Programme. The goal of UERRA was to produce
ensembles of European regional reanalyses at high temporal resolution for
several decades, with an estimate of the associated uncertainties in the
resulting datasets. A key component of UERRA was the recovery of sub-daily
surface meteorological observations to provide input to and assess the
quality of future regional reanalysis products.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F1" specific-use="star"><caption><p id="d1e253">Stations with monthly mean sea level pressure data in MARS across
the three identified regions of interest: <bold>(a)</bold> the Mediterranean,
<bold>(b)</bold> eastern Europe and <bold>(c)</bold> Scandinavia. The shade and size
of the symbols indicate the percentage of data available for 1957–2010.</p></caption>
        <?xmltex \igopts{width=341.433071pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f01.jpg"/>

      </fig>

      <p id="d1e271">In this paper we describe our complete data rescue process to provide
sufficient details, as much as possible, for a fully traceable dataset. We
present the methods used to minimise errors in the digitisation process and
the steps required to take the data from a disparate set of sources to a
unified database. In Sect. 2 we explain how we identified target regions and
likely sources for data rescue across Europe and the neighbouring southern
Mediterranean region to maximise improvement in spatial and temporal coverage
of existing data. In Sect. 3 we provide details on the quality assurance and
control procedures used to reduce errors in the dataset, including visual
checks, semi-automatic statistical methods and an automatic spatial
comparison method. We present the dataset and quality control (QC) results in
Sect. 4. Finally, we give some practical ideas for future data recovery
projects based on our experiences with this particular project, as well as
details about how to access the data.</p>
</sec>
<sec id="Ch1.S2">
  <title>Methods and materials</title>
<sec id="Ch1.S2.SS1">
  <title>Identifying gaps in sub-daily data availability</title>
      <p id="d1e285">The primary goal of the data rescue efforts within UERRA was to improve
spatial coverage of input data for future regional gridded and reanalysis
climate products over the European domain. Our aim was not to develop
single, long-term data series for particular stations, but rather improve
the availability of sub-daily observations anywhere that may be
underrepresented in the current observational data used for European
reanalysis products. This involved, as a first step, identifying the basic
station data used in current reanalysis products available at the European
Centre for Medium Weather Forecasts (ECMWF) and other relevant databases
that contain digitised observations.</p>
      <p id="d1e288">To identify gaps in the available sub-daily climate record, we first
conducted a visual examination of the data holdings of the International
Surface Pressure Databank (ISPD, Cram et al., 2015) and the Koninklijk
Nederlands Meteorologisch Instituut (KNMI) European Climate Assessment and
Dataset (ECA&amp;D: <uri>http://eca.knmi.nl/</uri>, last access: 12<?pagebreak page1615?> January 2018).
These databanks provide station lists, regularly updated datasets and online
visualisation tools, making it relatively straightforward to identify the
regions lacking in sub-daily data. We also examined the holdings of the
national climate data systems of countries whose data may not yet be in a
multi-national repository. In particular, we checked the data available from
the national climate data management systems of countries that had not been
included in previous regional data rescue projects, namely the Romanian
Meteorological Administration (NMA-RO) and the national meteorological and
hydrological services (NMHSs) of countries in the western Balkans, including
Albania, Bosnia-Herzegovina, the Republic of Macedonia, Montenegro and the
Republic of Serbia. With this data availability information, we identified
the Mediterranean, eastern Europe and Scandinavia as three key sub-regions
within the European sector as lacking in sub-daily data.</p>
      <p id="d1e294">We then conducted an extensive examination of the data available for these
regions within the Meteorological Archival and Retrieval System (MARS) at
ECMWF. MARS is home to the primary data input for the current European
reanalysis products available from ECMWF (e.g. Dahlgren et al., 2016), and so
stations that are identified in data sources (see Sect. 2.2) but not present
in MARS, or stations with low percentages of sub-daily data, are likely
candidates for data recovery. Interrogating the MARS holdings is not as
straightforward as ISPD or ECA&amp;D due to the extremely large number of data
sources stored in the system and the registrations required, which is why we
conducted our search in this order.</p>
      <p id="d1e297">We focussed our search on the three data-sparse sub-regions in the post-1957
period, to align with the temporal focus of the proposed UERRA regional
reanalysis products and ECMWF historical reanalyses such as ERA-20C
(<uri>https://www.ecmwf.int/en/research/climate-reanalysis/era-20c</uri>, last
access: 22 July 2018). The variables of interest were several atmospheric and
terrestrial essential climate variables (ECVs) as defined by the Global
Climate Observing System (GCOS, World Meteorological Organization, 2015) that
were identified as important for the development and verification of regional
reanalyses: air temperature (TT), atmospheric pressure (sea level pressure,
PP, and station level pressure, SP), wind speed (WS), wind direction (WD),
relative humidity (RH), dew point temperature (DP), daily rainfall (RR),
fresh snowfall (FS) and snow depth (SD).</p>
      <?pagebreak page1616?><p id="d1e304">The high percentage of stations with data for less than 60 % of the
1957–2010 period in MARS (Fig. 1) illustrates the lack of sub-daily
observations in these sectors. Gaps are clear in the southern and eastern
Mediterranean countries, Sweden, and Norway for the 1960s and 1970s
(Table S1 in the Supplement), as well as across the Balkan region. The relatively dense spatial
coverage of the stations with less than 60 % data coverage also suggests
that sub-daily observations may have been taken at many places in these
regions, but have not yet been made available in a standardised format.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T1" specific-use="star"><caption><p id="d1e310">An overview of the data sources used in this project. More
information on the precise temporal coverage of each location and units are
provided with the dataset, available on PANGAEA (Ashcroft et al., 2018).
Variables given in bold in the variable column have been digitised as part of
this project: not all available variables and time periods were digitised in
this project due to time and funding constraints. Each source can be found at
<uri>ftp://130.206.36.123</uri> (last access: 4 June 2018), u: C3_UERRA, p:
c3uerra17, folder: C3_UERRA_datasources_images, where the sources are
listed under their source code. In the source location column, NOAA-CDMP
represents the National Oceanic and Atmospheric Administration Climate Data
Modernisation Project. The variables are represented by acronyms similar to
those used in the main text: temperature (TT), relative humidity (RH), dew
point temperature (DP), mean sea level pressure (PP), and station level
pressure (SP), wind direction (WD), wind speed (WS), wet bulb temperature
(WB), precipitation (RR), snow depth (SD) and fresh snow
(FS), maximum temperature (Tmax) and minimum temperature (Tmin).</p></caption><oasis:table frame="topbot"><?xmltex \begin{scaleboxenv}{.7}[.7]?><oasis:tgroup cols="8">
     <oasis:colspec colnum="1" colname="col1" align="justify" colwidth="65.441339pt"/>
     <oasis:colspec colnum="2" colname="col2" align="justify" colwidth="71.13189pt"/>
     <oasis:colspec colnum="3" colname="col3" align="justify" colwidth="65.441339pt"/>
     <oasis:colspec colnum="4" colname="col4" align="justify" colwidth="85.358268pt"/>
     <oasis:colspec colnum="5" colname="col5" align="justify" colwidth="56.905512pt"/>
     <oasis:colspec colnum="6" colname="col6" align="justify" colwidth="56.905512pt"/>
     <oasis:colspec colnum="7" colname="col7" align="justify" colwidth="56.905512pt"/>
     <oasis:colspec colnum="8" colname="col8" align="justify" colwidth="142.26378pt"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Source code and <?xmltex \hack{\hfill\break}?>name</oasis:entry>
         <oasis:entry colname="col2">Country (or<?xmltex \hack{\hfill\break}?>countries) <?xmltex \hack{\hfill\break}?>covered</oasis:entry>
         <oasis:entry colname="col3">Time period <?xmltex \hack{\hfill\break}?>covered<?xmltex \hack{\hfill\break}?>(continuous)</oasis:entry>
         <oasis:entry colname="col4">Source provider</oasis:entry>
         <oasis:entry colname="col5">Primary or <?xmltex \hack{\hfill\break}?>secondary</oasis:entry>
         <oasis:entry colname="col6">Typed or hand <?xmltex \hack{\hfill\break}?>written</oasis:entry>
         <oasis:entry colname="col7">Surface <?xmltex \hack{\hfill\break}?>variables</oasis:entry>
         <oasis:entry colname="col8">Details</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">So04: Bulletin <?xmltex \hack{\hfill\break}?>Météorologique<?xmltex \hack{\hfill\break}?>de l'Algérie</oasis:entry>
         <oasis:entry colname="col2">Algeria</oasis:entry>
         <oasis:entry colname="col3">1877–1938</oasis:entry>
         <oasis:entry colname="col4">NOAA-CDMP</oasis:entry>
         <oasis:entry colname="col5">Secondary</oasis:entry>
         <oasis:entry colname="col6">Hand-written</oasis:entry>
         <oasis:entry colname="col7"><bold>TT, PP, WS, WD</bold>, Cloud,<?xmltex \hack{\hfill\break}?>weather conditions, RR,<?xmltex \hack{\hfill\break}?>Tmax, Tmin</oasis:entry>
         <oasis:entry colname="col8">Sub-daily observations, multiple variables and stations per day on each page, average reliability and some issues with chronological order of pages. One file per year.</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">So12: Annales <?xmltex \hack{\hfill\break}?>de l'Observatoire<?xmltex \hack{\hfill\break}?>de  Ksara</oasis:entry>
         <oasis:entry colname="col2">Lebanon</oasis:entry>
         <oasis:entry colname="col3">1921–1971</oasis:entry>
         <oasis:entry colname="col4">NOAA-CDMP</oasis:entry>
         <oasis:entry colname="col5">Secondary</oasis:entry>
         <oasis:entry colname="col6">Hand-written</oasis:entry>
         <oasis:entry colname="col7"><bold>TT, PP, RH</bold>, summaries of<?xmltex \hack{\hfill\break}?>wind, sunshine, evaporation, rainfall, clouds,<?xmltex \hack{\hfill\break}?>weather</oasis:entry>
         <oasis:entry colname="col8">Hourly observations, one variable per month per station on each page, good readability and source in good chronological order. One file per year.</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">So06: Bulletin <?xmltex \hack{\hfill\break}?>Meteorologique du Maroc</oasis:entry>
         <oasis:entry colname="col2">Morocco, Algeria</oasis:entry>
         <oasis:entry colname="col3">1953–1968, 1977–1978</oasis:entry>
         <oasis:entry colname="col4">NOAA-CDMP</oasis:entry>
         <oasis:entry colname="col5">Secondary</oasis:entry>
         <oasis:entry colname="col6">Typed</oasis:entry>
         <oasis:entry colname="col7"><bold>TT, PP, ST, DP, WS, WD</bold>, Cloud, RR,<?xmltex \hack{\hfill\break}?>Tmax, Tmin</oasis:entry>
         <oasis:entry colname="col8">Sub-daily observations, multiple variables and stations per day on each page, good readability and source in good chronological order. One file per year.</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">So63: Cyprus <?xmltex \hack{\hfill\break}?>Meteorological Returns</oasis:entry>
         <oasis:entry colname="col2">Cyprus</oasis:entry>
         <oasis:entry colname="col3">1881–1922</oasis:entry>
         <oasis:entry colname="col4">UK Met Office</oasis:entry>
         <oasis:entry colname="col5">Primary</oasis:entry>
         <oasis:entry colname="col6">Hand-written</oasis:entry>
         <oasis:entry colname="col7"><bold>TT</bold>, ST, WS,<?xmltex \hack{\hfill\break}?>WD, Cloud,<?xmltex \hack{\hfill\break}?>RR</oasis:entry>
         <oasis:entry colname="col8">Sub-daily observations, multiple variables for one station on each data, good readability and in good chronological order. One file per month.</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">So62: German <?xmltex \hack{\hfill\break}?>station observing <?xmltex \hack{\hfill\break}?>books</oasis:entry>
         <oasis:entry colname="col2">Germany</oasis:entry>
         <oasis:entry colname="col3">1958–1978</oasis:entry>
         <oasis:entry colname="col4">German Meteorological Service DWD</oasis:entry>
         <oasis:entry colname="col5">Primary</oasis:entry>
         <oasis:entry colname="col6">Hand-written</oasis:entry>
         <oasis:entry colname="col7"><bold>TT, PP, ST, DP, RH, WB, WD, WS, RR, FS, SD</bold></oasis:entry>
         <oasis:entry colname="col8">Daily snowfall data for some stations, one station per month on each page, hourly or half-hourly observations provided for two locations (Dresden and Brocken), multiple variables for one station per day on each page. One file per year. Good readability and in good chronological order.</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">So16: Egypt daily weather report</oasis:entry>
         <oasis:entry colname="col2">Egypt</oasis:entry>
         <oasis:entry colname="col3">1907–1957</oasis:entry>
         <oasis:entry colname="col4">NOAA-CDMP</oasis:entry>
         <oasis:entry colname="col5">Secondary</oasis:entry>
         <oasis:entry colname="col6">Hand-written</oasis:entry>
         <oasis:entry colname="col7"><bold>TT, PP, RH, DP, WS, WD</bold>, RR, Cloud, Visibility, Weather</oasis:entry>
         <oasis:entry colname="col8">Sub-daily observations from multiple stations and variables for 1 day on each page, Average readability and some issues with chronological order. One file per year.</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">So54: Instituto <?xmltex \hack{\hfill\break}?>Nacional de Meteorología Banco <?xmltex \hack{\hfill\break}?>de Datos</oasis:entry>
         <oasis:entry colname="col2">Spain</oasis:entry>
         <oasis:entry colname="col3">1954–1984</oasis:entry>
         <oasis:entry colname="col4">Provided by MeteoCat, but containing data <?xmltex \hack{\hfill\break}?>from the Spanish Meteorological Agency <?xmltex \hack{\hfill\break}?>(AEMET)</oasis:entry>
         <oasis:entry colname="col5">Secondary</oasis:entry>
         <oasis:entry colname="col6">Hand-written and typed</oasis:entry>
         <oasis:entry colname="col7"><bold>TT, PP, ST, RH, DP, WD, WS</bold>, Cloud, RR</oasis:entry>
         <oasis:entry colname="col8">Sub-daily data one station per month on each page, good readability and in good chronological order. One file per month.</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">So56: Meteo- <?xmltex \hack{\hfill\break}?>roloski godisnjak<?xmltex \hack{\hfill\break}?>1 – klimatoloski <?xmltex \hack{\hfill\break}?>podaci</oasis:entry>
         <oasis:entry colname="col2">Bosnia-Herzegovina, Croatia, <?xmltex \hack{\hfill\break}?>Republic of Serbia</oasis:entry>
         <oasis:entry colname="col3">1949–2012</oasis:entry>
         <oasis:entry colname="col4">Provided by the Republic Hydrometeorological Institute of Serbia</oasis:entry>
         <oasis:entry colname="col5">Secondary</oasis:entry>
         <oasis:entry colname="col6">Typed</oasis:entry>
         <oasis:entry colname="col7"><bold>TT, ST, RH, WS, WD</bold>,<?xmltex \hack{\hfill\break}?>Vapour pressure, RR, SD, Cloud, Visibility, Weather</oasis:entry>
         <oasis:entry colname="col8">Sub-daily data one station per month on each page, good readability and in good chronological order. One file per year.</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">So64: Rocenka-annuaire</oasis:entry>
         <oasis:entry colname="col2">Czech Republic, Slovak Republic</oasis:entry>
         <oasis:entry colname="col3">1940–1968</oasis:entry>
         <oasis:entry colname="col4">NOAA-CDMP</oasis:entry>
         <oasis:entry colname="col5">Secondary</oasis:entry>
         <oasis:entry colname="col6">Hand-written</oasis:entry>
         <oasis:entry colname="col7"><bold>TT, PP, ST, RH, WS,</bold><?xmltex \hack{\hfill\break}?> <bold>WD</bold>, Visibility, Cloud, RR,<?xmltex \hack{\hfill\break}?>Weather</oasis:entry>
         <oasis:entry colname="col8">Sub-daily data one station per month on each page, good readability and in good chronological order. One file per year.</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">So65: Slovenian <?xmltex \hack{\hfill\break}?>meteorological observing books</oasis:entry>
         <oasis:entry colname="col2">Slovenia</oasis:entry>
         <oasis:entry colname="col3">1950–1978</oasis:entry>
         <oasis:entry colname="col4">Provided by the Slovenian Environmental <?xmltex \hack{\hfill\break}?>Agency</oasis:entry>
         <oasis:entry colname="col5">Primary</oasis:entry>
         <oasis:entry colname="col6">Hand-written</oasis:entry>
         <oasis:entry colname="col7"><bold>TT, PP, ST, RH, DP, SD, FS, RR, WS, WD</bold>, WB,<?xmltex \hack{\hfill\break}?>Cloud, Visibility, Weather</oasis:entry>
         <oasis:entry colname="col8">Hourly data, one station per day on each page, in good chronological order but difficult to read at times. One file per day.</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">So61: Yillik Meteorolojoi Bülteni</oasis:entry>
         <oasis:entry colname="col2">Turkey</oasis:entry>
         <oasis:entry colname="col3">1962–1971</oasis:entry>
         <oasis:entry colname="col4">NOAA-CDMP</oasis:entry>
         <oasis:entry colname="col5">Secondary</oasis:entry>
         <oasis:entry colname="col6">Typed</oasis:entry>
         <oasis:entry colname="col7"><bold>TT, ST, RH, WS, WD</bold>,<?xmltex \hack{\hfill\break}?>Tmax, Tmin,<?xmltex \hack{\hfill\break}?>Cloud, Evaporation, RR, <?xmltex \hack{\hfill\break}?>Weather</oasis:entry>
         <oasis:entry colname="col8">Sub-daily data one station per month on each page, good readability and in good chronological order. One file per year.</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup><?xmltex \end{scaleboxenv}?></oasis:table></table-wrap>

</sec>
<sec id="Ch1.S2.SS2">
  <title>Locating and assessing scans of sub-daily data sources</title>
      <p id="d1e790">As well as identifying gaps in the digitised sub-daily record available for
Europe, we also needed to locate sources of undigitised sub-daily data. We
undertook extensive consultation with NMHSs across the three identified
regions of poor data coverage, in an attempt to identify and recover paper or
scanned data sources suitable for digitisation. Priorities were given to data
sources already available as scanned images, stations with data from the
post-1957 period, and stations where the selected ECVs were recorded (see
Sect. 2.1). Recovered precipitation observations from NMA-RO were digitised
internally, and then provided to us in digitised quality-controlled format,
using a similar quality control format to that used in this study (see
Sect. 3.2). Discussion with the Norwegian and Swedish NMHSs uncovered data for
these countries that had been digitised, but were not yet provided to
international data repositories. Similarly, the Catalan Meteorological
Service (MeteoCat), which has an open data policy, allowed their digitised
data for the recent 1998–2015 period to be transferred to relevant global
repositories through our effort. Data sharing was organised between these
regions and ECMWF without the need for observations to be transcribed from
paper format and will therefore not be discussed further here. Political and
financial difficulties prevented many other countries we contacted,
particularly in northern Africa and the Balkans regions, from providing
original data sources to us for digitisation.</p>
      <p id="d1e793">Original data sources were provided in scanned format by Deutscher
Wetterdienst (the German Meteorological Office, DWD), the Slovenian
Environmental Agency (SEA), and Agencia Estatal de Meteorología (the
Spanish Meteorological Service, AEMET), via MeteoCat. Close consultation
with these NMHSs enabled us to identify valuable and previously undigitised
data sources. From these sources, stations with minimal data available in
MARS were selected for digitisation.</p>
      <p id="d1e796">The World Meteorological Organization (WMO) Mediterranean data rescue
initiative MEDARE and the precursor project to UERRA, the European Reanalysis
and Observations for Monitoring project (EURO4M, <uri>http://www.euro4m.eu/</uri>,
last access: 12 January 2018), located key records of data for the Middle
Eastern, Balkan and southern Mediterranean regions from the Serbian NMHS
online climatological scanned repository
(<uri>http://www.hidmet.gov.rs/ciril/meteorologija/klimatologija_godisnjaci.php</uri>,
last access: 4 June 2018), the United States of America's National Oceanic
and Atmospheric Administration/National Climatic Data Center (NOAA/NCDC)
Climate Data Modernization Project (CDMP:
<uri>http://library.noaa.gov/Collections/Digital-Documents/Foreign-Climate-Data-Home</uri>,
last access: 8 August 2018), the British Atmospheric Data Centre (BADC,
<uri>http://badc.nerc.ac.uk/browse/badc/corral/images/metobs</uri>, last access:
8 August 2018), and other national meteorological services (see Brunet et
al., 2014a, b for details). Daily maximum and minimum temperature,
precipitation, and sub-daily atmospheric air pressure observations from some
of these sources were digitised under the auspices of EURO4M and MEDARE, but
many other observations were unable to be transcribed due to project
constraints. UERRA therefore provides a valuable opportunity to rescue the
previously undigitised values from these sources (Brunet et al., 2014b).</p>
      <p id="d1e811">Table 1 provides detail of the data sources identified for digitisation,
while Fig. 2 shows several examples of the data sources used. All of the
variables included in each source are listed in Table 1, although not all
were digitised under the auspices of UERRA. The majority of data sources from
CDMP are secondary, meaning that they are collations or summaries of
observations that have been prepared in a central location. Unfortunately,
secondary data sources are more prone to transcription errors than original
series, as they have been transferred from the original readings. Many were
handwritten, although a small subset was typed.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F2" specific-use="star"><caption><p id="d1e817">Examples of the different data source formats found for
digitisation: <bold>(a)</bold> Egypt, 1939, where each row is observations from a
different station on 1 day; <bold>(b)</bold> Morocco, 1968, where each row is
observations from a different station on 1 day; <bold>(c)</bold> Kredarica,
Slovenia, 1970, where each row is observations of a different variable for one
station on 1 day; <bold>(d)</bold> Ksara, Lebanon, 1939, where each row is
atmospheric pressure data for 1 day at one station. Data images are
available online the Universitat Rovira i Virgili's Centre for Climate Change
(see Sect. 6).</p></caption>
          <?xmltex \igopts{width=455.244094pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f02.jpg"/>

        </fig>

</sec>
<sec id="Ch1.S2.SS3">
  <title>Digitising method</title>
      <p id="d1e844">Once data sources had been identified and catalogued, a group of 11
digitisers were employed for 15 h a week over a 2-year period to digitise
the data. The digitisation team was made up of undergraduate and postgraduate
geography students from the Universitat Rovira i Virgili (URV), who all had
some knowledge of meteorological variables and European climate. The
digitisers worked on desktop computers in a computer lab, with large screens
and standard keyboards. They were also given the option of working from home
on their personal laptops.</p>
      <?pagebreak page1618?><p id="d1e847">The digitisers received initial training sessions, online instructions and
monthly in-person meetings to discuss issues and introduce new digitisation
tasks. Digitisation was done using a “key as you see” method, meaning that
the digitisers typed the values they could read in the data images, rather
than using any coding system. This follows standard best practice outlined by
the WMO (2016). Clear, unambiguous errors in the data sources were generally
retained by the digitisers and recorded in station metadata files, which were
later used when quality controlling the data (see Sect. 3). If a digitiser
could not read a value due to poor handwriting or scanning issues, they
represented it by a value of <inline-formula><mml:math id="M1" display="inline"><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">88.8</mml:mn></mml:mrow></mml:math></inline-formula>, while missing values were set to
<inline-formula><mml:math id="M2" display="inline"><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">99.9</mml:mn></mml:mrow></mml:math></inline-formula>.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F3" specific-use="star"><caption><p id="d1e872">Examples of the templates used in data digitisation. Shaded rows and
columns in the templates represent data that are not to be digitised.
<bold>(a)</bold> The template for the Slovenian data sources picks out the rows
that require digitising: wind direction (WD), wind speed (WS), atmospheric
pressure (SLP), temperature (T), relative humidity (RH), precipitation (P),
snow depth (SD) and fresh snow (FS). Note that rows for the daily values are
formatted to match the location of the data in the original source.
<bold>(b)</bold> The template for temperature data from Spanish data sources with
the columns labelled with variables and hours: dry bulb temperature (TD),
relative humidity (HU) and dew point temperature
(PR).</p></caption>
          <?xmltex \igopts{width=312.980315pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f03.png"/>

        </fig>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T2" specific-use="star"><caption><p id="d1e891">List of conversions applied to digitised data, where <inline-formula><mml:math id="M3" display="inline"><mml:mi>x</mml:mi></mml:math></inline-formula> represents
the original unit and <inline-formula><mml:math id="M4" display="inline"><mml:mi>y</mml:mi></mml:math></inline-formula> is the converted value. Full details of the
conversion applied to data from each station is given in Table S3.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="4">
     <oasis:colspec colnum="1" colname="col1" align="justify" colwidth="65.441339pt"/>
     <oasis:colspec colnum="2" colname="col2" align="justify" colwidth="71.13189pt"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:colspec colnum="4" colname="col4" align="justify" colwidth="256.074803pt"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Original units</oasis:entry>
         <oasis:entry colname="col3">Final units</oasis:entry>
         <oasis:entry colname="col4">Details</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">Wind speed <?xmltex \hack{\hfill\break}?>conversions</oasis:entry>
         <oasis:entry rowsep="1" colname="col2">Beaufort scale</oasis:entry>
         <oasis:entry rowsep="1" colname="col3">m s<inline-formula><mml:math id="M5" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col4">Replacement of <inline-formula><mml:math id="M6" display="inline"><mml:mi>x</mml:mi></mml:math></inline-formula> with <inline-formula><mml:math id="M7" display="inline"><mml:mi>y</mml:mi></mml:math></inline-formula> using the following map: <inline-formula><mml:math id="M8" display="inline"><mml:mrow><mml:mn mathvariant="normal">0</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0.0</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M9" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1.0</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M10" display="inline"><mml:mrow><mml:mn mathvariant="normal">2</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">2.6</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M11" display="inline"><mml:mrow><mml:mn mathvariant="normal">3</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">4.6</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M12" display="inline"><mml:mrow><mml:mn mathvariant="normal">4</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">6.7</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M13" display="inline"><mml:mrow><mml:mn mathvariant="normal">5</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">9.3</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M14" display="inline"><mml:mrow><mml:mn mathvariant="normal">6</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">12.3</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M15" display="inline"><mml:mrow><mml:mn mathvariant="normal">7</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">15.4</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M16" display="inline"><mml:mrow><mml:mn mathvariant="normal">8</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">19</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M17" display="inline"><mml:mrow><mml:mn mathvariant="normal">9</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">22.6</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M18" display="inline"><mml:mrow><mml:mn mathvariant="normal">10</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">26.8</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M19" display="inline"><mml:mrow><mml:mn mathvariant="normal">11</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">30.9</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M20" display="inline"><mml:mrow><mml:mn mathvariant="normal">12</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">35</mml:mn></mml:mrow></mml:math></inline-formula>, from WMO Code 1100 (Da Silva et al., 1995)</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2">Turkish 17-point <?xmltex \hack{\hfill\break}?>power scale</oasis:entry>
         <oasis:entry rowsep="1" colname="col3">m s<inline-formula><mml:math id="M21" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col4">Replacement of <inline-formula><mml:math id="M22" display="inline"><mml:mi>x</mml:mi></mml:math></inline-formula> with <inline-formula><mml:math id="M23" display="inline"><mml:mi>y</mml:mi></mml:math></inline-formula> using the following map: <inline-formula><mml:math id="M24" display="inline"><mml:mrow><mml:mn mathvariant="normal">0</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0.0</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M25" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0.9</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M26" display="inline"><mml:mrow><mml:mn mathvariant="normal">2</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">2.4</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M27" display="inline"><mml:mrow><mml:mn mathvariant="normal">3</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">4.4</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M28" display="inline"><mml:mrow><mml:mn mathvariant="normal">4</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">6.7</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M29" display="inline"><mml:mrow><mml:mn mathvariant="normal">5</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">9.3</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M30" display="inline"><mml:mrow><mml:mn mathvariant="normal">6</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">12.3</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M31" display="inline"><mml:mrow><mml:mn mathvariant="normal">7</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">15.5</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M32" display="inline"><mml:mrow><mml:mn mathvariant="normal">8</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">18.9</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M33" display="inline"><mml:mrow><mml:mn mathvariant="normal">9</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">22.6</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M34" display="inline"><mml:mrow><mml:mn mathvariant="normal">10</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">26.4</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M35" display="inline"><mml:mrow><mml:mn mathvariant="normal">11</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">30.5</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M36" display="inline"><mml:mrow><mml:mn mathvariant="normal">12</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">34.8</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M37" display="inline"><mml:mrow><mml:mn mathvariant="normal">13</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">39.2</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M38" display="inline"><mml:mrow><mml:mn mathvariant="normal">14</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">43.8</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M39" display="inline"><mml:mrow><mml:mn mathvariant="normal">15</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">48.6</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M40" display="inline"><mml:mrow><mml:mn mathvariant="normal">16</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">53.5</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M41" display="inline"><mml:mrow><mml:mn mathvariant="normal">17</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">58.6</mml:mn></mml:mrow></mml:math></inline-formula> taken from data source (average of wind range used)</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2">9-point power scale</oasis:entry>
         <oasis:entry rowsep="1" colname="col3">m s<inline-formula><mml:math id="M42" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col4">Replacement of <inline-formula><mml:math id="M43" display="inline"><mml:mi>x</mml:mi></mml:math></inline-formula> with <inline-formula><mml:math id="M44" display="inline"><mml:mi>y</mml:mi></mml:math></inline-formula> using the following map: <inline-formula><mml:math id="M45" display="inline"><mml:mrow><mml:mn mathvariant="normal">0</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0.0</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M46" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1.0</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M47" display="inline"><mml:mrow><mml:mn mathvariant="normal">2</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">2.6</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M48" display="inline"><mml:mrow><mml:mn mathvariant="normal">3</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">4.6</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M49" display="inline"><mml:mrow><mml:mn mathvariant="normal">4</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">6.7</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M50" display="inline"><mml:mrow><mml:mn mathvariant="normal">5</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">9.3</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M51" display="inline"><mml:mrow><mml:mn mathvariant="normal">6</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">12.3</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M52" display="inline"><mml:mrow><mml:mn mathvariant="normal">7</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">15.4</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M53" display="inline"><mml:mrow><mml:mn mathvariant="normal">8</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">19</mml:mn></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M54" display="inline"><mml:mrow><mml:mn mathvariant="normal">9</mml:mn><mml:mo>=</mml:mo><mml:mn mathvariant="normal">28.8</mml:mn></mml:mrow></mml:math></inline-formula>, from 1931 French instruction book (Angot, 1931)</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2">km h<inline-formula><mml:math id="M55" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col3">m s<inline-formula><mml:math id="M56" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col4"><inline-formula><mml:math id="M57" display="inline"><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>x</mml:mi><mml:mo>/</mml:mo><mml:mn mathvariant="normal">3.6</mml:mn></mml:mrow></mml:math></inline-formula> rounded to 1 decimal place</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">knots</oasis:entry>
         <oasis:entry colname="col3">m s<inline-formula><mml:math id="M58" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col4"><inline-formula><mml:math id="M59" display="inline"><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>x</mml:mi><mml:mo>×</mml:mo><mml:mn mathvariant="normal">0.514444</mml:mn></mml:mrow></mml:math></inline-formula> rounded to 1 decimal place</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Wind direction <?xmltex \hack{\hfill\break}?>conversions</oasis:entry>
         <oasis:entry rowsep="1" colname="col2">16-point compass<?xmltex \hack{\hfill\break}?>scale</oasis:entry>
         <oasis:entry rowsep="1" colname="col3">degrees</oasis:entry>
         <oasis:entry rowsep="1" colname="col4">Replacement of <inline-formula><mml:math id="M60" display="inline"><mml:mi>x</mml:mi></mml:math></inline-formula> with <inline-formula><mml:math id="M61" display="inline"><mml:mi>y</mml:mi></mml:math></inline-formula> using the following map: C <inline-formula><mml:math id="M62" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 361, NNE <inline-formula><mml:math id="M63" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 22.5, NE <inline-formula><mml:math id="M64" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 45, ENE <inline-formula><mml:math id="M65" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 67.5, E <inline-formula><mml:math id="M66" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 90, ESE <inline-formula><mml:math id="M67" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 112.5, SE <inline-formula><mml:math id="M68" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 135, SSE <inline-formula><mml:math id="M69" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 157.5, S <inline-formula><mml:math id="M70" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 180, SSW <inline-formula><mml:math id="M71" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 202.5, SW <inline-formula><mml:math id="M72" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 225, WSW <inline-formula><mml:math id="M73" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 247.5, W <inline-formula><mml:math id="M74" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 270, WNW <inline-formula><mml:math id="M75" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 292.5, NW <inline-formula><mml:math id="M76" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 315, NNW <inline-formula><mml:math id="M77" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 337.5, N <inline-formula><mml:math id="M78" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 360</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2">32-point direction <?xmltex \hack{\hfill\break}?>scale</oasis:entry>
         <oasis:entry rowsep="1" colname="col3">degrees</oasis:entry>
         <oasis:entry rowsep="1" colname="col4"><inline-formula><mml:math id="M79" display="inline"><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>x</mml:mi><mml:mo>×</mml:mo><mml:mn mathvariant="normal">11.25</mml:mn></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">degrees <inline-formula><mml:math id="M80" display="inline"><mml:mo>/</mml:mo></mml:math></inline-formula> 10</oasis:entry>
         <oasis:entry colname="col3">degrees</oasis:entry>
         <oasis:entry colname="col4"><inline-formula><mml:math id="M81" display="inline"><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>x</mml:mi><mml:mo>/</mml:mo><mml:mn mathvariant="normal">10</mml:mn></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Pressure</oasis:entry>
         <oasis:entry rowsep="1" colname="col2">mmhg</oasis:entry>
         <oasis:entry rowsep="1" colname="col3">hPa</oasis:entry>
         <oasis:entry rowsep="1" colname="col4"><inline-formula><mml:math id="M82" display="inline"><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>x</mml:mi><mml:mo>×</mml:mo><mml:mn mathvariant="normal">1.33224</mml:mn></mml:mrow></mml:math></inline-formula> rounded to 1 decimal place</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">conversions</oasis:entry>
         <oasis:entry colname="col2">hpa <inline-formula><mml:math id="M83" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 10</oasis:entry>
         <oasis:entry colname="col3">hPa</oasis:entry>
         <oasis:entry colname="col4"><inline-formula><mml:math id="M84" display="inline"><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>x</mml:mi><mml:mo>/</mml:mo><mml:mn mathvariant="normal">10</mml:mn></mml:mrow></mml:math></inline-formula></oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Temperature <?xmltex \hack{\hfill\break}?>conversions</oasis:entry>
         <oasis:entry colname="col2"><inline-formula><mml:math id="M85" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>F</oasis:entry>
         <oasis:entry colname="col3"><inline-formula><mml:math id="M86" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>C</oasis:entry>
         <oasis:entry colname="col4"><inline-formula><mml:math id="M87" display="inline"><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mo>(</mml:mo><mml:mi>x</mml:mi><mml:mo>-</mml:mo><mml:mn mathvariant="normal">32</mml:mn><mml:mo>)</mml:mo><mml:mo>×</mml:mo><mml:mo>(</mml:mo><mml:mn mathvariant="normal">5</mml:mn><mml:mo>/</mml:mo><mml:mn mathvariant="normal">9</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> rounded to 1 decimal place</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

      <p id="d1e2006">Budget constraints made it unfeasible to use double-keying, a suggested
method of improving digitised data quality where the same data are
transcribed twice (Brönnimann et al., 2006; World Meteorological
Organization, 2016). We tested optical character recognition (OCR) and speech
recognition technologies, but the diverse nature of each task and the time
and cost associated with training the software to each data source made these
options unfeasible. However, the digitisers were trained in self-assessment
techniques aimed at reducing data errors. Digitisers were asked to carefully
cross-check their values with the original source values for the 10th, 20th
and 30th day of each month to make sure that no days had been skipped or
repeated. Days with missing data were recorded in metadata files, along with
any<?pagebreak page1619?> other variations in the data source, such as repeated pages in the
scanned file or temporary changes in the table structure. Where data sources
included monthly totals and summaries, digitisers were also instructed to
calculate these values from their daily transcribed data, to check accuracy.</p>
      <p id="d1e2009">The data sources were in a number of different formats. The two main formats
were 1 month (or day) to a page for a single station, and 1 day to a page
for a network of stations. Depending on the source structure, each digitiser
was in charge of digitising values from a station (e.g. Egyptian and Moroccan
sources, Fig. 2a and b), a time period (e.g. Slovenia, Fig. 2c) or a
variable (e.g. Lebanon, Fig. 2d). English and Catalan translations of the
relevant column and row headings were provided to the digitisers for each
source, as well as the various wind strength scales (see Sect. 2.4).</p>
      <p id="d1e2012">In several cases, not all of the data on a sheet were required to be
digitised, as they had already been transcribed as part of EURO4M and MEDARE.
To help digitisers with the complex layout of the source images, templates
were developed in Microsoft Excel for some sources that were as close<?pagebreak page1620?> as
possible to the format of the original data source (see Fig. 3 for several
examples). Borders and shading within the files were used to help the
digitiser keep track of their work, and date columns were pre-filled with the
correct dates to reduce the occurrence of errors associated with leap years.
While the development of templates was
not always possible due to time constraints, templates were used for all
sources with very high-resolution data (e.g. observations every hour, see
Table 1).</p>
      <p id="d1e2015">The digitisers were required to upload their data to a central server every
15 days, include a count of the number of values digitised and include an up-to-date
copy of the data transcribed. This method ensured that the digitisers were
making progress, the data were being regularly backed up and the
digitised observations could be regularly checked (see Sect. 3).</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F4" specific-use="star"><caption><p id="d1e2021">A schematic of the quality assurance and quality control procedures
used in the development of the dataset.</p></caption>
          <?xmltex \igopts{width=341.433071pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f04.pdf"/>

        </fig>

</sec>
<sec id="Ch1.S2.SS4">
  <title>Conversion to standard units</title>
      <?pagebreak page1621?><p id="d1e2036">While visual quality control and assessment were applied to the data in their
original units, the data were also converted to standard units, to be used in
widespread meteorological products and statistical quality control procedures
(Table 2). Data sources and available metadata were examined closely to
ensure the conversions were as accurate as possible, and any changes to units
within the same source were captured. Many atmospheric pressure observations
in particular needed to be converted from millimetres of mercury to
hectopascals, and station level pressure data reduced to sea level pressure
for quality control testing. This step involved a detailed examination of the
data sources to identify station height information and any instrument
movements that may have occurred. In most cases, only the station height
information could be located, but any changes identified were recorded in the
coordinates accompanying the final dataset.<?xmltex \hack{\newpage}?></p>
</sec>
</sec>
<sec id="Ch1.S3">
  <title>Quality assessment of digitised data</title>
      <p id="d1e2047">Quality control procedures are crucial to identify non-systematic
errors or shed light on systematic biases in a time series. This is
particularly the case for daily or sub-daily data, as these observations are
used in the calculation of monthly and annual means. Errors can occur as a
result of issues with original sources, the method of data collection,
transcription in the original source or the digitisation process.</p>
      <p id="d1e2050">An ideal QC procedure must be transparent and rigorous to ensure internal
data consistency, temporal and spatial coherence, and traceability for future
data users. A well-defined and well-executed QC routine will be able to flag data
errors from time series that could compromise the analysis of natural climate
variability and anthropogenic climate change, including the study of extreme
events (Aguilar et al., 2003; Brunet et al., 2006).</p>
      <p id="d1e2053">An exhaustive QC application was vital for our study, but given the large
number of observations, completely manual QC by cross-checking all
observations against the original source was not feasible. However, a
completely automated procedure that tests data against that of neighbouring
values, such as that used for global databases (Dunn et al., 2012), would
also be sub-optimal, as the digitised data do not cover a wide geographic
area and consistent time period. We therefore decided that a multiple-step
process would be the best approach. A different version of the dataset
was produced after each step,
enabling users to ultimately access the original data, as well as data that
had undergone one or two rounds of quality testing.</p>
      <p id="d1e2056">Figure 4 outlines the multiple steps of the data quality assurance and
control procedures used in the development of the dataset. As outlined in
Sect. 2, efforts were made before digitisation to minimise the introduction
of errors, including a detailed assessment of each data source, the
development of templates for many sources, and the selection of qualified
digitisers. During and after digitisation, the digitised data were then
subjected to quality control and assurance testing. The structure of the
testing (Fig. 4) can be summarised as a basic visual check, statistical
testing at the individual station level and spatial testing across
comparable networks.</p>
      <p id="d1e2060">Note that homogenisation is not included in this QC procedure. Although the
homogenisation of data to remove non-climatic features of a long-term
instrumental record is crucial for the assessment of climate variability and
change (e.g. Peterson et al., 1998), homogeneity assessment of sub-daily data
is a highly complex task that is still under development within the research
community (Venema et al., 2012).</p>
<sec id="Ch1.S3.SS1">
  <title>Visual cross-checking</title>
      <p id="d1e2068">A selection of values uploaded by digitisers were systematically compared to
the original source images by postgraduate researchers and other digitisers
at the Centre for Climate Change at URV familiar with the sources.
The aims of these initial visual cross-checks was to provide timely feedback
to the digitisers if common digitisation errors were occurring, identify
subtle errors in the order of the data that may not be picked up in
statistical procedures and also make a preliminary assessment of the quality
of the data from each particular source (Table 1). Additionally, regular
reporting of data completed helped us identify any digitisers who were having
trouble with their tasks and needed extra assistance.</p>
      <p id="d1e2071">For every fourth year of data, 2 or 3 days of observations were
selected at three monthly intervals for visual cross-checking with the
original source. This was completed for data from all sources. Additional ad
hoc checks were made if a known issue existed in the data source, e.g. if the
period covered by the data source contained a leap year, or the source pages
were known to be out of order. Although these<?pagebreak page1622?> checks only covered a small
percentage of the total digitised data, we felt it was sufficient to
identify the general quality of work done by individual digitisers and for
each source.</p>
      <p id="d1e2074">In more than 60 % of stations tested, only a small number (less than
5 %) of the checked values required correction. Visual cross-checking of
data from stations with a larger number of errors identified the occasional
skipped day or duplicated value, which meant that a large percentage of
observations needed to be shifted by one time step. The majority of these
errors were found in data for Egypt and Algeria, from sources that had
already been flagged as difficult to read and containing date order errors.
In two cases, digitisers were asked to repeat their work.</p>
</sec>
<sec id="Ch1.S3.SS2">
  <title>Individual station quality control (SAQC method)</title>
      <p id="d1e2083">After the basic visual cross-checks, the digitised data were subjected to a
range of statistical quality control tests. Due to the highly variable nature
of the different data sources, and their disparate geographical spread, data
from each station were examined individually. Data were also examined in
their original temporal resolution, and not converted to daily averages, as
averaging the sub-daily values would make it difficult to identify the
erroneous value. Statistical quality control was conducted using a
semi-automatic quality control (SAQC) procedure (Universitat Rovira i
Virgili, 2014). The SAQC method was largely adapted from existing automatic
quality control procedures developed for sub-daily data at a global scale
(e.g. Dunn et al., 2012; Durre et al., 2010), but was modified for our
dataset to enable more manual
examination of the resultant flags. Full details of the procedure, the
relevant software and instructions for use are available from A.Q.C. Software
menu at <uri>http://www.c3.urv.cat/softdata.php</uri> (last access:
8 August 2018).</p>
      <p id="d1e2089">SAQC comprised of three separate programs that can be applied to the data at
their original time resolution in text file format: one examining
temperature, wind, relative humidity and dewpoint observations; another
assessing sea level pressure data; and a final check on sub-daily rainfall
data, daily snow depth and snow fall data. The tests applied within SAQC
(Table 3) can be largely grouped into four groups depending on the degree of
QC applied (Aguilar et al., 2003):
<list list-type="bullet"><list-item>
      <p id="d1e2094"><italic>Gross errors tests</italic>. These are QC tests that detect and flag obviously
erroneous values (date order check, date errors, unrealistic values, data
repetitions and non-numeric value tests).</p></list-item><list-item>
      <p id="d1e2100"><italic>Tolerance tests</italic>. These are QC tests that detect and flag those values considered outliers with
respect to their own defined upper and lower limits (climatic outliers, bivariate comparisons, monthly
mean of absolute increments, and unusual distribution of values tests).</p></list-item><list-item>
      <p id="d1e2106"><italic>Inter-variable check</italic>. These are QC tests which detect and flag inconsistencies between associated elements
within each record (interval and DP/FS/SD inconsistency test, RH/DP/TT
comparison tests, precipitation and snow totals test).</p></list-item><list-item>
      <p id="d1e2112"><italic>Temporal coherency</italic>. These are QC tests which detect and flag a given value that is not consistent with
the amount of change that might be expected in a variable in any time
interval according to adjacent values (flat line test, big jump test, summer
snow test and irregular temporal evolution).</p></list-item></list></p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F5"><caption><p id="d1e2119">Air temperature evolution (in <inline-formula><mml:math id="M88" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>C) in Port Said station
(Egypt) taken at 08:00 (in black) and 14:00 (in grey) for the period
1939–1940. Different errors flagged by SAQC are marked with solid coloured
squares: an outlier (pink); outlier and inter-variable (IV) error (yellow); IV
error (orange); and big jump, IV error and outlier (red). The decision made
by manual checking is shown by rectangular outlines: values identified as
transcription errors are outlined by a red border, values flagged due to a
data duplication error are outlined in blue, and values that were found to be
valid extremes are outlined in green. Values found to be errors were
corrected and given a flag of fl12 in the quality-controlled version of the datasets, and
values found to be correct were retained and given a flag of fl14.</p></caption>
          <?xmltex \igopts{width=213.395669pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f05.pdf"/>

        </fig>

<?xmltex \floatpos{p}?><table-wrap id="Ch1.T3" specific-use="star"><caption><p id="d1e2141">Descriptions of the SAQC tests applied for each climate variable.
Variable acronyms are as those described in Table 1. The programs used to
apply each test are available at <uri>http://www.c3.urv.cat/softdata.php</uri>
(last access: 8 August 2018).</p></caption><oasis:table frame="topbot"><?xmltex \begin{scaleboxenv}{.9}[.9]?><oasis:tgroup cols="4">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="justify" colwidth="99.584646pt"/>
     <oasis:colspec colnum="3" colname="col3" align="justify" colwidth="199.169291pt"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">

         <oasis:entry namest="col1" nameend="col2">SAQC test </oasis:entry>

         <oasis:entry colname="col3">Brief description</oasis:entry>

         <oasis:entry colname="col4">Variables</oasis:entry>

       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g01.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Date order check</oasis:entry>

         <oasis:entry colname="col3">Detect erroneous calendar date order</oasis:entry>

         <oasis:entry colname="col4">TT/DP/RH/WD/WS/RR/PP</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g02.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Data repetitions</oasis:entry>

         <oasis:entry colname="col3">Flag repeated entire months</oasis:entry>

         <oasis:entry colname="col4">TT/DP/RH/WD/WS/PP</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=48.369685pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g03.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Unrealistic values</oasis:entry>

         <oasis:entry colname="col3">Flag values outside world record limits and physically impossible values</oasis:entry>

         <oasis:entry colname="col4">TT/DP/RH/WD/WS/PP</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="2"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g04.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Climatic outliers</oasis:entry>

         <oasis:entry colname="col3">Flag values out of an established threshold</oasis:entry>

         <oasis:entry colname="col4">TT/DP/RH/WS/PP/RR/FS/SD</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g05.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Duplicate values</oasis:entry>

         <oasis:entry colname="col3">Detect at least 30 identical consecutive values</oasis:entry>

         <oasis:entry colname="col4">TT/DP/RH/WD/WS</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g06.png"/></oasis:entry>

         <oasis:entry colname="col2">Big jumps and sharp spikes</oasis:entry>

         <oasis:entry colname="col3">Large differences between adjacent values</oasis:entry>

         <oasis:entry colname="col4">TT/DP</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col1"/>

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col1"/>

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g07.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Bivariate outliers</oasis:entry>

         <oasis:entry colname="col3">Differences between adjacent values that are larger than the bivariate distribution</oasis:entry>

         <oasis:entry colname="col4">PP</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=48.369685pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g08.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Inter-variable inconsistency</oasis:entry>

         <oasis:entry colname="col3">Flag internal inconsistencies among variables</oasis:entry>

         <oasis:entry colname="col4">TT/RH/WS</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g09.jpg"/></oasis:entry>

         <oasis:entry colname="col2">DP inconsistency</oasis:entry>

         <oasis:entry colname="col3">Flag differences between observed and calculated DP</oasis:entry>

         <oasis:entry colname="col4">TT/RH/DP</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g10.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Monthly mean of absolute <?xmltex \hack{\hfill\break}?>increments</oasis:entry>

         <oasis:entry colname="col3">Flag all values when mean monthly increments <?xmltex \hack{\hfill\break}?>below/above of the climatic normal increment</oasis:entry>

         <oasis:entry colname="col4">TT/RH/WD/WS</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g11.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Irregular temporal <?xmltex \hack{\hfill\break}?>evolution</oasis:entry>

         <oasis:entry colname="col3">Flag values that show unexpected temporal evolution</oasis:entry>

         <oasis:entry colname="col4">TT/RH/WS</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g12.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Unit changes</oasis:entry>

         <oasis:entry colname="col3">Automatic unit changes from millimetres of mercury (mmHg) to hectopascals (hPa)</oasis:entry>

         <oasis:entry colname="col4">PP</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g13.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Unusual distribution of <?xmltex \hack{\hfill\break}?>values</oasis:entry>

         <oasis:entry colname="col3">Flag values where the distribution in each month <?xmltex \hack{\hfill\break}?>includes a secondary peak</oasis:entry>

         <oasis:entry colname="col4">TT, DP, VV</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g14.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Precipitation totals</oasis:entry>

         <oasis:entry colname="col3">Flag values when sum of sub-daily RR data does not<?xmltex \hack{\hfill\break}?>equal daily RR total</oasis:entry>

         <oasis:entry colname="col4">RR</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g15.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Snow totals</oasis:entry>

         <oasis:entry colname="col3">Flag values when sum of fresh snow <inline-formula><mml:math id="M89" display="inline"><mml:mrow><mml:mo>&lt;</mml:mo><mml:mo>=</mml:mo></mml:mrow></mml:math></inline-formula> total snow depth</oasis:entry>

         <oasis:entry colname="col4">FS/SD</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g16.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Summer snow</oasis:entry>

         <oasis:entry colname="col3">Flag snowfall between May and October</oasis:entry>

         <oasis:entry colname="col4">FS/SD</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g17.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Non-numeric values</oasis:entry>

         <oasis:entry colname="col3">Flag non-numeric values</oasis:entry>

         <oasis:entry colname="col4">PP/RR/FS/SD</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry rowsep="1" colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g18.jpg"/></oasis:entry>

         <oasis:entry colname="col2">FS/SD inconsistency</oasis:entry>

         <oasis:entry colname="col3">Flag total SD that increases without a FS falland or decreases with a FS value/ <?xmltex \hack{\hfill\break}?>flag FS that is not accompanied by SD</oasis:entry>

         <oasis:entry colname="col4">FS/SD</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col1" morerows="1"><?xmltex \igopts{width=56.905512pt}?><inline-graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-g17.jpg"/></oasis:entry>

         <oasis:entry colname="col2">Non-numeric values</oasis:entry>

         <oasis:entry colname="col3">Flag non-numeric values</oasis:entry>

         <oasis:entry colname="col4">PP/RR/FS/SD</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2"/>

         <oasis:entry colname="col3"/>

         <oasis:entry colname="col4"/>

       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup><?xmltex \end{scaleboxenv}?></oasis:table></table-wrap>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T4" specific-use="star"><caption><p id="d1e2756">Description of quality control flags applied to data during the SAQC
and HQC procedures.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="3">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="justify" colwidth="369.885827pt"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Flag</oasis:entry>
         <oasis:entry colname="col2">Flag description</oasis:entry>
         <oasis:entry colname="col3">Expert decision</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">fl10</oasis:entry>
         <oasis:entry colname="col2">Passed all SAQC tests</oasis:entry>
         <oasis:entry colname="col3">Retained</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl11</oasis:entry>
         <oasis:entry colname="col2">Identified as suspect and removed due to gross digitiser error</oasis:entry>
         <oasis:entry colname="col3">Removed</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl12</oasis:entry>
         <oasis:entry colname="col2">Identified as suspect, found to be a digitisation error, corrected</oasis:entry>
         <oasis:entry colname="col3">Corrected</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl13</oasis:entry>
         <oasis:entry colname="col2">Identified as suspect, found to be a digitisation error, removed</oasis:entry>
         <oasis:entry colname="col3">Removed</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl14</oasis:entry>
         <oasis:entry colname="col2">Identified as suspect but retained as correct after expert examination</oasis:entry>
         <oasis:entry colname="col3">Retained</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl15</oasis:entry>
         <oasis:entry colname="col2">Identified as suspect, found to be a source error and removed</oasis:entry>
         <oasis:entry colname="col3">Removed</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl17</oasis:entry>
         <oasis:entry colname="col2">Identified as suspect, no observation found in source, removed</oasis:entry>
         <oasis:entry colname="col3">Removed</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl30</oasis:entry>
         <oasis:entry colname="col2">Passed SAQC and HQC</oasis:entry>
         <oasis:entry colname="col3">Retained</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl32</oasis:entry>
         <oasis:entry colname="col2">Corrected in SAQC, passed HQC</oasis:entry>
         <oasis:entry colname="col3">Retained</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl34</oasis:entry>
         <oasis:entry colname="col2">Retained as correct in SAQC, passed HQC</oasis:entry>
         <oasis:entry colname="col3">Retained</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl36</oasis:entry>
         <oasis:entry colname="col2">Identified as suspect in HQC, removed</oasis:entry>
         <oasis:entry colname="col3">Removed</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl40</oasis:entry>
         <oasis:entry colname="col2">Passed statistical quality control but updated to correct units after location of accurate metadata</oasis:entry>
         <oasis:entry colname="col3">Retained</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl42</oasis:entry>
         <oasis:entry colname="col2">Identified as suspect, found to be a digitisation error and corrected, then updated to correct units after location of accurate metadata</oasis:entry>
         <oasis:entry colname="col3">Corrected</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">fl44</oasis:entry>
         <oasis:entry colname="col2">Identified as suspect but retained as correct after expert examination, then updated to correct units after location of accurate metadata</oasis:entry>
         <oasis:entry colname="col3">Retained</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

      <p id="d1e2958">Each program produced a list of values flagged by each test at each station.
The combined key results were then manually cross-referenced against the
original source data, and corrected or removed from the quality-controlled
version of the dataset. The removal or correction of each value was recorded
using a flag system, to clearly document the nature of the identified errors
and results (Table 4). An example of the air temperature evolution in Port
Said (Egypt) taken at 08:00 and 14:00 local time for the short period
1939–1940 and resultant QC flags is shown in Fig. 5, highlighting various
types of errors, outliers and extreme values over a short time period.</p>
      <?pagebreak page1624?><p id="d1e2961">In the initial testing of the SAQC procedure, the tests for duplicate
values, monthly mean of absolute increments and unusual distribution of
values tests were found to be overly sensitive, resulting in many valid
observations being flagged for assessment. Many of the legitimate errors
identified by these tests were also found by others, so the thresholds on
these tests were relaxed to make the task of checking flagged values more
manageable.</p>
</sec>
<sec id="Ch1.S3.SS3">
  <title>Spatial and automatic quality assurance (HQC method)</title>
      <p id="d1e2970">The final QC procedure consisted of subjecting data from neighbouring stations
to spatial quality control tests, as well as rerunning several individual
station checks in a fully automated way as a second-round check for gross
errors that may have slipped through SAQC. Only data that had been checked by
visual means and SAQC were subjected to this procedure and as with SAQC, the data
were examined in their original temporal format to avoid removing valid data.
This QC process (Hadley quality control, or HQC) was conducted using an
adapted version of the procedure used in the development of the UK Met Office
Hadley Centre Global Sub-Daily Station Observations dataset (HadISD
v2.0.1.2016p; Dunn et al., 2012, 2016). Due to time constraints, only data
digitised as part of this project were used in the spatial quality
assessment, although future work could make use of the existing HadISD
dataset as a reference network.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F6"><caption><p id="d1e2975">Percentage of flagged values using the standard QC tests developed
for the UK Met Office Hadley Centre Global Sub-Daily Station Observations
(HadISD), and the percentage of values flagged using HadISD tests
specifically adapted for this dataset (HQC). The variable acronyms are the
same as those given in the text: temperature (TT), dew point temperature
(DP), mean sea level pressure (PP), wind direction (WD), and wind speed
(WS).</p></caption>
          <?xmltex \igopts{width=184.942913pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f06.png"/>

        </fig>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T5" specific-use="star"><caption><p id="d1e2987">The networks used in the spatial and automatic quality control
analysis (HQC), including the period, variables and observing times examined.
Note that not all observing times were examined in HQC due to neighbouring
data availability.</p></caption><oasis:table frame="topbot"><?xmltex \begin{scaleboxenv}{.9}[.9]?><oasis:tgroup cols="7">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="right"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:colspec colnum="5" colname="col5" align="left"/>
     <oasis:colspec colnum="6" colname="col6" align="left"/>
     <oasis:colspec colnum="7" colname="col7" align="right"/>
     <oasis:thead>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">Number of</oasis:entry>
         <oasis:entry colname="col4">Period checked</oasis:entry>
         <oasis:entry colname="col5">Variables checked</oasis:entry>
         <oasis:entry colname="col6">Observing times</oasis:entry>
         <oasis:entry colname="col7">% of complete</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Network</oasis:entry>
         <oasis:entry colname="col2">Countries</oasis:entry>
         <oasis:entry colname="col3">stations used</oasis:entry>
         <oasis:entry colname="col4">by HQC</oasis:entry>
         <oasis:entry colname="col5">by HQC</oasis:entry>
         <oasis:entry colname="col6">examined</oasis:entry>
         <oasis:entry colname="col7">data checked</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Morocco</oasis:entry>
         <oasis:entry colname="col2">Morocco</oasis:entry>
         <oasis:entry colname="col3">8</oasis:entry>
         <oasis:entry colname="col4">1953–1968</oasis:entry>
         <oasis:entry colname="col5">PP, TT, WD, FF DP</oasis:entry>
         <oasis:entry colname="col6">6 or 7 h/18 h (2)</oasis:entry>
         <oasis:entry colname="col7">94.5 %</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">North Africa</oasis:entry>
         <oasis:entry colname="col2">Algeria, Tunisia</oasis:entry>
         <oasis:entry colname="col3">25</oasis:entry>
         <oasis:entry colname="col4">1886–1938</oasis:entry>
         <oasis:entry colname="col5">PP, TT, WD, FF</oasis:entry>
         <oasis:entry colname="col6">7 h (1)</oasis:entry>
         <oasis:entry colname="col7">91.5 %</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Egypt</oasis:entry>
         <oasis:entry colname="col2">Egypt</oasis:entry>
         <oasis:entry colname="col3">21</oasis:entry>
         <oasis:entry colname="col4">1907–1957</oasis:entry>
         <oasis:entry colname="col5">PP, TT, WD, FF DP</oasis:entry>
         <oasis:entry colname="col6">6 h or 8 h/12 h or</oasis:entry>
         <oasis:entry colname="col7">69.6 %</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3"/>
         <oasis:entry colname="col4"/>
         <oasis:entry colname="col5"/>
         <oasis:entry colname="col6">14 h/18 h or 20 h (4)</oasis:entry>
         <oasis:entry colname="col7"/>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Turkey</oasis:entry>
         <oasis:entry colname="col2">Turkey</oasis:entry>
         <oasis:entry colname="col3">25</oasis:entry>
         <oasis:entry colname="col4">1962–1971</oasis:entry>
         <oasis:entry colname="col5">PP, TT, WD, FF</oasis:entry>
         <oasis:entry colname="col6">7 h/14 h/21 h (3)</oasis:entry>
         <oasis:entry colname="col7">100 %</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Central Balkans</oasis:entry>
         <oasis:entry colname="col2">Slovakia, Croatia, Bosnia-</oasis:entry>
         <oasis:entry colname="col3">10</oasis:entry>
         <oasis:entry colname="col4">1950–2012</oasis:entry>
         <oasis:entry colname="col5">PP, TT, WD, FF</oasis:entry>
         <oasis:entry colname="col6">7 h/14 h/21 h (3)</oasis:entry>
         <oasis:entry colname="col7">72.7 %</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Herzegovina, Serbia</oasis:entry>
         <oasis:entry colname="col3"/>
         <oasis:entry colname="col4"/>
         <oasis:entry colname="col5"/>
         <oasis:entry colname="col6"/>
         <oasis:entry colname="col7"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Central Europe</oasis:entry>
         <oasis:entry colname="col2">Slovenia, Czech Republic,</oasis:entry>
         <oasis:entry colname="col3">11</oasis:entry>
         <oasis:entry colname="col4">1948–1968</oasis:entry>
         <oasis:entry colname="col5">PP, TT, WD, FF</oasis:entry>
         <oasis:entry colname="col6">7 h/14 h/21 h (3)</oasis:entry>
         <oasis:entry colname="col7">46.99 %</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Germany (2 stations)</oasis:entry>
         <oasis:entry colname="col3"/>
         <oasis:entry colname="col4"/>
         <oasis:entry colname="col5"/>
         <oasis:entry colname="col6"/>
         <oasis:entry colname="col7"/>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup><?xmltex \end{scaleboxenv}?></oasis:table></table-wrap>

      <?xmltex \floatpos{t}?><fig id="Ch1.F7" specific-use="star"><caption><p id="d1e3275">Spatial coverage of 8.8 million observations digitised, showing the
station locations. The approximate length of the record at each station is
indicated by the size of the pie symbol; the number of observations per day
is represented by the colour of the pie pieces; and the different variables
available at each station are indicated by which wedges are shaded based on
the legend in the top right corner. Variable acronyms are as those described
in the caption to Table 1, apart from SLP, which represents station and sea
level pressure.</p></caption>
          <?xmltex \igopts{width=312.980315pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f07.pdf"/>

        </fig>

      <p id="d1e3284">Automatically running HQC with the standard thresholds used in the
development of the global HadISD dataset led to a large number of false
positive flags being identified (Fig. 6), as the rescued dataset had low
spatial coverage and included observations taken at inconsistent times, often
converted from units with coarse resolution. To reduce the number of false
positive flags and increase the number of stations that could be checked,
some of the HadISD tests were adapted (Table S2). The minimum number of
neighbouring stations required for HQC testing was reduced from 10 to 5, and
the percentage of non-missing observations per month allowed was reduced from
75 % to 66 %. Tests that looked for streaks of identical values, or
non-uniform distributions in the frequency of values, were also slackened to
account for the fact that many observations were converted from different
units.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T6" specific-use="star"><caption><p id="d1e3290">Summary of stations digitised as part of this project. The variables
are temperature (TT), relative humidity (RH), dew point temperature (DP),
wind speed (WS), wind direction (WD), air pressure (PP, including sea level
pressure and station level pressure), wet bulb temperature (WB), total snow
depth (SD), fresh snow (FS) and precipitation (RR). The digitised dataset is
available through the World Data Center PANGAEA
(<uri>https://doi.pangaea.de/10.1594/PANGAEA.886511</uri>), in the format of one
file for each variable and country.</p></caption><oasis:table frame="topbot"><?xmltex \begin{scaleboxenv}{.9}[.9]?><oasis:tgroup cols="9">
     <oasis:colspec colnum="1" colname="col1" align="justify" colwidth="56.905512pt"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="right"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:colspec colnum="5" colname="col5" align="justify" colwidth="85.358268pt"/>
     <oasis:colspec colnum="6" colname="col6" align="right"/>
     <oasis:colspec colnum="7" colname="col7" align="right"/>
     <oasis:colspec colnum="8" colname="col8" align="right"/>
     <oasis:colspec colnum="9" colname="col9" align="right"/>
     <oasis:thead>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Country</oasis:entry>
         <oasis:entry colname="col3">Number of</oasis:entry>
         <oasis:entry colname="col4">Period</oasis:entry>
         <oasis:entry colname="col5"/>
         <oasis:entry colname="col6">Number of</oasis:entry>
         <oasis:entry colname="col7">Total</oasis:entry>
         <oasis:entry colname="col8">Total</oasis:entry>
         <oasis:entry colname="col9">Percentage of</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Country</oasis:entry>
         <oasis:entry colname="col2">code</oasis:entry>
         <oasis:entry colname="col3">stations</oasis:entry>
         <oasis:entry colname="col4">covered</oasis:entry>
         <oasis:entry colname="col5">Variables</oasis:entry>
         <oasis:entry colname="col6">observations</oasis:entry>
         <oasis:entry colname="col7">digitised</oasis:entry>
         <oasis:entry colname="col8">after</oasis:entry>
         <oasis:entry colname="col9">data removed</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3"/>
         <oasis:entry colname="col4"/>
         <oasis:entry colname="col5"/>
         <oasis:entry colname="col6">per day</oasis:entry>
         <oasis:entry colname="col7"/>
         <oasis:entry colname="col8">QC</oasis:entry>
         <oasis:entry colname="col9">in SAQC</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Algeria</oasis:entry>
         <oasis:entry colname="col2">ALG</oasis:entry>
         <oasis:entry colname="col3">21</oasis:entry>
         <oasis:entry colname="col4">1877–1968</oasis:entry>
         <oasis:entry colname="col5">DP, WS, WD, SLP, TT</oasis:entry>
         <oasis:entry colname="col6">4</oasis:entry>
         <oasis:entry colname="col7">684 114</oasis:entry>
         <oasis:entry colname="col8">665 369</oasis:entry>
         <oasis:entry colname="col9">2.74</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Bosnia- <?xmltex \hack{\hfill\break}?>Herzegovina</oasis:entry>
         <oasis:entry colname="col2">BOH</oasis:entry>
         <oasis:entry colname="col3">2</oasis:entry>
         <oasis:entry colname="col4">1953–1984</oasis:entry>
         <oasis:entry colname="col5">WS, WD, PP, RH, TT</oasis:entry>
         <oasis:entry colname="col6">3</oasis:entry>
         <oasis:entry colname="col7">125 831</oasis:entry>
         <oasis:entry colname="col8">115 894</oasis:entry>
         <oasis:entry colname="col9">7.90</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Croatia</oasis:entry>
         <oasis:entry colname="col2">CRO</oasis:entry>
         <oasis:entry colname="col3">2</oasis:entry>
         <oasis:entry colname="col4">1949–1984</oasis:entry>
         <oasis:entry colname="col5">WS, WD, PP, RH, TT</oasis:entry>
         <oasis:entry colname="col6">3</oasis:entry>
         <oasis:entry colname="col7">391 789</oasis:entry>
         <oasis:entry colname="col8">390 141</oasis:entry>
         <oasis:entry colname="col9">0.42</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Cyprus</oasis:entry>
         <oasis:entry colname="col2">CYP</oasis:entry>
         <oasis:entry colname="col3">2</oasis:entry>
         <oasis:entry colname="col4">1881–1922</oasis:entry>
         <oasis:entry colname="col5">TT</oasis:entry>
         <oasis:entry colname="col6">2</oasis:entry>
         <oasis:entry colname="col7">45 070</oasis:entry>
         <oasis:entry colname="col8">45 068</oasis:entry>
         <oasis:entry colname="col9">0.00</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Czech Republic</oasis:entry>
         <oasis:entry colname="col2">CZE</oasis:entry>
         <oasis:entry colname="col3">7</oasis:entry>
         <oasis:entry colname="col4">1948–1968</oasis:entry>
         <oasis:entry colname="col5">WS, WD, PP, RH, TT</oasis:entry>
         <oasis:entry colname="col6">6</oasis:entry>
         <oasis:entry colname="col7">379 582</oasis:entry>
         <oasis:entry colname="col8">377 843</oasis:entry>
         <oasis:entry colname="col9">0.46</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Egypt</oasis:entry>
         <oasis:entry colname="col2">EGY</oasis:entry>
         <oasis:entry colname="col3">18</oasis:entry>
         <oasis:entry colname="col4">1907–1957</oasis:entry>
         <oasis:entry colname="col5">DP, WS, WD, PP, RH, TT</oasis:entry>
         <oasis:entry colname="col6">6</oasis:entry>
         <oasis:entry colname="col7">1 371 436</oasis:entry>
         <oasis:entry colname="col8">1 336 281</oasis:entry>
         <oasis:entry colname="col9">2.56</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Germany</oasis:entry>
         <oasis:entry colname="col2">GER</oasis:entry>
         <oasis:entry colname="col3">23</oasis:entry>
         <oasis:entry colname="col4">1958–1978</oasis:entry>
         <oasis:entry colname="col5">WS, WD, FS, PP, RH, RR, SD, TT, WB, DP</oasis:entry>
         <oasis:entry colname="col6">Up to 24</oasis:entry>
         <oasis:entry colname="col7">697 308</oasis:entry>
         <oasis:entry colname="col8">692 750</oasis:entry>
         <oasis:entry colname="col9">0.65</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Lebanon</oasis:entry>
         <oasis:entry colname="col2">LBN</oasis:entry>
         <oasis:entry colname="col3">1</oasis:entry>
         <oasis:entry colname="col4">1930–1939</oasis:entry>
         <oasis:entry colname="col5">PP, RH, TT</oasis:entry>
         <oasis:entry colname="col6">24</oasis:entry>
         <oasis:entry colname="col7">262 944</oasis:entry>
         <oasis:entry colname="col8">254 044</oasis:entry>
         <oasis:entry colname="col9">3.38</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Morocco</oasis:entry>
         <oasis:entry colname="col2">MAR</oasis:entry>
         <oasis:entry colname="col3">8</oasis:entry>
         <oasis:entry colname="col4">1910–1968</oasis:entry>
         <oasis:entry colname="col5">DP, WS, WD, PP, TT</oasis:entry>
         <oasis:entry colname="col6">4</oasis:entry>
         <oasis:entry colname="col7">340 563</oasis:entry>
         <oasis:entry colname="col8">336 170</oasis:entry>
         <oasis:entry colname="col9">1.29</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Serbia</oasis:entry>
         <oasis:entry colname="col2">SER</oasis:entry>
         <oasis:entry colname="col3">3</oasis:entry>
         <oasis:entry colname="col4">1949–2012</oasis:entry>
         <oasis:entry colname="col5">WS, WD, PP, RH, TT</oasis:entry>
         <oasis:entry colname="col6">3</oasis:entry>
         <oasis:entry colname="col7">358 898</oasis:entry>
         <oasis:entry colname="col8">356 058</oasis:entry>
         <oasis:entry colname="col9">0.79</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Slovak <?xmltex \hack{\hfill\break}?>Republic</oasis:entry>
         <oasis:entry colname="col2">SLO</oasis:entry>
         <oasis:entry colname="col3">2</oasis:entry>
         <oasis:entry colname="col4">1940–1967</oasis:entry>
         <oasis:entry colname="col5">WS, WD, PP, RH, TT</oasis:entry>
         <oasis:entry colname="col6">6</oasis:entry>
         <oasis:entry colname="col7">248 751</oasis:entry>
         <oasis:entry colname="col8">247 541</oasis:entry>
         <oasis:entry colname="col9">0.49</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Slovenia</oasis:entry>
         <oasis:entry colname="col2">SLV</oasis:entry>
         <oasis:entry colname="col3">3</oasis:entry>
         <oasis:entry colname="col4">1950–1978</oasis:entry>
         <oasis:entry colname="col5">DP, WS, WD, FS, PP, RH, RR, SD, TT</oasis:entry>
         <oasis:entry colname="col6">Up to 24</oasis:entry>
         <oasis:entry colname="col7">2 507 878</oasis:entry>
         <oasis:entry colname="col8">2 437 163</oasis:entry>
         <oasis:entry colname="col9">2.82</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Spain</oasis:entry>
         <oasis:entry colname="col2">ESP</oasis:entry>
         <oasis:entry colname="col3">5</oasis:entry>
         <oasis:entry colname="col4">1954–1984</oasis:entry>
         <oasis:entry colname="col5">WS, WD, PP, RH, TT, DP</oasis:entry>
         <oasis:entry colname="col6">5</oasis:entry>
         <oasis:entry colname="col7">194 274</oasis:entry>
         <oasis:entry colname="col8">192 670</oasis:entry>
         <oasis:entry colname="col9">0.83</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Tunisia</oasis:entry>
         <oasis:entry colname="col2">TUN</oasis:entry>
         <oasis:entry colname="col3">5</oasis:entry>
         <oasis:entry colname="col4">1886–1938</oasis:entry>
         <oasis:entry colname="col5">WS, WD, PP, TT</oasis:entry>
         <oasis:entry colname="col6">1</oasis:entry>
         <oasis:entry colname="col7">174 900</oasis:entry>
         <oasis:entry colname="col8">170 480</oasis:entry>
         <oasis:entry colname="col9">2.53</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Turkey</oasis:entry>
         <oasis:entry colname="col2">TUR</oasis:entry>
         <oasis:entry colname="col3">25</oasis:entry>
         <oasis:entry colname="col4">1962–1971</oasis:entry>
         <oasis:entry colname="col5">WS, WD, PP, RH, TT</oasis:entry>
         <oasis:entry colname="col6">3</oasis:entry>
         <oasis:entry colname="col7">1 028 898</oasis:entry>
         <oasis:entry colname="col8">1 017 871</oasis:entry>
         <oasis:entry colname="col9">1.07</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Total</oasis:entry>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">127</oasis:entry>
         <oasis:entry colname="col4"/>
         <oasis:entry colname="col5"/>
         <oasis:entry colname="col6"/>
         <oasis:entry colname="col7">8 812 236</oasis:entry>
         <oasis:entry colname="col8">8 627 338</oasis:entry>
         <oasis:entry colname="col9">2.00</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup><?xmltex \end{scaleboxenv}?></oasis:table></table-wrap>

      <p id="d1e3910">Data from each country were then split into networks according to their
correlation, spatial distance, observing times, overlapping observing periods
and variables observed. Six appropriate networks were identified (Table 5),
but it was not possible to include all stations, periods, variables and
observing times. The heterogeneous characteristics of the dataset, the high
spatial separation and irregular distribution of the stations, and the
inconsistent coverage of the variables<?pagebreak page1625?> included in the dataset meant that
only about 4.3 million observations (over 48 % of the total dataset)
could be subjected to HQC.</p>
      <p id="d1e3913">For example, it was not possible to apply HQC to data from Cyprus, Lebanon
and Spain due to the low number of stations in each country and the large
distance between the stations of neighbouring countries. We were also unable
to automatically analyse fresh snow and snow depth, precipitation, or
relative humidity data, as the HadISD QC does not assess these variables as
raw input. Moreover, several stations (such as those in Germany and Slovenia,
the central Europe network in Table 5) provided
hourly data, but there were not enough neighbouring stations with
sufficiently high<?pagebreak page1626?> temporal resolution to allow for more than a subset of
observing times per day to be checked.</p>
</sec>
</sec>
<sec id="Ch1.S4">
  <title>Results</title>
<sec id="Ch1.S4.SS1">
  <title>Spatial and temporal data distribution</title>
      <p id="d1e3928">A total of 8.8 million observations were digitised from 127 stations in 15
countries (Tables 6 and S3). Long records (<inline-formula><mml:math id="M90" display="inline"><mml:mrow><mml:mo>&gt;</mml:mo><mml:mn mathvariant="normal">30</mml:mn></mml:mrow></mml:math></inline-formula> years) of many variables
were successfully recovered from stations in Egypt, Tunisia and Algeria,
although only the Egyptian stations provided observations more than once a
day (Fig. 7). Shorter but more widespread observations were rescued across
Morocco, Turkey and the Balkans region, while the snowfall observations in
Germany only covered the west of the country.</p>
      <p id="d1e3941">The largest number of observations (more than 28 %) came from Slovenia
(Fig. 8a); even though we only had data for three stations in Slovenia, the
observations were hourly, included nine variables and covered more than
20 years. Around 15 % of the rescued observations came from Egypt, and
almost 12 % from Turkey. Both of these countries have a large number of
stations in the recovered network, and a variety of variables over a long
period of time (Fig. 7).</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F8" specific-use="star"><caption><p id="d1e3946">Distribution of the digitised observations by <bold>(a)</bold> country,
<bold>(b)</bold> variable, <bold>(c)</bold> decade and <bold>(d)</bold> hour of
observation. The length of each bar shows the number of observations
digitised (in millions), with orange indicating any observations flagged and
removed during SAQC. Variable acronyms are as those described in Table 1.
Country codes are as those listed in Table 6.</p></caption>
          <?xmltex \igopts{width=341.433071pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f08.png"/>

        </fig>

      <p id="d1e3967">More than 21 % (1.8 million) of the rescued observations were sub-daily
temperature measurements, with wind speed and direction measurements
totalling over 17 % (Fig. 8b). There were around 20 000 more wind
direction observations than wind speed observations; this is because very early Tunisian and Egyptian wind speed
observations were qualitative (e.g. light, moderate) and were not digitised.
Relative humidity data made up around 16 % of the rescued dataset, while
sea level pressure and station level pressure contributed a similar amount at
just over 15 % (around 1.4 million values). Over 160 000 fresh snow and
160 000 snow depth values (more than 3.5 % of the full dataset combined)
were also recovered from Germany and Slovenia from as early as the 1950s,
representing a significant increase in snow observations across the region.</p>
      <?pagebreak page1627?><p id="d1e3971">Due to the temporal coverage of the Slovenian data (1950–1978), as well as
the dedicated focus of the UERRA project on post-1957 observations, the
mid-20th century was the most well represented period in the rescued dataset
(Fig. 8c). Almost 60 % of the dataset covered the 20 years from 1950 to
1969. Observations from Cyprus and northern Africa provided data from the
late 19th century, and records from Serbia were recovered up to 2012.</p>
      <p id="d1e3974">Finally, the most common observing times for the variables rescued were
07:00, 14:00 and 21:00, reflecting standard observing practices over the
European region in the 20th century. Tunisian observations were only
available for 07:00, and for many other countries where observations were
only available once a day in the early part of the record, these observations
were also inevitably in the morning. Two German stations included a small
number of half-hourly observations (Fig. 8d).</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F9" specific-use="star"><caption><p id="d1e3979">Percentages of flagged and not flagged values derived from SAQC
application to this dataset. Panel <bold>(a)</bold> shows all datasets, while
<bold>(b)</bold> breaks down data that were flagged as possible errors by SAQC.
Flag codes given are explained in Table 4.</p></caption>
          <?xmltex \igopts{width=398.338583pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f09.png"/>

        </fig>

      <?xmltex \floatpos{t}?><fig id="Ch1.F10" specific-use="star"><caption><p id="d1e3996">Total counts (in percentage) of error flags by
countries <bold>(a)</bold>, variables <bold>(b)</bold>, observation
times <bold>(c)</bold> and decades <bold>(d)</bold> derived from SAQC application to
the dataset. Purple indicates values that were flagged but verified; blue
indicates values that were flagged and corrected; and red and orange indicate
values that were flagged and removed as errors. Variable acronyms are as
those described in Table 1. Flag descriptions are given in Table 4.</p></caption>
          <?xmltex \igopts{width=341.433071pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f10.png"/>

        </fig>

</sec>
<sec id="Ch1.S4.SS2">
  <title>Semi-automatic quality control (SAQC) results</title>
      <p id="d1e4023">All rescued sub-daily data were subjected to quality control routines to
identify erroneous values or chains of values in the time series (Sect. 3). A
total of 3.2 % of observations, around 268 000, were flagged as
suspicious for the whole dataset using SAQC (Fig. 9).</p>
      <p id="d1e4026">Flagging correct values (false positives) is a common QC issue, and manual
examination ensured that these important observations – often of extreme
events – are retained for future studies. The majority of the values flagged
(1.5 % of the total number of values) were corrected after manual
examination, with just over 1 % of the total number of<?pagebreak page1628?> observations
removed from the quality-controlled version of the dataset due to errors in the source
image or issues with the readability of the original values. This includes
observations recorded as <inline-formula><mml:math id="M91" display="inline"><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">88.8</mml:mn></mml:mrow></mml:math></inline-formula> by digitisers (hard to read, see Sect. 2.3).
Over 27 000, or 0.3 % of the total number of observations, were flagged
but then found to be correct after examination.</p>
      <p id="d1e4039">Despite being among the countries with the smallest number of observations,
the largest percentages of flagged values found were for Bosnia-Herzegovina and the Czech Republic
(<inline-formula><mml:math id="M92" display="inline"><mml:mrow><mml:mo>∼</mml:mo><mml:mn mathvariant="normal">8</mml:mn></mml:mrow></mml:math></inline-formula> % of the total number of data digitised, Fig. 10a). For
Bosnia-Herzegovina a large section of observations from one station was given
a flag of fl11 and removed due to an extensive digitiser error that could not
be reconciled. A digitisation error in the Czech Republic observations was
able to be corrected by shifting data by 1 day, resulting in a large number
of fl12 flags (corrected based on original source). The hand-written nature
of the Czech data, together with the absence of data templates (only used in
Slovenian, Spanish and German data sources) may go some way to explaining the
large number of flagged values among both countries. The countries with the
largest number of observations (Egypt and Slovenia) had about 3 % of
their observations corrected or verified and less than 2 % removed under
the SAQC procedure.</p>
      <p id="d1e4052">A similar amount of flagged values were proportionally found in all rescued
observations distributed by variables, except for precipitation (RR,
Fig. 10b), which was only available for Slovenian stations. The high number
of precipitation flags is due to two factors. Firstly, several digitisers
inadvertently recorded zero rainfall values as missing, or missing rainfall
as zero. The format of the Slovenian data sources changed over the period,
with some years having hourly rainfall data and others only providing
observations 3 or 4 times a day. Reporting no rainfall as missing data
could significantly affect any future analysis of rainfall frequency using
these data, and so these values were corrected, resulting in a number of fl12
(corrected based on original source) flags. Secondly, during the latter part
of the Slovenian record, some daily rainfall totals were calculated
inconsistently, using a midnight-to-midnight sum occasionally rather than a
07:00–07:00 total. The 6-hourly observations from the same stations were
quality-controlled based on these totals, but the daily rainfall totals calculated in this
way were removed from the final version of the dataset, to ensure consistency,
and given a flag of fl15 (removed due to source error).</p>
      <p id="d1e4056">SAQC flags distributed by decade show a similar pattern to the distribution
of observations, with a peak in the mid-20th century (Fig. 10c). The higher
number of fl17 flags (observations set to missing as no value could be found
in the source image) during the 1940s may reflect data issues during the
Second World War, particularly for Egypt and Algeria, where some original
source files were ordered incorrectly. This resulted in a number of values
being ascribed to the wrong date. Flagged values were relatively evenly
distributed across observation times (Fig. 10d), although the lower absolute
numbers of half-hourly observations made for a higher proportion of flagged
observations during these times (compare Fig. 8d and 10d).</p>
</sec>
<sec id="Ch1.S4.SS3">
  <title>Spatial quality control results (HQC)</title>
      <p id="d1e4065">In total about 64 000 values were flagged and subsequently removed by HQC,
around 0.7 % of the total dataset. Temperature was the
variable with the smallest number of flagged values overall by HQC, with the
exception of the northern African network, where data source resolution and the high number of missing
values caused HQC to flag and remove extra values (Fig. 11). The variable
with the highest<?pagebreak page1629?> proportion of flagged values in the northern African network was sea level pressure.</p>
      <p id="d1e4068">Given the automatic nature of the HQC tests, all values flagged by this step
were removed from the final version of the dataset and given a flag of fl36.
Values that were subjected to HQC were therefore marked with an additional
flag (a prefix of 3), to clearly identify the level of testing applied to
each individual observation (see Table 5 and Fig. 12). For example,
observations which were corrected or verified in the SAQC round of testing
and given an initial flag of fl12 or fl14 but passed the HQC procedure had a
final flag of fl32 or fl34, ensuring that information from both rounds of QC
were retained.</p>
      <p id="d1e4071">While the HQC tests were unable to be applied to all of the observations,
these results are similar to the findings of the HadISD spatial QC analyses
(Dunn et al., 2012). Around 3.9 %, or about 330 000 observations, were
flagged by both QC procedures (Fig. 12). A total of 2.1 % of the data
were removed as a result of SAQC and HQC testing, with 1.5 % corrected
during the SAQC process. Only 0.3 % were flagged but later verified
during SAQC, although this includes many legitimate extreme events that are
crucial for calibrating and verifying the tails of atmospheric behaviour
which can have the largest societal impact. These percentages of flagged
values are similar to those identified by Brönnimann et al. (2006), who
found transcription error rates of<?pagebreak page1630?> 0.2 % to 3 % for hourly
temperature and upper air observations.</p>
</sec>
<sec id="Ch1.S4.SS4">
  <title>Additional digitisation quality assurance checks</title>
      <p id="d1e4080">In the final data check, a small conversion problem was detected with the
atmospheric pressure at two Slovenian stations (around 318 000 values). The
vast majority of these observations passed both SAQC and HQC, with large
errors identified and flagged appropriately. However, these observations were
marked with a prefix of “4” rather than “1” (subjected to SAQC) or “3”
(subjected to SAQC and HQC) in the final dataset, to signify that additional
QC may be required by future users.</p>
      <p id="d1e4083">Incidental errors throughout the digitisation process, namely digitisers
keying the same data twice, gave us an additional opportunity to examine the
quality of several data sources. In particular, these opportunistic analyses
allowed us to identify the likely percentage of errors that would be
identified using a double keying technique.</p>
<sec id="Ch1.S4.SS4.SSS1">
  <title>Zagazig, Egypt, 1932</title>
      <p id="d1e4091">The 08:00 WD, WS and RH data for Zagazig, Egypt, in 1932 were digitised twice
by different digitisers: once using a template where every station on a page
was digitised together, and once without a template but extracting only data
from Zagazig from each source page (see Fig. 2a). A total of 70 disagreements
were found out of 1098 values, just over 6 % of the overlapping data.
Interestingly, all but one disagreement was due to errors in the data
digitised using the template. A total of eight values were entered into an
incorrect row, six values were misread by the digitiser as they were hard to
read, and 55 errors were as a result of skipped days, i.e. entire pages of
data were skipped. All of the skipped days errors occurred in relative
humidity, indicating that the digitiser worked through the source by
digitising one complete column at a time, rather than reading across each
row. The one error in the non-templated data was due to an incorrect row
being read.</p>
</sec>
<sec id="Ch1.S4.SS4.SSS2">
  <title>Egypt 1931</title>
      <p id="d1e4100">Two digitisers inadvertently digitised 08:00 SLP, TT, WS, WD and RH data for
11 stations in Egypt in 1931, both using the same template. A total of 308
differences were found between the two versions, 1.6 % of the 19 800
values digitised. Checking the differences with the original source images
revealed that 79 % were errors from one digitiser, and 21 % from the
second digitiser. The most common error type was an incorrect row or column
being read (54 % of errors), or the misreading of a value that was hard
to decipher (43 %). Only 4 % of the errors identified were put down
to gross typographical errors (e.g. 999 instead of 99).<?xmltex \hack{\newpage}?></p>
      <p id="d1e4104">These two Egyptian examples highlight a number of key issues with data
digitisation. The first is that the reliability of digitised data depends to
a large extent on the reliability of the person digitising those data. In
both cases there was a clear separation between the two digitisers, even
though (in the case of Egypt 1931) both digitisers used the same method.
The second is that templates created without input from digitisers may not
always achieve the best result. Indeed, follow-up surveys with the
digitisers suggested that several of the digitisers did not enjoy using
templates, and preferred to work on spreadsheets they designed themselves.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F11"><caption><p id="d1e4109">The percentage of values flagged within each network (see Table 5)
tested using the HQC automatic procedure. Variable acronyms are as explained
in the caption for Table 1, noting that not all variables were included in
each network.</p></caption>
            <?xmltex \igopts{width=236.157874pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f11.pdf"/>

          </fig>

      <p id="d1e4118">Finally, these opportunistic analyses show that many of the errors made in
the digitisation process are small. Reading the value from a nearby station
that is given in the row below the station of interest, or accidentally
shifting the data by 1 day is very difficult to identify using automatic or
semi-automatic quality control procedures. Double-keying, which is considered
standard practice for many data entry activities (Barchard and Pace, 2011),
would be the best way to overcome these issues, or even triple-keying, which
is the method used by a number of citizen science activities (e.g. Old
Weather, <uri>www.oldweather.org</uri>, last access: 8 August 2018). However, this
was simply not feasible for this digitation project due to limited resources.
While we cannot say that the final version of the dataset from this study is
free from errors, the methods we have used have removed or flagged the
majority of suspect values.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F12" specific-use="star"><caption><p id="d1e4127">The percentage distribution of quality control flags in the dataset.
Values that have passed QC are represented in green (QC flags fl10, fl40 and
fl30); values that were flagged but verified as correct are shown in purple
(fl14, fl44 and fl34); values that were flagged but corrected are shown in
blue (fl12, fl42 and fl32); and values that were flagged and removed are shown in
orange (fl11, fl13, fl15, fl17 and fl36). The darkness of the colours
indicates the level of QC applied for each flag. Lighter colours represent
values that were only subjected to semi-automatic quality control (SAQC, fl
codes that begin with 1), darker colours indicate values subjected to both
SAQC and spatial HQC procedures (fl codes that begin with 3), and the colours
in the middle represent the small number of values that may need to be
rechecked (fl codes that begin with 4). See Table 4 for additional flag
details.</p></caption>
            <?xmltex \igopts{width=341.433071pt}?><graphic xlink:href="https://essd.copernicus.org/articles/10/1613/2018/essd-10-1613-2018-f12.png"/>

          </fig>

</sec>
</sec>
</sec>
<sec id="Ch1.S5">
  <title>Discussion</title>
      <p id="d1e4145">Procedures used in this study to identify, digitise and quality control data
are an example of the effort required to prepare<?pagebreak page1631?> an observational dataset for
analysis. Meteorological data come in a wide range of formats, and preparing
these data to be ingested into a national database, or shared among the
research community, is not a trivial task. It can be time consuming,
expensive and difficult (Brönnimann et al., 2006). In particular, the
transcription of the original observations (referred to here as
digitisation) requires a lot of work hours and resources. Without a reliable
method of digitisation and a standard method to assess the quality of
sources, the accuracy and usability of the final dataset can be jeopardised.</p>
      <p id="d1e4148">There are some overarching guidelines currently available to assist
organisations and communities who are conducting their own data recovery
project. However, they are generally brief when it comes to specifics of the
digitisation method. Original WMO guidelines on climate data rescue
(Tan et al., 2004), for example, include minimal
information on the best method of data digitisation, but instead focus on
locating original data sources and data management.</p>
      <p id="d1e4151">In their guide for digitising manuscript climate data, Brönnimann et
al. (2006) describe the use of speech recognition, optical character
recognition and manual key entry. On balance, they found key entry to
be the most efficient method of digitising data, in terms of speed, error
rate and the amount of post-processing required. The WMO updated data rescue
guidelines (World Meteorological Organization, 2016) support this finding,
suggesting that OCR techniques are expensive and only appropriate for certain
sources, while the human eye is still better when translating hand-written
observations.</p>
      <p id="d1e4154">The currently accepted best practice for manual data digitisation is to
double- or sometimes triple-key data using a “key what you see” method
that employs templates which match the data source (Healy et al., 2004; Ryan
et al., 2018; World Meteorological Organization, 2016). Citizen science
efforts that make use of large numbers of volunteers in fact require a value
to be keyed at least 3, and up to 5, times (Eveleigh et al., 2013).
Coupled with an automatic quality control procedure, these features of the
digitisation process are important for providing the best possible
opportunity for data accuracy.</p>
      <p id="d1e4158">However, in reality it is prohibitively expensive and not feasible for many
small data recovery projects to use all of these features. Single data entry
with visual checking is often the most cost-effective way of recovering
valuable climate data for analysis, even though there are known issues
around the resultant data quality. Based on our experience, we provide five
key recommendations for other data rescue initiatives that might lack the
resources to employ double or triple keying techniques:
<list list-type="bullet"><list-item>
      <p id="d1e4163"><italic>Conduct a complete assessment of each data source before digitisation</italic>.</p>
      <p id="d1e4168">It is vital to understand the limitations and issues of original data images
and sources before the digitisation process begins
(Brönnimann et al., 2006), particularly if<?pagebreak page1632?> the
data are provided in pre-scanned format. Checking every page of the original
data source before providing it for digitisation will save time and effort
in the long term. Identify any mistakes in the page order, missing pages,
images that are too dark or light to be read, or any changes in format or
data units, to make an assessment of the data source quality. With this
information it then becomes possible to provide improved instructions to
digitisers, develop better templates and tools for digitisation, or even
re-scan data sources if possible.</p></list-item><list-item>
      <p id="d1e4172"><italic>Develop user-friendly templates</italic>.</p>
      <p id="d1e4177">Our examination of duplicated data for Zagazig (Sect. 4.4) does not align
with the recommendations made by WMO (2016) about the use of templates. In
this case study, one digitiser was asked to key data for more than 20
stations into a template, while the other digitised observations from only
1 station (1 row per page of data source) without a template. More errors
were made using a template than not using a template, although it must be
noted that the template style was unfamiliar to the digitisers, and different
digitisers completed the tasks. Clearly there is a balance between the
repetitive nature of keying in multiple rows of data, and the high chance of
error associated with picking out one row of data in a complex table.</p>
      <p id="d1e4180">Despite this finding, we still believe that the use of templates acts to
reduce the number of digitisation errors. Although templates do not remove
issues associated with the original source, they do give the digitiser the
best chance to replicate what they see on the page. Templates that include
automatic visualisation of the observations, highlight outliers, or enforce
regular breaks would help to improve the quality of the resultant data.
Another suggestion could be to develop the templates in collaboration with
the digitisation team.</p></list-item><list-item>
      <p id="d1e4184"><italic>Involve digitisers in quality control procedures</italic>.</p>
      <p id="d1e4189">One potentially time-saving method that can be employed to reduce
digitisation errors is to involve the digitisers in the quality assurance and
quality control of the data. It is true that unreliable digitisers may also
make unreliable quality control assessors, but by asking digitisers to run QC
on data keyed by others, they will become more aware of common errors they
may make in their own work. This step can also help to identify errors within
the data source, as poor observational practices may lead to erroneous
instrument readings or other mistakes when transcribing the data if the data
are secondary sources (Brönnimann et al., 2006; Hunziker et al., 2017).</p></list-item><list-item>
      <p id="d1e4193"><italic>Do not underestimate the value of manually checking quality control results</italic>.</p>
      <p id="d1e4198">Most QC procedures are based on statistical tests and are intended to
identify individual errors or a chain of erroneous values. An alternative is
visual QC checks, which, although existent, are neither well developed nor
employed and, therefore, data quality issues that may appear systematically
can remain inadvertently in the data series (Hunziker et al., 2017).</p>
      <p id="d1e4201">While manually checking the results of any QC procedure is very time
consuming and tedious, our work suggests that for data rescue projects –
particularly for critical spatial or temporal gaps – it is a necessary step
to minimise the number of observations incorrectly removed as errors.
Completely automated QC procedures used for global products run the risk of
removing large swathes of data that can be corrected by a close examination
of the reasons behind the flag. For example, if data from a station is out
by 1 day due to a digitisation error, it will likely be removed in any
automatic spatial analysis with neighbours. Flagging and manually examining
these errors allows all of the affected observations to be retained by one
correction. Automatic quality control procedures can also remove real
extreme events or other observations that are correct but trigger flags, as
they have been converted from a coarser unit to those used in modern
observations.</p>
      <p id="d1e4204">The value of manually assessing QC results means that it is also necessary
to use an appropriate QC procedure. A QC tool that produces a large number
of false quality flags may cause a project to lose a lot of time validating
observations. For that reason it may be appropriate to tailor the QC
procedure for different sources, providing that any variations are recorded.</p></list-item><list-item>
      <p id="d1e4208"><italic>Provide all versions of the final dataset to enable traceability</italic>.</p>
      <p id="d1e4213">Finally, as with all dataset development, it is crucial to retain all
versions of the data, from the original images to the raw keyed data,
through all of the quality control iterations and any conversions applied.
Manual checking of values and decisions based on expert knowledge may mean
that it is not possible to create a truly reproducible product, but
accompanying each data value with a quality flag and keeping every version
of the data can create, as much as possible, a dataset that is traceable.</p></list-item></list></p>
</sec>

      
      </body>
    <back><notes notes-type="dataavailability">

      <p id="d1e4220">All versions of the digitised dataset are available through
the World Data Center PANGAEA (<ext-link xlink:href="https://doi.org/10.1594/PANGAEA.886511" ext-link-type="DOI">10.1594/PANGAEA.886511</ext-link>, Ashcroft et al.,
2018). Version 1 contains the raw digitised data, which in the original
format includes typographical errors and other issues subsequently identified
in the quality control<?pagebreak page1633?> procedure. We have retained this information to ensure
transparency of the process, in case it is useful for future users of the
dataset. Version 2 contains the data with SAQC applied. Version 3 contains
the data with statistical and spatial automated quality control applied,
while Version 4 (labelled “convertedvalue”) contains the Version 3 data
converted to SI units. Full details of the quality control flags, data
sources and station information are also provided.</p>

      <p id="d1e4226">These files have also been provided to international data repositories,
including the International Surface Pressure Databank, the International
Surface Temperature Initiative, the C3S 311a Lot 2 Global Land and Marine
Observations Database service through the British Science and Technology
Facilities Council (STFC)/Centre for Environmental Data Analysis (CEDA),
ECMWF's MARS Catalogue, the Global
Precipitation Climatology Centre Dataset, the ECA&amp;D, and HadISD. Through
these repositories and their connections to ECMWF's MARS holdings, future
users should be able to develop long-term composite time series of these and
other observations from the European sector. The original data scans are
available through each data repository (Table S3) and through the Universitat
Rovira i Virgili Centre for Climate Change (<uri>ftp://130.206.36.123</uri>, user:
C3_UERRA, password: c3uerra17).</p>
  </notes>
<sec id="Ch1.S6" sec-type="conclusions">
  <title>Conclusions</title>
      <p id="d1e4238">This study describes our process of identifying, digitising and quality
controlling an extensive set of sub-daily meteorological observations across
Europe and the southern Mediterranean for use by the wider research
community. The multiple, complex steps associated with dataset development
are often overlooked when data are used for research, and yet without them,
there would be no data to analyse. The data we have rescued as part of the
UERRA project totals 8.8 million observations from 15 countries, spanning
1879 to 2012. The observations cover the Mediterranean region, as well as
eastern and central Europe, addressing data scarcity in these regions as
identified in currently existing weather and climate data repositories.</p>
      <p id="d1e4241">Observations of several ECVs, including temperature, atmospheric pressure,
wind, humidity and precipitation, have been recovered from a wide range of
original sources, from field books to daily weather registers kept for an
entire country. Some sources were typed while others were hand written; some
were provided in standard meteorological units, while others needed
extensive conversion to be comparable with modern data.</p>
      <p id="d1e4244">These observations have also been subjected to extensive semi-automatic and
automatic quality control, making them useful for the development and
verification of regional reanalysis, as well as potential studies of
high-resolution weather at a station level. The QC procedure flagged
3.9 % of the total number of observations digitised, with 2.1 % of
the total number removed, 1.5 % corrected and 0.3 % retained as
correct observations. These QC results are on par with other data rescue
activities. It is our hope that these observations support and improve the
next generation of international and European weather and climate services.</p><supplementary-material position="anchor"><p id="d1e4246"><bold>The Supplement related to this article is available online at <inline-supplementary-material xlink:href="https://doi.org/10.5194/essd-10-1613-2018-supplement" xlink:title="zip">https://doi.org/10.5194/essd-10-1613-2018-supplement</inline-supplementary-material>.</bold></p></supplementary-material>
</sec><notes notes-type="authorcontribution">

      <p id="d1e4253">PU was the coordinator of the UERRA project, and MB and PJ coordinated the
data rescue component of WP1. LA, JRC, AG and MC managed the digitisation and
visual cross-check procedure. PD, EA, LA and JS developed the SAQC procedure.
AG developed and ran the HQC procedure. IH provided information and analysis
on the MARS data availability. The initial draft of the paper was written by
LA with contributions from JRC for Sects. 3.2, 4.2 and 5; AG for Sects. 3.3,
4.3 and 5; and MB for Sect. 5. All authors contributed to the revision of the
text.</p>
  </notes><notes notes-type="competinginterests">

      <p id="d1e4259">The authors declare that they have no conflict of
interest.</p>
  </notes><ack><title>Acknowledgements</title><p id="d1e4265">This work was undertaken as part of the Uncertainties in Ensembles of
Regional ReAnalysis project (UERRA, SPAce grant number 607193). Many thanks
to the UERRA digitisation team: Irene López, David Azuara,
Javier Paradinas, Antoni Domènech, Eulàlia Pla, Ester Romero,
Alba Robert, Jordi Tarragó, Carla Bonfill, Guillem Vandellòs,
Ruben Guerra, Aitor Àvila, Adrià Balart, Marc Martin,
Ezequiel Cebrián, Pau Sabaté and Anna Boqué. We would like to
thank the National Meteorological and Hydrological Services for searching
their archives and sharing data sources, in particular Marc Prohom
(MeteoCat), Frank Kaspar (DWD) and Matija Klančar (SEA). We are also very
grateful to Robert Dunn for his help with the use of the HadISD quality
control code, Lynda Chambers and Blair Trewin for their feedback on earlier
versions of the manuscript, and two anonymous reviewers whose comments
greatly improved the final version of this paper.<?xmltex \hack{\newline}?><?xmltex \hack{\newline}?> Edited by: David Carlson<?xmltex \hack{\newline}?> Reviewed by: two
anonymous referees</p></ack><ref-list>
    <title>References</title>

      <ref id="bib1.bib1"><label>1</label><mixed-citation>
Aguilar, E., Auer, I., Brunet, M., Peterson, T. C., and Wieringa, J.:
Guidelines on Climate Metadata and Homogenization, World Meteorological
Organization, WMO/TD No. 1186, 55 pp., 2003.</mixed-citation></ref>
      <ref id="bib1.bib2"><label>2</label><mixed-citation>Allan, R., Brohan, P., Compo, G. P., Stone, R., Luterbacher, J.,
Brönnimann, S., Allan, R., Brohan, P., Compo, G. P., Stone, R.,
Luterbacher, J., and Brönnimann, S.: The International Atmospheric
Circulation Reconstructions over the Earth (ACRE) Initiative, B. Am.
Meteorol. Soc., 92, 1421–1425, <ext-link xlink:href="https://doi.org/10.1175/2011BAMS3218.1" ext-link-type="DOI">10.1175/2011BAMS3218.1</ext-link>, 2011.</mixed-citation></ref>
      <ref id="bib1.bib3"><label>3</label><mixed-citation>Angot, A.: Instructions météorologiques, Meteofrance, Paris, France,
available at: <uri>http://bibliotheque.meteo.fr/exl-php/vue-consult/mf_ -_
recherche_avancee/ISO00008704</uri> (last access: 4 June 2018), 1931.</mixed-citation></ref>
      <ref id="bib1.bib4"><label>4</label><mixed-citation>Ashcroft, L., Gergis, J., and Karoly, D. J.: A historical climate dataset for
southeastern Australia, 1788–1859, Geosci. Data J., 1, 158–178,
<ext-link xlink:href="https://doi.org/10.1002/gdj3.19" ext-link-type="DOI">10.1002/gdj3.19</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bib5"><label>5</label><mixed-citation>Ashcroft, L., Coll, J.R., Gilabert, A., Domonkos, P., Aguilar, E., Sigro, J.,
Castella, M., Unden, P., Harris, I., Jones, P., and Brunet, M.:
Meteorological observations for Europe and the southern Mediterranean region,
1877–2012, PANGAEA, <ext-link xlink:href="https://doi.org/10.1594/PANGAEA.886511" ext-link-type="DOI">10.1594/PANGAEA.886511</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bib6"><label>6</label><mixed-citation>Auer, I., Böhm, R., Jurkovic, A., Lipa, W., Orlik, A., Potzmann, R.,
Schöner, W., Ungersböck, M., Matulla, C., Briffa, K., Jones, P.,
Efthymiadis, D., Brunetti, M., Nanni, T., Maugeri, M., Mercalli, L., Mestre,
O., Moisselin, J.-M., Begert, M., Müller-Westermeier, G., Kveton, V.,
Bochnicek, O., Stastny, P., Lapin, M., Szalai, S., Szentimrey, T., Cegnar,
T., Dolinar, M., Gajic-Capka, M., Zaninovic, K., Majstorovic, Z., and
Nieplova, E.: HISTALP—historical instrumental climatological surface time
series of the Greater Alpine Region, Int. J. Climatol., 27, 17–46,
<ext-link xlink:href="https://doi.org/10.1002/joc.1377" ext-link-type="DOI">10.1002/joc.1377</ext-link>, 2007.</mixed-citation></ref>
      <ref id="bib1.bib7"><label>7</label><mixed-citation>Barchard, K. A. and Pace, L. A.: Preventing human error: The impact of data
entry methods on data accuracy and statistical results, Comput. Human Behav.,
27(5), 1834–1839, <ext-link xlink:href="https://doi.org/10.1016/J.CHB.2011.04.004" ext-link-type="DOI">10.1016/J.CHB.2011.04.004</ext-link>, 2011.</mixed-citation></ref>
      <ref id="bib1.bib8"><label>8</label><mixed-citation>Brönnimann, S., Annis, J., Dann, W., Ewen, T., Grant, A. N., Griesser,
T., Krähenmann, S., Mohr, C., Scherer, M., and Vogler, C.: A guide for
digitising manuscript climate data, Clim. Past, 2, 137–144,
<ext-link xlink:href="https://doi.org/10.5194/cp-2-137-2006" ext-link-type="DOI">10.5194/cp-2-137-2006</ext-link>, 2006.</mixed-citation></ref>
      <ref id="bib1.bib9"><label>9</label><mixed-citation>Brunet, M. and Jones, P.: Data rescue initiatives: bringing historical
climate data into the 21st century, Clim. Res., 47, 29–40,
<ext-link xlink:href="https://doi.org/10.3354/cr00960" ext-link-type="DOI">10.3354/cr00960</ext-link>, 2011.</mixed-citation></ref>
      <ref id="bib1.bib10"><label>10</label><mixed-citation>Brunet, M., SaladiÉ, O., Jones, P., SigrÓ, J., Aguilar, E., Moberg,
A., Lister, D., Walther, A., Lopez, D., and Almarza, C.: The development of a
new dataset of Spanish Daily Adjusted Temperature Series (SDATS)
(1850–2003), Int. J. Climatol., 26, 1777–1802, <ext-link xlink:href="https://doi.org/10.1002/joc.1338" ext-link-type="DOI">10.1002/joc.1338</ext-link>, 2006.</mixed-citation></ref>
      <ref id="bib1.bib11"><label>11</label><mixed-citation>Brunet, M., Gilabert, A., Jones, P., and Efthymiadis, D.: A historical
surface climate dataset from station observations in Mediterranean North
Africa and Middle East areas, Geosci. Data J., 1, 121–128,
<ext-link xlink:href="https://doi.org/10.1002/gdj3.12" ext-link-type="DOI">10.1002/gdj3.12</ext-link>, 2014a.</mixed-citation></ref>
      <ref id="bib1.bib12"><label>12</label><mixed-citation>Brunet, M., Jones, P. D., Jourdain, S., Efthymiadis, D., Kerrouche, M., and
Boroneant, C.: Data sources for rescuing the rich heritage of Mediterranean
historical surface climate data, Geosci. Data J., 1, 61–73,
<ext-link xlink:href="https://doi.org/10.1002/gdj3.4" ext-link-type="DOI">10.1002/gdj3.4</ext-link>, 2014b.</mixed-citation></ref>
      <ref id="bib1.bib13"><label>13</label><mixed-citation>Cannon, D. J., Brayshaw, D. J., Methven, J., Coker, P. J., and Lenaghan, D.:
Using reanalysis data to quantify extreme wind power generation statistics: A
33 year case study in Great Britain, Renew. Energy, 75, 767–778,
<ext-link xlink:href="https://doi.org/10.1016/j.renene.2014.10.024" ext-link-type="DOI">10.1016/j.renene.2014.10.024</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bib14"><label>14</label><mixed-citation>Compo, G. P., Whitaker, J. S., Sardeshmukh, P. D., Matsui, N., Allan, R. J.,
Yin, X., Gleason, B. E., Vose, R. S., Rutledge, G., Bessemoulin, P.,
Brönnimann, S., Brunet, M., Crouthamel, R. I., Grant, A. N., Groisman, P.
Y., Jones, P. D., Kruk, M. C., Kruger, A. C., Marshall, G. J., Maugeri, M.,
Mok, H. Y., Nordli, Ø., Ross, T. F., Trigo, R. M., Wang, X. L., Woodruff,
S. D., and Worley, S. J.: The Twentieth Century Reanalysis Project, Q. J.
Roy. Meteor. Soc., 137, 1–28, <ext-link xlink:href="https://doi.org/10.1002/qj.776" ext-link-type="DOI">10.1002/qj.776</ext-link>, 2011.</mixed-citation></ref>
      <ref id="bib1.bib15"><label>15</label><mixed-citation>Cram, T. A., Compo, G. P., Yin, X., Allan, R. J., McColl, C., Vose, R. S.,
Whitaker, J. S., Matsui, N., Ashcroft, L., Auchmann, R., Bessemoulin, P.,
Brandsma, T., Brohan, P., Brunet, M., Comeaux, J., Crouthamel, R., Gleason,
B. E., Groisman, P. Y., Hersbach, H., Jones, P. D., Jónsson, T.,
Jourdain, S., Kelly, G., Knapp, K. R., Kruger, A., Kubota, H., Lentini, G.,
Lorrey, A., Lott, N., Lubker, S. J., Luterbacher, J., Marshall, G. J.,
Maugeri, M., Mock, C. J., Mok, H. Y., Nordli, Ø., Rodwell, M. J., Ross, T.
F., Schuster, D., Srnec, L., Valente, M. A., Vizi, Z., Wang, X. L., Westcott,
N., Woollen, J. S., and Worley, S. J.: The International Surface Pressure
Databank version 2, Geosci. Data J., 2, 31–46, <ext-link xlink:href="https://doi.org/10.1002/gdj3.25" ext-link-type="DOI">10.1002/gdj3.25</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bib16"><label>16</label><mixed-citation>Dahlgren, P., Landelius, T., Kållberg, P., and Gollvik, S.: A
high-resolution regional reanalysis for Europe, Part 1: Three-dimensional
reanalysis with the regional HIgh-Resolution Limited-Area Model (HIRLAM), Q.
J. Roy. Meteor. Soc., 142, 2119–2131, <ext-link xlink:href="https://doi.org/10.1002/qj.2807" ext-link-type="DOI">10.1002/qj.2807</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bib17"><label>17</label><mixed-citation>Dunn, R. J. H., Willett, K. M., Thorne, P. W., Woolley, E. V., Durre, I.,
Dai, A., Parker, D. E., and Vose, R. S.: HadISD: a quality-controlled global
synoptic report database for selected variables at long-term stations from
1973–2011, Clim. Past, 8, 1649–1679, <ext-link xlink:href="https://doi.org/10.5194/cp-8-1649-2012" ext-link-type="DOI">10.5194/cp-8-1649-2012</ext-link>,
2012.</mixed-citation></ref>
      <ref id="bib1.bib18"><label>18</label><mixed-citation>
Da Silva, A. M., Young, C. C., and Levitus, S.: Toward a revised Beaufort
equivalent scale, in Proc. Int. COADS Winds Workshop, NOAA-ELR, Kiel,
270–286, 1995.</mixed-citation></ref>
      <ref id="bib1.bib19"><label>19</label><mixed-citation>Dunn, R. J. H., Willett, K. M., Parker, D. E., and Mitchell, L.: Expanding
HadISD: quality-controlled, sub-daily station data from 1931, Geosci.
Instrum. Method. Data Syst., 5, 473–491,
<ext-link xlink:href="https://doi.org/10.5194/gi-5-473-2016" ext-link-type="DOI">10.5194/gi-5-473-2016</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bib20"><label>20</label><mixed-citation>Durre, I., Menne, M. J., Gleason, B. E., Houston, T. G., Vose, R. S., Durre,
I., Menne, M. J., Gleason, B. E., Houston, T. G., and Vose, R. S.:
Comprehensive Automated Quality Assurance of Daily Surface Observations, J.
Appl. Meteorol. Climatol., 49, 1615–1633, <ext-link xlink:href="https://doi.org/10.1175/2010JAMC2375.1" ext-link-type="DOI">10.1175/2010JAMC2375.1</ext-link>, 2010.</mixed-citation></ref>
      <ref id="bib1.bib21"><label>21</label><mixed-citation>Eveleigh, A., Jennett, C., Lynn, S., and Cox, A. L.: “I want to be a
captain! I want to be a captain!”: gamification in the old weather citizen
science project, Proc. First Int. Conf. Gameful Des. Res. Appl. –
Gamification '13, 79–82, <ext-link xlink:href="https://doi.org/10.1145/2583008.2583019" ext-link-type="DOI">10.1145/2583008.2583019</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bib22"><label>22</label><mixed-citation>Healy, A. F., Kole, J. A., Buck-Gengler, C. J., and Bourne, L. E.: Effects of
Prolonged Work on Data Entry Speed and Accuracy, J. Exp. Psychol. Appl., 10,
188–199, <ext-link xlink:href="https://doi.org/10.1037/1076-898X.10.3.188" ext-link-type="DOI">10.1037/1076-898X.10.3.188</ext-link>, 2004.</mixed-citation></ref>
      <ref id="bib1.bib23"><label>23</label><mixed-citation>Hunziker, S., Gubler, S., Calle, J., Moreno, I., Andrade, M., Velarde, F.,
Ticona, L., Carrasco, G., Castellón, Y., Oria, C., Croci-Maspoli, M.,
Konzelmann, T., Rohrer, M., and Brönnimann, S.: Identifying, attributing,
and overcoming common data quality issues of manned station observations,
Int. J. Climatol., 37, 4131–4145, <ext-link xlink:href="https://doi.org/10.1002/joc.5037" ext-link-type="DOI">10.1002/joc.5037</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bib24"><label>24</label><mixed-citation>Kaspar, F., Tinz, B., Mächel, H., and Gates, L.: Data rescue of national
and international meteorological observations at Deutscher Wetterdienst, Adv.
Sci. Res., 12, 57–61, <ext-link xlink:href="https://doi.org/10.5194/asr-12-57-2015" ext-link-type="DOI">10.5194/asr-12-57-2015</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bib25"><label>25</label><mixed-citation>Moberg, A., Jones, P. D., Lister, D., Walther, A., Brunet, M., Jacobeit, J.,
Alexander, L. V., Della-Marta, P. M., Luterbacher, J., Yiou, P., Chen, D.,
Klein Tank, A. M. G., Saladié, O., Sigró, J., Aguilar, E.,
Alexandersson, H., Almarza, C., Auer, I., Barriendos, M., Begert, M.,
Bergström, H., Böhm, R., Butler, C. J., Caesar, J., Drebs, A.,
Founda, D., Gerstengarbe,<?pagebreak page1635?> F.-W., Micela, G., Maugeri, M., Österle, H.,
Pandzic, K., Petrakis, M., Srnec, L., Tolasz, R., Tuomenvirta, H., Werner, P.
C., Linderholm, H., Philipp, A., Wanner, H., and Xoplaki, E.: Indices for
daily temperature and precipitation extremes in Europe analyzed for the
period 1901–2000, J. Geophys. Res., 111, D22106, <ext-link xlink:href="https://doi.org/10.1029/2006JD007103" ext-link-type="DOI">10.1029/2006JD007103</ext-link>,
2006.</mixed-citation></ref>
      <ref id="bib1.bib26"><label>26</label><mixed-citation>Page, C., Nicholls, N., Plummer, N., Trewin, B., Manton, M., Alexander, L.,
Chambers, L., Choi, L. E., Collins, D. A., Gosai, A., Della-Marta, P.,
Haylock, M. ., Inape, K., Laurent, V., Maitrepierre, L., Makmur, E. P.,
Nagamigawa, H., Ouprasitwond, N., McGree, S., Pahalad, J., Salinger, J.,
Tibig, L., Tran, T. D., Vediapan, K., and Zhai, P.: Data Rescue in the
Southeast Asia and South Pacific Region: Challenges and Opportunities, B. Am.
Meteorol. Soc., 85, 1483–1489, <ext-link xlink:href="https://doi.org/10.1175/BAMS-85-10-1483" ext-link-type="DOI">10.1175/BAMS-85-10-1483</ext-link>, 2004.</mixed-citation></ref>
      <ref id="bib1.bib27"><label>27</label><mixed-citation>Peterson, T. C., Easterling, D. R., Karl, T. R., Groisman, P., Nicholls, N.,
Plummer, N., Torok, S., Auer, I., Boehm, R., Gullett, D., Vincent, L., Heino,
R., Tuomenvirta, H., Mestre, O., Szentimrey, T., Salinger, J., Førland, E.
J., Hanssen-Bauer, I., Alexandersson, H., Jones, P., and Parker, D.:
Homogeneity adjustments ofin situ atmospheric climate data: a review, Int. J.
Climatol., 18, 1493–1517,
<ext-link xlink:href="https://doi.org/10.1002/(SICI)1097-0088(19981115)18:13&lt;1493::AID-JOC329&gt;3.0.CO;2-T" ext-link-type="DOI">10.1002/(SICI)1097-0088(19981115)18:13&lt;1493::AID-JOC329&gt;3.0.CO;2-T</ext-link>,
1998.</mixed-citation></ref>
      <ref id="bib1.bib28"><label>28</label><mixed-citation>Ryan, C., Duffy, C., Broderick, C., Thorne, P. W., Curley, M., Walsh, S.,
Daly, C., Treanor, M., Murphy, C., Ryan, C., Duffy, C., Broderick, C.,
Thorne, P. W., Curley, M., Walsh, S., Daly, C., Treanor, M., and Murphy, C.:
Integrating data rescue into the classroom, B. Am. Meteorol. Soc.,
<ext-link xlink:href="https://doi.org/10.1175/BAMS-D-17-0147.1" ext-link-type="DOI">10.1175/BAMS-D-17-0147.1</ext-link>, online first, 2018.</mixed-citation></ref>
      <ref id="bib1.bib29"><label>29</label><mixed-citation>Stickler, A., Brönnimann, S., Valente, M. A., Bethke, J., Sterin, A.,
Jourdain, S., Roucaute, E., Vasquez, M. V., Reyes, D. A., Allan, R., and Dee,
D.: ERA-CLIM: Historical Surface and Upper-Air Data for Future Reanalyses, B.
Am. Meteorol. Soc., 95, 1419–1430, <ext-link xlink:href="https://doi.org/10.1175/BAMS-D-13-00147.1" ext-link-type="DOI">10.1175/BAMS-D-13-00147.1</ext-link>, 2014.
</mixed-citation></ref><?xmltex \hack{\newpage}?>
      <ref id="bib1.bib30"><label>30</label><mixed-citation>Tan, L. S., Burton, S., Crouthamel, R., van Engelen, A., Hutchinson, R.,
Nicodemus, L., Peterson, T. C., and Rahimzadeh, F.: Guidelines on Climate
Data Rescue WMO/TD No. 1210, available at:
<uri>http://www.wmo.int/pages/prog/wcp/wcdmp/documents/WCDMP-55.pdf</uri> (last
access: 4 June 2018), 2004.</mixed-citation></ref>
      <ref id="bib1.bib31"><label>31</label><mixed-citation>Universitat Rovira i Virgili: Original scans of data sources used in the
development of the UERRA sub-daily dataset, available at:
<uri>ftp://130.206.36.123</uri> (last access: 8 August 2018), 2018.</mixed-citation></ref>
      <ref id="bib1.bib32"><label>32</label><mixed-citation>Venema, V. K. C., Mestre, O., Aguilar, E., Auer, I., Guijarro, J. A.,
Domonkos, P., Vertacnik, G., Szentimrey, T., Stepanek, P., Zahradnicek, P.,
Viarre, J., Müller-Westermeier, G., Lakatos, M., Williams, C. N., Menne,
M. J., Lindau, R., Rasol, D., Rustemeier, E., Kolokythas, K., Marinova, T.,
Andresen, L., Acquaotta, F., Fratianni, S., Cheval, S., Klancar, M.,
Brunetti, M., Gruber, C., Prohom Duran, M., Likso, T., Esteban, P., and
Brandsma, T.: Benchmarking homogenization algorithms for monthly data, Clim.
Past, 8, 89–115, <ext-link xlink:href="https://doi.org/10.5194/cp-8-89-2012" ext-link-type="DOI">10.5194/cp-8-89-2012</ext-link>, 2012.</mixed-citation></ref>
      <ref id="bib1.bib33"><label>33</label><mixed-citation>World Meteorological Organization: GSOC-195: Status of the Global Observing
System for Climate, available at:
<uri>https://unfccc.int/sites/default/files/541.pdf</uri> (last access:
4 June 2018), 2015.</mixed-citation></ref>
      <ref id="bib1.bib34"><label>34</label><mixed-citation>World Meteorological Organization: Guidelines on Best Practices for Climate
Data Rescue 2016, WMO-No. 1182, available at:
<uri>https://public.wmo.int/en/resources/library/guidelines-best-practices-climate-data-rescue</uri>
(last access: 8 August 2018, 2016.</mixed-citation></ref>

  </ref-list></back>
    <!--<article-title-html>A rescued dataset of sub-daily meteorological observations for Europe and the southern Mediterranean region, 1877–2012</article-title-html>
<abstract-html><p>Sub-daily meteorological observations are needed for input to and assessment
of high-resolution reanalysis products to improve understanding of weather
and climate variability. While there are millions of such weather
observations that have been collected by various organisations, many are yet
to be transcribed into a useable format.</p><p>Under the auspices of the Uncertainties in Ensembles of Regional
ReAnalyses (UERRA) project, we
describe the compilation and development of a digital dataset of 8.8 million
meteorological observations of essential climate variables (ECVs) rescued
across the European and southern Mediterranean region. By presenting the
entire chain of data preparation, from the identification of regions lacking
in digitised sub-daily data and the location of original sources, through the
digitisation of the observations to the quality control procedures applied,
we provide a rescued dataset that is as traceable as possible for use by the
research community.</p><p>Data from 127 stations and of 15 climate variables in the northern African and
European sectors have been prepared for the period 1877 to 2012. Quality
control of the data using a two-step semi-automatic statistical approach
identified 3.5&thinsp;% of observations that required correction or removal, on
par with previous data rescue efforts.</p><p>In addition to providing a new sub-daily meteorological dataset for the
research community, our experience in the development of this
sub-daily dataset gives us an
opportunity to share some suggestions for future data rescue projects.</p><p>All versions of the dataset, from the raw digitised data to data that have
been quality controlled and converted to standard units, are available on
PANGAEA: <a href="https://doi.org/10.1594/PANGAEA.886511" target="_blank">https://doi.org/10.1594/PANGAEA.886511</a> (Ashcroft et al., 2018).</p></abstract-html>
<ref-html id="bib1.bib1"><label>1</label><mixed-citation>
Aguilar, E., Auer, I., Brunet, M., Peterson, T. C., and Wieringa, J.:
Guidelines on Climate Metadata and Homogenization, World Meteorological
Organization, WMO/TD No. 1186, 55 pp., 2003.
</mixed-citation></ref-html>
<ref-html id="bib1.bib2"><label>2</label><mixed-citation>
Allan, R., Brohan, P., Compo, G. P., Stone, R., Luterbacher, J.,
Brönnimann, S., Allan, R., Brohan, P., Compo, G. P., Stone, R.,
Luterbacher, J., and Brönnimann, S.: The International Atmospheric
Circulation Reconstructions over the Earth (ACRE) Initiative, B. Am.
Meteorol. Soc., 92, 1421–1425, <a href="https://doi.org/10.1175/2011BAMS3218.1" target="_blank">https://doi.org/10.1175/2011BAMS3218.1</a>, 2011.
</mixed-citation></ref-html>
<ref-html id="bib1.bib3"><label>3</label><mixed-citation>
Angot, A.: Instructions météorologiques, Meteofrance, Paris, France,
available at: <a href="http://bibliotheque.meteo.fr/exl-php/vue-consult/mf_ -_&#xA;recherche_avancee/ISO00008704" target="_blank">http://bibliotheque.meteo.fr/exl-php/vue-consult/mf_ -_
recherche_avancee/ISO00008704</a> (last access: 4 June 2018), 1931.
</mixed-citation></ref-html>
<ref-html id="bib1.bib4"><label>4</label><mixed-citation>
Ashcroft, L., Gergis, J., and Karoly, D. J.: A historical climate dataset for
southeastern Australia, 1788–1859, Geosci. Data J., 1, 158–178,
<a href="https://doi.org/10.1002/gdj3.19" target="_blank">https://doi.org/10.1002/gdj3.19</a>, 2014.
</mixed-citation></ref-html>
<ref-html id="bib1.bib5"><label>5</label><mixed-citation>
Ashcroft, L., Coll, J.R., Gilabert, A., Domonkos, P., Aguilar, E., Sigro, J.,
Castella, M., Unden, P., Harris, I., Jones, P., and Brunet, M.:
Meteorological observations for Europe and the southern Mediterranean region,
1877–2012, PANGAEA, <a href="https://doi.org/10.1594/PANGAEA.886511" target="_blank">https://doi.org/10.1594/PANGAEA.886511</a>, 2018.
</mixed-citation></ref-html>
<ref-html id="bib1.bib6"><label>6</label><mixed-citation>
Auer, I., Böhm, R., Jurkovic, A., Lipa, W., Orlik, A., Potzmann, R.,
Schöner, W., Ungersböck, M., Matulla, C., Briffa, K., Jones, P.,
Efthymiadis, D., Brunetti, M., Nanni, T., Maugeri, M., Mercalli, L., Mestre,
O., Moisselin, J.-M., Begert, M., Müller-Westermeier, G., Kveton, V.,
Bochnicek, O., Stastny, P., Lapin, M., Szalai, S., Szentimrey, T., Cegnar,
T., Dolinar, M., Gajic-Capka, M., Zaninovic, K., Majstorovic, Z., and
Nieplova, E.: HISTALP—historical instrumental climatological surface time
series of the Greater Alpine Region, Int. J. Climatol., 27, 17–46,
<a href="https://doi.org/10.1002/joc.1377" target="_blank">https://doi.org/10.1002/joc.1377</a>, 2007.
</mixed-citation></ref-html>
<ref-html id="bib1.bib7"><label>7</label><mixed-citation>
Barchard, K. A. and Pace, L. A.: Preventing human error: The impact of data
entry methods on data accuracy and statistical results, Comput. Human Behav.,
27(5), 1834–1839, <a href="https://doi.org/10.1016/J.CHB.2011.04.004" target="_blank">https://doi.org/10.1016/J.CHB.2011.04.004</a>, 2011.
</mixed-citation></ref-html>
<ref-html id="bib1.bib8"><label>8</label><mixed-citation>
Brönnimann, S., Annis, J., Dann, W., Ewen, T., Grant, A. N., Griesser,
T., Krähenmann, S., Mohr, C., Scherer, M., and Vogler, C.: A guide for
digitising manuscript climate data, Clim. Past, 2, 137–144,
<a href="https://doi.org/10.5194/cp-2-137-2006" target="_blank">https://doi.org/10.5194/cp-2-137-2006</a>, 2006.
</mixed-citation></ref-html>
<ref-html id="bib1.bib9"><label>9</label><mixed-citation>
Brunet, M. and Jones, P.: Data rescue initiatives: bringing historical
climate data into the 21st century, Clim. Res., 47, 29–40,
<a href="https://doi.org/10.3354/cr00960" target="_blank">https://doi.org/10.3354/cr00960</a>, 2011.
</mixed-citation></ref-html>
<ref-html id="bib1.bib10"><label>10</label><mixed-citation>
Brunet, M., SaladiÉ, O., Jones, P., SigrÓ, J., Aguilar, E., Moberg,
A., Lister, D., Walther, A., Lopez, D., and Almarza, C.: The development of a
new dataset of Spanish Daily Adjusted Temperature Series (SDATS)
(1850–2003), Int. J. Climatol., 26, 1777–1802, <a href="https://doi.org/10.1002/joc.1338" target="_blank">https://doi.org/10.1002/joc.1338</a>, 2006.
</mixed-citation></ref-html>
<ref-html id="bib1.bib11"><label>11</label><mixed-citation>
Brunet, M., Gilabert, A., Jones, P., and Efthymiadis, D.: A historical
surface climate dataset from station observations in Mediterranean North
Africa and Middle East areas, Geosci. Data J., 1, 121–128,
<a href="https://doi.org/10.1002/gdj3.12" target="_blank">https://doi.org/10.1002/gdj3.12</a>, 2014a.
</mixed-citation></ref-html>
<ref-html id="bib1.bib12"><label>12</label><mixed-citation>
Brunet, M., Jones, P. D., Jourdain, S., Efthymiadis, D., Kerrouche, M., and
Boroneant, C.: Data sources for rescuing the rich heritage of Mediterranean
historical surface climate data, Geosci. Data J., 1, 61–73,
<a href="https://doi.org/10.1002/gdj3.4" target="_blank">https://doi.org/10.1002/gdj3.4</a>, 2014b.
</mixed-citation></ref-html>
<ref-html id="bib1.bib13"><label>13</label><mixed-citation>
Cannon, D. J., Brayshaw, D. J., Methven, J., Coker, P. J., and Lenaghan, D.:
Using reanalysis data to quantify extreme wind power generation statistics: A
33 year case study in Great Britain, Renew. Energy, 75, 767–778,
<a href="https://doi.org/10.1016/j.renene.2014.10.024" target="_blank">https://doi.org/10.1016/j.renene.2014.10.024</a>, 2015.
</mixed-citation></ref-html>
<ref-html id="bib1.bib14"><label>14</label><mixed-citation>
Compo, G. P., Whitaker, J. S., Sardeshmukh, P. D., Matsui, N., Allan, R. J.,
Yin, X., Gleason, B. E., Vose, R. S., Rutledge, G., Bessemoulin, P.,
Brönnimann, S., Brunet, M., Crouthamel, R. I., Grant, A. N., Groisman, P.
Y., Jones, P. D., Kruk, M. C., Kruger, A. C., Marshall, G. J., Maugeri, M.,
Mok, H. Y., Nordli, Ø., Ross, T. F., Trigo, R. M., Wang, X. L., Woodruff,
S. D., and Worley, S. J.: The Twentieth Century Reanalysis Project, Q. J.
Roy. Meteor. Soc., 137, 1–28, <a href="https://doi.org/10.1002/qj.776" target="_blank">https://doi.org/10.1002/qj.776</a>, 2011.
</mixed-citation></ref-html>
<ref-html id="bib1.bib15"><label>15</label><mixed-citation>
Cram, T. A., Compo, G. P., Yin, X., Allan, R. J., McColl, C., Vose, R. S.,
Whitaker, J. S., Matsui, N., Ashcroft, L., Auchmann, R., Bessemoulin, P.,
Brandsma, T., Brohan, P., Brunet, M., Comeaux, J., Crouthamel, R., Gleason,
B. E., Groisman, P. Y., Hersbach, H., Jones, P. D., Jónsson, T.,
Jourdain, S., Kelly, G., Knapp, K. R., Kruger, A., Kubota, H., Lentini, G.,
Lorrey, A., Lott, N., Lubker, S. J., Luterbacher, J., Marshall, G. J.,
Maugeri, M., Mock, C. J., Mok, H. Y., Nordli, Ø., Rodwell, M. J., Ross, T.
F., Schuster, D., Srnec, L., Valente, M. A., Vizi, Z., Wang, X. L., Westcott,
N., Woollen, J. S., and Worley, S. J.: The International Surface Pressure
Databank version 2, Geosci. Data J., 2, 31–46, <a href="https://doi.org/10.1002/gdj3.25" target="_blank">https://doi.org/10.1002/gdj3.25</a>, 2015.
</mixed-citation></ref-html>
<ref-html id="bib1.bib16"><label>16</label><mixed-citation>
Dahlgren, P., Landelius, T., Kållberg, P., and Gollvik, S.: A
high-resolution regional reanalysis for Europe, Part 1: Three-dimensional
reanalysis with the regional HIgh-Resolution Limited-Area Model (HIRLAM), Q.
J. Roy. Meteor. Soc., 142, 2119–2131, <a href="https://doi.org/10.1002/qj.2807" target="_blank">https://doi.org/10.1002/qj.2807</a>, 2016.
</mixed-citation></ref-html>
<ref-html id="bib1.bib17"><label>17</label><mixed-citation>
Dunn, R. J. H., Willett, K. M., Thorne, P. W., Woolley, E. V., Durre, I.,
Dai, A., Parker, D. E., and Vose, R. S.: HadISD: a quality-controlled global
synoptic report database for selected variables at long-term stations from
1973–2011, Clim. Past, 8, 1649–1679, <a href="https://doi.org/10.5194/cp-8-1649-2012" target="_blank">https://doi.org/10.5194/cp-8-1649-2012</a>,
2012.
</mixed-citation></ref-html>
<ref-html id="bib1.bib18"><label>18</label><mixed-citation>
Da Silva, A. M., Young, C. C., and Levitus, S.: Toward a revised Beaufort
equivalent scale, in Proc. Int. COADS Winds Workshop, NOAA-ELR, Kiel,
270–286, 1995.
</mixed-citation></ref-html>
<ref-html id="bib1.bib19"><label>19</label><mixed-citation>
Dunn, R. J. H., Willett, K. M., Parker, D. E., and Mitchell, L.: Expanding
HadISD: quality-controlled, sub-daily station data from 1931, Geosci.
Instrum. Method. Data Syst., 5, 473–491,
<a href="https://doi.org/10.5194/gi-5-473-2016" target="_blank">https://doi.org/10.5194/gi-5-473-2016</a>, 2016.
</mixed-citation></ref-html>
<ref-html id="bib1.bib20"><label>20</label><mixed-citation>
Durre, I., Menne, M. J., Gleason, B. E., Houston, T. G., Vose, R. S., Durre,
I., Menne, M. J., Gleason, B. E., Houston, T. G., and Vose, R. S.:
Comprehensive Automated Quality Assurance of Daily Surface Observations, J.
Appl. Meteorol. Climatol., 49, 1615–1633, <a href="https://doi.org/10.1175/2010JAMC2375.1" target="_blank">https://doi.org/10.1175/2010JAMC2375.1</a>, 2010.
</mixed-citation></ref-html>
<ref-html id="bib1.bib21"><label>21</label><mixed-citation>
Eveleigh, A., Jennett, C., Lynn, S., and Cox, A. L.: “I want to be a
captain! I want to be a captain!”: gamification in the old weather citizen
science project, Proc. First Int. Conf. Gameful Des. Res. Appl. –
Gamification '13, 79–82, <a href="https://doi.org/10.1145/2583008.2583019" target="_blank">https://doi.org/10.1145/2583008.2583019</a>, 2013.
</mixed-citation></ref-html>
<ref-html id="bib1.bib22"><label>22</label><mixed-citation>
Healy, A. F., Kole, J. A., Buck-Gengler, C. J., and Bourne, L. E.: Effects of
Prolonged Work on Data Entry Speed and Accuracy, J. Exp. Psychol. Appl., 10,
188–199, <a href="https://doi.org/10.1037/1076-898X.10.3.188" target="_blank">https://doi.org/10.1037/1076-898X.10.3.188</a>, 2004.
</mixed-citation></ref-html>
<ref-html id="bib1.bib23"><label>23</label><mixed-citation>
Hunziker, S., Gubler, S., Calle, J., Moreno, I., Andrade, M., Velarde, F.,
Ticona, L., Carrasco, G., Castellón, Y., Oria, C., Croci-Maspoli, M.,
Konzelmann, T., Rohrer, M., and Brönnimann, S.: Identifying, attributing,
and overcoming common data quality issues of manned station observations,
Int. J. Climatol., 37, 4131–4145, <a href="https://doi.org/10.1002/joc.5037" target="_blank">https://doi.org/10.1002/joc.5037</a>, 2017.
</mixed-citation></ref-html>
<ref-html id="bib1.bib24"><label>24</label><mixed-citation>
Kaspar, F., Tinz, B., Mächel, H., and Gates, L.: Data rescue of national
and international meteorological observations at Deutscher Wetterdienst, Adv.
Sci. Res., 12, 57–61, <a href="https://doi.org/10.5194/asr-12-57-2015" target="_blank">https://doi.org/10.5194/asr-12-57-2015</a>, 2015.
</mixed-citation></ref-html>
<ref-html id="bib1.bib25"><label>25</label><mixed-citation>
Moberg, A., Jones, P. D., Lister, D., Walther, A., Brunet, M., Jacobeit, J.,
Alexander, L. V., Della-Marta, P. M., Luterbacher, J., Yiou, P., Chen, D.,
Klein Tank, A. M. G., Saladié, O., Sigró, J., Aguilar, E.,
Alexandersson, H., Almarza, C., Auer, I., Barriendos, M., Begert, M.,
Bergström, H., Böhm, R., Butler, C. J., Caesar, J., Drebs, A.,
Founda, D., Gerstengarbe, F.-W., Micela, G., Maugeri, M., Österle, H.,
Pandzic, K., Petrakis, M., Srnec, L., Tolasz, R., Tuomenvirta, H., Werner, P.
C., Linderholm, H., Philipp, A., Wanner, H., and Xoplaki, E.: Indices for
daily temperature and precipitation extremes in Europe analyzed for the
period 1901–2000, J. Geophys. Res., 111, D22106, <a href="https://doi.org/10.1029/2006JD007103" target="_blank">https://doi.org/10.1029/2006JD007103</a>,
2006.
</mixed-citation></ref-html>
<ref-html id="bib1.bib26"><label>26</label><mixed-citation>
Page, C., Nicholls, N., Plummer, N., Trewin, B., Manton, M., Alexander, L.,
Chambers, L., Choi, L. E., Collins, D. A., Gosai, A., Della-Marta, P.,
Haylock, M. ., Inape, K., Laurent, V., Maitrepierre, L., Makmur, E. P.,
Nagamigawa, H., Ouprasitwond, N., McGree, S., Pahalad, J., Salinger, J.,
Tibig, L., Tran, T. D., Vediapan, K., and Zhai, P.: Data Rescue in the
Southeast Asia and South Pacific Region: Challenges and Opportunities, B. Am.
Meteorol. Soc., 85, 1483–1489, <a href="https://doi.org/10.1175/BAMS-85-10-1483" target="_blank">https://doi.org/10.1175/BAMS-85-10-1483</a>, 2004.
</mixed-citation></ref-html>
<ref-html id="bib1.bib27"><label>27</label><mixed-citation>
Peterson, T. C., Easterling, D. R., Karl, T. R., Groisman, P., Nicholls, N.,
Plummer, N., Torok, S., Auer, I., Boehm, R., Gullett, D., Vincent, L., Heino,
R., Tuomenvirta, H., Mestre, O., Szentimrey, T., Salinger, J., Førland, E.
J., Hanssen-Bauer, I., Alexandersson, H., Jones, P., and Parker, D.:
Homogeneity adjustments ofin situ atmospheric climate data: a review, Int. J.
Climatol., 18, 1493–1517,
<a href="https://doi.org/10.1002/(SICI)1097-0088(19981115)18:13&lt;1493::AID-JOC329&gt;3.0.CO;2-T" target="_blank">https://doi.org/10.1002/(SICI)1097-0088(19981115)18:13&lt;1493::AID-JOC329&gt;3.0.CO;2-T</a>,
1998.
</mixed-citation></ref-html>
<ref-html id="bib1.bib28"><label>28</label><mixed-citation>
Ryan, C., Duffy, C., Broderick, C., Thorne, P. W., Curley, M., Walsh, S.,
Daly, C., Treanor, M., Murphy, C., Ryan, C., Duffy, C., Broderick, C.,
Thorne, P. W., Curley, M., Walsh, S., Daly, C., Treanor, M., and Murphy, C.:
Integrating data rescue into the classroom, B. Am. Meteorol. Soc.,
<a href="https://doi.org/10.1175/BAMS-D-17-0147.1" target="_blank">https://doi.org/10.1175/BAMS-D-17-0147.1</a>, online first, 2018.
</mixed-citation></ref-html>
<ref-html id="bib1.bib29"><label>29</label><mixed-citation>
Stickler, A., Brönnimann, S., Valente, M. A., Bethke, J., Sterin, A.,
Jourdain, S., Roucaute, E., Vasquez, M. V., Reyes, D. A., Allan, R., and Dee,
D.: ERA-CLIM: Historical Surface and Upper-Air Data for Future Reanalyses, B.
Am. Meteorol. Soc., 95, 1419–1430, <a href="https://doi.org/10.1175/BAMS-D-13-00147.1" target="_blank">https://doi.org/10.1175/BAMS-D-13-00147.1</a>, 2014.

</mixed-citation></ref-html>
<ref-html id="bib1.bib30"><label>30</label><mixed-citation>
Tan, L. S., Burton, S., Crouthamel, R., van Engelen, A., Hutchinson, R.,
Nicodemus, L., Peterson, T. C., and Rahimzadeh, F.: Guidelines on Climate
Data Rescue WMO/TD No. 1210, available at:
<a href="http://www.wmo.int/pages/prog/wcp/wcdmp/documents/WCDMP-55.pdf" target="_blank">http://www.wmo.int/pages/prog/wcp/wcdmp/documents/WCDMP-55.pdf</a> (last
access: 4 June 2018), 2004.
</mixed-citation></ref-html>
<ref-html id="bib1.bib31"><label>31</label><mixed-citation>
Universitat Rovira i Virgili: Original scans of data sources used in the
development of the UERRA sub-daily dataset, available at:
<a href="ftp://130.206.36.123" target="_blank">ftp://130.206.36.123</a> (last access: 8 August 2018), 2018.
</mixed-citation></ref-html>
<ref-html id="bib1.bib32"><label>32</label><mixed-citation>
Venema, V. K. C., Mestre, O., Aguilar, E., Auer, I., Guijarro, J. A.,
Domonkos, P., Vertacnik, G., Szentimrey, T., Stepanek, P., Zahradnicek, P.,
Viarre, J., Müller-Westermeier, G., Lakatos, M., Williams, C. N., Menne,
M. J., Lindau, R., Rasol, D., Rustemeier, E., Kolokythas, K., Marinova, T.,
Andresen, L., Acquaotta, F., Fratianni, S., Cheval, S., Klancar, M.,
Brunetti, M., Gruber, C., Prohom Duran, M., Likso, T., Esteban, P., and
Brandsma, T.: Benchmarking homogenization algorithms for monthly data, Clim.
Past, 8, 89–115, <a href="https://doi.org/10.5194/cp-8-89-2012" target="_blank">https://doi.org/10.5194/cp-8-89-2012</a>, 2012.
</mixed-citation></ref-html>
<ref-html id="bib1.bib33"><label>33</label><mixed-citation>
World Meteorological Organization: GSOC-195: Status of the Global Observing
System for Climate, available at:
<a href="https://unfccc.int/sites/default/files/541.pdf" target="_blank">https://unfccc.int/sites/default/files/541.pdf</a> (last access:
4 June 2018), 2015.
</mixed-citation></ref-html>
<ref-html id="bib1.bib34"><label>34</label><mixed-citation>
World Meteorological Organization: Guidelines on Best Practices for Climate
Data Rescue 2016, WMO-No. 1182, available at:
<a href="https://public.wmo.int/en/resources/library/guidelines-best-practices-climate-data-rescue" target="_blank">https://public.wmo.int/en/resources/library/guidelines-best-practices-climate-data-rescue</a>
(last access: 8 August 2018, 2016.
</mixed-citation></ref-html>--></article>
