# $Id: dbschema.sql,v 1.17 2006/05/04 22:50:42 andres Exp $

CREATE TABLE datalogger (
    name                   VARCHAR(20) PRIMARY KEY, # PS6-SC
    description            VARCHAR(80),             # EarthData PS6-24 w/SeisComP
    digitizer_model        VARCHAR(20),             # PS6-24
    digitizer_manufacturer VARCHAR(50),             # EarthData
    recorder_model         VARCHAR(20),             # SeisComP
    recorder_manufacturer  VARCHAR(50),             # Alpha2000
    clock_model            VARCHAR(20),   # for DHI, can be dummy
    clock_manufacturer     VARCHAR(50),   # for DHI, can be dummy
    clock_type             VARCHAR(10),   # for DHI, can be dummy
    sample_rate            INT UNSIGNED,            # 3000
    sample_rate_div        INT UNSIGNED,            # 1
    gain                   DOUBLE UNSIGNED,         # 392000
    max_clock_drift        DOUBLE UNSIGNED,         # 0
    last_modified          TIMESTAMP
);

CREATE TABLE decimation (
    datalogger            VARCHAR(20),              # PS6-SC
    sample_rate           INT UNSIGNED,             # 20
    sample_rate_div       INT UNSIGNED,             # 1
    analogue_filter_chain VARCHAR(255),
    digital_filter_chain  VARCHAR(255),             # fir:edd_deci6 fir:edd_deci5 fir:scp_deci5.1
    last_modified         TIMESTAMP,
    PRIMARY KEY (datalogger, sample_rate, sample_rate_div)
);

# Response (poles & zeros) could be embedded in the seismometer table.

CREATE TABLE seismometer (
    name                  VARCHAR(20) PRIMARY KEY,  # STS-2
    description           VARCHAR(80),              # Streckeisen STS-2
    model                 VARCHAR(20),              # STS-2
    manufacturer          VARCHAR(50),              # Streckeisen
    type                  VARCHAR(10),              # VBB
    low_freq              DOUBLE UNSIGNED,  # nominal low freq for DHI
    high_freq             DOUBLE UNSIGNED,  # nominal high freq for DHI
    response              VARCHAR(25),              # paz:sts2
    last_modified         TIMESTAMP
);

CREATE TABLE resp_fir (
    name                  VARCHAR(20) PRIMARY KEY,  # edd_deci6
    gain                  DOUBLE UNSIGNED,          # 1.0
    deci_fac              SMALLINT UNSIGNED,        # 6
    delay                 DOUBLE UNSIGNED,          # 179
    correction            DOUBLE,                   # -179
    symmetry              CHAR(1),                  # B
    ncoeff                SMALLINT UNSIGNED,        # 180
    coeff                 BLOB,
    last_modified         TIMESTAMP
);

CREATE TABLE resp_paz (
    name                  VARCHAR(20) PRIMARY KEY,  # sts2
    type                  CHAR(1),                  # A (analogue)
    gain                  DOUBLE UNSIGNED,          # 1500
    norm_fac              DOUBLE UNSIGNED,          # 60077000
    norm_freq             DOUBLE UNSIGNED,          # 1.0
    nzeros                TINYINT UNSIGNED,         # 2
    npoles                TINYINT UNSIGNED,         # 6
    zeros                 BLOB,
    poles                 BLOB,
    last_modified         TIMESTAMP
);

# Here we possibly need additional fields:
# date - for multiple calibrations per unit
# chgroup(?) - for 6-channel devices chgroup=0/1
#
# In case we use 'date', we have to decide how to find the applicable
# calibration. Either use 'date' as a key and put 'calibration_date' into
# streams table or search through the list of all calibrations of a
# device. In the latter case, it would be easier if we use effective date
# range instead of a single date.
#
# On the other hand, since 'sn' is an arbitrary string, we could play
# with that, eg., for a 6-channel device with serial number 12345 we
# could use sn="12345/1" for ch 1..3 and sn="12345/2" for ch 4..6.
# If the device is re-calibrated on 16/01/2005 we could use
# sn="12345/2_c16012005" and so on.
#
# Make we should use, eg., 'id' instead of 'sn' to make it more obvious?

CREATE TABLE calibration (
    device                VARCHAR(20),      # Q380-M         # CMG-6T/30
    sn                    VARCHAR(50),      # QT940603       # T6357
    comp1_gain            DOUBLE UNSIGNED,  # 397156.0       # 1201.2
    comp2_gain            DOUBLE UNSIGNED,  # 407278.0       # 1180.1
    comp3_gain            DOUBLE UNSIGNED,  # 389777.0       # 1203.3
    last_modified         TIMESTAMP,
    PRIMARY KEY (device, sn)
);

# Network codes for temporary nets are allocated on yearly basis,
# therefore we need year (start date) as key. See also
# http://www.iris.edu/stations/networks.portable.txt
#
# How are we going to map our internal codes of temporary nets
# into official ones?
#
# net_class is currently 'p' for permanent and 't' for temporary. Are
# there other classes planned?? Otherwise I would use a binary flag
# permanent=true/false.

CREATE TABLE network (
    net_code              CHAR(2),        # GE
    net_start             DATETIME,       # 01/01/1993
    net_end               DATETIME,       # NULL if open
    description           VARCHAR(80),    # GEOFON
    institutions          VARCHAR(100),   # GFZ/partners
    region                VARCHAR(100),   # euromed global
    type                  VARCHAR(50),    # VBB
    net_class             CHAR(1),        # p
    restricted            BIT NOT NULL,   # TRUE, if contains restricted stations
    remark                VARCHAR(255),
    last_modified         TIMESTAMP,
    PRIMARY KEY (net_code, net_start)
);

CREATE TABLE station (
    net_code              CHAR(2),        # GE
    net_start             DATETIME,
    sta_code              CHAR(5),        # RGN
    sta_start             DATETIME,
    sta_end               DATETIME,       # NULL if open
    description           VARCHAR(80),    # "GEOFON station Ruegen, Germany" (7-bit ASCII for SEED)
    latitude              DECIMAL(10,6),
    longitude             DECIMAL(11,6),
    elevation             DECIMAL(7,1),
    depth                 DECIMAL(5,1),
    place                 VARCHAR(80),    # ? "Rügen" (UTF-8 for WWW)   ?
    country               VARCHAR(50),    # ? "Germany" (UTF-8 for WWW) ?
    affiliation           VARCHAR(50),    # ? "GEOFON" (UTF-8 for WWW)  ?
    restricted            BIT NOT NULL,   # TRUE, if contains restricted streams
    last_modified         TIMESTAMP,
    PRIMARY KEY (net_code, net_start, sta_code, sta_start) 
);

# In our nomenclature, stream code of seismic stream is 2 characters long
# and the stream has 3 components. That means, it is *not* possible to use
# different type of seismometer or digitizer for each component, but that
# does not make sense anyway. It is also not possible to use a seismometer
# with more than 3 components. (Didn't somebody from Guralp mention they
# are developing such?)
#
# In case of 6-channel digitizers, 2 seis_stream records are used.
# One-component streams can be defined by setting comp2_code and
# comp3_code to NULL

CREATE TABLE seis_stream (
    net_code              CHAR(2),        # GE
    net_start             DATETIME,
    sta_code              CHAR(5),        # RGN
    sta_start             DATETIME,
    str_code              CHAR(2),        # BH
    str_start             DATETIME,
    str_end               DATETIME,       # NULL if open
    loc_code              CHAR(2),
    datalogger            VARCHAR(20),    # PS6-SC
    datalogger_sn         VARCHAR(50),    # 12345
    clock_sn              VARCHAR(50),    # for DHI, can be dummy
    seismometer           VARCHAR(20),    # STS-2
    seismometer_sn        VARCHAR(50),    # 12345
    sample_rate           INT UNSIGNED,   # 20
    sample_rate_div       INT UNSIGNED,   # 1
    gain                  DECIMAL(8,1),   # overall sensitivity; convenience for AutoLoc
    comp1_code            CHAR(1),        # Z
    comp1_azimuth         DECIMAL(5,1),   # 0
    comp1_dip             DECIMAL(5,1),   # -90
    comp2_code            CHAR(1),        # N
    comp2_azimuth         DECIMAL(5,1),   # 0
    comp2_dip             DECIMAL(5,1),   # 0
    comp3_code            CHAR(1),        # E
    comp3_azimuth         DECIMAL(5,1),   # 90
    comp3_dip             DECIMAL(5,1),   # 0
    depth                 DECIMAL(5,1),   # 3
    format                VARCHAR(20),    # MSEED/Steim2
    flags                 VARCHAR(20),    # SEED flags: GC or GT (geophysical, continuous or triggered)
    restricted            BIT NOT NULL,   # if TRUE, look into access table
    last_modified         TIMESTAMP,
    PRIMARY KEY (net_code, net_start, sta_code, sta_start, str_code, str_start, loc_code)     
);

# aux_device records should be defined also for dataloggers, in order to
# specify streams like supply voltage, etc.

CREATE TABLE aux_device (
    name                  VARCHAR(20) PRIMARY KEY,  # WS2300                         # PS6-SC
    description           VARCHAR(80),              # LaCrosse 2300 Weather Station  # EarthData PS6-24 w/SeisComP
    model                 VARCHAR(20),              # 2300                           # PS6-24
    manufacturer          VARCHAR(50),              # LaCrosse                       # EarthData
    last_modified         TIMESTAMP
);

CREATE TABLE aux_source (
    device                VARCHAR(20),              # WS2300                         # PS6-SC
    source_id             VARCHAR(20),              # 11                             # 6
    description           VARCHAR(80),              # Absolute air pressure          # Supply voltage
    unit                  VARCHAR(20),              # hPa                            # V
    conversion            VARCHAR(80),              # C / 10                         # C * 10.9 / 2700.0 + 5.0
    sample_rate           INT UNSIGNED,             # 1                              # 1
    sample_rate_div       INT UNSIGNED,             # 10                             # 1
    last_modified         TIMESTAMP,
    PRIMARY KEY (device, source_id)
);

# Unlike seis_stream, one aux_stream record defines one stream only;
# therefore str_code has 3 letters and there is no compX_code.
#
# source_id is used to select a particular channel of auxilliary device.
# The description of this channel can be found in aux_source.

CREATE TABLE aux_stream (
    net_code              CHAR(2),                  # GE                             # GE
    net_start             DATETIME,
    sta_code              CHAR(5),                  # RGN                            # RGN
    sta_start             DATETIME,
    str_code              CHAR(3),                  # WDI                            # AE1
    str_start             DATETIME,
    str_end               DATETIME,     # NULL if open
    loc_code              CHAR(2),
    device                VARCHAR(20),              # WS2300                         # PS6-SC
    device_sn             VARCHAR(50),              # 12345                          # 12345
    source_id             VARCHAR(20),              # 11                             # 6
    format                VARCHAR(20),              # MSEED/Steim2                   # MSEED/Steim2
    flags                 VARCHAR(20),  # SEED flags: WC (weather, continuous)       # HC (state-of-health, continuous)
    restricted            BIT NOT NULL, # if TRUE, look into access table
    last_modified         TIMESTAMP,
    PRIMARY KEY (net_code, net_start, sta_code, sta_start, str_code, str_start, loc_code)     
);

# Keyed to component or aux stream

CREATE TABLE outage (
    net_code              CHAR(2),
    net_start             DATETIME,
    sta_code              CHAR(5),
    sta_start             DATETIME,
    str_code              CHAR(3),      # HH, WDI
    str_start             DATETIME,
    loc_code              CHAR(2),
    comp_code             CHAR(1),      # empty if keyed to aux_stream
    eff_start             DATETIME,
    eff_end               DATETIME,
    last_modified         TIMESTAMP,
    PRIMARY KEY (net_code, net_start, sta_code, sta_start, str_code, str_start, loc_code, comp_code, eff_start, eff_end)
);

# QClog, keyed to station, logs all problems with data quality. The
# messages can be automatically generated ("RMS > 10000") or manually
# inserted.

CREATE TABLE qclog (
    net_code              CHAR(2),     # GE
    net_start             DATETIME,
    sta_code              CHAR(5),     # GSI
    sta_start             DATETIME,
    eff_start             DATETIME,    # 2005-08-22 00:00:00
    eff_end               DATETIME,    # 2005-10-04 00:00:00
    message               BLOB,        # "Seismometer defective, only Z component usable"
    last_modified         TIMESTAMP,
    INDEX (net_code, net_start, sta_code, sta_start, eff_start, eff_end)
);

# As suggested by Joachim, routing info is no longer part of station
# object. I propose a corresponding top-level element in XML schema called
# "routing", so the general XML structure would look like this:
#
# <inventory>
#   <network....>
#     <station....>
#       ...
#     </station>
#   </network>
#
#   <network....>
#     ...
#   </network>
#
#   <seismometer...>
#     ...
#   </seismometer>
#
#   <datalogger...>
#     ...
#   </datalogger>
#
#   <aux_device...>
#     ...
#   </aux_device>
#
#   <resp_fir...>
#     ...
#   </resp_fir>
#
#   <resp_paz...>
#     ...
#   </resp_paz>
#
#   <routing network="MN" station="IDI" last_modified="...">
#     <arclink start="..." end="..." address="..." priority="..."/>
#     <arclink start="..." end="..." address="..." priority="..."/>
#     <seedlink address="..." priority="..."/>
#     <seedlink address="..." priority="..."/>
#     <!-- possible to extend with other protocols -->
#   </routing>
#
#   <routing network="MN" station="AQU" last_modified="...">
#     <arclink start="..." end="..." address="..." priority="..."/>
#     <arclink start="..." end="..." address="..." priority="..."/>
#     <seedlink address="..." priority="..."/>
#     <seedlink address="..." priority="..."/>
#     <!-- possible to extend with other protocols -->
#   </routing>
#
#   <routing...>
#     ...
#   </routing>
# </inventory>

CREATE TABLE route_arclink (
    net_code              CHAR(2),     # GE
    sta_code              CHAR(5),     # empty if default route for network
    eff_start             DATETIME,
    eff_end               DATETIME,
    address               VARCHAR(50),
    priority              TINYINT UNSIGNED,
    last_modified         TIMESTAMP,
    PRIMARY KEY (net_code, sta_code, eff_start, eff_end, address, priority)
);

CREATE TABLE route_seedlink (
    net_code              CHAR(2),     # GE
    sta_code              CHAR(5),     # empty if default route for network
    address               VARCHAR(50),
    priority              TINYINT UNSIGNED,
    last_modified         TIMESTAMP,
    PRIMARY KEY (net_code, sta_code, address, priority)
);

# Access table is checked if 'restricted' attribute of a stream is TRUE.
# In case of some stations (eg., Spanish GEOFON stations), only certain
# streams with high sample rate are limited, so we need to define access
# on stream basis.
#
# Effective start and end time are for the case when older data of an
# experiment are free, but newer data is limited.
#
# The 'user' field may contain a complete e-mail address (arclink
# username) or hostname suffix. Eg., "gfz-potsdam.de" means anybody from
# GFZ can get the data. Theoretically, "de" means anybody from Germany can
# get the data. empty means no restriction (to free certain time period).
#
# str_code has 2 letters for seismic streams, 3 letters for aux
# streams.
#
# str_code and sta_code can be empty to define access for the whole
# station/network.
#
# This table is "private"--it does not exist in XML schema and cannot be
# transferred via ArcLink.

CREATE TABLE access (
    net_code              CHAR(2),     # GE
    sta_code              CHAR(5),     # MAHO
    str_code              CHAR(3),     # HH
    loc_code              CHAR(2),
    eff_start             DATETIME,
    eff_end               DATETIME,
    user                  VARCHAR(50), # roa.es
    last_modified         TIMESTAMP,
    PRIMARY KEY (net_code, sta_code, str_code, loc_code, eff_start, eff_end, user)
);


