<?xml version="1.0"?>
<mappings>
<prefix value="agave">http://www.bioxml.info/dtd/agave.dtd</prefix>
<prefix value="tigr">http://www.bioxml.info/dtd/tigrxml.dtd</prefix>
<prefix value="bioseq">http://www.bioxml.info/dtd/Bioseq.dtd</prefix>

<!-- a base object in the GO namespace -->
<object>
  <regex>(?:GO|go):(\d{7})</regex>
  <namespace>
    <ns value="GO">$1</ns>
  </namespace>
</object>

<object>
  <xpath>self::agave:gene//agave:classification[@system='GO']/@id</xpath>
  <namespace>
    <ns value="GO">.</ns>
  </namespace>
</object>

<!-- a base object in the NCBI Genbank's GI namespace -->
<object>
  <regex>(?:GI|gi)[|:](\d+)</regex>
  <xpath>substring-before(substring-after(self::tigr:TU//tigr:ACCESSION/@DBXREF, 'gi|'), '|')</xpath>
  <namespace>
    <ns value="NCBI_gi">$1</ns>
  </namespace>
</object>

<!-- an enzyme committee number -->
<object>
  <!-- have the form '6.1.99.-', where '-' is a wildcard -->
  <regex>([1-6]\.(?:[0-9]{1,2}|-)(?:\.[0-9]{1,3}|-){2})</regex>
  <namespace>
    <ns value="EC">$1</ns>
  </namespace>
</object>

<!-- NCBI Entrez gene -->
<object>
  <regex>GeneID:(\d+)</regex>
  <!-- readseq xml tag (usually fitem) that has a feature note containing a geneID -->
  <xpath>substring-after(./bioseq:fnote/bioseq:fval[starts-with(., "GeneID:")], "GeneID:")</xpath>
  <namespace>
    <ns value="EntrezGene_EntrezGeneID">$1</ns>
  </namespace>
</object>

<!-- NCBI Reference Sequence -->
<object>
  <regex>([NXYZ][CMP]_\d{6})</regex>
  <namespace>
    <ns value="RefSeq">$1</ns>
  </namespace>
</object>

<!-- NCBI taxonomy database identifier -->
<object>
  <regex>taxon:(\d+)</regex>
  <namespace>
    <ns value="taxon">$1</ns>
  </namespace>
</object>

<object>
  <regex>(?:SP|sp)[:|]([A-Z]+\d{4,})</regex>
  <namespace>
    <ns value="Swiss-Prot">$1</ns>
  </namespace>
</object>

<!-- a fasta (complex) dna record, \N is seahawk-specific regex shorthand for IUPAC nucleic acid sequences -->
<object>
  <regex>(>(\S*)[^\n]*(?:\n\N+)+)</regex>
  <namespace>
    <ns value="unknown">$2</ns>
  </namespace>
  <datatype value="FASTA_NA"/>
  <member value="content">$1</member>
</object>

<!-- a fasta (complex) amino acid record, \P is seahawk-specific regex shorthand for IUPAC amino acid sequences -->
<object>
  <regex>(>(\S*)[^\n]*(?:\n\P+)+)</regex>
  <namespace>
    <ns value="unknown">$2</ns>
  </namespace>
  <datatype value="FASTA_AA"/>
  <member value="content">$1</member>
</object>
</mappings>
