Skip to end of banner
Go to start of banner

Complex Objects Reference

Skip to end of metadata
Go to start of metadata

You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 10 Next »

Disclaimer

Complex objects are an advanced form of ingest workflow in MediaHaven which is not available to all customers. 

Complete Example

KB_JB306_1915-02-19_01.xml
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/METS/ mets.xsd">
   <!-- HEADER section -->
   <mets:metsHdr LASTMODDATE="2010-02-16T10:15:19Z" CREATEDATE="2012-02-16T10:15:19" RECORDSTATUS="MY IMPORT">
      <mets:agent TYPE="ORGANIZATION" ROLE="CUSTODIAN">
         <mets:name>BELSPO</mets:name>
      </mets:agent>
      <mets:agent TYPE="ORGANIZATION" ROLE="IPOWNER">
         <mets:name>KBR</mets:name>
      </mets:agent>
   </mets:metsHdr>
   <!-- SIDECAR section -->
   <mets:amdSec ID="SECTION-METADATA-SIP">
      <mets:sourceMD ID="KB_JB306_1915-02-19_01.xml">
         <mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
            <mets:xmlData>
               <MediaHAVEN_external_metadata>
                  <title>My Archive!</title>
                  <description>My description about the archive</description>
               </MediaHAVEN_external_metadata>
            </mets:xmlData>
         </mets:mdWrap>
      </mets:sourceMD>
   </mets:amdSec>
   <mets:amdSec ID="SECTION-METADATA-PDF">
      <mets:sourceMD ID="KB_JB306_1915-02-19_01.pdf">
         <mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
            <mets:xmlData>
               <MediaHAVEN_external_metadata>
                  <title>My PDF!</title>
                  <description>My description about the pdf</description>
               </MediaHAVEN_external_metadata>
            </mets:xmlData>
         </mets:mdWrap>
      </mets:sourceMD>
   </mets:amdSec>
   <!-- PREMIS section -->
   <!-- Event 01 -->
   <mets:amdSec ID="SECTION-PREMIS-EVENT-01">
      <mets:digiprovMD ID="PREMIS-EVENT-01">
         <mets:mdWrap MDTYPE="PREMIS:EVENT">
            <mets:xmlData>
               <premis:event xmlns:premis="info:lc/xmlns/premis-v2">
                  <premis:eventIdentifier>
                     <premis:eventIdentifierType>KBR-Events</premis:eventIdentifierType>
                     <premis:eventIdentifierValue>8545875115</premis:eventIdentifierValue>
                  </premis:eventIdentifier>
                  <premis:eventType>OCR creation</premis:eventType>
                  <premis:eventDateTime>2014-10-21</premis:eventDateTime>
                  <premis:eventOutcomeInformation>
                     <premis:eventOutcome>y</premis:eventOutcome>
                     <premis:eventOutcomeDetail>
                        <premis:eventOutcomeDetailNote />
                     </premis:eventOutcomeDetail>
                  </premis:eventOutcomeInformation>
                  <premis:linkingAgentIdentifier>
                     <premis:linkingAgentIdentifierType>KBR-Employee</premis:linkingAgentIdentifierType>
                     <premis:linkingAgentIdentifierValue>John Doe</premis:linkingAgentIdentifierValue>
                  </premis:linkingAgentIdentifier>
                  <premis:linkingObjectIdentifier>
                     <premis:linkingObjectIdentifierType>KBR-ID</premis:linkingObjectIdentifierType>
                     <premis:linkingObjectIdentifierValue>KB_JB306_1915-02-19_01-mets</premis:linkingObjectIdentifierValue>
                  </premis:linkingObjectIdentifier>
               </premis:event>
            </mets:xmlData>
         </mets:mdWrap>
      </mets:digiprovMD>
   </mets:amdSec>
   <!-- SIP/FILE section -->
   <mets:fileSec ID="SECTION-FILES-SIP">
      <mets:fileGrp ID="ARCHIVE_GRP" ADMID="METADATA-SIP PREMIS-EVENT-01">
         <mets:file ID="KB_JB306_1915-02-19_01-mets" CHECKSUMTYPE="MD5" CHECKSUM="f10d79fe597304761bf5476a03b77079" USE="PRESERVATION" MIMETYPE="text/xml">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/KB_JB306_1915-02-19_01-mets.xml" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01" ADMID="METADATA-PDF" CHECKSUMTYPE="MD5" CHECKSUM="54ba2c50f0b5bb544de55aebbb4fa6ab" USE="VIRTUAL" MIMETYPE="text/pdf">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/KB_JB306_1915-02-19_01.pdf" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-ALTO-00001" CHECKSUMTYPE="MD5" CHECKSUM="2c042250c118a03dd85c01c3e542ae5f" USE="FIXITY" MIMETYPE="text/xml">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00001.xml" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-ALTO-00002" CHECKSUMTYPE="MD5" CHECKSUM="20bb2927a704e7eddac48b2af39fbf3a" USE="FIXITY" MIMETYPE="text/xml">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00002.xml" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-ALTO-00003" CHECKSUMTYPE="MD5" CHECKSUM="003b149246c7146eaa3d5605f00f6bb7" USE="FIXITY" MIMETYPE="text/xml">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00003.xml" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-ALTO-00004" CHECKSUMTYPE="MD5" CHECKSUM="a24cd875a366529dd5a7a7f6bcec0e72" USE="FIXITY" MIMETYPE="text/xml">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00004.xml" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-JPG-00001" CHECKSUMTYPE="MD5" CHECKSUM="2aa75e684c85a0bda4f4155c614715f4" USE="FIXITY" MIMETYPE="image/jpg">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00001.jpg" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-JPG-00002" CHECKSUMTYPE="MD5" CHECKSUM="b203607a35502dd054f8bb93ce71e317" USE="FIXITY" MIMETYPE="image/jpg">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00002.jpg" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-JPG-00003" CHECKSUMTYPE="MD5" CHECKSUM="881f781c630b650cd8b99b56cfecc3b4" USE="FIXITY" MIMETYPE="image/jpg">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00003.jpg" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-JPG-00004" CHECKSUMTYPE="MD5" CHECKSUM="80d5c7b7e554b696b21f6bc4e9e3d441" USE="FIXITY" MIMETYPE="image/jpg">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00004.jpg" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-PDF-00001" CHECKSUMTYPE="MD5" CHECKSUM="705e2cf3af94aea81da8afcef14ee6dd" USE="FIXITY" MIMETYPE="text/pdf">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00001.pdf" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-PDF-00002" CHECKSUMTYPE="MD5" CHECKSUM="0fc07b6421debba28480fd9538203e16" USE="FIXITY" MIMETYPE="text/pdf">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00002.pdf" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-PDF-00003" CHECKSUMTYPE="MD5" CHECKSUM="9f348fed21a6f90079d69288c5b636f7" USE="FIXITY" MIMETYPE="text/pdf">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00003.pdf" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-PDF-00004" CHECKSUMTYPE="MD5" CHECKSUM="ea921eef6f10e99c646abb637c3bca94" USE="FIXITY" MIMETYPE="text/pdf">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00004.pdf" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-TIF-00001" CHECKSUMTYPE="MD5" CHECKSUM="29338f3a00145cb14383a77bdd4d90e6" USE="FIXITY" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00001.tif" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-TIF-00002" CHECKSUMTYPE="MD5" CHECKSUM="1e49d3f4c31201312376ebaf7a74e530" USE="FIXITY" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00002.tif" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-TIF-00003" CHECKSUMTYPE="MD5" CHECKSUM="4ebfee64b198bc388c5fa7653a977dc6" USE="FIXITY" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00003.tif" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-TIF-00004" CHECKSUMTYPE="MD5" CHECKSUM="7fbb71c0b41e3c30b87eaed593b8546e" USE="FIXITY" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00004.tif" />
         </mets:file>
      </mets:fileGrp>
   </mets:fileSec>
   <mets:structMap>
      <mets:div />
   </mets:structMap>
</mets:mets>

Requirements

The following requirements are imposed by the Complex Ingest workflow that go beyond the well-formatted XML and the validation by the provided XSDs.

  1. Every file in the archive must be referenced by the METS. If files are not referenced or if a referenced file is missing, the entire archive is rejected.
  2. The MD5 checksums provided in the XML are compared against the calculated MD5 checksums. The entire archive is rejected if one check fails.
  3. The file paths used in the METS are paths relative to the root the accompanying archive.

METS Validation

  1. Do not use a default namespace for the METS tags but use the explicit namespace prefix mets, e.g. mets:agent.
  2. Use exactly one tag mets:fileGrp under which all tags  mets:file are placed. 
  3. The value of the attribute ID is of the type xsd:id which means it must be unique accross the XML document and be a valid CNAME. This means that it must start with a letter or underscore, and can only contain letters, digits, underscores, hyphens, and periods. More info here.
  4. The tag <mets:structMap> must be provided with the self-closed tag <mets:div /> in order have a valid METS XML.


TIP

It is important to validate the METS which you generate with the provided XSDs before uploading them to the platform. This will prevent the need for fixing common errors. You can validate the XML using online tools, for example: http://www.xmlvalidation.com/. It is important that you place the XSD xlink.xsd in the same folder as mets.xsd.

Common mistakes

The value of the attribute ID must be a valid XML tag name which has the following rules:

  • Must start with a letter or underscore
  • Cannot start with the letters xml (or XML, or Xml, etc)
  • Can contain letters, digits, hyphens, underscores, and periods
  • Cannot contain spaces

Because the value of the attribute xlink:href must be a valid URI, you will have to URI encode its value if the path to the file contains illegal URI symbols such as ? % etc.

Step-by-step building of the METS

The XML Header

XML Header
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xsi:schemaLocation="http://www.loc.gov/METS/ mets.xsd">

The XML header contains the different namespaces using in METS, namely mets and xlink. MediaHaven validates the METS with the XSDs provided on this page regardless of the URI provided in the optional tag xsi:schemaLocation

The METS header

METS Header
<!-- HEADER section -->
<mets:metsHdr LASTMODDATE="2010-02-16T10:15:19Z" CREATEDATE="2012-02-16T10:15:19" RECORDSTATUS="MY IMPORT">
   <mets:agent TYPE="ORGANIZATION" ROLE="CUSTODIAN">
      <mets:name>BELSPO</mets:name>
   </mets:agent>
   <mets:agent TYPE="ORGANIZATION" ROLE="IPOWNER">
      <mets:name>KBR</mets:name>
   </mets:agent>
</mets:metsHdr>

The METS header is mandatory must at least provide one agent.

The File Section

This section describes the each and every file in the archive, no exceptions.

File Section
<!-- SIP/FILE section -->
<mets:fileSec ID="SECTION-FILES-SIP">
   <mets:fileGrp ID="ARCHIVE_GRP">
      <mets:file>...</mets:file> <!-- File 1 --> 
      <mets:file>...</mets:file> <!-- File 2 --> 
      <mets:file>...</mets:file> <!-- File 3 --> 
	  <!-- repeat the tag mets:file for the other files -->
    </mets:fileGrp>
</mets:fileSec>         

Single File Section

This section describes a single file through a number of attributes and the relative path in the ZIP archive. 

Single File Section
<mets:file ID="KB_JB306_1915-02-19_01-mets" CHECKSUMTYPE="MD5" CHECKSUM="f10d79fe597304761bf5476a03b77079"  USE="PRESERVATION" MIMETYPE="text/xml">
   <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/KB_JB306_1915-02-19_01-mets.xml"></mets:FLocat>
</mets:file>

Metadata
<!-- SIDECAR section -->
<mets:amdSec ...>
   <mets:sourceMD ID="METADATA-SIP">
      <mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
         <mets:xmlData>
            <MediaHAVEN_external_metadata>
               <title>My Archive!</title>
               <description>My description about the archive</description>
            </MediaHAVEN_external_metadata>
         </mets:xmlData>
      </mets:mdWrap>
   </mets:sourceMD>
</mets:amdSec>
 <mets:amdSec ...>
       <mets:sourceMD ID="METADATA-PDF">
          <mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
             <mets:xmlData>
                <MediaHAVEN_external_metadata>
                   <title>My PDF!</title>
                   <description>My description about the pdf</description>
                </MediaHAVEN_external_metadata>
             </mets:xmlData>
          </mets:mdWrap>
       </mets:sourceMD>
    </mets:amdSec>
...
<mets:fileSec ...>
   <mets:fileGrp ID="ARCHIVE_GRP" ADMID="METADATA-SIP">
          <mets:file ID="KB_JB306_1915-02-19_01-mets" ...>
             ...
          </mets:file>
          <mets:file ID="KB_JB306_1915-02-19_01-pdf" ADMID="METADATA-PDF" ...>
             ...
          </mets:file>   
       ..

Events
<!-- PREMIS section -->
<mets:amdSec ID="SECTION-PREMIS-EVENT-01">
    <mets:digiprovMD ID="PREMIS-EVENT-01">
        <mets:mdWrap MDTYPE="PREMIS:EVENT">
            <mets:xmlData>
               <premis:event xmlns:premis="info:lc/xmlns/premis-v2">
                  <premis:eventIdentifier>
                     <premis:eventIdentifierType>KBR-Events</premis:eventIdentifierType>
                     <premis:eventIdentifierValue>8545875115</premis:eventIdentifierValue>
                  </premis:eventIdentifier>
                  <premis:eventType>OCR creation</premis:eventType>
                  <premis:eventDateTime>2014-10-21</premis:eventDateTime>
                  <premis:eventOutcomeInformation>
                     <premis:eventOutcome>y</premis:eventOutcome>
                     <premis:eventOutcomeDetail>
                        <premis:eventOutcomeDetailNote />
                     </premis:eventOutcomeDetail>
                  </premis:eventOutcomeInformation>
                  <premis:linkingAgentIdentifier>
                     <premis:linkingAgentIdentifierType>KBR-Employee</premis:linkingAgentIdentifierType>
                     <premis:linkingAgentIdentifierValue>John Doe</premis:linkingAgentIdentifierValue>
                  </premis:linkingAgentIdentifier>
                  <premis:linkingObjectIdentifier>
                     <premis:linkingObjectIdentifierType>KBR-ID</premis:linkingObjectIdentifierType>
                     <premis:linkingObjectIdentifierValue>KB_JB306_1915-02-19_01-mets</premis:linkingObjectIdentifierValue>
                  </premis:linkingObjectIdentifier>
              </premis:event>
           </mets:xmlData>
        </mets:mdWrap>
    </mets:digiprovMD>
</mets:amdSec>
<mets:amdSec ID="SECTION-PREMIS-EVENT-02">
    <mets:digiprovMD ID="PREMIS-EVENT-02">
        <mets:mdWrap MDTYPE="PREMIS:EVENT">
            <mets:xmlData>...</mets:xmlData>
        </mets:mdWrap>
    </mets:digiprovMD>
</mets:amdSec>
...
<mets:fileSec ...>
   <mets:fileGrp ID="ARCHIVE_GRP" ADMID="PREMIS-EVENT-01 PREMIS-EVENT-02">
          <mets:file ID="KB_JB306_1915-02-19_01-mets" ...>
             ...
          </mets:file>
          ...

Because the value of the attribute xlink:href must be a valid URI, you will have to URI encode its value if the path to the file contains illegal URI symbols such as ? % etc.

  1. ID: The value if of the type xsd:id which means it must be unique accross the XML document and it must start with a letter or underscore, and can only contain letters, digits, underscores, hyphens, and periods. More info here.
  2. CHECKSUMTYPE: Fixed value "MD5"
  3. CHECKSUM: The MD5 checksum of the referenced file
  4. USE: PRESERVATION, FIXITY, VIRTUAL, see Complex Objects Reference for more information.
  5. MIMETYPE: Optional parameter
  6. xlink:href: The path provided here must be relative to the root of the archive. See the concrete example archive below to evaluate this requirement. Using other values for path will cause the METS to be rejected because the referenced files will not be found inside the archive.
  7. ADMID:
    1. If you provide as value the ID of a mets:sourceMD in the METS, the embedded XML under this tag will be used a sidecar metadata for the referenced file. In the example above the values METADATA-SIP and METADATA-PDF refer to matching embedded sidecars in the format MediaHAVEN_external_metadata.
    2. If you provide as value the ID of a mets:digiprovMD in the METS, the embedded Premis event under this tag will be recognized by MediaHaven and be visible in the future metadata exports for this file. In the above example the values PREMIS-EVENT-01 and PREMIS-EVENT-02 refer to matching embedded Premis events.
  • No labels