Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

...

...

...

...

...

...

...

Warninginfo
titleDisclaimer

Complex objects are an advanced form of ingest workflow in MediaHaven which is not available to all customers. 

Complete Example

View file
nameKB_JB306_1915-02-19_01.xml

...

KB_JB306_1915-02-19_01.xml
linenumberstrue
collapsetrue
Code Block
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/METS/ mets.xsd">
   <!-- HEADER section -->
   <mets:metsHdr LASTMODDATE="2010-02-16T10:15:19Z" CREATEDATE="2012-02-16T10:15:19" RECORDSTATUS="MY IMPORT">
      <mets:agent TYPE="ORGANIZATION" ROLE="CUSTODIAN">
         <mets:name>BELSPO</mets:name>
      </mets:agent>
      <mets:agent TYPE="ORGANIZATION" ROLE="IPOWNER">
         <mets:name>KBR</mets:name>
      </mets:agent>
   </mets:metsHdr>
   <!-- SIDECAR section -->
   <mets:amdSec ID="SECTION-METADATA-SIP">
      <mets:sourceMD ID="KB_JB306_1915-02-19_01.xml">
         <mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
            <mets:xmlData>
               <MediaHAVEN_external_metadata>
                  <title>My Archive!</title>
                  <description>My description about the archive</description>
               </MediaHAVEN_external_metadata>
            </mets:xmlData>
         </mets:mdWrap>
      </mets:sourceMD>
   </mets:amdSec>
   <mets:amdSec ID="SECTION-METADATA-PDF">
      <mets:sourceMD ID="KB_JB306_1915-02-19_01.pdf">
         <mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
            <mets:xmlData>
               <MediaHAVEN_external_metadata>
                  <title>My PDF!</title>
                  <description>My description about the pdf</description>
               </MediaHAVEN_external_metadata>
            </mets:xmlData>
         </mets:mdWrap>
      </mets:sourceMD>
   </mets:amdSec>
   <!-- PREMIS section -->
   <!-- Event 01 -->
   <mets:amdSec ID="SECTION-PREMIS-EVENT-01">
      <mets:digiprovMD ID="PREMIS-EVENT-01">
         <mets:mdWrap MDTYPE="PREMIS:EVENT">
            <mets:xmlData>
               <premis:event xmlns:premis="info:lc/xmlns/premis-v2">
                  <premis:eventIdentifier>
                     <premis:eventIdentifierType>KBR-Events</premis:eventIdentifierType>
                     <premis:eventIdentifierValue>8545875115</premis:eventIdentifierValue>
                  </premis:eventIdentifier>
                  <premis:eventType>OCR creation</premis:eventType>
                  <premis:eventDateTime>2014-10-21</premis:eventDateTime>
				  <premis:eventDetail>Detailed info</premis:eventDetail>
                  <premis:eventOutcomeInformation>
                     <premis:eventOutcome>y</premis:eventOutcome>
                     <premis:eventOutcomeDetail>
 </premis:eventOutcomeInformation>
                      <premis:eventOutcomeDetailNotelinkingAgentIdentifier>
/>                      <<premis:linkingAgentIdentifierType>KBR-Employee</premis:eventOutcomeDetail>linkingAgentIdentifierType>
                  </premis:eventOutcomeInformation>
                  <premis:linkingAgentIdentifier>
                     <premis:linkingAgentIdentifierType>KBR-Employee</premis:linkingAgentIdentifierType>
                     <premis:linkingAgentIdentifierValue>John Doe</premis:linkingAgentIdentifierValue>
                  </premis:linkingAgentIdentifier>
                  <premis:linkingObjectIdentifier>
                     <premis:linkingObjectIdentifierType>KBR-ID</premis:linkingObjectIdentifierType>
                     <premis:linkingObjectIdentifierValue>KB_JB306_1915-02-19_01-mets</premis:linkingObjectIdentifierValue>
                  </premis:linkingObjectIdentifier>
               </premis:event>
            </mets:xmlData>
         </mets:mdWrap>
      </mets:digiprovMD>
   </mets:amdSec>
   <!-- SIP/FILE section -->
   <mets:fileSec ID="SECTION-FILES-SIP">
      <mets:fileGrp ID="ARCHIVE_GRP" ADMID="METADATA-SIP PREMIS-EVENT-01">
         <mets:file ID="KB_JB306_1915-02-19_01-mets" CHECKSUMTYPE="MD5" CHECKSUM="f10d79fe597304761bf5476a03b77079" USE="PRESERVATION" MIMETYPE="text/xml">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/KB_JB306_1915-02-19_01-mets.xml" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01" ADMID="METADATA-PDF" CHECKSUMTYPE="MD5" CHECKSUM="54ba2c50f0b5bb544de55aebbb4fa6ab" USE="VIRTUAL" MIMETYPE="text/pdf">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/KB_JB306_1915-02-19_01.pdf" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-ALTO-00001" CHECKSUMTYPE="MD5" CHECKSUM="2c042250c118a03dd85c01c3e542ae5f" USE="FIXITY" MIMETYPE="text/xml">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00001.xml" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-ALTO-00002" CHECKSUMTYPE="MD5" CHECKSUM="20bb2927a704e7eddac48b2af39fbf3a" USE="FIXITY" MIMETYPE="text/xml">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00002.xml" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-ALTO-00003" CHECKSUMTYPE="MD5" CHECKSUM="003b149246c7146eaa3d5605f00f6bb7" USE="FIXITY" MIMETYPE="text/xml">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00003.xml" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-ALTO-00004" CHECKSUMTYPE="MD5" CHECKSUM="a24cd875a366529dd5a7a7f6bcec0e72" USE="FIXITY" MIMETYPE="text/xml">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00004.xml" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-JPG-00001" CHECKSUMTYPE="MD5" CHECKSUM="2aa75e684c85a0bda4f4155c614715f4" USE="FIXITY" MIMETYPE="image/jpg">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00001.jpg" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-JPG-00002" CHECKSUMTYPE="MD5" CHECKSUM="b203607a35502dd054f8bb93ce71e317" USE="FIXITY" MIMETYPE="image/jpg">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00002.jpg" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-JPG-00003" CHECKSUMTYPE="MD5" CHECKSUM="881f781c630b650cd8b99b56cfecc3b4" USE="FIXITY" MIMETYPE="image/jpg">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00003.jpg" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-JPG-00004" CHECKSUMTYPE="MD5" CHECKSUM="80d5c7b7e554b696b21f6bc4e9e3d441" USE="FIXITY" MIMETYPE="image/jpg">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00004.jpg" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-PDF-00001" CHECKSUMTYPE="MD5" CHECKSUM="705e2cf3af94aea81da8afcef14ee6dd" USE="FIXITY" MIMETYPE="text/pdf">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00001.pdf" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-PDF-00002" CHECKSUMTYPE="MD5" CHECKSUM="0fc07b6421debba28480fd9538203e16" USE="FIXITY" MIMETYPE="text/pdf">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00002.pdf" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-PDF-00003" CHECKSUMTYPE="MD5" CHECKSUM="9f348fed21a6f90079d69288c5b636f7" USE="FIXITY" MIMETYPE="text/pdf">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00003.pdf" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-PDF-00004" CHECKSUMTYPE="MD5" CHECKSUM="ea921eef6f10e99c646abb637c3bca94" USE="FIXITY" MIMETYPE="text/pdf">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00004.pdf" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-TIF-00001" CHECKSUMTYPE="MD5" CHECKSUM="29338f3a00145cb14383a77bdd4d90e6" USE="FIXITY" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00001.tif" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-TIF-00002" CHECKSUMTYPE="MD5" CHECKSUM="1e49d3f4c31201312376ebaf7a74e530" USE="FIXITY" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00002.tif" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-TIF-00003" CHECKSUMTYPE="MD5" CHECKSUM="4ebfee64b198bc388c5fa7653a977dc6" USE="FIXITY" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00003.tif" />
         </mets:file>
         <mets:file ID="KB_JB306_1915-02-19_01-TIF-00004" CHECKSUMTYPE="MD5" CHECKSUM="7fbb71c0b41e3c30b87eaed593b8546e" USE="FIXITY" MIMETYPE="image/tif">
            <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00004.tif" />
         </mets:file>
      </mets:fileGrp>
   </mets:fileSec>
   <mets:structMap>
      <mets:div />
   </mets:structMap>
</mets:mets>

...

The following requirements are imposed by the Complex Ingest workflow that go beyond the well-formatted XML and the validation by the provided XSDs.

  1. Every file in the archive must be referenced by the METS. If files are not referenced or if a referenced file is missing, the entire archive is rejected.

  2. The MD5 checksums provided in the XML are compared against the calculated MD5 checksums. The entire archive is rejected if one check fails.

  3. The file paths used in the METS are paths relative to the root the accompanying archive.

METS Validation

View file
namemets.xsd

...

View file
namexlink.xsd

...

...

  1. Do not use a default namespace for the METS tags but use the explicit namespace prefix mets, e.g. mets:agent.

  2. Use exactly one tag mets:fileGrp under which all tags  mets:file are placed. 

  3. The value of the attribute ID is of the type xsd:id which means it must be unique accross the XML document and be a valid CNAME. This means that it must start with a letter or underscore, and can only contain letters, digits, underscores, hyphens, and periods. More info here.

  4. The tag <mets:structMap> must be provided with the self-closed tag <mets:div /> in order have a valid METS XML.


TIP
Tip
title

TIP

It is important to validate the METS which you generate with the provided XSDs before uploading them to the platform. This will prevent the need for fixing common errors. You can validate the XML using online tools, for example: http://www.xmlvalidation.com/. It is important that you place the XSD xlink.xsd in the same folder as mets.xsd.


Common mistakes

Tip

The value of the attribute ID must be a valid XML tag name which has the following rules:

  • Must start with a letter or underscore

  • Cannot start with the letters xml (or XML, or Xml, etc)

  • Can contain letters, digits, hyphens, underscores, and periods

  • Cannot contain spaces


Tip

Because the value of the attribute attribute xlink:href must  must be a valid URI, you will have to URI encode its value if the path to the file contains illegal URI symbols such as as ? % etc etc.

Step-by-step building of the METS

The XML Header

...

XML Header
linenumberstrue
collapsetrue
Code Block
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xsi:schemaLocation="http://www.loc.gov/METS/ mets.xsd">

The XML header contains the different namespaces using in METS, namely mets and xlink. MediaHaven validates the METS with the XSDs provided on this page regardless of the URI provided in the optional tag xsi:schemaLocation

The METS header

...

METS Header
true
linenumberstrue
collapse
Code Block
<!-- HEADER section -->
<mets:metsHdr LASTMODDATE="2010-02-16T10:15:19Z" CREATEDATE="2012-02-16T10:15:19" RECORDSTATUS="MY IMPORT">
   <mets:agent TYPE="ORGANIZATION" ROLE="CUSTODIAN">
      <mets:name>BELSPO</mets:name>
   </mets:agent>
   <mets:agent TYPE="ORGANIZATION" ROLE="IPOWNER">
      <mets:name>KBR</mets:name>
   </mets:agent>
</mets:metsHdr>

...

This section describes the each and every file in the archive, no exceptions.

...

File Section
linenumberstrue
collapsetrue
Code Block
<!-- SIP/FILE section -->
<mets:fileSec ID="SECTION-FILES-SIP">
   <mets:fileGrp ID="ARCHIVE_GRP">
      <mets:file>...</mets:file> <!-- File 1 --> 
      <mets:file>...</mets:file> <!-- File 2 --> 
      <mets:file>...</mets:file> <!-- File 3 --> 
	  <!-- repeat the tag mets:file for the other files -->
    </mets:fileGrp>
</mets:fileSec>         

...

This section describes a single file through a number of attributes and the relative path in the ZIP archive. 

...

Single File Section
linenumberstrue
collapsetrue
Code Block
<mets:file ID="KB_JB306_1915-02-19_01-mets" CHECKSUMTYPE="MD5" CHECKSUM="f10d79fe597304761bf5476a03b77079"  USE="PRESERVATION" MIMETYPE="text/xml">
   <mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/KB_JB306_1915-02-19_01-mets.xml"></mets:FLocat>
</mets:file>

Anchor
Metadata
Metadata

Metadata
Code Block
themeConfluence
titleMetadata
linenumberstrue
collapsetrue
<!-<!-- SIDECAR section -->
<mets:amdSec ...>
   <mets:sourceMD ID="METADATA-SIP">
      <mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
         <mets:xmlData>
            <MediaHAVEN_external_metadata>
               <title>My Archive!</title>
               <description>My description about the archive</description>
            </MediaHAVEN_external_metadata>
         </mets:xmlData>
      </mets:mdWrap>
   </mets:sourceMD>
</mets:amdSec>
 <mets:amdSec ...>
       <mets:sourceMD ID="METADATA-PDF">
          <mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
             <mets:xmlData>
                <MediaHAVEN_external_metadata>
                   <title>My PDF!</title>
                   <description>My description about the pdf</description>
                </MediaHAVEN_external_metadata>
             </mets:xmlData>
          </mets:mdWrap>
       </mets:sourceMD>
    </mets:amdSec>
...
<mets:fileSec ...>
   <mets:fileGrp ID="ARCHIVE_GRP" ADMID="METADATA-SIP">
          <mets:file ID="KB_JB306_1915-02-19_01-mets" ...>
             ...
          </mets:file>
          <mets:file ID="KB_JB306_1915-02-19_01-pdf" ADMID="METADATA-PDF" ...>
             ...
          </mets:file>   
       ..

Anchor
Premis
Premis

...

Events
true
linenumberstrue
collapse
Code Block
<!-- PREMIS section -->
<mets:amdSec ID="SECTION-PREMIS-EVENT-01">
    <mets:digiprovMD ID="PREMIS-EVENT-01">
        <mets:mdWrap MDTYPE="PREMIS:EVENT">
            <mets:xmlData>
               <premis:event xmlns:premis="info:lc/xmlns/premis-v2">
                  <premis:eventIdentifier>
                     <premis:eventIdentifierType>KBR-Events</premis:eventIdentifierType>
                     <premis:eventIdentifierValue>8545875115</premis:eventIdentifierValue>">
                  </premis<premis:eventIdentifier>
                     <premis:eventType>OCR creation<eventIdentifierType>KBR-Events</premis:eventType>eventIdentifierType>
                     <premis:eventDateTime>2014-10-21<eventIdentifierValue>8545875115</premis:eventDateTime>eventIdentifierValue>
                  <premis:eventOutcomeInformation>
  </premis:eventIdentifier>
                  <premis:eventOutcome>y<eventType>OCR creation</premis:eventOutcome>eventType>
                  <premis:eventDateTime>2014-10-21</premis:eventDateTime>
				  <premis:eventOutcomeDetail>
  eventDetail>Detailed info</premis:eventDetail>
                     <premis:eventOutcomeDetailNoteeventOutcomeInformation>
/>                      <<premis:eventOutcome>y</premis:eventOutcomeDetail>eventOutcome>
                  </premis:eventOutcomeInformation>
                  <premis:linkingAgentIdentifier>
                     <premis:linkingAgentIdentifierType>KBR-Employee</premis:linkingAgentIdentifierType>
                     <premis:linkingAgentIdentifierValue>John Doe</premis:linkingAgentIdentifierValue>
                  </premis:linkingAgentIdentifier>
                  <premis:linkingObjectIdentifier>
                     <premis:linkingObjectIdentifierType>KBR-ID</premis:linkingObjectIdentifierType>
                     <premis:linkingObjectIdentifierValue>KB_JB306_1915-02-19_01-mets</premis:linkingObjectIdentifierValue>
                  </premis:linkingObjectIdentifier>
              </premis:event>
           </mets:xmlData>
        </mets:mdWrap>
    </mets:digiprovMD>
</mets:amdSec>
<mets:amdSec ID="SECTION-PREMIS-EVENT-02">
    <mets:digiprovMD ID="PREMIS-EVENT-02">
        <mets:mdWrap MDTYPE="PREMIS:EVENT">
            <mets:xmlData>...</mets:xmlData>
        </mets:mdWrap>
    </mets:digiprovMD>
</mets:amdSec>
...
<mets:fileSec ...>
   <mets:fileGrp ID="ARCHIVE_GRP" ADMID="PREMIS-EVENT-01 PREMIS-EVENT-02">
          <mets:file ID="KB_JB306_1915-02-19_01-mets" ...>
             ...
          </mets:file>
          ...

...

Warning

Because the value of the attribute xlink:href must be a valid URI, you will have to URI encode its value if the path to the file contains illegal URI symbols such as ? % etc.

  1. ID: The value if of the type xsd:id which means it must be unique accross the XML document and it must start with a letter or underscore, and can only contain letters, digits, underscores, hyphens, and periods. More info here.

  2. CHECKSUMTYPE: Fixed value "MD5"

  3. CHECKSUM: The MD5 checksum of the referenced file

  4. USE: PRESERVATION, FIXITY, VIRTUAL, see Complex Objects Reference for more information.

  5. MIMETYPE: Optional parameter

  6. xlink:href: The path provided here must be relative to the root of the archive. See the concrete example archive below to evaluate this requirement. Using other values for path will cause the METS to be rejected because the referenced files will not be found inside the archive.

  7. ADMID:

    1. If you provide as value the ID of a mets:sourceMD in the METS, the embedded XML under this tag will be used a sidecar metadata for the referenced file. In the example above the values METADATA-SIP and METADATA-PDF refer to matching embedded sidecars in the format MediaHAVEN_external_metadata.

    2. If you provide as value the ID of a mets:digiprovMD in the METS, the embedded Premis event under this tag will be recognized by MediaHaven and be visible in the future metadata exports for this file. In the above example the values PREMIS-EVENT-01

       and PREMIS

       and PREMIS-EVENT-02 refer to matching embedded Premis events.