You are viewing an old version of this page. View the current version.
Compare with Current
View Page History
« Previous
Version 2
Next »
Complete Example
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/METS/ mets.xsd">
<!-- HEADER section -->
<mets:metsHdr LASTMODDATE="2010-02-16T10:15:19Z" CREATEDATE="2012-02-16T10:15:19" RECORDSTATUS="MY IMPORT">
<mets:agent TYPE="ORGANIZATION" ROLE="CUSTODIAN">
<mets:name>BELSPO</mets:name>
</mets:agent>
<mets:agent TYPE="ORGANIZATION" ROLE="IPOWNER">
<mets:name>KBR</mets:name>
</mets:agent>
</mets:metsHdr>
<!-- SIDECAR section -->
<mets:amdSec ID="SECTION-METADATA-SIP">
<mets:sourceMD ID="KB_JB306_1915-02-19_01.xml">
<mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
<mets:xmlData>
<MediaHAVEN_external_metadata>
<title>My Archive!</title>
<description>My description about the archive</description>
</MediaHAVEN_external_metadata>
</mets:xmlData>
</mets:mdWrap>
</mets:sourceMD>
</mets:amdSec>
<mets:amdSec ID="SECTION-METADATA-PDF">
<mets:sourceMD ID="KB_JB306_1915-02-19_01.pdf">
<mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
<mets:xmlData>
<MediaHAVEN_external_metadata>
<title>My PDF!</title>
<description>My description about the pdf</description>
</MediaHAVEN_external_metadata>
</mets:xmlData>
</mets:mdWrap>
</mets:sourceMD>
</mets:amdSec>
<!-- PREMIS section -->
<!-- Event 01 -->
<mets:amdSec ID="SECTION-PREMIS-EVENT-01">
<mets:digiprovMD ID="PREMIS-EVENT-01">
<mets:mdWrap MDTYPE="PREMIS:EVENT">
<mets:xmlData>
<premis:event xmlns:premis="info:lc/xmlns/premis-v2">
<premis:eventIdentifier>
<premis:eventIdentifierType>KBR-Events</premis:eventIdentifierType>
<premis:eventIdentifierValue>8545875115</premis:eventIdentifierValue>
</premis:eventIdentifier>
<premis:eventType>OCR creation</premis:eventType>
<premis:eventDateTime>2014-10-21</premis:eventDateTime>
<premis:eventOutcomeInformation>
<premis:eventOutcome>y</premis:eventOutcome>
<premis:eventOutcomeDetail>
<premis:eventOutcomeDetailNote />
</premis:eventOutcomeDetail>
</premis:eventOutcomeInformation>
<premis:linkingAgentIdentifier>
<premis:linkingAgentIdentifierType>KBR-Employee</premis:linkingAgentIdentifierType>
<premis:linkingAgentIdentifierValue>John Doe</premis:linkingAgentIdentifierValue>
</premis:linkingAgentIdentifier>
<premis:linkingObjectIdentifier>
<premis:linkingObjectIdentifierType>KBR-ID</premis:linkingObjectIdentifierType>
<premis:linkingObjectIdentifierValue>KB_JB306_1915-02-19_01-mets</premis:linkingObjectIdentifierValue>
</premis:linkingObjectIdentifier>
</premis:event>
</mets:xmlData>
</mets:mdWrap>
</mets:digiprovMD>
</mets:amdSec>
<!-- SIP/FILE section -->
<mets:fileSec ID="SECTION-FILES-SIP">
<mets:fileGrp ID="ARCHIVE_GRP" ADMID="METADATA-SIP PREMIS-EVENT-01">
<mets:file ID="KB_JB306_1915-02-19_01-mets" CHECKSUMTYPE="MD5" CHECKSUM="f10d79fe597304761bf5476a03b77079" USE="PRESERVATION" MIMETYPE="text/xml">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/KB_JB306_1915-02-19_01-mets.xml" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01" ADMID="METADATA-PDF" CHECKSUMTYPE="MD5" CHECKSUM="54ba2c50f0b5bb544de55aebbb4fa6ab" USE="VIRTUAL" MIMETYPE="text/pdf">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/KB_JB306_1915-02-19_01.pdf" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-ALTO-00001" CHECKSUMTYPE="MD5" CHECKSUM="2c042250c118a03dd85c01c3e542ae5f" USE="FIXITY" MIMETYPE="text/xml">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00001.xml" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-ALTO-00002" CHECKSUMTYPE="MD5" CHECKSUM="20bb2927a704e7eddac48b2af39fbf3a" USE="FIXITY" MIMETYPE="text/xml">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00002.xml" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-ALTO-00003" CHECKSUMTYPE="MD5" CHECKSUM="003b149246c7146eaa3d5605f00f6bb7" USE="FIXITY" MIMETYPE="text/xml">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00003.xml" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-ALTO-00004" CHECKSUMTYPE="MD5" CHECKSUM="a24cd875a366529dd5a7a7f6bcec0e72" USE="FIXITY" MIMETYPE="text/xml">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/alto/KB_JB306_1915-02-19_01-00004.xml" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-JPG-00001" CHECKSUMTYPE="MD5" CHECKSUM="2aa75e684c85a0bda4f4155c614715f4" USE="FIXITY" MIMETYPE="image/jpg">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00001.jpg" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-JPG-00002" CHECKSUMTYPE="MD5" CHECKSUM="b203607a35502dd054f8bb93ce71e317" USE="FIXITY" MIMETYPE="image/jpg">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00002.jpg" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-JPG-00003" CHECKSUMTYPE="MD5" CHECKSUM="881f781c630b650cd8b99b56cfecc3b4" USE="FIXITY" MIMETYPE="image/jpg">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00003.jpg" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-JPG-00004" CHECKSUMTYPE="MD5" CHECKSUM="80d5c7b7e554b696b21f6bc4e9e3d441" USE="FIXITY" MIMETYPE="image/jpg">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/jpg/KB_JB306_1915-02-19_01-00004.jpg" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-PDF-00001" CHECKSUMTYPE="MD5" CHECKSUM="705e2cf3af94aea81da8afcef14ee6dd" USE="FIXITY" MIMETYPE="text/pdf">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00001.pdf" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-PDF-00002" CHECKSUMTYPE="MD5" CHECKSUM="0fc07b6421debba28480fd9538203e16" USE="FIXITY" MIMETYPE="text/pdf">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00002.pdf" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-PDF-00003" CHECKSUMTYPE="MD5" CHECKSUM="9f348fed21a6f90079d69288c5b636f7" USE="FIXITY" MIMETYPE="text/pdf">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00003.pdf" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-PDF-00004" CHECKSUMTYPE="MD5" CHECKSUM="ea921eef6f10e99c646abb637c3bca94" USE="FIXITY" MIMETYPE="text/pdf">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/pdf/KB_JB306_1915-02-19_01-00004.pdf" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-TIF-00001" CHECKSUMTYPE="MD5" CHECKSUM="29338f3a00145cb14383a77bdd4d90e6" USE="FIXITY" MIMETYPE="image/tif">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00001.tif" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-TIF-00002" CHECKSUMTYPE="MD5" CHECKSUM="1e49d3f4c31201312376ebaf7a74e530" USE="FIXITY" MIMETYPE="image/tif">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00002.tif" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-TIF-00003" CHECKSUMTYPE="MD5" CHECKSUM="4ebfee64b198bc388c5fa7653a977dc6" USE="FIXITY" MIMETYPE="image/tif">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00003.tif" />
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-TIF-00004" CHECKSUMTYPE="MD5" CHECKSUM="7fbb71c0b41e3c30b87eaed593b8546e" USE="FIXITY" MIMETYPE="image/tif">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/tif/KB_JB306_1915-02-19_01-00004.tif" />
</mets:file>
</mets:fileGrp>
</mets:fileSec>
<mets:structMap>
<mets:div />
</mets:structMap>
</mets:mets>
Requirements
The following requirements are imposed by the Complex Ingest workflow that go beyond the well-formatted XML and the validation by the provided XSDs.
- Every file in the archive must be referenced by the METS. If files are not referenced or if a referenced file is missing, the entire archive is rejected.
- The MD5 checksums provided in the XML are compared against the calculated MD5 checksums. The entire archive is rejected if one check fails.
- The file paths used in the METS are paths relative to the root the accompanying archive.
METS Validation
- Do not use a default namespace for the METS tags but use the explicit namespace prefix
mets
, e.g. mets:agent
. - Use exactly one tag
mets:fileGrp
under which all tags mets:file
are placed. - The value of the attribute ID is of the type xsd:id which means it must be unique accross the XML document and be a valid CNAME. This means that it must start with a letter or underscore, and can only contain letters, digits, underscores, hyphens, and periods. More info here.
The tag <mets:structMap>
must be provided with the self-closed tag <mets:div />
in order have a valid METS XML.
Common mistakes
Step-by-step building of the METS
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/METS/ mets.xsd">
The XML header contains the different namespaces using in METS, namely mets
and xlink
. MediaHaven validates the METS with the XSDs provided on page regardless of the URI provided in the optional tag xsi:schemaLocation
.
<!-- HEADER section -->
<mets:metsHdr LASTMODDATE="2010-02-16T10:15:19Z" CREATEDATE="2012-02-16T10:15:19" RECORDSTATUS="MY IMPORT">
<mets:agent TYPE="ORGANIZATION" ROLE="CUSTODIAN">
<mets:name>BELSPO</mets:name>
</mets:agent>
<mets:agent TYPE="ORGANIZATION" ROLE="IPOWNER">
<mets:name>KBR</mets:name>
</mets:agent>
</mets:metsHdr>
The METS header is mandatory must at least provide one agent.
The File Section
This section describes the each and every file in the archive, no exceptions.
<!-- SIP/FILE section -->
<mets:fileSec ID="SECTION-FILES-SIP">
<mets:fileGrp ID="ARCHIVE_GRP">
<mets:file>...</mets:file> <!-- File 1 -->
<mets:file>...</mets:file> <!-- File 2 -->
<mets:file>...</mets:file> <!-- File 3 -->
<!-- repeat the tag mets:file for the other files -->
</mets:fileGrp>
</mets:fileSec>
Single File Section
This section describes a single file through a number of attributes and the relative path in the ZIP archive.
<mets:file ID="KB_JB306_1915-02-19_01-mets" CHECKSUMTYPE="MD5" CHECKSUM="f10d79fe597304761bf5476a03b77079" USE="PRESERVATION" MIMETYPE="text/xml">
<mets:FLocat LOCTYPE="OTHER" xlink:href="JB306/1915/02/19/01/KB_JB306_1915-02-19_01-mets.xml"></mets:FLocat>
</mets:file>
<!-- SIDECAR section -->
<mets:amdSec ...>
<mets:sourceMD ID="METADATA-SIP">
<mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
<mets:xmlData>
<MediaHAVEN_external_metadata>
<title>My Archive!</title>
<description>My description about the archive</description>
</MediaHAVEN_external_metadata>
</mets:xmlData>
</mets:mdWrap>
</mets:sourceMD>
</mets:amdSec>
<mets:amdSec ...>
<mets:sourceMD ID="METADATA-PDF">
<mets:mdWrap MDTYPE="OTHER" OTHERMDTYPE="MEDIAHAVEN-XML">
<mets:xmlData>
<MediaHAVEN_external_metadata>
<title>My PDF!</title>
<description>My description about the pdf</description>
</MediaHAVEN_external_metadata>
</mets:xmlData>
</mets:mdWrap>
</mets:sourceMD>
</mets:amdSec>
...
<mets:fileSec ...>
<mets:fileGrp ID="ARCHIVE_GRP" ADMID="METADATA-SIP">
<mets:file ID="KB_JB306_1915-02-19_01-mets" ...>
...
</mets:file>
<mets:file ID="KB_JB306_1915-02-19_01-pdf" ADMID="METADATA-PDF" ...>
...
</mets:file>
...
<!-- PREMIS section -->
<mets:amdSec ID="SECTION-PREMIS-EVENT-01">
<mets:digiprovMD ID="PREMIS-EVENT-01">
<mets:mdWrap MDTYPE="PREMIS:EVENT">
<mets:xmlData>
<premis:event xmlns:premis="info:lc/xmlns/premis-v2">
<premis:eventIdentifier>
<premis:eventIdentifierType>KBR-Events</premis:eventIdentifierType>
<premis:eventIdentifierValue>8545875115</premis:eventIdentifierValue>
</premis:eventIdentifier>
<premis:eventType>OCR creation</premis:eventType>
<premis:eventDateTime>2014-10-21</premis:eventDateTime>
<premis:eventOutcomeInformation>
<premis:eventOutcome>y</premis:eventOutcome>
<premis:eventOutcomeDetail>
<premis:eventOutcomeDetailNote />
</premis:eventOutcomeDetail>
</premis:eventOutcomeInformation>
<premis:linkingAgentIdentifier>
<premis:linkingAgentIdentifierType>KBR-Employee</premis:linkingAgentIdentifierType>
<premis:linkingAgentIdentifierValue>John Doe</premis:linkingAgentIdentifierValue>
</premis:linkingAgentIdentifier>
<premis:linkingObjectIdentifier>
<premis:linkingObjectIdentifierType>KBR-ID</premis:linkingObjectIdentifierType>
<premis:linkingObjectIdentifierValue>KB_JB306_1915-02-19_01-mets</premis:linkingObjectIdentifierValue>
</premis:linkingObjectIdentifier>
</premis:event>
</mets:xmlData>
</mets:mdWrap>
</mets:digiprovMD>
</mets:amdSec>
<mets:amdSec ID="SECTION-PREMIS-EVENT-02">
<mets:digiprovMD ID="PREMIS-EVENT-02">
<mets:mdWrap MDTYPE="PREMIS:EVENT">
<mets:xmlData>...</mets:xmlData>
</mets:mdWrap>
</mets:digiprovMD>
</mets:amdSec>
...
<mets:fileSec ...>
<mets:fileGrp ID="ARCHIVE_GRP" ADMID="PREMIS-EVENT-01 PREMIS-EVENT-02">
<mets:file ID="KB_JB306_1915-02-19_01-mets" ...>
...
</mets:file>
...
- ID: The value if of the type xsd:id which means it must be unique accross the XML document and it must start with a letter or underscore, and can only contain letters, digits, underscores, hyphens, and periods. More info here.
- CHECKSUMTYPE: Fixed value "MD5"
- CHECKSUM: The MD5 checksum of the referenced file
- USE: PRESERVATION, FIXITY, VIRTUAL, see Complex Objects Reference for more information.
- MIMETYPE: Optional parameter
- xlink:href: The path provided here must be relative to the root the archive. See the concrete example archive below to evaluate this requirement. Using other values for path will cause the METS to be rejected because the referenced files will not be found inside the archive.
- ADMID:
- If you provide as value the ID of a
mets:sourceMD
in the METS, the embedded XML under this tag will be used a sidecar metadata for the referenced file. In the example above the values METADATA-SIP
and METADATA-PDF
refer to matching embedded sidecars in the format MediaHAVEN_external_metadata
. - If you provide as value the
ID
of a mets:digiprovMD
in the METS, the embedded Premis event under this tag will be recognized by MediaHaven and be visible in the future metadata exports for this file. In the above example the values PREMIS-EVENT-01
and PREMIS-EVENT-02 refer to matching embedded Premis events.