Validation Package invenio

Language: F#

ARC; data publication

v3.2.0 released on 04/24/2026

by Oliver Maus; Christopher Lux; Lukas Weil

7 Downloads

Validates if the ARC contains the necessary metadata to be publishable via Invenio.


Install with arc-validate

 arc-validate package install invenio --version 3.2.0

Include in a PLANTDataHUB CQC pipeline

validation_packages:
  - name: invenio
    version: 3.2.0

Description

Validates if the ARC contains the necessary metadata to be publishable via Invenio.

The following metadata is required:

- Investigation has title and description

- All persons in Investigation Contacts must have a name, last name, affiliation, valid email, and valid ORCID

License file is required.


Release notes

- Only one contact is required to have E-Mail, ORCID and affiliation, not all contacts. https://github.com/orgs/nfdi4plants/discussions/10


Browse code for v3.2.0 (fsharp)
let [<Literal>]PACKAGE_METADATA = """(*
---
Name: invenio
Summary: Validates if the ARC contains the necessary metadata to be publishable via Invenio.
Description: |
  Validates if the ARC contains the necessary metadata to be publishable via Invenio.
  The following metadata is required:
  - Investigation has title and description
  - All persons in Investigation Contacts must have a name, last name, affiliation, valid email, and valid ORCID
  License file is required.
MajorVersion: 3
MinorVersion: 2
PatchVersion: 0
Publish: true
Authors:
  - FullName: Oliver Maus
    Affiliation: DataPLANT
  - FullName: Christopher Lux
    Email: lux@csbiology.de
    Affiliation: RPTU Kaiserslautern
    AffiliationLink: http://rptu.de/startseite
  - FullName: Lukas Weil
    Email: weil@rptu.de
    Affiliation: DataPLANT
Tags:
  - Name: ARC
  - Name: data publication
ReleaseNotes: |
  - Only one contact is required to have E-Mail, ORCID and affiliation, not all contacts. https://github.com/orgs/nfdi4plants/discussions/10
---
*)"""

#r "nuget: ARCExpect, 5.0.1"

open ControlledVocabulary
open Expecto
open ARCExpect
open ARCTokenization
open ARCTokenization.StructuralOntology
open System.IO

// Input:
let arcDir = Directory.GetCurrentDirectory()

// Values:
let absoluteDirectoryPaths = FileSystem.parseARCFileSystem arcDir

let imd =
    absoluteDirectoryPaths
    |> Investigation.parseMetadataSheetsFromTokens() arcDir 
    |> List.concat 
// fill each Contact to completeness
let firstNames = imd |> List.filter (fun cvp -> Param.getCvName cvp = INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person First Name``.Name)
let lastNames = imd |> List.filter (fun cvp -> Param.getCvName cvp = INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Last Name``.Name)
let affiliations = imd |> List.filter (fun cvp -> Param.getCvName cvp = INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Affiliation``.Name)
let emails = imd |> List.filter (fun cvp -> Param.getCvName cvp = INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Email``.Name)
let orcids = imd |> List.filter (fun cvp -> Param.getCvName cvp = INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Comment[ORCID]``.Name)
let longestRow =
    [firstNames; lastNames; affiliations; emails; orcids]
    |> List.fold (fun acc cvps -> System.Math.Max(acc, Seq.length cvps)) 0
let addEmptyValues l emptyValueCvp (cvpList : IParam list) =
    if l = cvpList.Length then
        cvpList
    else 
        cvpList @ (List.init (l - cvpList.Length) (fun _ -> emptyValueCvp))
let filledFirstNames = addEmptyValues longestRow (CvParam(INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person First Name``, Value "")) firstNames
let filledLastNames = addEmptyValues longestRow (CvParam(INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Last Name``, Value "")) lastNames
let filledAffiliations = addEmptyValues longestRow (CvParam(INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Affiliation``, Value "")) affiliations
let filledEmails = addEmptyValues longestRow (CvParam(INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Email``, Value "")) emails
let filledOrcids = addEmptyValues longestRow (CvParam(INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Comment[ORCID]``, Value "")) orcids

// Validation Cases:
let cases = 
    testList INVMSO.``Investigation Metadata``.INVESTIGATION.key.Name [
        // Investigation has title
        ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Title``.Name} exists") {
            imd
            |> Validate.ParamCollection.ContainsNonKeyParamWithTerm
                INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Title``
        }
        // Investigation title is not empty
        ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Title``.Name} is not empty") {
            imd
            |> Seq.filter (Param.getTerm >> (=) INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Title``)
            |> Seq.iter Validate.Param.ValueIsNotEmpty
        }
        // Investigation has description
        ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Description``.Name} exists") {
            imd
            |> Validate.ParamCollection.ContainsNonKeyParamWithTerm
                INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Description``
        }
        // Investigation description is not empty
        ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Description``.Name} is not empty") {
            imd
            |> Seq.filter (Param.getTerm >> (=) INVMSO.``Investigation Metadata``.INVESTIGATION.``Investigation Description``)
            |> Seq.iter Validate.Param.ValueIsNotEmpty
        }
        // Investigation has contacts with name, last name, affiliation and email
        // Investigation Person First Name
        ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person First Name``.Name} exists") {
            filledFirstNames
            |> Validate.ParamCollection.ContainsNonKeyParamWithTerm INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person First Name``
        }
        ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person First Name``.Name} is not empty") {
            filledFirstNames
            |> Seq.filter (Param.getTerm >> (=) INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person First Name``)
            |> Seq.iter Validate.Param.ValueIsNotEmpty
        }
        // Investigation Person Last Name
        ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Last Name``.Name} exists") {
            filledLastNames
            |> Validate.ParamCollection.ContainsNonKeyParamWithTerm INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Last Name``
        }
        ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Last Name``.Name} is not empty") {
            filledLastNames
            |> Seq.filter (Param.getTerm >> (=) INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Last Name``)
            |> Seq.iter Validate.Param.ValueIsNotEmpty
        }

        // Investigation Person Email
        ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``. ``INVESTIGATION CONTACTS``.``Investigation Person Email``.Name} is valid") {
            filledEmails
            |> Seq.filter (Param.getTerm >> (=) INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Investigation Person Email``)
            |> Seq.filter (Param.getValueAsString >> (<>) "Metadata Section Key")
            |> Seq.filter (Param.getValueAsString >> (<>) "")
            |> Seq.iter (Validate.Param.ValueMatchesRegex StringValidationPattern.email)
        }
        // Investigation Person ORCID
        ARCExpect.validationCase (TestID.Name $"{INVMSO.``Investigation Metadata``. ``INVESTIGATION CONTACTS``.``Comment[ORCID]``.Name} is valid") {
            filledOrcids
            |> Seq.filter (Param.getTerm >> (=) INVMSO.``Investigation Metadata``.``INVESTIGATION CONTACTS``.``Comment[ORCID]``)
            |> Seq.filter (Param.getValueAsString >> (<>) "Metadata Section Key")
            |> Seq.filter (Param.getValueAsString >> (<>) "")
            |> Seq.iter (Validate.Param.ValueMatchesRegex StringValidationPattern.orcid)
        }
        ARCExpect.validationCase (TestID.Name "There is at least one contact which contains E-Mail, ORCID and affiliation") {
            if filledFirstNames.Length = 1 then
                failwith "There are zero contacts"
            else 
                let mutable hasCompleteContact = false
                for i = 1 to filledFirstNames.Length - 1 do
                    let hasOrcid = filledOrcids.[i] |> Param.getValueAsString |> (<>) ""
                    let hasEmail = filledEmails.[i] |> Param.getValueAsString |> (<>) ""
                    let hasAffiliation = filledAffiliations.[i] |> Param.getValueAsString |> (<>) ""
                    if hasOrcid && hasEmail && hasAffiliation then 
                        hasCompleteContact <- true
                if not hasCompleteContact then
                    failwith "There is no contact with E-Mail, ORCID and affiliation"
        }
        // License file exists
        ARCExpect.validationCase (TestID.Name "License exists") {
            absoluteDirectoryPaths
            |> Validate.ParamCollection.SatisfiesPredicate (
                Seq.exists (
                    Param.getValueAsString 
                    >> fun (v : string) -> 
                        v.ToLower()
                        |> fun lowerV -> lowerV = "license" || lowerV = "license.md"
                )
            )
        }
    ]

// Execution:

Setup.ValidationPackage(
    metadata = Setup.Metadata(PACKAGE_METADATA),
    CriticalValidationCases = [cases]
)
|> Execute.ValidationPipeline(
    basePath = arcDir
)

Available versions
Version Released on
3.2.0 04/24/2026
3.1.0 12/04/2025
3.0.2 12/04/2025
2.1.0 08/08/2024
2.0.1 06/21/2024
2.0.0 06/21/2024
1.0.0 06/21/2024