···11+(lang dune 3.20)
22+(name cff)
33+(generate_opam_files true)
44+(license ISC)
55+(authors "The ocaml-cff programmers")
66+(maintainers "anil@recoil.org")
77+(source (github avsm/ocaml-cff))
88+99+(package
1010+ (name cff)
1111+ (synopsis "Citation File Format (CFF) codec for OCaml")
1212+ (description
1313+ "A library for parsing and generating CITATION.cff files following the CFF 1.2.0 specification. Provides findlib subpackages: cff.unix for Unix file I/O and cff.eio for Eio-based I/O.")
1414+ (depends
1515+ (ocaml (>= 4.14.0))
1616+ ptime
1717+ ISO3166
1818+ spdx_licenses
1919+ jsont
2020+ yamlt
2121+ bytesrw
2222+ eio
2323+ bytesrw-eio))
+31
lib/cff.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Citation File Format (CFF) codec for OCaml. *)
77+88+(* Module aliases *)
99+module Config = Cff_config
1010+module Date = Cff_date
1111+module Country = Cff_country
1212+module License = Cff_license
1313+1414+module Identifier_type = Cff_enums.Identifier_type
1515+module Reference_type = Cff_enums.Reference_type
1616+module Status = Cff_enums.Status
1717+module Cff_type = Cff_enums.Cff_type
1818+1919+module Address = Cff_address.Address
2020+module Contact = Cff_address.Contact
2121+2222+module Author = Cff_author
2323+module Name = Cff_author.Name
2424+module Person = Cff_author.Person
2525+module Entity = Cff_author.Entity
2626+2727+module Identifier = Cff_identifier
2828+module Reference = Cff_reference
2929+3030+(* Include the root type *)
3131+include Cff_root
+194
lib/cff.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Citation File Format (CFF) codec for OCaml.
77+88+ This library provides types and codecs for the
99+ {{:https://citation-file-format.github.io/}Citation File Format (CFF)}
1010+ version 1.2.0, a human- and machine-readable format for software and
1111+ dataset citation metadata.
1212+1313+ CFF files are plain text files named [CITATION.cff] written in
1414+ {{:https://yaml.org/}YAML 1.2}. They provide citation metadata for
1515+ software and datasets, enabling proper academic credit for research
1616+ software.
1717+1818+ {1 Overview}
1919+2020+ A minimal [CITATION.cff] file requires four fields:
2121+ - [cff-version]: The CFF schema version (currently ["1.2.0"])
2222+ - [message]: Instructions for citing the work
2323+ - [title]: The name of the software or dataset
2424+ - [authors]: A list of persons and/or entities
2525+2626+ {1 Quick Start}
2727+2828+ {2 Creating a CFF record}
2929+3030+ {[
3131+ let author = Cff.Author.Person
3232+ (Cff.Person.make ~family_names:"Smith" ~given_names:"Jane" ()) in
3333+ let cff = Cff.make_simple
3434+ ~title:"My Research Software"
3535+ ~authors:[author]
3636+ ~version:"1.0.0"
3737+ ~doi:"10.5281/zenodo.1234567"
3838+ ()
3939+ ]}
4040+4141+ {2 File I/O}
4242+4343+ For file operations, use the backend-specific subpackages:
4444+ - [cff.unix] - Unix file I/O using [In_channel]/[Out_channel]
4545+ - [cff.eio] - Eio-based I/O using [bytesrw-eio]
4646+4747+ Example with [cff.unix]:
4848+ {[
4949+ match Cff_unix.of_file "CITATION.cff" with
5050+ | Ok cff -> Printf.printf "Title: %s\n" (Cff.title cff)
5151+ | Error msg -> Printf.eprintf "Error: %s\n" msg
5252+ ]}
5353+5454+ {1 Module Structure}
5555+5656+ The library uses a flat internal structure ([Cff_author], [Cff_date], etc.)
5757+ but exposes a convenient nested API through module aliases:
5858+5959+ - {!module:Author} - Person and entity types for authorship
6060+ - {!module:Reference} - Bibliographic reference with 60+ fields
6161+ - {!module:Identifier} - DOI, URL, SWH, and other identifiers
6262+ - {!module:License} - SPDX license identifiers
6363+ - {!module:Date} - ISO 8601 date handling
6464+6565+ {1 CFF Specification}
6666+6767+ This implementation follows the
6868+ {{:https://github.com/citation-file-format/citation-file-format}CFF 1.2.0 specification}.
6969+ Key concepts:
7070+7171+ - {b Authors}: Can be persons (with family/given names) or entities
7272+ (organizations, identified by a [name] field)
7373+ - {b References}: Bibliography entries that the work cites or depends on
7474+ - {b Preferred citation}: An alternate work to cite instead of the
7575+ software itself (e.g., a journal article about the software)
7676+ - {b Identifiers}: Typed identifiers including DOIs, URLs, and
7777+ Software Heritage IDs (SWH)
7878+ - {b Licenses}: SPDX license identifiers; multiple licenses imply OR
7979+8080+ {1 Core Types} *)
8181+8282+(** Configuration for validation strictness. *)
8383+module Config = Cff_config
8484+8585+(** Date representation as [(year, month, day)] tuple.
8686+8787+ CFF uses ISO 8601 dates in [YYYY-MM-DD] format (e.g., ["2024-01-15"]). *)
8888+module Date = Cff_date
8989+9090+(** ISO 3166-1 alpha-2 country codes (e.g., ["US"], ["DE"], ["GB"]).
9191+9292+ Used for author and entity addresses. *)
9393+module Country = Cff_country
9494+9595+(** SPDX license identifiers.
9696+9797+ CFF uses {{:https://spdx.org/licenses/}SPDX license identifiers} for
9898+ the [license] field. Multiple licenses indicate an OR relationship
9999+ (the user may choose any of the listed licenses). *)
100100+module License = Cff_license
101101+102102+(** {1 Enumeration Types} *)
103103+104104+(** Identifier types for the [identifiers] field.
105105+106106+ - [`Doi] - Digital Object Identifier
107107+ - [`Url] - Web URL
108108+ - [`Swh] - Software Heritage identifier
109109+ - [`Other] - Other identifier type *)
110110+module Identifier_type = Cff_enums.Identifier_type
111111+112112+(** Reference types for bibliographic entries.
113113+114114+ CFF supports 40+ reference types including [`Article], [`Book],
115115+ [`Software], [`Conference_paper], [`Thesis], [`Dataset], and more.
116116+ See {!Cff_enums.Reference_type} for the complete list. *)
117117+module Reference_type = Cff_enums.Reference_type
118118+119119+(** Publication status for works in progress.
120120+121121+ - [`Preprint] - Available as preprint
122122+ - [`Submitted] - Submitted for publication
123123+ - [`In_press] - Accepted, awaiting publication
124124+ - [`Advance_online] - Published online ahead of print *)
125125+module Status = Cff_enums.Status
126126+127127+(** CFF file type: [`Software] (default) or [`Dataset]. *)
128128+module Cff_type = Cff_enums.Cff_type
129129+130130+(** {1 Address and Contact Information} *)
131131+132132+(** Physical address with street, city, region, postal code, and country. *)
133133+module Address = Cff_address.Address
134134+135135+(** Contact information: email, telephone, fax, website, and ORCID. *)
136136+module Contact = Cff_address.Contact
137137+138138+(** {1 Authors and Entities} *)
139139+140140+(** Authors as a discriminated union of {!Person} or {!Entity}.
141141+142142+ CFF distinguishes between:
143143+ - {b Persons}: Individual humans with family names, given names, etc.
144144+ - {b Entities}: Organizations, projects, or groups with a [name] field
145145+146146+ When parsing, the presence of a [name] field indicates an entity;
147147+ otherwise, the entry is treated as a person. *)
148148+module Author = Cff_author
149149+150150+(** Person name components: family names, given names, particle, suffix, alias. *)
151151+module Name = Cff_author.Name
152152+153153+(** A person (individual author or contributor). *)
154154+module Person = Cff_author.Person
155155+156156+(** An entity (organization, institution, project, conference). *)
157157+module Entity = Cff_author.Entity
158158+159159+(** {1 Identifiers and References} *)
160160+161161+(** Typed identifiers for DOI, URL, SWH, or other schemes.
162162+163163+ Each identifier has a type, value, and optional description. Example:
164164+ {[
165165+ let id = Cff.Identifier.make
166166+ ~type_:`Doi
167167+ ~value:"10.5281/zenodo.1234567"
168168+ ~description:"The concept DOI for all versions"
169169+ ()
170170+ ]} *)
171171+module Identifier = Cff_identifier
172172+173173+(** Bibliographic references with comprehensive metadata.
174174+175175+ References can represent any citable work: articles, books, software,
176176+ datasets, conference papers, theses, etc. The {!Reference} module
177177+ provides 60+ fields organized into logical sub-records:
178178+179179+ - {!Reference.Core} - Type, title, authors, abstract
180180+ - {!Reference.Publication} - Journal, volume, issue, pages
181181+ - {!Reference.Collection} - Proceedings, book series
182182+ - {!Reference.Dates} - Various date fields and year
183183+ - {!Reference.Identifiers} - DOI, URL, ISBN, ISSN, etc.
184184+ - {!Reference.Entities} - Editors, publisher, institution
185185+ - {!Reference.Metadata} - Keywords, license, notes
186186+ - {!Reference.Technical} - Commit, version, format *)
187187+module Reference = Cff_reference
188188+189189+(** {1 Root CFF Type}
190190+191191+ The main [t] type represents a complete [CITATION.cff] file. It includes
192192+ the {!module:Cff_root} interface with all required and optional fields. *)
193193+194194+include module type of Cff_root
+113
lib/cff_address.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Address and contact information for CFF. *)
77+88+(** Physical address information. *)
99+module Address = struct
1010+ type t = {
1111+ address : string option;
1212+ city : string option;
1313+ region : string option;
1414+ post_code : string option;
1515+ country : string option; (* ISO 3166-1 alpha-2 *)
1616+ }
1717+1818+ let empty = {
1919+ address = None;
2020+ city = None;
2121+ region = None;
2222+ post_code = None;
2323+ country = None;
2424+ }
2525+2626+ let make ?address ?city ?region ?post_code ?country () =
2727+ { address; city; region; post_code; country }
2828+2929+ let of_options ~address ~city ~region ~post_code ~country =
3030+ { address; city; region; post_code; country }
3131+3232+ let address t = t.address
3333+ let city t = t.city
3434+ let region t = t.region
3535+ let post_code t = t.post_code
3636+ let country t = t.country
3737+3838+ let is_empty t =
3939+ t.address = None && t.city = None && t.region = None &&
4040+ t.post_code = None && t.country = None
4141+4242+ let pp ppf t =
4343+ let parts = List.filter_map Fun.id [
4444+ t.address;
4545+ t.city;
4646+ t.region;
4747+ t.post_code;
4848+ t.country;
4949+ ] in
5050+ Format.pp_print_string ppf (String.concat ", " parts)
5151+5252+ let jsont_fields ~get obj =
5353+ obj
5454+ |> Jsont.Object.opt_mem "address" Jsont.string ~enc:(fun x -> (get x).address)
5555+ |> Jsont.Object.opt_mem "city" Jsont.string ~enc:(fun x -> (get x).city)
5656+ |> Jsont.Object.opt_mem "region" Jsont.string ~enc:(fun x -> (get x).region)
5757+ |> Jsont.Object.opt_mem "post-code" Jsont.string ~enc:(fun x -> (get x).post_code)
5858+ |> Jsont.Object.opt_mem "country" Jsont.string ~enc:(fun x -> (get x).country)
5959+end
6060+6161+(** Contact information. *)
6262+module Contact = struct
6363+ type t = {
6464+ email : string option;
6565+ tel : string option;
6666+ fax : string option;
6767+ website : string option;
6868+ orcid : string option;
6969+ }
7070+7171+ let empty = {
7272+ email = None;
7373+ tel = None;
7474+ fax = None;
7575+ website = None;
7676+ orcid = None;
7777+ }
7878+7979+ let make ?email ?tel ?fax ?website ?orcid () =
8080+ { email; tel; fax; website; orcid }
8181+8282+ let of_options ~email ~tel ~fax ~website ~orcid =
8383+ { email; tel; fax; website; orcid }
8484+8585+ let email t = t.email
8686+ let tel t = t.tel
8787+ let fax t = t.fax
8888+ let website t = t.website
8989+ let orcid t = t.orcid
9090+9191+ let is_empty t =
9292+ t.email = None && t.tel = None && t.fax = None &&
9393+ t.website = None && t.orcid = None
9494+9595+ let pp ppf t =
9696+ let parts = List.filter_map (fun (k, v) ->
9797+ Option.map (fun v -> k ^ ": " ^ v) v
9898+ ) [
9999+ ("email", t.email);
100100+ ("tel", t.tel);
101101+ ("website", t.website);
102102+ ("orcid", t.orcid);
103103+ ] in
104104+ Format.pp_print_string ppf (String.concat ", " parts)
105105+106106+ let jsont_fields ~get obj =
107107+ obj
108108+ |> Jsont.Object.opt_mem "email" Jsont.string ~enc:(fun x -> (get x).email)
109109+ |> Jsont.Object.opt_mem "tel" Jsont.string ~enc:(fun x -> (get x).tel)
110110+ |> Jsont.Object.opt_mem "fax" Jsont.string ~enc:(fun x -> (get x).fax)
111111+ |> Jsont.Object.opt_mem "website" Jsont.string ~enc:(fun x -> (get x).website)
112112+ |> Jsont.Object.opt_mem "orcid" Jsont.string ~enc:(fun x -> (get x).orcid)
113113+end
+195
lib/cff_address.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Physical address and contact information for CFF.
77+88+ CFF includes address and contact fields for both persons and entities.
99+ This module provides types for these shared fields.
1010+1111+ {1 Address Fields}
1212+1313+ Physical address components appear on both persons and entities:
1414+1515+ - [address]: Street address (e.g., ["123 Main St"])
1616+ - [city]: City name (e.g., ["Cambridge"])
1717+ - [region]: State, province, or region (e.g., ["Massachusetts"])
1818+ - [post-code]: Postal/ZIP code (e.g., ["02139"])
1919+ - [country]: ISO 3166-1 alpha-2 country code (e.g., ["US"])
2020+2121+ {1 Contact Fields}
2222+2323+ Contact information available for persons and entities:
2424+2525+ - [email]: Email address
2626+ - [tel]: Telephone number
2727+ - [fax]: Fax number
2828+ - [website]: Website URL
2929+ - [orcid]: ORCID identifier URL (for researchers)
3030+3131+ {1 Example}
3232+3333+ {[
3434+ authors:
3535+ - family-names: Smith
3636+ given-names: Jane
3737+ affiliation: MIT
3838+ address: 77 Massachusetts Avenue
3939+ city: Cambridge
4040+ region: Massachusetts
4141+ post-code: "02139"
4242+ country: US
4343+ email: jsmith@mit.edu
4444+ orcid: https://orcid.org/0000-0001-2345-6789
4545+ ]} *)
4646+4747+(** Physical address information.
4848+4949+ All fields are optional; an empty address is valid. *)
5050+module Address : sig
5151+ type t
5252+ (** Physical address record. *)
5353+5454+ val empty : t
5555+ (** Empty address with all fields [None]. *)
5656+5757+ val make :
5858+ ?address:string ->
5959+ ?city:string ->
6060+ ?region:string ->
6161+ ?post_code:string ->
6262+ ?country:string ->
6363+ unit -> t
6464+ (** Create an address with optional fields.
6565+6666+ @param address Street address
6767+ @param city City name
6868+ @param region State, province, or administrative region
6969+ @param post_code Postal code, ZIP code, or postcode
7070+ @param country ISO 3166-1 alpha-2 country code *)
7171+7272+ val of_options :
7373+ address:string option ->
7474+ city:string option ->
7575+ region:string option ->
7676+ post_code:string option ->
7777+ country:string option ->
7878+ t
7979+ (** Create an address from option values directly.
8080+8181+ Used internally by jsont decoders where fields are decoded as options. *)
8282+8383+ val address : t -> string option
8484+ (** Street address (e.g., ["77 Massachusetts Avenue"]). *)
8585+8686+ val city : t -> string option
8787+ (** City name (e.g., ["Cambridge"], ["London"]). *)
8888+8989+ val region : t -> string option
9090+ (** State, province, or region (e.g., ["Massachusetts"], ["Bavaria"]). *)
9191+9292+ val post_code : t -> string option
9393+ (** Postal or ZIP code (e.g., ["02139"], ["W1A 1AA"]). *)
9494+9595+ val country : t -> string option
9696+ (** ISO 3166-1 alpha-2 country code (e.g., ["US"], ["DE"], ["GB"]). *)
9797+9898+ val is_empty : t -> bool
9999+ (** [true] if all fields are [None]. *)
100100+101101+ val pp : Format.formatter -> t -> unit
102102+ (** Pretty-print the address. *)
103103+104104+ val jsont_fields :
105105+ get:('a -> t) ->
106106+ ('a, string option -> string option -> string option ->
107107+ string option -> string option -> 'b) Jsont.Object.map ->
108108+ ('a, 'b) Jsont.Object.map
109109+ (** Add address fields to a jsont object builder.
110110+111111+ This adds the five address fields (address, city, region, post-code,
112112+ country) to an object codec. The decoder function must accept five
113113+ [string option] arguments in that order.
114114+115115+ @param get Extracts the address from the parent type for encoding *)
116116+end
117117+118118+(** Contact information.
119119+120120+ Electronic contact details for persons and entities. All fields
121121+ are optional. *)
122122+module Contact : sig
123123+ type t
124124+ (** Contact information record. *)
125125+126126+ val empty : t
127127+ (** Empty contact with all fields [None]. *)
128128+129129+ val make :
130130+ ?email:string ->
131131+ ?tel:string ->
132132+ ?fax:string ->
133133+ ?website:string ->
134134+ ?orcid:string ->
135135+ unit -> t
136136+ (** Create contact information with optional fields.
137137+138138+ @param email Email address
139139+ @param tel Telephone number (any format)
140140+ @param fax Fax number (any format)
141141+ @param website Website URL
142142+ @param orcid ORCID identifier URL *)
143143+144144+ val of_options :
145145+ email:string option ->
146146+ tel:string option ->
147147+ fax:string option ->
148148+ website:string option ->
149149+ orcid:string option ->
150150+ t
151151+ (** Create contact info from option values directly.
152152+153153+ Used internally by jsont decoders where fields are decoded as options. *)
154154+155155+ val email : t -> string option
156156+ (** Email address (e.g., ["jane.smith\@example.org"]). *)
157157+158158+ val tel : t -> string option
159159+ (** Telephone number. No specific format is required. *)
160160+161161+ val fax : t -> string option
162162+ (** Fax number. No specific format is required. *)
163163+164164+ val website : t -> string option
165165+ (** Website URL (e.g., ["https://example.org/~jsmith"]). *)
166166+167167+ val orcid : t -> string option
168168+ (** ORCID identifier as a URL.
169169+170170+ ORCID (Open Researcher and Contributor ID) provides persistent
171171+ digital identifiers for researchers.
172172+173173+ Format: ["https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
174174+175175+ Example: ["https://orcid.org/0000-0001-2345-6789"] *)
176176+177177+ val is_empty : t -> bool
178178+ (** [true] if all fields are [None]. *)
179179+180180+ val pp : Format.formatter -> t -> unit
181181+ (** Pretty-print the contact information. *)
182182+183183+ val jsont_fields :
184184+ get:('a -> t) ->
185185+ ('a, string option -> string option -> string option ->
186186+ string option -> string option -> 'b) Jsont.Object.map ->
187187+ ('a, 'b) Jsont.Object.map
188188+ (** Add contact fields to a jsont object builder.
189189+190190+ This adds the five contact fields (email, tel, fax, website, orcid)
191191+ to an object codec. The decoder function must accept five
192192+ [string option] arguments in that order.
193193+194194+ @param get Extracts the contact from the parent type for encoding *)
195195+end
+259
lib/cff_author.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Person, Entity, and Author types for CFF. *)
77+88+(** Person name components. *)
99+module Name = struct
1010+ type t = {
1111+ family_names : string option;
1212+ given_names : string option;
1313+ name_particle : string option; (* e.g., "von" *)
1414+ name_suffix : string option; (* e.g., "Jr." *)
1515+ alias : string option;
1616+ }
1717+1818+ let empty = {
1919+ family_names = None;
2020+ given_names = None;
2121+ name_particle = None;
2222+ name_suffix = None;
2323+ alias = None;
2424+ }
2525+2626+ let make ?family_names ?given_names ?name_particle ?name_suffix ?alias () =
2727+ { family_names; given_names; name_particle; name_suffix; alias }
2828+2929+ let family_names t = t.family_names
3030+ let given_names t = t.given_names
3131+ let name_particle t = t.name_particle
3232+ let name_suffix t = t.name_suffix
3333+ let alias t = t.alias
3434+3535+ let full_name t =
3636+ let parts = List.filter_map Fun.id [
3737+ t.given_names;
3838+ t.name_particle;
3939+ t.family_names;
4040+ ] in
4141+ let base = String.concat " " parts in
4242+ match t.name_suffix with
4343+ | Some suffix -> base ^ ", " ^ suffix
4444+ | None -> base
4545+4646+ let pp ppf t =
4747+ Format.pp_print_string ppf (full_name t)
4848+end
4949+5050+(** A person (individual author/contributor). *)
5151+module Person = struct
5252+ type t = {
5353+ name : Name.t;
5454+ affiliation : string option;
5555+ address : Cff_address.Address.t;
5656+ contact : Cff_address.Contact.t;
5757+ }
5858+5959+ let make
6060+ ?family_names ?given_names ?name_particle ?name_suffix ?alias
6161+ ?affiliation
6262+ ?(address = Cff_address.Address.empty)
6363+ ?(contact = Cff_address.Contact.empty)
6464+ () =
6565+ let name = Name.make ?family_names ?given_names ?name_particle
6666+ ?name_suffix ?alias () in
6767+ { name; affiliation; address; contact }
6868+6969+ let name t = t.name
7070+ let affiliation t = t.affiliation
7171+ let address t = t.address
7272+ let contact t = t.contact
7373+7474+ let family_names t = Name.family_names t.name
7575+ let given_names t = Name.given_names t.name
7676+ let name_particle t = Name.name_particle t.name
7777+ let name_suffix t = Name.name_suffix t.name
7878+ let alias t = Name.alias t.name
7979+ let full_name t = Name.full_name t.name
8080+8181+ let email t = Cff_address.Contact.email t.contact
8282+ let orcid t = Cff_address.Contact.orcid t.contact
8383+ let website t = Cff_address.Contact.website t.contact
8484+8585+ let pp ppf t =
8686+ Format.fprintf ppf "%s" (full_name t);
8787+ Option.iter (Format.fprintf ppf " (%s)") t.affiliation
8888+8989+ let jsont =
9090+ Jsont.Object.map ~kind:"Person"
9191+ (fun family_names given_names name_particle name_suffix alias
9292+ affiliation address city region post_code country
9393+ email tel fax website orcid ->
9494+ let name = Name.make ?family_names ?given_names ?name_particle
9595+ ?name_suffix ?alias () in
9696+ let address = Cff_address.Address.of_options
9797+ ~address ~city ~region ~post_code ~country in
9898+ let contact = Cff_address.Contact.of_options
9999+ ~email ~tel ~fax ~website ~orcid in
100100+ { name; affiliation; address; contact })
101101+ |> Jsont.Object.opt_mem "family-names" Jsont.string
102102+ ~enc:(fun p -> Name.family_names p.name)
103103+ |> Jsont.Object.opt_mem "given-names" Jsont.string
104104+ ~enc:(fun p -> Name.given_names p.name)
105105+ |> Jsont.Object.opt_mem "name-particle" Jsont.string
106106+ ~enc:(fun p -> Name.name_particle p.name)
107107+ |> Jsont.Object.opt_mem "name-suffix" Jsont.string
108108+ ~enc:(fun p -> Name.name_suffix p.name)
109109+ |> Jsont.Object.opt_mem "alias" Jsont.string
110110+ ~enc:(fun p -> Name.alias p.name)
111111+ |> Jsont.Object.opt_mem "affiliation" Jsont.string
112112+ ~enc:(fun p -> p.affiliation)
113113+ |> Cff_address.Address.jsont_fields ~get:(fun p -> p.address)
114114+ |> Cff_address.Contact.jsont_fields ~get:(fun p -> p.contact)
115115+ |> Jsont.Object.skip_unknown
116116+ |> Jsont.Object.finish
117117+end
118118+119119+(** Event dates for entities (e.g., conferences). *)
120120+module Event_dates = struct
121121+ type t = {
122122+ date_start : Cff_date.t option;
123123+ date_end : Cff_date.t option;
124124+ }
125125+126126+ let empty = { date_start = None; date_end = None }
127127+128128+ let make ?date_start ?date_end () = { date_start; date_end }
129129+130130+ let date_start t = t.date_start
131131+ let date_end t = t.date_end
132132+133133+ let is_empty t = t.date_start = None && t.date_end = None
134134+135135+ let pp ppf t =
136136+ match t.date_start, t.date_end with
137137+ | Some s, Some e ->
138138+ Format.fprintf ppf "%a - %a" Cff_date.pp s Cff_date.pp e
139139+ | Some s, None ->
140140+ Format.fprintf ppf "%a -" Cff_date.pp s
141141+ | None, Some e ->
142142+ Format.fprintf ppf "- %a" Cff_date.pp e
143143+ | None, None -> ()
144144+end
145145+146146+(** An entity (organization, team, conference, etc.). *)
147147+module Entity = struct
148148+ type t = {
149149+ name : string;
150150+ alias : string option;
151151+ address : Cff_address.Address.t;
152152+ contact : Cff_address.Contact.t;
153153+ event_dates : Event_dates.t;
154154+ location : string option;
155155+ }
156156+157157+ let make
158158+ ~name ?alias
159159+ ?(address = Cff_address.Address.empty)
160160+ ?(contact = Cff_address.Contact.empty)
161161+ ?date_start ?date_end ?location
162162+ () =
163163+ let event_dates = Event_dates.make ?date_start ?date_end () in
164164+ { name; alias; address; contact; event_dates; location }
165165+166166+ let name t = t.name
167167+ let alias t = t.alias
168168+ let address t = t.address
169169+ let contact t = t.contact
170170+ let event_dates t = t.event_dates
171171+ let location t = t.location
172172+173173+ let email t = Cff_address.Contact.email t.contact
174174+ let orcid t = Cff_address.Contact.orcid t.contact
175175+ let website t = Cff_address.Contact.website t.contact
176176+177177+ let pp ppf t =
178178+ Format.pp_print_string ppf t.name;
179179+ Option.iter (Format.fprintf ppf " (%s)") t.alias
180180+181181+ let jsont =
182182+ Jsont.Object.map ~kind:"Entity"
183183+ (fun name alias address city region post_code country
184184+ email tel fax website orcid date_start date_end location ->
185185+ let address = Cff_address.Address.of_options
186186+ ~address ~city ~region ~post_code ~country in
187187+ let contact = Cff_address.Contact.of_options
188188+ ~email ~tel ~fax ~website ~orcid in
189189+ let event_dates = Event_dates.make ?date_start ?date_end () in
190190+ { name; alias; address; contact; event_dates; location })
191191+ |> Jsont.Object.mem "name" Jsont.string
192192+ ~enc:(fun e -> e.name)
193193+ |> Jsont.Object.opt_mem "alias" Jsont.string
194194+ ~enc:(fun e -> e.alias)
195195+ |> Cff_address.Address.jsont_fields ~get:(fun e -> e.address)
196196+ |> Cff_address.Contact.jsont_fields ~get:(fun e -> e.contact)
197197+ |> Jsont.Object.opt_mem "date-start" Cff_date.jsont
198198+ ~enc:(fun e -> Event_dates.date_start e.event_dates)
199199+ |> Jsont.Object.opt_mem "date-end" Cff_date.jsont
200200+ ~enc:(fun e -> Event_dates.date_end e.event_dates)
201201+ |> Jsont.Object.opt_mem "location" Jsont.string
202202+ ~enc:(fun e -> e.location)
203203+ |> Jsont.Object.skip_unknown
204204+ |> Jsont.Object.finish
205205+end
206206+207207+(** An author can be either a Person or an Entity. *)
208208+type t =
209209+ | Person of Person.t
210210+ | Entity of Entity.t
211211+212212+let person p = Person p
213213+let entity e = Entity e
214214+215215+let name = function
216216+ | Person p -> Person.full_name p
217217+ | Entity e -> Entity.name e
218218+219219+let orcid = function
220220+ | Person p -> Person.orcid p
221221+ | Entity e -> Entity.orcid e
222222+223223+let email = function
224224+ | Person p -> Person.email p
225225+ | Entity e -> Entity.email e
226226+227227+let pp ppf = function
228228+ | Person p -> Person.pp ppf p
229229+ | Entity e -> Entity.pp ppf e
230230+231231+(* Jsont codec that discriminates based on "name" field presence.
232232+ If "name" is present -> Entity, otherwise -> Person *)
233233+let jsont =
234234+ (* Check if json object has "name" member *)
235235+ let has_name_member = function
236236+ | Jsont.Object (members, _) -> Option.is_some (Jsont.Json.find_mem "name" members)
237237+ | _ -> false
238238+ in
239239+ let dec_json j =
240240+ if has_name_member j then
241241+ match Jsont.Json.decode' Entity.jsont j with
242242+ | Ok e -> Entity e
243243+ | Error err -> Jsont.Error.msgf Jsont.Meta.none "Invalid entity: %s" (Jsont.Error.to_string err)
244244+ else
245245+ match Jsont.Json.decode' Person.jsont j with
246246+ | Ok p -> Person p
247247+ | Error err -> Jsont.Error.msgf Jsont.Meta.none "Invalid person: %s" (Jsont.Error.to_string err)
248248+ in
249249+ let enc_author = function
250250+ | Person p ->
251251+ (match Jsont.Json.encode' Person.jsont p with
252252+ | Ok j -> j
253253+ | Error _ -> assert false)
254254+ | Entity e ->
255255+ (match Jsont.Json.encode' Entity.jsont e with
256256+ | Ok j -> j
257257+ | Error _ -> assert false)
258258+ in
259259+ Jsont.json |> Jsont.map ~dec:dec_json ~enc:enc_author
+377
lib/cff_author.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Authors for CFF: persons and entities.
77+88+ CFF distinguishes between two types of authors:
99+1010+ - {b Persons}: Individual humans identified by name components
1111+ (family names, given names, etc.)
1212+ - {b Entities}: Organizations, institutions, teams, projects, or
1313+ conferences identified by a single [name] field
1414+1515+ When parsing YAML, the library discriminates based on the presence
1616+ of a [name] field: if present, the entry is an entity; otherwise,
1717+ it's a person.
1818+1919+ {1 Name Components}
2020+2121+ CFF follows academic citation conventions for person names:
2222+2323+ - {b family-names}: Last name/surname (e.g., ["Smith"], ["van Rossum"])
2424+ - {b given-names}: First name(s) (e.g., ["Jane"], ["Guido"])
2525+ - {b name-particle}: Connector before family name (e.g., ["von"], ["van"], ["de"])
2626+ - {b name-suffix}: Generational suffix (e.g., ["Jr."], ["III"])
2727+ - {b alias}: Nickname or pseudonym
2828+2929+ {1 Entity Types}
3030+3131+ Entities can represent various organizations:
3232+3333+ - Research institutions and universities
3434+ - Companies and corporations
3535+ - Government agencies
3636+ - Open source projects and communities
3737+ - Academic conferences (with date-start/date-end)
3838+ - Standards bodies
3939+4040+ {1 Example}
4141+4242+ {[
4343+ (* A person author with contact info *)
4444+ let contact = Cff.Address.Contact.make
4545+ ~orcid:"https://orcid.org/0000-0001-2345-6789" () in
4646+ let jane = Cff.Author.Person (Cff.Author.Person.make
4747+ ~family_names:"Smith"
4848+ ~given_names:"Jane A."
4949+ ~affiliation:"MIT"
5050+ ~contact
5151+ ())
5252+5353+ (* A person with name particle *)
5454+ let guido = Cff.Author.Person (Cff.Author.Person.make
5555+ ~family_names:"Rossum"
5656+ ~given_names:"Guido"
5757+ ~name_particle:"van"
5858+ ())
5959+6060+ (* An organization entity *)
6161+ let address = Cff.Address.Address.make
6262+ ~city:"San Francisco" ~country:"US" () in
6363+ let contact = Cff.Address.Contact.make
6464+ ~website:"https://mozilla.org" () in
6565+ let mozilla = Cff.Author.Entity (Cff.Author.Entity.make
6666+ ~name:"Mozilla Foundation"
6767+ ~address ~contact
6868+ ())
6969+7070+ (* A conference entity with dates *)
7171+ let conf = Cff.Author.Entity (Cff.Author.Entity.make
7272+ ~name:"ICSE 2024"
7373+ ~date_start:(Cff.Date.of_ymd ~year:2024 ~month:4 ~day:14)
7474+ ~date_end:(Cff.Date.of_ymd ~year:2024 ~month:4 ~day:20)
7575+ ~location:"Lisbon, Portugal"
7676+ ())
7777+ ]}
7878+7979+ {1 Name Components} *)
8080+8181+(** Name components for persons.
8282+8383+ CFF name handling follows scholarly citation conventions to properly
8484+ represent names from various cultures and naming traditions. *)
8585+module Name : sig
8686+ type t
8787+8888+ val empty : t
8989+ (** Empty name with all components as [None]. *)
9090+9191+ val make :
9292+ ?family_names:string ->
9393+ ?given_names:string ->
9494+ ?name_particle:string ->
9595+ ?name_suffix:string ->
9696+ ?alias:string ->
9797+ unit -> t
9898+ (** Create a name with optional components.
9999+100100+ @param family_names Last name/surname
101101+ @param given_names First name(s)
102102+ @param name_particle Connector like ["von"], ["van"], ["de"]
103103+ @param name_suffix Generational suffix like ["Jr."], ["III"]
104104+ @param alias Nickname or pseudonym *)
105105+106106+ val family_names : t -> string option
107107+ (** The person's family name (surname, last name). *)
108108+109109+ val given_names : t -> string option
110110+ (** The person's given name(s) (first name, forenames). *)
111111+112112+ val name_particle : t -> string option
113113+ (** Name connector appearing before family name.
114114+115115+ Examples: ["von"] in "Ludwig von Beethoven",
116116+ ["van"] in "Vincent van Gogh". *)
117117+118118+ val name_suffix : t -> string option
119119+ (** Generational or honorary suffix.
120120+121121+ Examples: ["Jr."], ["Sr."], ["III"], ["PhD"]. *)
122122+123123+ val alias : t -> string option
124124+ (** Nickname, pseudonym, or alternative name.
125125+126126+ Example: ["Tim"] for "Timothy", ["DHH"] for "David Heinemeier Hansson". *)
127127+128128+ val full_name : t -> string
129129+ (** Format name as "Given Particle Family, Suffix".
130130+131131+ Examples:
132132+ - ["Jane Smith"]
133133+ - ["Guido van Rossum"]
134134+ - ["John Smith, Jr."] *)
135135+136136+ val pp : Format.formatter -> t -> unit
137137+ (** Pretty-print the full name. *)
138138+end
139139+140140+(** Individual person (author, contributor, editor, etc.).
141141+142142+ A person represents a human contributor with:
143143+ - Name components (required: at least family or given names)
144144+ - Optional affiliation (institution, company)
145145+ - Optional physical address
146146+ - Optional contact information (email, ORCID, website) *)
147147+module Person : sig
148148+ type t
149149+150150+ val make :
151151+ ?family_names:string ->
152152+ ?given_names:string ->
153153+ ?name_particle:string ->
154154+ ?name_suffix:string ->
155155+ ?alias:string ->
156156+ ?affiliation:string ->
157157+ ?address:Cff_address.Address.t ->
158158+ ?contact:Cff_address.Contact.t ->
159159+ unit -> t
160160+ (** Create a person with optional fields.
161161+162162+ At minimum, provide [family_names] or [given_names].
163163+164164+ @param family_names Last name/surname
165165+ @param given_names First name(s)
166166+ @param name_particle Connector before family name
167167+ @param name_suffix Generational suffix
168168+ @param alias Nickname or pseudonym
169169+ @param affiliation Institution or organization name
170170+ @param address Physical address
171171+ @param contact Contact information (email, ORCID, website, etc.) *)
172172+173173+ val name : t -> Name.t
174174+ (** The person's name components. *)
175175+176176+ val affiliation : t -> string option
177177+ (** The person's institutional affiliation.
178178+179179+ Example: ["Massachusetts Institute of Technology"]. *)
180180+181181+ val address : t -> Cff_address.Address.t
182182+ (** Physical address information. *)
183183+184184+ val contact : t -> Cff_address.Contact.t
185185+ (** Contact information (email, phone, web, ORCID). *)
186186+187187+ (** {2 Convenience Accessors for Name} *)
188188+189189+ val family_names : t -> string option
190190+ (** Shortcut for [Name.family_names (name t)]. *)
191191+192192+ val given_names : t -> string option
193193+ (** Shortcut for [Name.given_names (name t)]. *)
194194+195195+ val name_particle : t -> string option
196196+ (** Shortcut for [Name.name_particle (name t)]. *)
197197+198198+ val name_suffix : t -> string option
199199+ (** Shortcut for [Name.name_suffix (name t)]. *)
200200+201201+ val alias : t -> string option
202202+ (** Shortcut for [Name.alias (name t)]. *)
203203+204204+ val full_name : t -> string
205205+ (** Shortcut for [Name.full_name (name t)]. *)
206206+207207+ (** {2 Convenience Accessors for Contact} *)
208208+209209+ val email : t -> string option
210210+ (** The person's email address. *)
211211+212212+ val orcid : t -> string option
213213+ (** The person's ORCID identifier URL.
214214+215215+ ORCID (Open Researcher and Contributor ID) provides persistent
216216+ digital identifiers for researchers. Format: ["https://orcid.org/XXXX-XXXX-XXXX-XXXX"]. *)
217217+218218+ val website : t -> string option
219219+ (** The person's website URL. *)
220220+221221+ val pp : Format.formatter -> t -> unit
222222+ (** Pretty-print as "Full Name (affiliation)". *)
223223+224224+ val jsont : t Jsont.t
225225+ (** JSON/YAML codec for person records. *)
226226+end
227227+228228+(** Event date range for entities like conferences.
229229+230230+ Some entities (particularly conferences) have associated dates
231231+ when they take place. *)
232232+module Event_dates : sig
233233+ type t
234234+235235+ val empty : t
236236+ (** Empty date range with both dates as [None]. *)
237237+238238+ val make :
239239+ ?date_start:Cff_date.t ->
240240+ ?date_end:Cff_date.t ->
241241+ unit -> t
242242+ (** Create an event date range.
243243+244244+ @param date_start When the event begins
245245+ @param date_end When the event ends *)
246246+247247+ val date_start : t -> Cff_date.t option
248248+ (** The start date of the event. *)
249249+250250+ val date_end : t -> Cff_date.t option
251251+ (** The end date of the event. *)
252252+253253+ val is_empty : t -> bool
254254+ (** [true] if both dates are [None]. *)
255255+256256+ val pp : Format.formatter -> t -> unit
257257+ (** Pretty-print as "YYYY-MM-DD - YYYY-MM-DD". *)
258258+end
259259+260260+(** Organization, institution, project, or conference.
261261+262262+ An entity represents a non-person author or contributor, such as:
263263+ - Research institutions (["MIT"], ["CERN"])
264264+ - Companies (["Google"], ["Mozilla Foundation"])
265265+ - Government agencies (["NASA"], ["NIH"])
266266+ - Open source projects (["The Rust Project"])
267267+ - Academic conferences (["ICSE 2024"])
268268+ - Standards bodies (["IEEE"], ["W3C"])
269269+270270+ Entities are distinguished from persons in YAML by the presence
271271+ of a required [name] field (persons have [family-names]/[given-names]
272272+ instead). *)
273273+module Entity : sig
274274+ type t
275275+276276+ val make :
277277+ name:string ->
278278+ ?alias:string ->
279279+ ?address:Cff_address.Address.t ->
280280+ ?contact:Cff_address.Contact.t ->
281281+ ?date_start:Cff_date.t ->
282282+ ?date_end:Cff_date.t ->
283283+ ?location:string ->
284284+ unit -> t
285285+ (** Create an entity.
286286+287287+ @param name The entity's official name (required)
288288+ @param alias Short name or acronym
289289+ @param address Physical address
290290+ @param contact Contact information (email, website, etc.)
291291+ @param date_start Event start date (for conferences)
292292+ @param date_end Event end date (for conferences)
293293+ @param location Event location description *)
294294+295295+ val name : t -> string
296296+ (** The entity's official name. This field distinguishes entities
297297+ from persons in the YAML format. *)
298298+299299+ val alias : t -> string option
300300+ (** Short name, acronym, or alternative name.
301301+302302+ Example: ["MIT"] for "Massachusetts Institute of Technology". *)
303303+304304+ val address : t -> Cff_address.Address.t
305305+ (** Physical address information. *)
306306+307307+ val contact : t -> Cff_address.Contact.t
308308+ (** Contact information. *)
309309+310310+ val event_dates : t -> Event_dates.t
311311+ (** Event dates (for conferences). *)
312312+313313+ val location : t -> string option
314314+ (** Event location description (for conferences).
315315+316316+ Example: ["Lisbon, Portugal"]. *)
317317+318318+ (** {2 Convenience Accessors for Contact} *)
319319+320320+ val email : t -> string option
321321+ (** The entity's contact email. *)
322322+323323+ val orcid : t -> string option
324324+ (** The entity's ORCID (organizations can have ORCIDs). *)
325325+326326+ val website : t -> string option
327327+ (** The entity's official website URL. *)
328328+329329+ val pp : Format.formatter -> t -> unit
330330+ (** Pretty-print as "Name (alias)". *)
331331+332332+ val jsont : t Jsont.t
333333+ (** JSON/YAML codec for entity records. *)
334334+end
335335+336336+(** {1 Author Discriminated Union}
337337+338338+ The main author type is a sum type that can hold either a person
339339+ or an entity. This matches the CFF specification where authors
340340+ can be either individuals or organizations. *)
341341+342342+type t =
343343+ | Person of Person.t (** An individual person *)
344344+ | Entity of Entity.t (** An organization or entity *)
345345+(** An author: either a person or an entity. *)
346346+347347+val person : Person.t -> t
348348+(** Wrap a person as an author. *)
349349+350350+val entity : Entity.t -> t
351351+(** Wrap an entity as an author. *)
352352+353353+val name : t -> string
354354+(** Get the display name.
355355+356356+ For persons, returns the full formatted name.
357357+ For entities, returns the entity name. *)
358358+359359+val orcid : t -> string option
360360+(** Get the ORCID if present. Works for both persons and entities. *)
361361+362362+val email : t -> string option
363363+(** Get the email if present. Works for both persons and entities. *)
364364+365365+val pp : Format.formatter -> t -> unit
366366+(** Pretty-print the author. *)
367367+368368+val jsont : t Jsont.t
369369+(** JSON/YAML codec that discriminates based on [name] field presence.
370370+371371+ When decoding:
372372+ - If the object has a [name] field -> Entity
373373+ - Otherwise -> Person
374374+375375+ This matches the CFF specification where entities are distinguished
376376+ by having a [name] field while persons have [family-names] and
377377+ [given-names] fields. *)
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Configuration for CFF parsing and validation.
77+88+ CFF files in the wild may contain non-standard or deprecated values.
99+ This module provides configuration options to control validation
1010+ strictness during parsing.
1111+1212+ {1 Validation Modes}
1313+1414+ {2 Strict Mode}
1515+1616+ Validates all fields according to their specifications:
1717+1818+ - URLs must be well-formed
1919+ - Dates must be valid ISO 8601 dates
2020+ - DOIs must match the DOI pattern
2121+ - ORCIDs must be valid ORCID URLs
2222+ - License IDs must be valid SPDX identifiers
2323+2424+ Use strict mode for validating CFF files or when you control the input.
2525+2626+ {2 Lenient Mode}
2727+2828+ Accepts any string value without validation. Use lenient mode when:
2929+3030+ - Parsing CFF files from unknown sources
3131+ - Handling legacy files with deprecated license IDs
3232+ - Round-tripping files without data loss
3333+3434+ {2 Default Mode}
3535+3636+ A balanced approach that:
3737+ - Keeps unknown fields (for round-tripping)
3838+ - Uses lenient validation for most fields
3939+4040+ {1 Unknown Fields}
4141+4242+ The [keep_unknown] option controls handling of unrecognized fields:
4343+4444+ - [true]: Preserve unknown fields in the parsed structure
4545+ - [false]: Silently ignore unknown fields
4646+4747+ Keeping unknown fields allows round-tripping CFF files that contain
4848+ extensions or newer fields not yet supported by this library. *)
4949+5050+type t
5151+(** Configuration type. *)
5252+5353+val default : t
5454+(** Default configuration.
5555+5656+ Uses lenient validation and keeps unknown fields. Suitable for
5757+ general parsing where round-tripping is desired. *)
5858+5959+val strict : t
6060+(** Strict configuration.
6161+6262+ Validates all fields according to CFF 1.2.0 specification.
6363+ Fails on invalid URLs, dates, DOIs, ORCIDs, and license IDs.
6464+6565+ Keeps unknown fields for compatibility. *)
6666+6767+val lenient : t
6868+(** Fully lenient configuration.
6969+7070+ Accepts any string values without validation. Useful for parsing
7171+ malformed or non-standard CFF files. *)
7272+7373+val make :
7474+ ?strict_urls:bool ->
7575+ ?strict_dates:bool ->
7676+ ?strict_dois:bool ->
7777+ ?strict_orcids:bool ->
7878+ ?strict_licenses:bool ->
7979+ ?keep_unknown:bool ->
8080+ unit -> t
8181+(** Create a custom configuration.
8282+8383+ All strictness options default to [false] (lenient).
8484+ [keep_unknown] defaults to [true].
8585+8686+ @param strict_urls Validate URL format
8787+ @param strict_dates Validate date format and values
8888+ @param strict_dois Validate DOI pattern
8989+ @param strict_orcids Validate ORCID format
9090+ @param strict_licenses Validate SPDX license identifiers
9191+ @param keep_unknown Preserve unrecognized fields *)
9292+9393+val strict_urls : t -> bool
9494+(** Whether URL fields are validated. *)
9595+9696+val strict_dates : t -> bool
9797+(** Whether date fields are validated. *)
9898+9999+val strict_dois : t -> bool
100100+(** Whether DOI fields are validated. *)
101101+102102+val strict_orcids : t -> bool
103103+(** Whether ORCID fields are validated. *)
104104+105105+val strict_licenses : t -> bool
106106+(** Whether license identifiers are validated against SPDX. *)
107107+108108+val keep_unknown : t -> bool
109109+(** Whether unknown fields are preserved in the parsed structure. *)
+48
lib/cff_country.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Country code handling for CFF using ISO3166. *)
77+88+type t = string
99+1010+let of_string s =
1111+ (* Validate against ISO3166 alpha-2 codes *)
1212+ let s = String.uppercase_ascii s in
1313+ try
1414+ let _ = ISO3166.alpha2_of_string s in
1515+ Ok s
1616+ with Invalid_argument _ ->
1717+ Error (`Invalid_country s)
1818+1919+let to_string t = t
2020+2121+let to_iso3166 t =
2222+ try
2323+ Some (ISO3166.alpha2_to_country (ISO3166.alpha2_of_string t))
2424+ with Invalid_argument _ ->
2525+ None
2626+2727+let name t = Option.map ISO3166.Country.name (to_iso3166 t)
2828+2929+let equal = String.equal
3030+let compare = String.compare
3131+3232+let pp ppf t =
3333+ Format.pp_print_string ppf t
3434+3535+(* Jsont codec for country codes *)
3636+let jsont =
3737+ let dec s =
3838+ match of_string s with
3939+ | Ok c -> c
4040+ | Error (`Invalid_country s) ->
4141+ Jsont.Error.msgf Jsont.Meta.none "Invalid ISO 3166-1 alpha-2 country code: %s" s
4242+ in
4343+ let enc t = to_string t in
4444+ Jsont.string
4545+ |> Jsont.map ~dec ~enc
4646+4747+(* Lenient codec that accepts any string *)
4848+let jsont_lenient = Jsont.string
+85
lib/cff_country.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** ISO 3166-1 alpha-2 country codes for CFF.
77+88+ CFF uses {{:https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2}
99+ ISO 3166-1 alpha-2} two-letter country codes for the [country]
1010+ field on persons and entities.
1111+1212+ {1 Format}
1313+1414+ Country codes are exactly two uppercase letters:
1515+1616+ - ["US"] - United States
1717+ - ["GB"] - United Kingdom
1818+ - ["DE"] - Germany
1919+ - ["FR"] - France
2020+ - ["JP"] - Japan
2121+ - ["CN"] - China
2222+ - ["AU"] - Australia
2323+ - ["CA"] - Canada
2424+ - ["CH"] - Switzerland
2525+ - ["NL"] - Netherlands
2626+2727+ {1 Validation}
2828+2929+ This module validates country codes against the {!ISO3166} library,
3030+ which maintains the official list of assigned codes.
3131+3232+ {1 Example}
3333+3434+ {[
3535+ authors:
3636+ - family-names: Müller
3737+ given-names: Hans
3838+ city: Berlin
3939+ country: DE
4040+ ]} *)
4141+4242+type t = string
4343+(** An ISO 3166-1 alpha-2 country code (two uppercase letters). *)
4444+4545+val of_string : string -> (t, [> `Invalid_country of string]) result
4646+(** Parse and validate a country code.
4747+4848+ Case-insensitive: ["us"], ["US"], and ["Us"] all produce ["US"].
4949+ Returns [Error (`Invalid_country s)] for unknown codes. *)
5050+5151+val to_string : t -> string
5252+(** Return the uppercase country code. *)
5353+5454+val to_iso3166 : t -> ISO3166.Country.t option
5555+(** Look up the full country record from {!ISO3166}.
5656+5757+ Returns [None] if the code is not in the ISO 3166-1 list. *)
5858+5959+val name : t -> string option
6060+(** Get the country name if the code is valid.
6161+6262+ Examples:
6363+ - [name "US" = Some "United States of America"]
6464+ - [name "GB" = Some "United Kingdom of Great Britain and Northern Ireland"]
6565+ - [name "XX" = None] *)
6666+6767+val equal : t -> t -> bool
6868+(** Country code equality (case-sensitive after normalization). *)
6969+7070+val compare : t -> t -> int
7171+(** Alphabetical comparison of country codes. *)
7272+7373+val pp : Format.formatter -> t -> unit
7474+(** Pretty-print the country code. *)
7575+7676+val jsont : t Jsont.t
7777+(** JSON/YAML codec that validates country codes.
7878+7979+ Returns an error for invalid ISO 3166-1 alpha-2 codes. *)
8080+8181+val jsont_lenient : t Jsont.t
8282+(** JSON/YAML codec that accepts any string.
8383+8484+ Use this when parsing CFF files that may contain non-standard
8585+ country codes. *)
+49
lib/cff_date.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Date handling for CFF using Ptime. *)
77+88+type t = Ptime.date
99+1010+let of_string s =
1111+ (* CFF dates are YYYY-MM-DD format *)
1212+ match String.split_on_char '-' s with
1313+ | [y; m; d] ->
1414+ (match int_of_string_opt y, int_of_string_opt m, int_of_string_opt d with
1515+ | Some year, Some month, Some day ->
1616+ (* Validate the date components *)
1717+ if year >= 0 && year <= 9999 &&
1818+ month >= 1 && month <= 12 &&
1919+ day >= 1 && day <= 31 then
2020+ Ok (year, month, day)
2121+ else
2222+ Error (`Invalid_date s)
2323+ | _ -> Error (`Invalid_date s))
2424+ | _ -> Error (`Invalid_date s)
2525+2626+let to_string (year, month, day) =
2727+ Printf.sprintf "%04d-%02d-%02d" year month day
2828+2929+let year (y, _, _) = y
3030+let month (_, m, _) = m
3131+let day (_, _, d) = d
3232+3333+let equal a b = a = b
3434+let compare = Stdlib.compare
3535+3636+let pp ppf date =
3737+ Format.pp_print_string ppf (to_string date)
3838+3939+(* Jsont codec for dates *)
4040+let jsont =
4141+ let dec s =
4242+ match of_string s with
4343+ | Ok d -> d
4444+ | Error (`Invalid_date s) ->
4545+ Jsont.Error.msgf Jsont.Meta.none "Invalid date format: %s" s
4646+ in
4747+ let enc date = to_string date in
4848+ Jsont.string
4949+ |> Jsont.map ~dec ~enc
+87
lib/cff_date.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Date handling for CFF.
77+88+ CFF uses ISO 8601 date format ([YYYY-MM-DD]) for all date fields.
99+ This module wraps {!Ptime.date} for date representation and provides
1010+ parsing and formatting functions.
1111+1212+ {1 Date Fields in CFF}
1313+1414+ CFF has several date-related fields at different levels:
1515+1616+ {2 Root Level}
1717+1818+ - [date-released]: When the software/dataset was released
1919+2020+ {2 Reference Level}
2121+2222+ - [date-accessed]: When an online resource was accessed
2323+ - [date-downloaded]: When a resource was downloaded
2424+ - [date-published]: Formal publication date
2525+ - [date-released]: Release date (for software references)
2626+2727+ {2 Entity Level}
2828+2929+ - [date-start]: Event start date (for conferences)
3030+ - [date-end]: Event end date (for conferences)
3131+3232+ {1 Date Format}
3333+3434+ All dates use ISO 8601 format: [YYYY-MM-DD]
3535+3636+ {2 Examples}
3737+3838+ {[
3939+ date-released: 2024-01-15
4040+ date-accessed: 2024-06-30
4141+ ]}
4242+4343+ {1 Year-Only Dates}
4444+4545+ For historical works or when only the year is known, use the [year]
4646+ field (an integer) instead of a full date. *)
4747+4848+type t = Ptime.date
4949+(** A date as [(year, month, day)] tuple.
5050+5151+ The tuple contains:
5252+ - [year]: Four-digit year (e.g., [2024])
5353+ - [month]: Month number (1-12)
5454+ - [day]: Day of month (1-31) *)
5555+5656+val of_string : string -> (t, [> `Invalid_date of string]) result
5757+(** Parse a date from [YYYY-MM-DD] format.
5858+5959+ Returns [Error (`Invalid_date s)] if the string is not a valid date.
6060+ Validates that the date is a real calendar date (e.g., rejects Feb 30). *)
6161+6262+val to_string : t -> string
6363+(** Format a date as [YYYY-MM-DD]. *)
6464+6565+val year : t -> int
6666+(** Extract the year component. *)
6767+6868+val month : t -> int
6969+(** Extract the month component (1-12). *)
7070+7171+val day : t -> int
7272+(** Extract the day component (1-31). *)
7373+7474+val equal : t -> t -> bool
7575+(** Date equality. *)
7676+7777+val compare : t -> t -> int
7878+(** Date comparison (chronological order). *)
7979+8080+val pp : Format.formatter -> t -> unit
8181+(** Pretty-print a date in [YYYY-MM-DD] format. *)
8282+8383+val jsont : t Jsont.t
8484+(** JSON/YAML codec for dates.
8585+8686+ Parses strings in [YYYY-MM-DD] format and serializes back to the
8787+ same format. *)
+241
lib/cff_enums.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Enumeration types for CFF using polymorphic variants. *)
77+88+(** Functor to generate common enum operations. *)
99+module type STRING_ENUM = sig
1010+ type t
1111+ val of_string : string -> t option
1212+ val to_string : t -> string
1313+ val type_name : string
1414+end
1515+1616+module Make_enum (E : STRING_ENUM) = struct
1717+ include E
1818+ let equal (a : t) (b : t) = a = b
1919+ let compare = Stdlib.compare
2020+ let pp ppf t = Format.pp_print_string ppf (to_string t)
2121+ let jsont =
2222+ Jsont.string |> Jsont.map
2323+ ~dec:(fun s ->
2424+ match of_string s with
2525+ | Some t -> t
2626+ | None -> Jsont.Error.msgf Jsont.Meta.none "Invalid %s: %s" type_name s)
2727+ ~enc:to_string
2828+end
2929+3030+module Identifier_type = Make_enum (struct
3131+ type t = [ `Doi | `Url | `Swh | `Other ]
3232+ let type_name = "identifier type"
3333+3434+ let of_string = function
3535+ | "doi" -> Some `Doi
3636+ | "url" -> Some `Url
3737+ | "swh" -> Some `Swh
3838+ | "other" -> Some `Other
3939+ | _ -> None
4040+4141+ let to_string = function
4242+ | `Doi -> "doi"
4343+ | `Url -> "url"
4444+ | `Swh -> "swh"
4545+ | `Other -> "other"
4646+end)
4747+4848+module Reference_type = Make_enum (struct
4949+ type t = [
5050+ | `Art
5151+ | `Article
5252+ | `Audiovisual
5353+ | `Bill
5454+ | `Blog
5555+ | `Book
5656+ | `Catalogue
5757+ | `Conference
5858+ | `Conference_paper
5959+ | `Data
6060+ | `Database
6161+ | `Dictionary
6262+ | `Edited_work
6363+ | `Encyclopedia
6464+ | `Film_broadcast
6565+ | `Generic
6666+ | `Government_document
6767+ | `Grant
6868+ | `Hearing
6969+ | `Historical_work
7070+ | `Legal_case
7171+ | `Legal_rule
7272+ | `Magazine_article
7373+ | `Manual
7474+ | `Map
7575+ | `Multimedia
7676+ | `Music
7777+ | `Newspaper_article
7878+ | `Pamphlet
7979+ | `Patent
8080+ | `Personal_communication
8181+ | `Proceedings
8282+ | `Report
8383+ | `Serial
8484+ | `Slides
8585+ | `Software
8686+ | `Software_code
8787+ | `Software_container
8888+ | `Software_executable
8989+ | `Software_virtual_machine
9090+ | `Sound_recording
9191+ | `Standard
9292+ | `Statute
9393+ | `Thesis
9494+ | `Unpublished
9595+ | `Video
9696+ | `Website
9797+ ]
9898+ let type_name = "reference type"
9999+100100+ let of_string = function
101101+ | "art" -> Some `Art
102102+ | "article" -> Some `Article
103103+ | "audiovisual" -> Some `Audiovisual
104104+ | "bill" -> Some `Bill
105105+ | "blog" -> Some `Blog
106106+ | "book" -> Some `Book
107107+ | "catalogue" -> Some `Catalogue
108108+ | "conference" -> Some `Conference
109109+ | "conference-paper" -> Some `Conference_paper
110110+ | "data" -> Some `Data
111111+ | "database" -> Some `Database
112112+ | "dictionary" -> Some `Dictionary
113113+ | "edited-work" -> Some `Edited_work
114114+ | "encyclopedia" -> Some `Encyclopedia
115115+ | "film-broadcast" -> Some `Film_broadcast
116116+ | "generic" -> Some `Generic
117117+ | "government-document" -> Some `Government_document
118118+ | "grant" -> Some `Grant
119119+ | "hearing" -> Some `Hearing
120120+ | "historical-work" -> Some `Historical_work
121121+ | "legal-case" -> Some `Legal_case
122122+ | "legal-rule" -> Some `Legal_rule
123123+ | "magazine-article" -> Some `Magazine_article
124124+ | "manual" -> Some `Manual
125125+ | "map" -> Some `Map
126126+ | "multimedia" -> Some `Multimedia
127127+ | "music" -> Some `Music
128128+ | "newspaper-article" -> Some `Newspaper_article
129129+ | "pamphlet" -> Some `Pamphlet
130130+ | "patent" -> Some `Patent
131131+ | "personal-communication" -> Some `Personal_communication
132132+ | "proceedings" -> Some `Proceedings
133133+ | "report" -> Some `Report
134134+ | "serial" -> Some `Serial
135135+ | "slides" -> Some `Slides
136136+ | "software" -> Some `Software
137137+ | "software-code" -> Some `Software_code
138138+ | "software-container" -> Some `Software_container
139139+ | "software-executable" -> Some `Software_executable
140140+ | "software-virtual-machine" -> Some `Software_virtual_machine
141141+ | "sound-recording" -> Some `Sound_recording
142142+ | "standard" -> Some `Standard
143143+ | "statute" -> Some `Statute
144144+ | "thesis" -> Some `Thesis
145145+ | "unpublished" -> Some `Unpublished
146146+ | "video" -> Some `Video
147147+ | "website" -> Some `Website
148148+ | _ -> None
149149+150150+ let to_string = function
151151+ | `Art -> "art"
152152+ | `Article -> "article"
153153+ | `Audiovisual -> "audiovisual"
154154+ | `Bill -> "bill"
155155+ | `Blog -> "blog"
156156+ | `Book -> "book"
157157+ | `Catalogue -> "catalogue"
158158+ | `Conference -> "conference"
159159+ | `Conference_paper -> "conference-paper"
160160+ | `Data -> "data"
161161+ | `Database -> "database"
162162+ | `Dictionary -> "dictionary"
163163+ | `Edited_work -> "edited-work"
164164+ | `Encyclopedia -> "encyclopedia"
165165+ | `Film_broadcast -> "film-broadcast"
166166+ | `Generic -> "generic"
167167+ | `Government_document -> "government-document"
168168+ | `Grant -> "grant"
169169+ | `Hearing -> "hearing"
170170+ | `Historical_work -> "historical-work"
171171+ | `Legal_case -> "legal-case"
172172+ | `Legal_rule -> "legal-rule"
173173+ | `Magazine_article -> "magazine-article"
174174+ | `Manual -> "manual"
175175+ | `Map -> "map"
176176+ | `Multimedia -> "multimedia"
177177+ | `Music -> "music"
178178+ | `Newspaper_article -> "newspaper-article"
179179+ | `Pamphlet -> "pamphlet"
180180+ | `Patent -> "patent"
181181+ | `Personal_communication -> "personal-communication"
182182+ | `Proceedings -> "proceedings"
183183+ | `Report -> "report"
184184+ | `Serial -> "serial"
185185+ | `Slides -> "slides"
186186+ | `Software -> "software"
187187+ | `Software_code -> "software-code"
188188+ | `Software_container -> "software-container"
189189+ | `Software_executable -> "software-executable"
190190+ | `Software_virtual_machine -> "software-virtual-machine"
191191+ | `Sound_recording -> "sound-recording"
192192+ | `Standard -> "standard"
193193+ | `Statute -> "statute"
194194+ | `Thesis -> "thesis"
195195+ | `Unpublished -> "unpublished"
196196+ | `Video -> "video"
197197+ | `Website -> "website"
198198+end)
199199+200200+module Status = Make_enum (struct
201201+ type t = [
202202+ | `Abstract
203203+ | `Advance_online
204204+ | `In_preparation
205205+ | `In_press
206206+ | `Preprint
207207+ | `Submitted
208208+ ]
209209+ let type_name = "status"
210210+211211+ let of_string = function
212212+ | "abstract" -> Some `Abstract
213213+ | "advance-online" -> Some `Advance_online
214214+ | "in-preparation" -> Some `In_preparation
215215+ | "in-press" -> Some `In_press
216216+ | "preprint" -> Some `Preprint
217217+ | "submitted" -> Some `Submitted
218218+ | _ -> None
219219+220220+ let to_string = function
221221+ | `Abstract -> "abstract"
222222+ | `Advance_online -> "advance-online"
223223+ | `In_preparation -> "in-preparation"
224224+ | `In_press -> "in-press"
225225+ | `Preprint -> "preprint"
226226+ | `Submitted -> "submitted"
227227+end)
228228+229229+module Cff_type = Make_enum (struct
230230+ type t = [ `Software | `Dataset ]
231231+ let type_name = "CFF type"
232232+233233+ let of_string = function
234234+ | "software" -> Some `Software
235235+ | "dataset" -> Some `Dataset
236236+ | _ -> None
237237+238238+ let to_string = function
239239+ | `Software -> "software"
240240+ | `Dataset -> "dataset"
241241+end)
+289
lib/cff_enums.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Enumeration types for CFF.
77+88+ CFF defines several enumerated types using fixed string values.
99+ This module represents them as polymorphic variants for type safety
1010+ while providing bidirectional conversion to/from strings.
1111+1212+ {1 Identifier Types}
1313+1414+ The [identifiers] field allows typed references to external resources.
1515+1616+ {1 Reference Types}
1717+1818+ CFF supports 40+ reference types for bibliographic entries, covering
1919+ academic publications, software, data, legal documents, and media.
2020+2121+ {1 Publication Status}
2222+2323+ Works in progress can have a status indicating their publication stage.
2424+2525+ {1 CFF Type}
2626+2727+ The top-level CFF file describes either software or a dataset. *)
2828+2929+(** Identifier type for the [identifiers] field.
3030+3131+ Each identifier in the [identifiers] list has a type indicating the
3232+ identifier scheme:
3333+3434+ - [`Doi] - Digital Object Identifier ({{:https://doi.org}doi.org})
3535+ - [`Url] - Web URL
3636+ - [`Swh] - Software Heritage identifier ({{:https://www.softwareheritage.org}softwareheritage.org})
3737+ - [`Other] - Any other identifier type
3838+3939+ {2 Examples}
4040+4141+ {[
4242+ type: doi
4343+ value: 10.5281/zenodo.1234567
4444+ description: The concept DOI for all versions
4545+4646+ type: swh
4747+ value: swh:1:dir:bc286860f423ea7ced246ba7458eef4b4541cf2d
4848+ description: Software Heritage archive
4949+ ]} *)
5050+module Identifier_type : sig
5151+ type t = [ `Doi | `Url | `Swh | `Other ]
5252+ (** Identifier types. *)
5353+5454+ val of_string : string -> t option
5555+ (** Parse from YAML string: ["doi"], ["url"], ["swh"], ["other"]. *)
5656+5757+ val to_string : t -> string
5858+ (** Convert to YAML string representation. *)
5959+6060+ val equal : t -> t -> bool
6161+ val compare : t -> t -> int
6262+ val pp : Format.formatter -> t -> unit
6363+6464+ val jsont : t Jsont.t
6565+ (** JSON/YAML codec. *)
6666+end
6767+6868+(** Reference type for bibliographic entries.
6969+7070+ CFF 1.2.0 supports 40+ reference types covering virtually all forms
7171+ of citable content. The type determines which fields are relevant.
7272+7373+ {2 Academic/Research}
7474+7575+ - [`Article] - Journal article
7676+ - [`Book] - Complete book
7777+ - [`Conference] - Conference as an event
7878+ - [`Conference_paper] - Paper in conference proceedings
7979+ - [`Edited_work] - Edited collection
8080+ - [`Proceedings] - Conference proceedings volume
8181+ - [`Thesis] - Dissertation or thesis
8282+ - [`Report] - Technical report
8383+8484+ {2 Software}
8585+8686+ - [`Software] - General software (default for CFF files)
8787+ - [`Software_code] - Source code specifically
8888+ - [`Software_container] - Container image (Docker, etc.)
8989+ - [`Software_executable] - Binary/executable
9090+ - [`Software_virtual_machine] - VM image
9191+9292+ {2 Data}
9393+9494+ - [`Data] - General data
9595+ - [`Database] - Database
9696+ - [`Dictionary] - Dictionary or lexicon
9797+ - [`Encyclopedia] - Encyclopedia
9898+9999+ {2 Legal}
100100+101101+ - [`Patent] - Patent
102102+ - [`Legal_case] - Legal case
103103+ - [`Legal_rule] - Legal rule or regulation
104104+ - [`Statute] - Statute or law
105105+ - [`Bill] - Legislative bill
106106+ - [`Hearing] - Legislative hearing
107107+108108+ {2 Media}
109109+110110+ - [`Audiovisual] - Audio/video content
111111+ - [`Film_broadcast] - Film or broadcast
112112+ - [`Video] - Video
113113+ - [`Sound_recording] - Audio recording
114114+ - [`Music] - Musical work
115115+ - [`Art] - Artwork
116116+117117+ {2 Publications}
118118+119119+ - [`Magazine_article] - Magazine article
120120+ - [`Newspaper_article] - Newspaper article
121121+ - [`Blog] - Blog post
122122+ - [`Website] - Website
123123+ - [`Pamphlet] - Pamphlet or brochure
124124+ - [`Serial] - Serial publication
125125+ - [`Manual] - Manual or documentation
126126+ - [`Catalogue] - Catalogue
127127+128128+ {2 Other}
129129+130130+ - [`Generic] - Generic reference (fallback)
131131+ - [`Grant] - Research grant
132132+ - [`Government_document] - Government document
133133+ - [`Historical_work] - Historical work
134134+ - [`Map] - Map
135135+ - [`Multimedia] - Multimedia work
136136+ - [`Personal_communication] - Personal communication
137137+ - [`Slides] - Presentation slides
138138+ - [`Standard] - Technical standard
139139+ - [`Unpublished] - Unpublished work *)
140140+module Reference_type : sig
141141+ type t = [
142142+ | `Art
143143+ | `Article
144144+ | `Audiovisual
145145+ | `Bill
146146+ | `Blog
147147+ | `Book
148148+ | `Catalogue
149149+ | `Conference
150150+ | `Conference_paper
151151+ | `Data
152152+ | `Database
153153+ | `Dictionary
154154+ | `Edited_work
155155+ | `Encyclopedia
156156+ | `Film_broadcast
157157+ | `Generic
158158+ | `Government_document
159159+ | `Grant
160160+ | `Hearing
161161+ | `Historical_work
162162+ | `Legal_case
163163+ | `Legal_rule
164164+ | `Magazine_article
165165+ | `Manual
166166+ | `Map
167167+ | `Multimedia
168168+ | `Music
169169+ | `Newspaper_article
170170+ | `Pamphlet
171171+ | `Patent
172172+ | `Personal_communication
173173+ | `Proceedings
174174+ | `Report
175175+ | `Serial
176176+ | `Slides
177177+ | `Software
178178+ | `Software_code
179179+ | `Software_container
180180+ | `Software_executable
181181+ | `Software_virtual_machine
182182+ | `Sound_recording
183183+ | `Standard
184184+ | `Statute
185185+ | `Thesis
186186+ | `Unpublished
187187+ | `Video
188188+ | `Website
189189+ ]
190190+ (** All supported reference types. *)
191191+192192+ val of_string : string -> t option
193193+ (** Parse from YAML string. Hyphenated names like ["conference-paper"]
194194+ map to underscored variants like [`Conference_paper]. *)
195195+196196+ val to_string : t -> string
197197+ (** Convert to YAML string representation.
198198+ Underscored variants like [`Conference_paper] become ["conference-paper"]. *)
199199+200200+ val equal : t -> t -> bool
201201+ val compare : t -> t -> int
202202+ val pp : Format.formatter -> t -> unit
203203+204204+ val jsont : t Jsont.t
205205+ (** JSON/YAML codec. *)
206206+end
207207+208208+(** Publication status for works in progress.
209209+210210+ The [status] field indicates the publication stage of a work that
211211+ is not yet formally published:
212212+213213+ - [`Abstract] - Only an abstract is available
214214+ - [`Advance_online] - Published online ahead of print
215215+ - [`In_preparation] - Being written
216216+ - [`In_press] - Accepted, awaiting publication
217217+ - [`Preprint] - Available as preprint (arXiv, bioRxiv, etc.)
218218+ - [`Submitted] - Submitted for review
219219+220220+ {2 Example}
221221+222222+ {[
223223+ references:
224224+ - type: article
225225+ title: "Our Upcoming Paper"
226226+ authors:
227227+ - family-names: Smith
228228+ given-names: Jane
229229+ journal: "Nature"
230230+ status: submitted
231231+ ]} *)
232232+module Status : sig
233233+ type t = [
234234+ | `Abstract
235235+ | `Advance_online
236236+ | `In_preparation
237237+ | `In_press
238238+ | `Preprint
239239+ | `Submitted
240240+ ]
241241+ (** Publication status values. *)
242242+243243+ val of_string : string -> t option
244244+ (** Parse from YAML string: ["abstract"], ["advance-online"], etc. *)
245245+246246+ val to_string : t -> string
247247+ (** Convert to YAML string representation. *)
248248+249249+ val equal : t -> t -> bool
250250+ val compare : t -> t -> int
251251+ val pp : Format.formatter -> t -> unit
252252+253253+ val jsont : t Jsont.t
254254+ (** JSON/YAML codec. *)
255255+end
256256+257257+(** CFF file type: software or dataset.
258258+259259+ The [type] field at the root level indicates whether the CFF file
260260+ describes software or a dataset:
261261+262262+ - [`Software] - Software project (default if omitted)
263263+ - [`Dataset] - Dataset or data package
264264+265265+ {2 Example}
266266+267267+ {[
268268+ cff-version: "1.2.0"
269269+ type: dataset
270270+ title: "Climate Data 2020-2024"
271271+ # ...
272272+ ]} *)
273273+module Cff_type : sig
274274+ type t = [ `Software | `Dataset ]
275275+ (** CFF file types. *)
276276+277277+ val of_string : string -> t option
278278+ (** Parse from YAML string: ["software"] or ["dataset"]. *)
279279+280280+ val to_string : t -> string
281281+ (** Convert to YAML string representation. *)
282282+283283+ val equal : t -> t -> bool
284284+ val compare : t -> t -> int
285285+ val pp : Format.formatter -> t -> unit
286286+287287+ val jsont : t Jsont.t
288288+ (** JSON/YAML codec. *)
289289+end
+45
lib/cff_identifier.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Identifier type for CFF. *)
77+88+type t = {
99+ type_ : Cff_enums.Identifier_type.t;
1010+ value : string;
1111+ description : string option;
1212+}
1313+1414+let make ~type_ ~value ?description () =
1515+ { type_; value; description }
1616+1717+let type_ t = t.type_
1818+let value t = t.value
1919+let description t = t.description
2020+2121+let equal a b =
2222+ Cff_enums.Identifier_type.equal a.type_ b.type_ &&
2323+ String.equal a.value b.value
2424+2525+let compare a b =
2626+ match Cff_enums.Identifier_type.compare a.type_ b.type_ with
2727+ | 0 -> String.compare a.value b.value
2828+ | n -> n
2929+3030+let pp ppf t =
3131+ Format.fprintf ppf "%a: %s"
3232+ Cff_enums.Identifier_type.pp t.type_
3333+ t.value
3434+3535+let jsont =
3636+ Jsont.Object.map ~kind:"Identifier"
3737+ (fun type_ value description -> { type_; value; description })
3838+ |> Jsont.Object.mem "type" Cff_enums.Identifier_type.jsont
3939+ ~enc:(fun i -> i.type_)
4040+ |> Jsont.Object.mem "value" Jsont.string
4141+ ~enc:(fun i -> i.value)
4242+ |> Jsont.Object.opt_mem "description" Jsont.string
4343+ ~enc:(fun i -> i.description)
4444+ |> Jsont.Object.skip_unknown
4545+ |> Jsont.Object.finish
+110
lib/cff_identifier.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Typed identifiers for CFF.
77+88+ The [identifiers] field in CFF allows listing multiple typed
99+ identifiers for a work. Each identifier has a type, value, and
1010+ optional description.
1111+1212+ {1 Identifier Types}
1313+1414+ CFF supports four identifier types:
1515+1616+ - {b DOI}: Digital Object Identifier
1717+ ({{:https://doi.org}doi.org})
1818+ - {b URL}: Web URL
1919+ - {b SWH}: Software Heritage identifier
2020+ ({{:https://www.softwareheritage.org}softwareheritage.org})
2121+ - {b Other}: Any other identifier scheme
2222+2323+ {1 Usage}
2424+2525+ The [identifiers] field is a list, allowing multiple identifiers:
2626+2727+ {[
2828+ identifiers:
2929+ - type: doi
3030+ value: 10.5281/zenodo.1234567
3131+ description: The concept DOI for all versions
3232+3333+ - type: doi
3434+ value: 10.5281/zenodo.1234568
3535+ description: The DOI for version 1.0.0
3636+3737+ - type: swh
3838+ value: swh:1:dir:bc286860f423ea7ced246ba7458eef4b4541cf2d
3939+ description: Software Heritage archive
4040+4141+ - type: url
4242+ value: https://github.com/user/project/releases/tag/v1.0.0
4343+ description: Release on GitHub
4444+ ]}
4545+4646+ {1 DOI vs doi Field}
4747+4848+ CFF provides two ways to specify DOIs:
4949+5050+ - The [doi] field at root level: A single, primary DOI
5151+ - The [identifiers] field with [type: doi]: Multiple DOIs with descriptions
5252+5353+ Both can be used together; [identifiers] provides more detail.
5454+5555+ {1 Software Heritage}
5656+5757+ Software Heritage (SWH) provides persistent identifiers for source
5858+ code. SWH identifiers follow the format:
5959+6060+ [swh:1:<object_type>:<hash>]
6161+6262+ Where object_type can be:
6363+ - [cnt]: Content (file)
6464+ - [dir]: Directory
6565+ - [rev]: Revision (commit)
6666+ - [rel]: Release
6767+ - [snp]: Snapshot *)
6868+6969+type t
7070+(** An identifier with type, value, and optional description. *)
7171+7272+val make :
7373+ type_:Cff_enums.Identifier_type.t ->
7474+ value:string ->
7575+ ?description:string ->
7676+ unit -> t
7777+(** Create an identifier.
7878+7979+ @param type_ The identifier type ([`Doi], [`Url], [`Swh], or [`Other])
8080+ @param value The identifier value (DOI, URL, SWH ID, etc.)
8181+ @param description Optional human-readable description *)
8282+8383+val type_ : t -> Cff_enums.Identifier_type.t
8484+(** The identifier type. *)
8585+8686+val value : t -> string
8787+(** The identifier value.
8888+8989+ For DOIs, this is just the DOI (e.g., ["10.5281/zenodo.1234567"]),
9090+ not the full URL. *)
9191+9292+val description : t -> string option
9393+(** Optional description explaining what this identifier refers to.
9494+9595+ Examples:
9696+ - ["The concept DOI for all versions"]
9797+ - ["Version 1.0.0 archive"]
9898+ - ["Release on GitHub"] *)
9999+100100+val equal : t -> t -> bool
101101+(** Identifier equality (compares all fields). *)
102102+103103+val compare : t -> t -> int
104104+(** Identifier comparison. *)
105105+106106+val pp : Format.formatter -> t -> unit
107107+(** Pretty-print as "[type]: value (description)". *)
108108+109109+val jsont : t Jsont.t
110110+(** JSON/YAML codec for identifiers. *)
+145
lib/cff_license.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** SPDX license handling for CFF. *)
77+88+module Id = struct
99+ type t = string
1010+1111+ (* Case-insensitive lookup in valid license IDs *)
1212+ let uppercased_valid_ids =
1313+ List.map (fun x -> (x, String.uppercase_ascii x)) Spdx_licenses.valid_license_ids
1414+1515+ let of_string s =
1616+ let s_upper = String.uppercase_ascii s in
1717+ match List.find_opt (fun (_, up) -> String.equal s_upper up) uppercased_valid_ids with
1818+ | Some (canonical, _) -> Ok canonical
1919+ | None -> Error (`Invalid_license_id s)
2020+2121+ let to_string t = t
2222+2323+ let equal = String.equal
2424+ let compare = String.compare
2525+2626+ let pp ppf t = Format.pp_print_string ppf t
2727+end
2828+2929+type t = Id.t list (* Non-empty list; multiple = OR relationship *)
3030+3131+let single id = [id]
3232+let multiple ids = ids
3333+3434+let ids t = t
3535+3636+let is_single = function
3737+ | [_] -> true
3838+ | _ -> false
3939+4040+let of_string s = Result.map single (Id.of_string s)
4141+4242+let of_string_list ss =
4343+ let rec aux acc = function
4444+ | [] -> Ok (List.rev acc)
4545+ | s :: rest ->
4646+ match Id.of_string s with
4747+ | Ok id -> aux (id :: acc) rest
4848+ | Error e -> Error e
4949+ in
5050+ match ss with
5151+ | [] -> Error (`Invalid_license_id "empty license list")
5252+ | ss -> aux [] ss
5353+5454+let to_string_list t = t
5555+5656+let equal t1 t2 =
5757+ List.length t1 = List.length t2 &&
5858+ List.for_all2 Id.equal t1 t2
5959+6060+let compare t1 t2 =
6161+ List.compare Id.compare t1 t2
6262+6363+let pp ppf t =
6464+ match t with
6565+ | [id] -> Id.pp ppf id
6666+ | ids ->
6767+ Format.fprintf ppf "[%a]"
6868+ (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") Id.pp)
6969+ ids
7070+7171+(* Convert to Spdx_licenses.t (OR combination) *)
7272+let to_spdx t =
7373+ let rec build = function
7474+ | [] -> assert false (* t is non-empty *)
7575+ | [id] -> Spdx_licenses.Simple (Spdx_licenses.LicenseID id)
7676+ | id :: rest ->
7777+ Spdx_licenses.OR (Spdx_licenses.Simple (Spdx_licenses.LicenseID id), build rest)
7878+ in
7979+ build t
8080+8181+(* Convert from Spdx_licenses.t (only simple IDs and OR combinations) *)
8282+let of_spdx spdx =
8383+ let rec extract acc = function
8484+ | Spdx_licenses.Simple (Spdx_licenses.LicenseID id) ->
8585+ Ok (id :: acc)
8686+ | Spdx_licenses.Simple (Spdx_licenses.LicenseIDPlus _) ->
8787+ Error `Unsupported_expression
8888+ | Spdx_licenses.Simple (Spdx_licenses.LicenseRef _) ->
8989+ Error `Unsupported_expression
9090+ | Spdx_licenses.WITH _ ->
9191+ Error `Unsupported_expression
9292+ | Spdx_licenses.AND _ ->
9393+ Error `Unsupported_expression
9494+ | Spdx_licenses.OR (left, right) ->
9595+ Result.bind (extract acc left) (fun acc -> extract acc right)
9696+ in
9797+ Result.map List.rev (extract [] spdx)
9898+9999+(* Jsont codec - handles both single string and array of strings *)
100100+let jsont =
101101+ let string_codec =
102102+ Jsont.string |> Jsont.map
103103+ ~dec:(fun s ->
104104+ match Id.of_string s with
105105+ | Ok id -> [id]
106106+ | Error (`Invalid_license_id s) ->
107107+ Jsont.Error.msgf Jsont.Meta.none "Invalid SPDX license ID: %s" s)
108108+ ~enc:(function
109109+ | [id] -> id
110110+ | _ -> assert false) (* Only used for single-element lists *)
111111+ in
112112+ let array_codec =
113113+ Jsont.(array string) |> Jsont.map
114114+ ~dec:(fun ss ->
115115+ match of_string_list (Stdlib.Array.to_list ss) with
116116+ | Ok t -> t
117117+ | Error (`Invalid_license_id s) ->
118118+ Jsont.Error.msgf Jsont.Meta.none "Invalid SPDX license ID: %s" s)
119119+ ~enc:(fun t -> Stdlib.Array.of_list t)
120120+ in
121121+ Jsont.any
122122+ ~dec_string:string_codec
123123+ ~dec_array:array_codec
124124+ ~enc:(fun t ->
125125+ match t with
126126+ | [_] -> string_codec
127127+ | _ -> array_codec)
128128+ ()
129129+130130+(* Lenient codec that accepts any string/array without validation *)
131131+let jsont_lenient =
132132+ let string_codec =
133133+ Jsont.string |> Jsont.map ~dec:(fun s -> [s]) ~enc:(function [s] -> s | _ -> assert false)
134134+ in
135135+ let array_codec =
136136+ Jsont.(array string) |> Jsont.map ~dec:(fun ss -> Stdlib.Array.to_list ss) ~enc:(fun t -> Stdlib.Array.of_list t)
137137+ in
138138+ Jsont.any
139139+ ~dec_string:string_codec
140140+ ~dec_array:array_codec
141141+ ~enc:(fun t ->
142142+ match t with
143143+ | [_] -> string_codec
144144+ | _ -> array_codec)
145145+ ()
+159
lib/cff_license.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** SPDX license identifiers for CFF.
77+88+ CFF uses {{:https://spdx.org/licenses/}SPDX license identifiers}
99+ for the [license] field. SPDX provides a standardized list of
1010+ open source license identifiers.
1111+1212+ {1 License Field}
1313+1414+ The [license] field can be a single license identifier like ["MIT"],
1515+ or a list of licenses with OR relationship like ["GPL-3.0-only"] and
1616+ ["MIT"] together.
1717+1818+ When multiple licenses are listed, it means the user may choose
1919+ {b any one} of the listed licenses. This matches the SPDX OR
2020+ semantics.
2121+2222+ {1 Examples}
2323+2424+ {2 Single License}
2525+2626+ {[
2727+ cff-version: "1.2.0"
2828+ title: "My Project"
2929+ license: MIT
3030+ ]}
3131+3232+ {2 Multiple Licenses (OR)}
3333+3434+ {[
3535+ cff-version: "1.2.0"
3636+ title: "My Project"
3737+ license:
3838+ - Apache-2.0
3939+ - MIT
4040+ ]}
4141+4242+ This means the software is available under Apache-2.0 OR MIT.
4343+4444+ {1 Common License IDs}
4545+4646+ Some commonly used SPDX license identifiers:
4747+4848+ - [MIT] - MIT License
4949+ - [Apache-2.0] - Apache License 2.0
5050+ - [GPL-3.0-only] - GNU General Public License v3.0 only
5151+ - [GPL-3.0-or-later] - GNU GPL v3.0 or later
5252+ - [BSD-2-Clause] - BSD 2-Clause "Simplified" License
5353+ - [BSD-3-Clause] - BSD 3-Clause "New" License
5454+ - [ISC] - ISC License
5555+ - [MPL-2.0] - Mozilla Public License 2.0
5656+ - [LGPL-3.0-only] - GNU Lesser GPL v3.0
5757+ - [CC-BY-4.0] - Creative Commons Attribution 4.0
5858+5959+ {1 Deprecated IDs}
6060+6161+ Some older license identifiers are deprecated in SPDX:
6262+6363+ - [GPL-2.0] should use [GPL-2.0-only] or [GPL-2.0-or-later]
6464+ - [GPL-3.0] should use [GPL-3.0-only] or [GPL-3.0-or-later]
6565+ - [LGPL-2.1] should use [LGPL-2.1-only] or [LGPL-2.1-or-later]
6666+6767+ The {!jsont_lenient} codec accepts these deprecated IDs. *)
6868+6969+(** A validated SPDX license identifier. *)
7070+module Id : sig
7171+ type t
7272+ (** A single validated SPDX license ID. *)
7373+7474+ val of_string : string -> (t, [> `Invalid_license_id of string]) result
7575+ (** Parse and validate a license ID.
7676+7777+ The check is case-insensitive. Returns [Error] for unknown
7878+ license identifiers. *)
7979+8080+ val to_string : t -> string
8181+ (** Return the canonical (properly cased) license ID string. *)
8282+8383+ val equal : t -> t -> bool
8484+ val compare : t -> t -> int
8585+8686+ val pp : Format.formatter -> t -> unit
8787+ (** Pretty-print the license ID. *)
8888+end
8989+9090+type t
9191+(** A CFF license: one or more SPDX license IDs.
9292+9393+ Multiple IDs represent an OR relationship: the user may choose
9494+ any of the listed licenses. *)
9595+9696+val single : Id.t -> t
9797+(** Create a license from a single ID. *)
9898+9999+val multiple : Id.t list -> t
100100+(** Create a license from multiple IDs (OR relationship).
101101+102102+ Raises [Invalid_argument] if the list is empty. *)
103103+104104+val ids : t -> Id.t list
105105+(** Get the list of license IDs.
106106+107107+ For a single license, returns a one-element list. *)
108108+109109+val is_single : t -> bool
110110+(** [true] if this is a single license ID, [false] for multiple. *)
111111+112112+val of_string : string -> (t, [> `Invalid_license_id of string]) result
113113+(** Parse a single license ID string into a license.
114114+115115+ Equivalent to [Result.map single (Id.of_string s)]. *)
116116+117117+val of_string_list : string list -> (t, [> `Invalid_license_id of string]) result
118118+(** Parse a list of license ID strings.
119119+120120+ All IDs must be valid; returns [Error] if any ID is invalid. *)
121121+122122+val to_string_list : t -> string list
123123+(** Return the list of license ID strings. *)
124124+125125+val equal : t -> t -> bool
126126+(** License equality. *)
127127+128128+val compare : t -> t -> int
129129+(** License comparison. *)
130130+131131+val pp : Format.formatter -> t -> unit
132132+(** Pretty-print: single ID or comma-separated list for multiple. *)
133133+134134+(** {1 SPDX Interop} *)
135135+136136+val to_spdx : t -> Spdx_licenses.t
137137+(** Convert to an SPDX license expression (OR combination). *)
138138+139139+val of_spdx : Spdx_licenses.t -> (t, [> `Unsupported_expression]) result
140140+(** Convert from an SPDX license expression.
141141+142142+ Only simple license IDs and OR combinations are supported.
143143+ Complex expressions using AND, WITH (exceptions), or license
144144+ references return [Error `Unsupported_expression]. *)
145145+146146+(** {1 Codecs} *)
147147+148148+val jsont : t Jsont.t
149149+(** JSON/YAML codec that validates license IDs.
150150+151151+ Handles both single string (["MIT"]) and array of strings.
152152+ Returns an error for invalid SPDX license identifiers. *)
153153+154154+val jsont_lenient : t Jsont.t
155155+(** JSON/YAML codec that accepts any string without validation.
156156+157157+ Use this codec when parsing CFF files that may contain deprecated
158158+ or non-standard license identifiers. Invalid IDs are preserved
159159+ as-is for round-tripping. *)
+595
lib/cff_reference.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Reference type for CFF with logical sub-records. *)
77+88+(** Core identity of a reference. *)
99+module Core = struct
1010+ type t = {
1111+ type_ : Cff_enums.Reference_type.t;
1212+ title : string;
1313+ authors : Cff_author.t list;
1414+ abstract : string option;
1515+ abbreviation : string option;
1616+ }
1717+1818+ let make ~type_ ~title ~authors ?abstract ?abbreviation () =
1919+ { type_; title; authors; abstract; abbreviation }
2020+2121+ let type_ t = t.type_
2222+ let title t = t.title
2323+ let authors t = t.authors
2424+ let abstract t = t.abstract
2525+ let abbreviation t = t.abbreviation
2626+2727+ let pp ppf t =
2828+ Format.fprintf ppf "%s (%a)"
2929+ t.title Cff_enums.Reference_type.pp t.type_
3030+end
3131+3232+(** Publication information (journal, volume, pages, etc.). *)
3333+module Publication = struct
3434+ type t = {
3535+ journal : string option;
3636+ volume : string option;
3737+ issue : string option;
3838+ pages : string option;
3939+ start : string option;
4040+ end_ : string option;
4141+ edition : string option;
4242+ section : string option;
4343+ status : Cff_enums.Status.t option;
4444+ }
4545+4646+ let empty = {
4747+ journal = None; volume = None; issue = None; pages = None;
4848+ start = None; end_ = None; edition = None; section = None;
4949+ status = None;
5050+ }
5151+5252+ let make ?journal ?volume ?issue ?pages ?start ?end_ ?edition
5353+ ?section ?status () =
5454+ { journal; volume; issue; pages; start; end_; edition; section; status }
5555+5656+ let journal t = t.journal
5757+ let volume t = t.volume
5858+ let issue t = t.issue
5959+ let pages t = t.pages
6060+ let start t = t.start
6161+ let end_ t = t.end_
6262+ let edition t = t.edition
6363+ let section t = t.section
6464+ let status t = t.status
6565+6666+ let is_empty t =
6767+ t.journal = None && t.volume = None && t.issue = None &&
6868+ t.pages = None && t.start = None && t.end_ = None &&
6969+ t.edition = None && t.section = None && t.status = None
7070+end
7171+7272+(** Collection information (proceedings, book series, etc.). *)
7373+module Collection = struct
7474+ type t = {
7575+ collection_title : string option;
7676+ collection_type : string option;
7777+ collection_doi : string option;
7878+ volume_title : string option;
7979+ number_volumes : string option;
8080+ }
8181+8282+ let empty = {
8383+ collection_title = None; collection_type = None;
8484+ collection_doi = None; volume_title = None; number_volumes = None;
8585+ }
8686+8787+ let make ?collection_title ?collection_type ?collection_doi
8888+ ?volume_title ?number_volumes () =
8989+ { collection_title; collection_type; collection_doi;
9090+ volume_title; number_volumes }
9191+9292+ let collection_title t = t.collection_title
9393+ let collection_type t = t.collection_type
9494+ let collection_doi t = t.collection_doi
9595+ let volume_title t = t.volume_title
9696+ let number_volumes t = t.number_volumes
9797+9898+ let is_empty t =
9999+ t.collection_title = None && t.collection_type = None &&
100100+ t.collection_doi = None && t.volume_title = None &&
101101+ t.number_volumes = None
102102+end
103103+104104+(** Date information. *)
105105+module Dates = struct
106106+ type t = {
107107+ date_accessed : Cff_date.t option;
108108+ date_downloaded : Cff_date.t option;
109109+ date_published : Cff_date.t option;
110110+ date_released : Cff_date.t option;
111111+ year : int option;
112112+ year_original : int option;
113113+ month : int option;
114114+ issue_date : string option;
115115+ }
116116+117117+ let empty = {
118118+ date_accessed = None; date_downloaded = None;
119119+ date_published = None; date_released = None;
120120+ year = None; year_original = None; month = None; issue_date = None;
121121+ }
122122+123123+ let make ?date_accessed ?date_downloaded ?date_published ?date_released
124124+ ?year ?year_original ?month ?issue_date () =
125125+ { date_accessed; date_downloaded; date_published; date_released;
126126+ year; year_original; month; issue_date }
127127+128128+ let date_accessed t = t.date_accessed
129129+ let date_downloaded t = t.date_downloaded
130130+ let date_published t = t.date_published
131131+ let date_released t = t.date_released
132132+ let year t = t.year
133133+ let year_original t = t.year_original
134134+ let month t = t.month
135135+ let issue_date t = t.issue_date
136136+137137+ let is_empty t =
138138+ t.date_accessed = None && t.date_downloaded = None &&
139139+ t.date_published = None && t.date_released = None &&
140140+ t.year = None && t.year_original = None &&
141141+ t.month = None && t.issue_date = None
142142+end
143143+144144+(** Identifiers and links. *)
145145+module Identifiers = struct
146146+ type t = {
147147+ doi : string option;
148148+ url : string option;
149149+ repository : string option;
150150+ repository_code : string option;
151151+ repository_artifact : string option;
152152+ isbn : string option;
153153+ issn : string option;
154154+ pmcid : string option;
155155+ nihmsid : string option;
156156+ identifiers : Cff_identifier.t list option;
157157+ }
158158+159159+ let empty = {
160160+ doi = None; url = None; repository = None;
161161+ repository_code = None; repository_artifact = None;
162162+ isbn = None; issn = None; pmcid = None; nihmsid = None;
163163+ identifiers = None;
164164+ }
165165+166166+ let make ?doi ?url ?repository ?repository_code ?repository_artifact
167167+ ?isbn ?issn ?pmcid ?nihmsid ?identifiers () =
168168+ { doi; url; repository; repository_code; repository_artifact;
169169+ isbn; issn; pmcid; nihmsid; identifiers }
170170+171171+ let doi t = t.doi
172172+ let url t = t.url
173173+ let repository t = t.repository
174174+ let repository_code t = t.repository_code
175175+ let repository_artifact t = t.repository_artifact
176176+ let isbn t = t.isbn
177177+ let issn t = t.issn
178178+ let pmcid t = t.pmcid
179179+ let nihmsid t = t.nihmsid
180180+ let identifiers t = t.identifiers
181181+182182+ let is_empty t =
183183+ t.doi = None && t.url = None && t.repository = None &&
184184+ t.repository_code = None && t.repository_artifact = None &&
185185+ t.isbn = None && t.issn = None && t.pmcid = None &&
186186+ t.nihmsid = None && t.identifiers = None
187187+end
188188+189189+(** Related entities (editors, publisher, etc.). *)
190190+module Entities = struct
191191+ type t = {
192192+ editors : Cff_author.t list option;
193193+ editors_series : Cff_author.t list option;
194194+ translators : Cff_author.t list option;
195195+ recipients : Cff_author.t list option;
196196+ senders : Cff_author.t list option;
197197+ contact : Cff_author.t list option;
198198+ publisher : Cff_author.Entity.t option;
199199+ institution : Cff_author.Entity.t option;
200200+ conference : Cff_author.Entity.t option;
201201+ database_provider : Cff_author.Entity.t option;
202202+ location : Cff_author.Entity.t option;
203203+ }
204204+205205+ let empty = {
206206+ editors = None; editors_series = None; translators = None;
207207+ recipients = None; senders = None; contact = None;
208208+ publisher = None; institution = None; conference = None;
209209+ database_provider = None; location = None;
210210+ }
211211+212212+ let make ?editors ?editors_series ?translators ?recipients ?senders
213213+ ?contact ?publisher ?institution ?conference ?database_provider
214214+ ?location () =
215215+ { editors; editors_series; translators; recipients; senders;
216216+ contact; publisher; institution; conference; database_provider;
217217+ location }
218218+219219+ let editors t = t.editors
220220+ let editors_series t = t.editors_series
221221+ let translators t = t.translators
222222+ let recipients t = t.recipients
223223+ let senders t = t.senders
224224+ let contact t = t.contact
225225+ let publisher t = t.publisher
226226+ let institution t = t.institution
227227+ let conference t = t.conference
228228+ let database_provider t = t.database_provider
229229+ let location t = t.location
230230+231231+ let is_empty t =
232232+ t.editors = None && t.editors_series = None && t.translators = None &&
233233+ t.recipients = None && t.senders = None && t.contact = None &&
234234+ t.publisher = None && t.institution = None && t.conference = None &&
235235+ t.database_provider = None && t.location = None
236236+end
237237+238238+(** Metadata and description. *)
239239+module Metadata = struct
240240+ type t = {
241241+ keywords : string list option;
242242+ languages : string list option;
243243+ license : Cff_license.t option;
244244+ license_url : string option;
245245+ copyright : string option;
246246+ scope : string option;
247247+ notes : string option;
248248+ }
249249+250250+ let empty = {
251251+ keywords = None; languages = None; license = None;
252252+ license_url = None; copyright = None; scope = None; notes = None;
253253+ }
254254+255255+ let make ?keywords ?languages ?license ?license_url ?copyright
256256+ ?scope ?notes () =
257257+ { keywords; languages; license; license_url; copyright; scope; notes }
258258+259259+ let keywords t = t.keywords
260260+ let languages t = t.languages
261261+ let license t = t.license
262262+ let license_url t = t.license_url
263263+ let copyright t = t.copyright
264264+ let scope t = t.scope
265265+ let notes t = t.notes
266266+267267+ let is_empty t =
268268+ t.keywords = None && t.languages = None && t.license = None &&
269269+ t.license_url = None && t.copyright = None &&
270270+ t.scope = None && t.notes = None
271271+end
272272+273273+(** Technical and domain-specific fields. *)
274274+module Technical = struct
275275+ type t = {
276276+ commit : string option;
277277+ version : string option;
278278+ filename : string option;
279279+ format : string option;
280280+ medium : string option;
281281+ data_type : string option;
282282+ database : string option;
283283+ number : string option;
284284+ patent_states : string list option;
285285+ thesis_type : string option;
286286+ term : string option;
287287+ entry : string option;
288288+ department : string option;
289289+ loc_start : string option;
290290+ loc_end : string option;
291291+ }
292292+293293+ let empty = {
294294+ commit = None; version = None; filename = None; format = None;
295295+ medium = None; data_type = None; database = None; number = None;
296296+ patent_states = None; thesis_type = None; term = None; entry = None;
297297+ department = None; loc_start = None; loc_end = None;
298298+ }
299299+300300+ let make ?commit ?version ?filename ?format ?medium ?data_type
301301+ ?database ?number ?patent_states ?thesis_type ?term ?entry
302302+ ?department ?loc_start ?loc_end () =
303303+ { commit; version; filename; format; medium; data_type; database;
304304+ number; patent_states; thesis_type; term; entry; department;
305305+ loc_start; loc_end }
306306+307307+ let commit t = t.commit
308308+ let version t = t.version
309309+ let filename t = t.filename
310310+ let format t = t.format
311311+ let medium t = t.medium
312312+ let data_type t = t.data_type
313313+ let database t = t.database
314314+ let number t = t.number
315315+ let patent_states t = t.patent_states
316316+ let thesis_type t = t.thesis_type
317317+ let term t = t.term
318318+ let entry t = t.entry
319319+ let department t = t.department
320320+ let loc_start t = t.loc_start
321321+ let loc_end t = t.loc_end
322322+323323+ let is_empty t =
324324+ t.commit = None && t.version = None && t.filename = None &&
325325+ t.format = None && t.medium = None && t.data_type = None &&
326326+ t.database = None && t.number = None && t.patent_states = None &&
327327+ t.thesis_type = None && t.term = None && t.entry = None &&
328328+ t.department = None && t.loc_start = None && t.loc_end = None
329329+end
330330+331331+(** Complete reference type. *)
332332+type t = {
333333+ core : Core.t;
334334+ publication : Publication.t;
335335+ collection : Collection.t;
336336+ dates : Dates.t;
337337+ identifiers : Identifiers.t;
338338+ entities : Entities.t;
339339+ metadata : Metadata.t;
340340+ technical : Technical.t;
341341+}
342342+343343+let make ~core
344344+ ?(publication = Publication.empty)
345345+ ?(collection = Collection.empty)
346346+ ?(dates = Dates.empty)
347347+ ?(identifiers = Identifiers.empty)
348348+ ?(entities = Entities.empty)
349349+ ?(metadata = Metadata.empty)
350350+ ?(technical = Technical.empty)
351351+ () =
352352+ { core; publication; collection; dates; identifiers;
353353+ entities; metadata; technical }
354354+355355+let make_simple ~type_ ~title ~authors ?doi ?year ?journal () =
356356+ let core = Core.make ~type_ ~title ~authors () in
357357+ let publication = Publication.make ?journal () in
358358+ let dates = Dates.make ?year () in
359359+ let identifiers = Identifiers.make ?doi () in
360360+ make ~core ~publication ~dates ~identifiers ()
361361+362362+(* Accessors for sub-records *)
363363+let core t = t.core
364364+let publication t = t.publication
365365+let collection t = t.collection
366366+let dates t = t.dates
367367+let identifiers t = t.identifiers
368368+let entities t = t.entities
369369+let metadata t = t.metadata
370370+let technical t = t.technical
371371+372372+(* Direct accessors for common fields *)
373373+let type_ t = Core.type_ t.core
374374+let title t = Core.title t.core
375375+let authors t = Core.authors t.core
376376+let doi t = Identifiers.doi t.identifiers
377377+let year t = Dates.year t.dates
378378+379379+let pp ppf t =
380380+ Core.pp ppf t.core
381381+382382+(* Helper for string that can also be int (for pages, etc.) *)
383383+let string_or_int_jsont =
384384+ Jsont.any
385385+ ~dec_number:(Jsont.number |> Jsont.map
386386+ ~dec:(fun f -> string_of_int (int_of_float f))
387387+ ~enc:float_of_string)
388388+ ~dec_string:Jsont.string
389389+ ~enc:(fun s ->
390390+ match float_of_string_opt s with
391391+ | Some _ -> Jsont.number |> Jsont.map ~dec:(fun _ -> assert false) ~enc:float_of_string
392392+ | None -> Jsont.string)
393393+ ()
394394+395395+(* Helper to convert array jsont to list jsont *)
396396+let list_jsont elt =
397397+ Jsont.(array elt |> map ~dec:Stdlib.Array.to_list ~enc:Stdlib.Array.of_list)
398398+399399+(* Jsont codec for the full reference type *)
400400+let jsont =
401401+ let authors_list_jsont = list_jsont Cff_author.jsont in
402402+ let identifiers_list_jsont = list_jsont Cff_identifier.jsont in
403403+ let string_list_jsont = list_jsont Jsont.string in
404404+ (* We need to decode all 60+ fields and then group into sub-records *)
405405+ Jsont.Object.map ~kind:"Reference"
406406+ (fun type_ title authors abstract abbreviation
407407+ (* Publication *)
408408+ journal volume issue pages start end_ edition section status
409409+ (* Collection *)
410410+ collection_title collection_type collection_doi volume_title number_volumes
411411+ (* Dates *)
412412+ date_accessed date_downloaded date_published date_released
413413+ year year_original month issue_date
414414+ (* Identifiers *)
415415+ doi url repository repository_code repository_artifact
416416+ isbn issn pmcid nihmsid identifiers_list
417417+ (* Entities *)
418418+ editors editors_series translators recipients senders contact
419419+ publisher institution conference database_provider location_entity
420420+ (* Metadata *)
421421+ keywords languages license license_url copyright scope notes
422422+ (* Technical *)
423423+ commit version filename format medium data_type database
424424+ number patent_states thesis_type term entry department
425425+ loc_start loc_end ->
426426+ let core = { Core.type_; title; authors; abstract; abbreviation } in
427427+ let publication = { Publication.journal; volume; issue; pages;
428428+ start; end_; edition; section; status } in
429429+ let collection = { Collection.collection_title; collection_type;
430430+ collection_doi; volume_title; number_volumes } in
431431+ let dates = { Dates.date_accessed; date_downloaded; date_published;
432432+ date_released; year; year_original; month; issue_date } in
433433+ let identifiers = { Identifiers.doi; url; repository; repository_code;
434434+ repository_artifact; isbn; issn; pmcid; nihmsid;
435435+ identifiers = identifiers_list } in
436436+ let entities = { Entities.editors; editors_series; translators;
437437+ recipients; senders; contact; publisher; institution;
438438+ conference; database_provider; location = location_entity } in
439439+ let metadata = { Metadata.keywords; languages; license; license_url;
440440+ copyright; scope; notes } in
441441+ let technical = { Technical.commit; version; filename; format; medium;
442442+ data_type; database; number; patent_states; thesis_type;
443443+ term; entry; department; loc_start; loc_end } in
444444+ { core; publication; collection; dates; identifiers;
445445+ entities; metadata; technical })
446446+ (* Core fields *)
447447+ |> Jsont.Object.mem "type" Cff_enums.Reference_type.jsont
448448+ ~enc:(fun r -> r.core.type_)
449449+ |> Jsont.Object.mem "title" Jsont.string
450450+ ~enc:(fun r -> r.core.title)
451451+ |> Jsont.Object.mem "authors" authors_list_jsont
452452+ ~enc:(fun r -> r.core.authors)
453453+ |> Jsont.Object.opt_mem "abstract" Jsont.string
454454+ ~enc:(fun r -> r.core.abstract)
455455+ |> Jsont.Object.opt_mem "abbreviation" Jsont.string
456456+ ~enc:(fun r -> r.core.abbreviation)
457457+ (* Publication fields *)
458458+ |> Jsont.Object.opt_mem "journal" Jsont.string
459459+ ~enc:(fun r -> r.publication.journal)
460460+ |> Jsont.Object.opt_mem "volume" string_or_int_jsont
461461+ ~enc:(fun r -> r.publication.volume)
462462+ |> Jsont.Object.opt_mem "issue" string_or_int_jsont
463463+ ~enc:(fun r -> r.publication.issue)
464464+ |> Jsont.Object.opt_mem "pages" string_or_int_jsont
465465+ ~enc:(fun r -> r.publication.pages)
466466+ |> Jsont.Object.opt_mem "start" string_or_int_jsont
467467+ ~enc:(fun r -> r.publication.start)
468468+ |> Jsont.Object.opt_mem "end" string_or_int_jsont
469469+ ~enc:(fun r -> r.publication.end_)
470470+ |> Jsont.Object.opt_mem "edition" Jsont.string
471471+ ~enc:(fun r -> r.publication.edition)
472472+ |> Jsont.Object.opt_mem "section" string_or_int_jsont
473473+ ~enc:(fun r -> r.publication.section)
474474+ |> Jsont.Object.opt_mem "status" Cff_enums.Status.jsont
475475+ ~enc:(fun r -> r.publication.status)
476476+ (* Collection fields *)
477477+ |> Jsont.Object.opt_mem "collection-title" Jsont.string
478478+ ~enc:(fun r -> r.collection.collection_title)
479479+ |> Jsont.Object.opt_mem "collection-type" Jsont.string
480480+ ~enc:(fun r -> r.collection.collection_type)
481481+ |> Jsont.Object.opt_mem "collection-doi" Jsont.string
482482+ ~enc:(fun r -> r.collection.collection_doi)
483483+ |> Jsont.Object.opt_mem "volume-title" Jsont.string
484484+ ~enc:(fun r -> r.collection.volume_title)
485485+ |> Jsont.Object.opt_mem "number-volumes" string_or_int_jsont
486486+ ~enc:(fun r -> r.collection.number_volumes)
487487+ (* Date fields *)
488488+ |> Jsont.Object.opt_mem "date-accessed" Cff_date.jsont
489489+ ~enc:(fun r -> r.dates.date_accessed)
490490+ |> Jsont.Object.opt_mem "date-downloaded" Cff_date.jsont
491491+ ~enc:(fun r -> r.dates.date_downloaded)
492492+ |> Jsont.Object.opt_mem "date-published" Cff_date.jsont
493493+ ~enc:(fun r -> r.dates.date_published)
494494+ |> Jsont.Object.opt_mem "date-released" Cff_date.jsont
495495+ ~enc:(fun r -> r.dates.date_released)
496496+ |> Jsont.Object.opt_mem "year" Jsont.int
497497+ ~enc:(fun r -> r.dates.year)
498498+ |> Jsont.Object.opt_mem "year-original" Jsont.int
499499+ ~enc:(fun r -> r.dates.year_original)
500500+ |> Jsont.Object.opt_mem "month" Jsont.int
501501+ ~enc:(fun r -> r.dates.month)
502502+ |> Jsont.Object.opt_mem "issue-date" Jsont.string
503503+ ~enc:(fun r -> r.dates.issue_date)
504504+ (* Identifier fields *)
505505+ |> Jsont.Object.opt_mem "doi" Jsont.string
506506+ ~enc:(fun r -> r.identifiers.doi)
507507+ |> Jsont.Object.opt_mem "url" Jsont.string
508508+ ~enc:(fun r -> r.identifiers.url)
509509+ |> Jsont.Object.opt_mem "repository" Jsont.string
510510+ ~enc:(fun r -> r.identifiers.repository)
511511+ |> Jsont.Object.opt_mem "repository-code" Jsont.string
512512+ ~enc:(fun r -> r.identifiers.repository_code)
513513+ |> Jsont.Object.opt_mem "repository-artifact" Jsont.string
514514+ ~enc:(fun r -> r.identifiers.repository_artifact)
515515+ |> Jsont.Object.opt_mem "isbn" Jsont.string
516516+ ~enc:(fun r -> r.identifiers.isbn)
517517+ |> Jsont.Object.opt_mem "issn" string_or_int_jsont
518518+ ~enc:(fun r -> r.identifiers.issn)
519519+ |> Jsont.Object.opt_mem "pmcid" Jsont.string
520520+ ~enc:(fun r -> r.identifiers.pmcid)
521521+ |> Jsont.Object.opt_mem "nihmsid" Jsont.string
522522+ ~enc:(fun r -> r.identifiers.nihmsid)
523523+ |> Jsont.Object.opt_mem "identifiers" identifiers_list_jsont
524524+ ~enc:(fun r -> r.identifiers.identifiers)
525525+ (* Entity fields *)
526526+ |> Jsont.Object.opt_mem "editors" authors_list_jsont
527527+ ~enc:(fun r -> r.entities.editors)
528528+ |> Jsont.Object.opt_mem "editors-series" authors_list_jsont
529529+ ~enc:(fun r -> r.entities.editors_series)
530530+ |> Jsont.Object.opt_mem "translators" authors_list_jsont
531531+ ~enc:(fun r -> r.entities.translators)
532532+ |> Jsont.Object.opt_mem "recipients" authors_list_jsont
533533+ ~enc:(fun r -> r.entities.recipients)
534534+ |> Jsont.Object.opt_mem "senders" authors_list_jsont
535535+ ~enc:(fun r -> r.entities.senders)
536536+ |> Jsont.Object.opt_mem "contact" authors_list_jsont
537537+ ~enc:(fun r -> r.entities.contact)
538538+ |> Jsont.Object.opt_mem "publisher" Cff_author.Entity.jsont
539539+ ~enc:(fun r -> r.entities.publisher)
540540+ |> Jsont.Object.opt_mem "institution" Cff_author.Entity.jsont
541541+ ~enc:(fun r -> r.entities.institution)
542542+ |> Jsont.Object.opt_mem "conference" Cff_author.Entity.jsont
543543+ ~enc:(fun r -> r.entities.conference)
544544+ |> Jsont.Object.opt_mem "database-provider" Cff_author.Entity.jsont
545545+ ~enc:(fun r -> r.entities.database_provider)
546546+ |> Jsont.Object.opt_mem "location" Cff_author.Entity.jsont
547547+ ~enc:(fun r -> r.entities.location)
548548+ (* Metadata fields *)
549549+ |> Jsont.Object.opt_mem "keywords" string_list_jsont
550550+ ~enc:(fun r -> r.metadata.keywords)
551551+ |> Jsont.Object.opt_mem "languages" string_list_jsont
552552+ ~enc:(fun r -> r.metadata.languages)
553553+ |> Jsont.Object.opt_mem "license" Cff_license.jsont_lenient
554554+ ~enc:(fun r -> r.metadata.license)
555555+ |> Jsont.Object.opt_mem "license-url" Jsont.string
556556+ ~enc:(fun r -> r.metadata.license_url)
557557+ |> Jsont.Object.opt_mem "copyright" Jsont.string
558558+ ~enc:(fun r -> r.metadata.copyright)
559559+ |> Jsont.Object.opt_mem "scope" Jsont.string
560560+ ~enc:(fun r -> r.metadata.scope)
561561+ |> Jsont.Object.opt_mem "notes" Jsont.string
562562+ ~enc:(fun r -> r.metadata.notes)
563563+ (* Technical fields *)
564564+ |> Jsont.Object.opt_mem "commit" Jsont.string
565565+ ~enc:(fun r -> r.technical.commit)
566566+ |> Jsont.Object.opt_mem "version" string_or_int_jsont
567567+ ~enc:(fun r -> r.technical.version)
568568+ |> Jsont.Object.opt_mem "filename" Jsont.string
569569+ ~enc:(fun r -> r.technical.filename)
570570+ |> Jsont.Object.opt_mem "format" Jsont.string
571571+ ~enc:(fun r -> r.technical.format)
572572+ |> Jsont.Object.opt_mem "medium" Jsont.string
573573+ ~enc:(fun r -> r.technical.medium)
574574+ |> Jsont.Object.opt_mem "data-type" Jsont.string
575575+ ~enc:(fun r -> r.technical.data_type)
576576+ |> Jsont.Object.opt_mem "database" Jsont.string
577577+ ~enc:(fun r -> r.technical.database)
578578+ |> Jsont.Object.opt_mem "number" string_or_int_jsont
579579+ ~enc:(fun r -> r.technical.number)
580580+ |> Jsont.Object.opt_mem "patent-states" string_list_jsont
581581+ ~enc:(fun r -> r.technical.patent_states)
582582+ |> Jsont.Object.opt_mem "thesis-type" Jsont.string
583583+ ~enc:(fun r -> r.technical.thesis_type)
584584+ |> Jsont.Object.opt_mem "term" Jsont.string
585585+ ~enc:(fun r -> r.technical.term)
586586+ |> Jsont.Object.opt_mem "entry" Jsont.string
587587+ ~enc:(fun r -> r.technical.entry)
588588+ |> Jsont.Object.opt_mem "department" Jsont.string
589589+ ~enc:(fun r -> r.technical.department)
590590+ |> Jsont.Object.opt_mem "loc-start" string_or_int_jsont
591591+ ~enc:(fun r -> r.technical.loc_start)
592592+ |> Jsont.Object.opt_mem "loc-end" string_or_int_jsont
593593+ ~enc:(fun r -> r.technical.loc_end)
594594+ |> Jsont.Object.skip_unknown
595595+ |> Jsont.Object.finish
+578
lib/cff_reference.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Bibliographic reference type for CFF.
77+88+ References represent citable works in the [references] and
99+ [preferred-citation] fields of a CFF file. They can describe any
1010+ type of scholarly output: journal articles, books, conference papers,
1111+ software, datasets, theses, patents, and many more.
1212+1313+ {1 Structure}
1414+1515+ CFF references have 60+ possible fields. This module organizes them
1616+ into logical sub-records for easier manipulation:
1717+1818+ - {!Core} - Required fields: type, title, authors
1919+ - {!Publication} - Journal articles: journal, volume, issue, pages
2020+ - {!Collection} - Book chapters, proceedings: collection title, DOI
2121+ - {!Dates} - When the work was published, accessed, etc.
2222+ - {!Identifiers} - DOI, URL, ISBN, ISSN, repository links
2323+ - {!Entities} - Editors, publisher, institution, conference
2424+ - {!Metadata} - Keywords, license, languages, copyright
2525+ - {!Technical} - Software-specific: commit, version, format
2626+2727+ {1 Reference Types}
2828+2929+ The [type] field determines what kind of work is being referenced.
3030+ CFF 1.2.0 supports 40+ types including:
3131+3232+ - Academic: [`Article], [`Book], [`Conference_paper], [`Thesis]
3333+ - Software: [`Software], [`Software_code], [`Software_container]
3434+ - Data: [`Data], [`Database], [`Dataset]
3535+ - Legal: [`Patent], [`Legal_case], [`Statute]
3636+ - Media: [`Video], [`Sound_recording], [`Film_broadcast]
3737+3838+ {1 Example}
3939+4040+ {[
4141+ (* A journal article reference *)
4242+ let article = Cff_reference.make_simple
4343+ ~type_:`Article
4444+ ~title:"The Software Citation Principles"
4545+ ~authors:[
4646+ Cff_author.Person (Cff_author.Person.make
4747+ ~family_names:"Smith"
4848+ ~given_names:"Arfon M."
4949+ ());
5050+ ]
5151+ ~doi:"10.7717/peerj-cs.86"
5252+ ~year:2016
5353+ ~journal:"PeerJ Computer Science"
5454+ ()
5555+5656+ (* A software reference with more details *)
5757+ let core = Cff_reference.Core.make
5858+ ~type_:`Software
5959+ ~title:"NumPy"
6060+ ~authors:[...]
6161+ () in
6262+ let dates = Cff_reference.Dates.make ~year:2020 () in
6363+ let ids = Cff_reference.Identifiers.make
6464+ ~doi:"10.1038/s41586-020-2649-2"
6565+ ~url:"https://numpy.org"
6666+ () in
6767+ let software = Cff_reference.make ~core ~dates ~identifiers:ids ()
6868+ ]}
6969+7070+ {1 Sub-records} *)
7171+7272+(** Core identity fields (required for all references).
7373+7474+ Every reference must have a type, title, and at least one author.
7575+ The type determines what additional fields are relevant. *)
7676+module Core : sig
7777+ type t
7878+7979+ val make :
8080+ type_:Cff_enums.Reference_type.t ->
8181+ title:string ->
8282+ authors:Cff_author.t list ->
8383+ ?abstract:string ->
8484+ ?abbreviation:string ->
8585+ unit -> t
8686+ (** Create a core record.
8787+8888+ @param type_ The reference type (article, book, software, etc.)
8989+ @param title The title of the work
9090+ @param authors List of persons and/or entities *)
9191+9292+ val type_ : t -> Cff_enums.Reference_type.t
9393+ (** The reference type. Determines which other fields are applicable. *)
9494+9595+ val title : t -> string
9696+ (** The title of the referenced work. *)
9797+9898+ val authors : t -> Cff_author.t list
9999+ (** The authors/creators of the work. *)
100100+101101+ val abstract : t -> string option
102102+ (** A description or abstract of the work. *)
103103+104104+ val abbreviation : t -> string option
105105+ (** Abbreviated form of the title (e.g., for journal names). *)
106106+107107+ val pp : Format.formatter -> t -> unit
108108+end
109109+110110+(** Publication metadata for journal articles and periodicals.
111111+112112+ Fields for works published in journals, magazines, or other
113113+ serial publications. Page numbers can be specified as a range
114114+ ([pages]) or as separate [start] and [end_] values. *)
115115+module Publication : sig
116116+ type t
117117+118118+ val empty : t
119119+ (** Empty publication record with all fields as [None]. *)
120120+121121+ val make :
122122+ ?journal:string ->
123123+ ?volume:string ->
124124+ ?issue:string ->
125125+ ?pages:string ->
126126+ ?start:string ->
127127+ ?end_:string ->
128128+ ?edition:string ->
129129+ ?section:string ->
130130+ ?status:Cff_enums.Status.t ->
131131+ unit -> t
132132+133133+ val journal : t -> string option
134134+ (** The name of the journal or magazine. *)
135135+136136+ val volume : t -> string option
137137+ (** The volume number of the journal. *)
138138+139139+ val issue : t -> string option
140140+ (** The issue number within the volume. *)
141141+142142+ val pages : t -> string option
143143+ (** Page range (e.g., ["123-145"]). Alternative to [start]/[end_]. *)
144144+145145+ val start : t -> string option
146146+ (** Starting page number. *)
147147+148148+ val end_ : t -> string option
149149+ (** Ending page number. *)
150150+151151+ val edition : t -> string option
152152+ (** The edition of the work (e.g., ["2nd edition"]). *)
153153+154154+ val section : t -> string option
155155+ (** The section of a work (e.g., newspaper section). *)
156156+157157+ val status : t -> Cff_enums.Status.t option
158158+ (** Publication status: preprint, in-press, submitted, etc. *)
159159+160160+ val is_empty : t -> bool
161161+ (** [true] if all fields are [None]. *)
162162+end
163163+164164+(** Collection metadata for works in edited volumes.
165165+166166+ Used for book chapters, conference proceedings, and other works
167167+ that appear within a larger collection. *)
168168+module Collection : sig
169169+ type t
170170+171171+ val empty : t
172172+173173+ val make :
174174+ ?collection_title:string ->
175175+ ?collection_type:string ->
176176+ ?collection_doi:string ->
177177+ ?volume_title:string ->
178178+ ?number_volumes:string ->
179179+ unit -> t
180180+181181+ val collection_title : t -> string option
182182+ (** Title of the collection (proceedings, book series, etc.). *)
183183+184184+ val collection_type : t -> string option
185185+ (** Type of collection (e.g., ["proceedings"], ["book series"]). *)
186186+187187+ val collection_doi : t -> string option
188188+ (** DOI of the collection itself (not the individual work). *)
189189+190190+ val volume_title : t -> string option
191191+ (** Title of the specific volume within a multi-volume collection. *)
192192+193193+ val number_volumes : t -> string option
194194+ (** Total number of volumes in the collection. *)
195195+196196+ val is_empty : t -> bool
197197+end
198198+199199+(** Date-related fields.
200200+201201+ CFF distinguishes between several date types:
202202+ - {b date-released}: When the software/dataset was released
203203+ - {b date-published}: When the work was formally published
204204+ - {b date-accessed}: When an online resource was last accessed
205205+ - {b date-downloaded}: When a resource was downloaded
206206+207207+ For older works or when only the year is known, use [year] instead
208208+ of a full date. *)
209209+module Dates : sig
210210+ type t
211211+212212+ val empty : t
213213+214214+ val make :
215215+ ?date_accessed:Cff_date.t ->
216216+ ?date_downloaded:Cff_date.t ->
217217+ ?date_published:Cff_date.t ->
218218+ ?date_released:Cff_date.t ->
219219+ ?year:int ->
220220+ ?year_original:int ->
221221+ ?month:int ->
222222+ ?issue_date:string ->
223223+ unit -> t
224224+225225+ val date_accessed : t -> Cff_date.t option
226226+ (** Date when an online resource was accessed for citation. *)
227227+228228+ val date_downloaded : t -> Cff_date.t option
229229+ (** Date when a resource was downloaded. *)
230230+231231+ val date_published : t -> Cff_date.t option
232232+ (** Formal publication date. *)
233233+234234+ val date_released : t -> Cff_date.t option
235235+ (** Release date (typically for software). *)
236236+237237+ val year : t -> int option
238238+ (** Publication year when full date is unknown. *)
239239+240240+ val year_original : t -> int option
241241+ (** Year of original publication (for reprints, translations). *)
242242+243243+ val month : t -> int option
244244+ (** Publication month (1-12) when only month/year is known. *)
245245+246246+ val issue_date : t -> string option
247247+ (** Issue date as a string (for periodicals with specific dates). *)
248248+249249+ val is_empty : t -> bool
250250+end
251251+252252+(** Identifiers and repository links.
253253+254254+ Various identifier schemes for locating and citing works:
255255+ - DOI: Digital Object Identifier (preferred for academic works)
256256+ - URL: Web address
257257+ - ISBN: International Standard Book Number
258258+ - ISSN: International Standard Serial Number (journals)
259259+ - PMCID: PubMed Central ID
260260+ - NIHMSID: NIH Manuscript Submission ID *)
261261+module Identifiers : sig
262262+ type t
263263+264264+ val empty : t
265265+266266+ val make :
267267+ ?doi:string ->
268268+ ?url:string ->
269269+ ?repository:string ->
270270+ ?repository_code:string ->
271271+ ?repository_artifact:string ->
272272+ ?isbn:string ->
273273+ ?issn:string ->
274274+ ?pmcid:string ->
275275+ ?nihmsid:string ->
276276+ ?identifiers:Cff_identifier.t list ->
277277+ unit -> t
278278+279279+ val doi : t -> string option
280280+ (** Digital Object Identifier (e.g., ["10.1234/example"]). *)
281281+282282+ val url : t -> string option
283283+ (** URL where the work can be accessed. *)
284284+285285+ val repository : t -> string option
286286+ (** General repository URL. *)
287287+288288+ val repository_code : t -> string option
289289+ (** Source code repository (GitHub, GitLab, etc.). *)
290290+291291+ val repository_artifact : t -> string option
292292+ (** Built artifact repository (npm, PyPI, Docker Hub, etc.). *)
293293+294294+ val isbn : t -> string option
295295+ (** International Standard Book Number. *)
296296+297297+ val issn : t -> string option
298298+ (** International Standard Serial Number (for journals). *)
299299+300300+ val pmcid : t -> string option
301301+ (** PubMed Central identifier. *)
302302+303303+ val nihmsid : t -> string option
304304+ (** NIH Manuscript Submission System identifier. *)
305305+306306+ val identifiers : t -> Cff_identifier.t list option
307307+ (** Additional typed identifiers (DOI, URL, SWH, other). *)
308308+309309+ val is_empty : t -> bool
310310+end
311311+312312+(** Related entities: editors, publishers, institutions.
313313+314314+ Persons and organizations involved in the work beyond the authors:
315315+ - Editors of collections or journals
316316+ - Publishers and their locations
317317+ - Academic institutions (for theses, reports)
318318+ - Conferences (for proceedings, presentations) *)
319319+module Entities : sig
320320+ type t
321321+322322+ val empty : t
323323+324324+ val make :
325325+ ?editors:Cff_author.t list ->
326326+ ?editors_series:Cff_author.t list ->
327327+ ?translators:Cff_author.t list ->
328328+ ?recipients:Cff_author.t list ->
329329+ ?senders:Cff_author.t list ->
330330+ ?contact:Cff_author.t list ->
331331+ ?publisher:Cff_author.Entity.t ->
332332+ ?institution:Cff_author.Entity.t ->
333333+ ?conference:Cff_author.Entity.t ->
334334+ ?database_provider:Cff_author.Entity.t ->
335335+ ?location:Cff_author.Entity.t ->
336336+ unit -> t
337337+338338+ val editors : t -> Cff_author.t list option
339339+ (** Editors of the work (for edited volumes). *)
340340+341341+ val editors_series : t -> Cff_author.t list option
342342+ (** Series editors (for book series). *)
343343+344344+ val translators : t -> Cff_author.t list option
345345+ (** Translators of the work. *)
346346+347347+ val recipients : t -> Cff_author.t list option
348348+ (** Recipients (for personal communications). *)
349349+350350+ val senders : t -> Cff_author.t list option
351351+ (** Senders (for personal communications). *)
352352+353353+ val contact : t -> Cff_author.t list option
354354+ (** Contact persons for the work. *)
355355+356356+ val publisher : t -> Cff_author.Entity.t option
357357+ (** Publishing organization. *)
358358+359359+ val institution : t -> Cff_author.Entity.t option
360360+ (** Academic/research institution (for theses, reports). *)
361361+362362+ val conference : t -> Cff_author.Entity.t option
363363+ (** Conference where the work was presented. *)
364364+365365+ val database_provider : t -> Cff_author.Entity.t option
366366+ (** Provider of a database (for data references). *)
367367+368368+ val location : t -> Cff_author.Entity.t option
369369+ (** Location entity (city, venue for conferences). *)
370370+371371+ val is_empty : t -> bool
372372+end
373373+374374+(** Descriptive metadata: keywords, license, notes.
375375+376376+ Additional information about the work for discovery and rights. *)
377377+module Metadata : sig
378378+ type t
379379+380380+ val empty : t
381381+382382+ val make :
383383+ ?keywords:string list ->
384384+ ?languages:string list ->
385385+ ?license:Cff_license.t ->
386386+ ?license_url:string ->
387387+ ?copyright:string ->
388388+ ?scope:string ->
389389+ ?notes:string ->
390390+ unit -> t
391391+392392+ val keywords : t -> string list option
393393+ (** Descriptive keywords for the work. *)
394394+395395+ val languages : t -> string list option
396396+ (** Languages the work is available in (ISO 639 codes). *)
397397+398398+ val license : t -> Cff_license.t option
399399+ (** SPDX license identifier(s). *)
400400+401401+ val license_url : t -> string option
402402+ (** URL to license text (for non-SPDX licenses). *)
403403+404404+ val copyright : t -> string option
405405+ (** Copyright statement. *)
406406+407407+ val scope : t -> string option
408408+ (** Scope of the reference (what aspect it covers). *)
409409+410410+ val notes : t -> string option
411411+ (** Additional notes or comments. *)
412412+413413+ val is_empty : t -> bool
414414+end
415415+416416+(** Technical and domain-specific fields.
417417+418418+ Fields for software, data, and specialized reference types:
419419+ - Software: commit hash, version, filename
420420+ - Theses: thesis type, department
421421+ - Data: data type, database, format
422422+ - Patents: patent states
423423+ - Dictionaries/encyclopedias: term, entry *)
424424+module Technical : sig
425425+ type t
426426+427427+ val empty : t
428428+429429+ val make :
430430+ ?commit:string ->
431431+ ?version:string ->
432432+ ?filename:string ->
433433+ ?format:string ->
434434+ ?medium:string ->
435435+ ?data_type:string ->
436436+ ?database:string ->
437437+ ?number:string ->
438438+ ?patent_states:string list ->
439439+ ?thesis_type:string ->
440440+ ?term:string ->
441441+ ?entry:string ->
442442+ ?department:string ->
443443+ ?loc_start:string ->
444444+ ?loc_end:string ->
445445+ unit -> t
446446+447447+ val commit : t -> string option
448448+ (** Git commit hash or VCS revision. *)
449449+450450+ val version : t -> string option
451451+ (** Version string of the software/data. *)
452452+453453+ val filename : t -> string option
454454+ (** Name of the file being referenced. *)
455455+456456+ val format : t -> string option
457457+ (** Format of the work (e.g., ["PDF"], ["HTML"]). *)
458458+459459+ val medium : t -> string option
460460+ (** Physical medium (e.g., ["CD-ROM"], ["print"]). *)
461461+462462+ val data_type : t -> string option
463463+ (** Type of data (for datasets). *)
464464+465465+ val database : t -> string option
466466+ (** Name of the database. *)
467467+468468+ val number : t -> string option
469469+ (** Report/patent/standard number. *)
470470+471471+ val patent_states : t -> string list option
472472+ (** Countries where a patent is held. *)
473473+474474+ val thesis_type : t -> string option
475475+ (** Type of thesis (["PhD"], ["Master's"], etc.). *)
476476+477477+ val term : t -> string option
478478+ (** Dictionary/encyclopedia term being referenced. *)
479479+480480+ val entry : t -> string option
481481+ (** Encyclopedia entry name. *)
482482+483483+ val department : t -> string option
484484+ (** Academic department (for theses). *)
485485+486486+ val loc_start : t -> string option
487487+ (** Starting line/location in source code. *)
488488+489489+ val loc_end : t -> string option
490490+ (** Ending line/location in source code. *)
491491+492492+ val is_empty : t -> bool
493493+end
494494+495495+(** {1 Reference Type} *)
496496+497497+(** The complete reference type combining all sub-records. *)
498498+type t
499499+500500+val make :
501501+ core:Core.t ->
502502+ ?publication:Publication.t ->
503503+ ?collection:Collection.t ->
504504+ ?dates:Dates.t ->
505505+ ?identifiers:Identifiers.t ->
506506+ ?entities:Entities.t ->
507507+ ?metadata:Metadata.t ->
508508+ ?technical:Technical.t ->
509509+ unit -> t
510510+(** Construct a reference from sub-records.
511511+512512+ Only [core] is required; other sub-records default to empty. *)
513513+514514+val make_simple :
515515+ type_:Cff_enums.Reference_type.t ->
516516+ title:string ->
517517+ authors:Cff_author.t list ->
518518+ ?doi:string ->
519519+ ?year:int ->
520520+ ?journal:string ->
521521+ unit -> t
522522+(** Convenience constructor for simple references.
523523+524524+ Creates a reference with just the most common fields. Suitable
525525+ for quick article or software references. *)
526526+527527+(** {2 Sub-record Accessors} *)
528528+529529+val core : t -> Core.t
530530+(** The core identity fields. *)
531531+532532+val publication : t -> Publication.t
533533+(** Publication metadata (journal, volume, pages). *)
534534+535535+val collection : t -> Collection.t
536536+(** Collection metadata (proceedings, book series). *)
537537+538538+val dates : t -> Dates.t
539539+(** Date-related fields. *)
540540+541541+val identifiers : t -> Identifiers.t
542542+(** Identifiers and links. *)
543543+544544+val entities : t -> Entities.t
545545+(** Related entities (editors, publisher). *)
546546+547547+val metadata : t -> Metadata.t
548548+(** Descriptive metadata (keywords, license). *)
549549+550550+val technical : t -> Technical.t
551551+(** Technical fields (commit, version, format). *)
552552+553553+(** {2 Direct Accessors for Common Fields}
554554+555555+ Convenience accessors that delegate to sub-records. *)
556556+557557+val type_ : t -> Cff_enums.Reference_type.t
558558+(** Shortcut for [Core.type_ (core t)]. *)
559559+560560+val title : t -> string
561561+(** Shortcut for [Core.title (core t)]. *)
562562+563563+val authors : t -> Cff_author.t list
564564+(** Shortcut for [Core.authors (core t)]. *)
565565+566566+val doi : t -> string option
567567+(** Shortcut for [Identifiers.doi (identifiers t)]. *)
568568+569569+val year : t -> int option
570570+(** Shortcut for [Dates.year (dates t)]. *)
571571+572572+(** {1 Formatting and Codec} *)
573573+574574+val pp : Format.formatter -> t -> unit
575575+(** Pretty-print a reference in a human-readable format. *)
576576+577577+val jsont : t Jsont.t
578578+(** JSON/YAML codec for serialization. *)
+175
lib/cff_root.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Root CFF type. *)
77+88+type t = {
99+ cff_version : string;
1010+ message : string;
1111+ title : string;
1212+ authors : Cff_author.t list;
1313+ abstract : string option;
1414+ commit : string option;
1515+ contact : Cff_author.t list option;
1616+ date_released : Cff_date.t option;
1717+ doi : string option;
1818+ identifiers : Cff_identifier.t list option;
1919+ keywords : string list option;
2020+ license : Cff_license.t option;
2121+ license_url : string option;
2222+ preferred_citation : Cff_reference.t option;
2323+ references : Cff_reference.t list option;
2424+ repository : string option;
2525+ repository_artifact : string option;
2626+ repository_code : string option;
2727+ type_ : Cff_enums.Cff_type.t option;
2828+ url : string option;
2929+ version : string option;
3030+}
3131+3232+let make
3333+ ~cff_version
3434+ ~message
3535+ ~title
3636+ ~authors
3737+ ?abstract
3838+ ?commit
3939+ ?contact
4040+ ?date_released
4141+ ?doi
4242+ ?identifiers
4343+ ?keywords
4444+ ?license
4545+ ?license_url
4646+ ?preferred_citation
4747+ ?references
4848+ ?repository
4949+ ?repository_artifact
5050+ ?repository_code
5151+ ?type_
5252+ ?url
5353+ ?version
5454+ () =
5555+ { cff_version; message; title; authors;
5656+ abstract; commit; contact; date_released; doi;
5757+ identifiers; keywords; license; license_url;
5858+ preferred_citation; references; repository;
5959+ repository_artifact; repository_code; type_; url; version }
6060+6161+(* Required field accessors *)
6262+let cff_version t = t.cff_version
6363+let message t = t.message
6464+let title t = t.title
6565+let authors t = t.authors
6666+6767+(* Optional field accessors *)
6868+let abstract t = t.abstract
6969+let commit t = t.commit
7070+let contact t = t.contact
7171+let date_released t = t.date_released
7272+let doi t = t.doi
7373+let identifiers t = t.identifiers
7474+let keywords t = t.keywords
7575+let license t = t.license
7676+let license_url t = t.license_url
7777+let preferred_citation t = t.preferred_citation
7878+let references t = t.references
7979+let repository t = t.repository
8080+let repository_artifact t = t.repository_artifact
8181+let repository_code t = t.repository_code
8282+let type_ t = t.type_
8383+let url t = t.url
8484+let version t = t.version
8585+8686+let make_simple ~title ~authors ?version ?doi ?license () =
8787+ let message = "If you use this software, please cite it using the metadata from this file." in
8888+ make
8989+ ~cff_version:"1.2.0"
9090+ ~message
9191+ ~title
9292+ ~authors
9393+ ?version
9494+ ?doi
9595+ ?license
9696+ ()
9797+9898+let pp ppf t =
9999+ Format.fprintf ppf "@[<v>";
100100+ Format.fprintf ppf "cff-version: %s@," t.cff_version;
101101+ Format.fprintf ppf "title: %s@," t.title;
102102+ Format.fprintf ppf "message: %s@," t.message;
103103+ Format.fprintf ppf "authors:@,";
104104+ List.iter (fun a -> Format.fprintf ppf " - %a@," Cff_author.pp a) t.authors;
105105+ Option.iter (fun v -> Format.fprintf ppf "version: %s@," v) t.version;
106106+ Option.iter (fun v -> Format.fprintf ppf "doi: %s@," v) t.doi;
107107+ Option.iter (fun v -> Format.fprintf ppf "date-released: %a@," Cff_date.pp v) t.date_released;
108108+ Option.iter (fun v -> Format.fprintf ppf "license: %a@," Cff_license.pp v) t.license;
109109+ Option.iter (fun v -> Format.fprintf ppf "url: %s@," v) t.url;
110110+ Option.iter (fun v -> Format.fprintf ppf "repository: %s@," v) t.repository;
111111+ Option.iter (fun v -> Format.fprintf ppf "repository-code: %s@," v) t.repository_code;
112112+ Option.iter (fun v -> Format.fprintf ppf "abstract: %s@," v) t.abstract;
113113+ Option.iter (fun v -> Format.fprintf ppf "commit: %s@," v) t.commit;
114114+ Option.iter (fun v -> Format.fprintf ppf "type: %a@," Cff_enums.Cff_type.pp v) t.type_;
115115+ Option.iter (fun kws ->
116116+ Format.fprintf ppf "keywords:@,";
117117+ List.iter (fun k -> Format.fprintf ppf " - %s@," k) kws
118118+ ) t.keywords;
119119+ Option.iter (fun ids ->
120120+ Format.fprintf ppf "identifiers:@,";
121121+ List.iter (fun id -> Format.fprintf ppf " - %a@," Cff_identifier.pp id) ids
122122+ ) t.identifiers;
123123+ Option.iter (fun contacts ->
124124+ Format.fprintf ppf "contact:@,";
125125+ List.iter (fun c -> Format.fprintf ppf " - %a@," Cff_author.pp c) contacts
126126+ ) t.contact;
127127+ Option.iter (fun refs ->
128128+ Format.fprintf ppf "references:@,";
129129+ List.iter (fun r -> Format.fprintf ppf " - %a@," Cff_reference.pp r) refs
130130+ ) t.references;
131131+ Option.iter (fun pc ->
132132+ Format.fprintf ppf "preferred-citation:@, %a@," Cff_reference.pp pc
133133+ ) t.preferred_citation;
134134+ Format.fprintf ppf "@]"
135135+136136+let list_jsont elt = Jsont.(array elt |> map ~dec:Stdlib.Array.to_list ~enc:Stdlib.Array.of_list)
137137+138138+let jsont =
139139+ let open Jsont in
140140+ let authors_jsont = list_jsont Cff_author.jsont in
141141+ let identifiers_jsont = list_jsont Cff_identifier.jsont in
142142+ let references_jsont = list_jsont Cff_reference.jsont in
143143+ let keywords_jsont = list_jsont string in
144144+ Object.map ~kind:"CFF"
145145+ (fun cff_version message title authors abstract commit contact
146146+ date_released doi identifiers keywords license license_url
147147+ preferred_citation references repository repository_artifact
148148+ repository_code type_ url version ->
149149+ { cff_version; message; title; authors;
150150+ abstract; commit; contact; date_released; doi;
151151+ identifiers; keywords; license; license_url;
152152+ preferred_citation; references; repository;
153153+ repository_artifact; repository_code; type_; url; version })
154154+ |> Object.mem "cff-version" string ~enc:(fun t -> t.cff_version)
155155+ |> Object.mem "message" string ~enc:(fun t -> t.message)
156156+ |> Object.mem "title" string ~enc:(fun t -> t.title)
157157+ |> Object.mem "authors" authors_jsont ~enc:(fun t -> t.authors)
158158+ |> Object.opt_mem "abstract" string ~enc:(fun t -> t.abstract)
159159+ |> Object.opt_mem "commit" string ~enc:(fun t -> t.commit)
160160+ |> Object.opt_mem "contact" authors_jsont ~enc:(fun t -> t.contact)
161161+ |> Object.opt_mem "date-released" Cff_date.jsont ~enc:(fun t -> t.date_released)
162162+ |> Object.opt_mem "doi" string ~enc:(fun t -> t.doi)
163163+ |> Object.opt_mem "identifiers" identifiers_jsont ~enc:(fun t -> t.identifiers)
164164+ |> Object.opt_mem "keywords" keywords_jsont ~enc:(fun t -> t.keywords)
165165+ |> Object.opt_mem "license" Cff_license.jsont_lenient ~enc:(fun t -> t.license)
166166+ |> Object.opt_mem "license-url" string ~enc:(fun t -> t.license_url)
167167+ |> Object.opt_mem "preferred-citation" Cff_reference.jsont ~enc:(fun t -> t.preferred_citation)
168168+ |> Object.opt_mem "references" references_jsont ~enc:(fun t -> t.references)
169169+ |> Object.opt_mem "repository" string ~enc:(fun t -> t.repository)
170170+ |> Object.opt_mem "repository-artifact" string ~enc:(fun t -> t.repository_artifact)
171171+ |> Object.opt_mem "repository-code" string ~enc:(fun t -> t.repository_code)
172172+ |> Object.opt_mem "type" Cff_enums.Cff_type.jsont ~enc:(fun t -> t.type_)
173173+ |> Object.opt_mem "url" string ~enc:(fun t -> t.url)
174174+ |> Object.opt_mem "version" string ~enc:(fun t -> t.version)
175175+ |> Object.finish
+249
lib/cff_root.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Root CFF type representing a complete [CITATION.cff] file.
77+88+ A [CITATION.cff] file is the standard way to provide citation metadata
99+ for research software and datasets. This module defines the root type
1010+ containing all top-level fields from the CFF 1.2.0 specification.
1111+1212+ {2 Required Fields}
1313+1414+ Every valid CFF file must include:
1515+ - {!cff_version}: Schema version (["1.2.0"])
1616+ - {!message}: Instructions for citing the work
1717+ - {!title}: Name of the software or dataset
1818+ - {!authors}: List of persons and/or entities
1919+2020+ {2 Common Optional Fields}
2121+2222+ - {!version}: Software version string
2323+ - {!doi}: Digital Object Identifier
2424+ - {!date_released}: Publication/release date
2525+ - {!license}: SPDX license identifier(s)
2626+ - {!keywords}: Descriptive keywords
2727+ - {!abstract}: Description of the work
2828+2929+ {2 Citation Redirection}
3030+3131+ The {!preferred_citation} field allows redirecting citations to
3232+ a related work (e.g., a journal article describing the software).
3333+ The {!references} field lists works that the software cites or
3434+ depends upon.
3535+3636+ {2 Example}
3737+3838+ {[
3939+ let cff = Cff_root.make
4040+ ~cff_version:"1.2.0"
4141+ ~message:"If you use this software, please cite it as below."
4242+ ~title:"My Research Software"
4343+ ~authors:[Cff_author.Person (Cff_author.Person.make
4444+ ~family_names:"Smith"
4545+ ~given_names:"Jane"
4646+ ())]
4747+ ~version:"1.0.0"
4848+ ~doi:"10.5281/zenodo.1234567"
4949+ ~date_released:(2024, 1, 15)
5050+ ~license:(Cff_license.single "MIT")
5151+ ()
5252+ ]} *)
5353+5454+(** The abstract type representing a complete CFF document. *)
5555+type t
5656+5757+(** {1 Construction} *)
5858+5959+val make :
6060+ cff_version:string ->
6161+ message:string ->
6262+ title:string ->
6363+ authors:Cff_author.t list ->
6464+ ?abstract:string ->
6565+ ?commit:string ->
6666+ ?contact:Cff_author.t list ->
6767+ ?date_released:Cff_date.t ->
6868+ ?doi:string ->
6969+ ?identifiers:Cff_identifier.t list ->
7070+ ?keywords:string list ->
7171+ ?license:Cff_license.t ->
7272+ ?license_url:string ->
7373+ ?preferred_citation:Cff_reference.t ->
7474+ ?references:Cff_reference.t list ->
7575+ ?repository:string ->
7676+ ?repository_artifact:string ->
7777+ ?repository_code:string ->
7878+ ?type_:Cff_enums.Cff_type.t ->
7979+ ?url:string ->
8080+ ?version:string ->
8181+ unit -> t
8282+(** [make ~cff_version ~message ~title ~authors ...] constructs a CFF value.
8383+8484+ @param cff_version The CFF schema version, typically ["1.2.0"]
8585+ @param message Instructions for users on how to cite the work
8686+ @param title The name of the software or dataset
8787+ @param authors List of persons and/or entities who created the work *)
8888+8989+(** {1 Required Fields} *)
9090+9191+val cff_version : t -> string
9292+(** The CFF schema version that this file adheres to.
9393+9494+ For CFF 1.2.0 files, this should be ["1.2.0"]. The version determines
9595+ which keys are valid and how they should be interpreted. *)
9696+9797+val message : t -> string
9898+(** A message to readers explaining how to cite the work.
9999+100100+ Common examples:
101101+ - ["If you use this software, please cite it using the metadata from this file."]
102102+ - ["Please cite this software using the metadata from 'preferred-citation'."]
103103+104104+ The message should guide users toward the preferred citation method. *)
105105+106106+val title : t -> string
107107+(** The name of the software or dataset.
108108+109109+ This is the title that should appear in citations. For software, it's
110110+ typically the project name; for datasets, the dataset title. *)
111111+112112+val authors : t -> Cff_author.t list
113113+(** The creators of the software or dataset.
114114+115115+ Authors can be persons (individuals) or entities (organizations).
116116+ At least one author is required for a valid CFF file. The order
117117+ typically reflects contribution significance. *)
118118+119119+(** {1 Optional Fields} *)
120120+121121+val abstract : t -> string option
122122+(** A description of the software or dataset.
123123+124124+ Provides context about what the work does, its purpose, and scope. *)
125125+126126+val commit : t -> string option
127127+(** The commit hash or revision number of the software version.
128128+129129+ Useful for precise version identification beyond semantic versioning.
130130+ Example: ["1ff847d81f29c45a3a1a5ce73d38e45c2f319bba"] *)
131131+132132+val contact : t -> Cff_author.t list option
133133+(** Contact persons or entities for the software or dataset.
134134+135135+ May differ from authors; useful when the primary contact is a
136136+ project maintainer rather than the original author. *)
137137+138138+val date_released : t -> Cff_date.t option
139139+(** The date when the software or dataset was released.
140140+141141+ Format is [(year, month, day)], corresponding to ISO 8601 [YYYY-MM-DD]. *)
142142+143143+val doi : t -> string option
144144+(** The Digital Object Identifier for the software or dataset.
145145+146146+ DOIs provide persistent, citable identifiers. This is a shorthand
147147+ for a single DOI; use {!identifiers} for multiple DOIs or other
148148+ identifier types. Example: ["10.5281/zenodo.1234567"] *)
149149+150150+val identifiers : t -> Cff_identifier.t list option
151151+(** Additional identifiers beyond the primary DOI.
152152+153153+ Each identifier has a type (DOI, URL, SWH, other), value, and
154154+ optional description. Useful for versioned DOIs, Software Heritage
155155+ identifiers, or repository URLs. *)
156156+157157+val keywords : t -> string list option
158158+(** Descriptive keywords for the work.
159159+160160+ Help with discoverability and categorization. Example:
161161+ [["machine learning"; "image processing"; "python"]] *)
162162+163163+val license : t -> Cff_license.t option
164164+(** The SPDX license identifier(s) for the work.
165165+166166+ Uses {{:https://spdx.org/licenses/}SPDX identifiers}. Multiple
167167+ licenses imply an OR relationship (user may choose any).
168168+ Example: ["MIT"], ["Apache-2.0"], or [["GPL-3.0-only"; "MIT"]]. *)
169169+170170+val license_url : t -> string option
171171+(** URL to the license text for non-standard licenses.
172172+173173+ Only needed for licenses not in the SPDX list. Standard SPDX
174174+ licenses have well-known URLs. *)
175175+176176+val preferred_citation : t -> Cff_reference.t option
177177+(** A reference to cite instead of the software itself.
178178+179179+ Used for "credit redirection" when authors prefer citation of
180180+ a related publication (e.g., a methods paper) over the software.
181181+ Note: Software citation principles recommend citing software
182182+ directly; use this field judiciously. *)
183183+184184+val references : t -> Cff_reference.t list option
185185+(** Works that this software cites or depends upon.
186186+187187+ Functions like a bibliography, listing dependencies, foundational
188188+ works, or related publications. Each reference includes full
189189+ bibliographic metadata. *)
190190+191191+val repository : t -> string option
192192+(** URL to the repository where the software is developed.
193193+194194+ Typically a version control system URL. For source code repositories,
195195+ prefer {!repository_code}. *)
196196+197197+val repository_artifact : t -> string option
198198+(** URL to the built/compiled artifact repository.
199199+200200+ For binary distributions, package registries (npm, PyPI, CRAN),
201201+ or container registries. *)
202202+203203+val repository_code : t -> string option
204204+(** URL to the source code repository.
205205+206206+ Typically a GitHub, GitLab, or similar URL where the source
207207+ code is publicly accessible. *)
208208+209209+val type_ : t -> Cff_enums.Cff_type.t option
210210+(** The type of work: [`Software] (default) or [`Dataset].
211211+212212+ Most CFF files describe software; use [`Dataset] for data packages. *)
213213+214214+val url : t -> string option
215215+(** The URL of the software or dataset homepage.
216216+217217+ A general landing page, documentation site, or project website. *)
218218+219219+val version : t -> string option
220220+(** The version string of the software or dataset.
221221+222222+ Can be any version format: semantic versioning (["1.2.3"]),
223223+ date-based (["2024.01"]), or other schemes. *)
224224+225225+(** {1 Convenience Constructors} *)
226226+227227+val make_simple :
228228+ title:string ->
229229+ authors:Cff_author.t list ->
230230+ ?version:string ->
231231+ ?doi:string ->
232232+ ?license:Cff_license.t ->
233233+ unit -> t
234234+(** Create a minimal CFF with sensible defaults.
235235+236236+ Uses [cff_version = "1.2.0"] and the standard message:
237237+ ["If you use this software, please cite it using the metadata from this file."]
238238+239239+ This is the quickest way to create a valid CFF for simple projects. *)
240240+241241+(** {1 Formatting and Codec} *)
242242+243243+val pp : Format.formatter -> t -> unit
244244+(** Pretty-print a CFF value in a human-readable YAML-like format. *)
245245+246246+val jsont : t Jsont.t
247247+(** JSON/YAML codec for serialization and deserialization.
248248+249249+ Used internally by the YAML codec functions. *)
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Eio-based I/O for CFF. *)
77+88+(* Custom error type for CFF parsing errors *)
99+type Eio.Exn.err += E of string
1010+1111+let () = Eio.Exn.register_pp (fun f -> function
1212+ | E msg -> Format.fprintf f "Cff %s" msg; true
1313+ | _ -> false)
1414+1515+let err msg = Eio.Exn.create (E msg)
1616+1717+let of_yaml_string s =
1818+ let reader = Bytesrw.Bytes.Reader.of_string s in
1919+ match Yamlt.decode ~layout:true Cff.jsont reader with
2020+ | Ok cff -> cff
2121+ | Error msg -> raise (err msg)
2222+2323+let to_yaml_string t =
2424+ let buf = Buffer.create 1024 in
2525+ let writer = Bytesrw.Bytes.Writer.of_buffer buf in
2626+ match Yamlt.encode ~format:Yamlt.Block Cff.jsont t ~eod:true writer with
2727+ | Ok () -> Buffer.contents buf
2828+ | Error msg -> raise (err msg)
2929+3030+let of_yaml_flow flow =
3131+ let reader = Bytesrw_eio.bytes_reader_of_flow flow in
3232+ match Yamlt.decode ~layout:true Cff.jsont reader with
3333+ | Ok cff -> cff
3434+ | Error msg -> raise (err msg)
3535+3636+let to_yaml_flow flow t =
3737+ let writer = Bytesrw_eio.bytes_writer_of_flow flow in
3838+ match Yamlt.encode ~format:Yamlt.Block Cff.jsont t ~eod:true writer with
3939+ | Ok () -> ()
4040+ | Error msg -> raise (err msg)
4141+4242+let of_file ~fs path =
4343+ let data = Eio.Path.load Eio.Path.(fs / path) in
4444+ try of_yaml_string data
4545+ with Eio.Exn.Io _ as ex ->
4646+ let bt = Printexc.get_raw_backtrace () in
4747+ Eio.Exn.reraise_with_context ex bt "parsing CFF file %S" path
4848+4949+let to_file ~fs path t =
5050+ let data = to_yaml_string t in
5151+ Eio.Path.save ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) data
+81
lib_eio/cff_eio.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Eio-based I/O for CFF.
77+88+ This module provides YAML parsing and serialization for CFF using
99+ {{:https://github.com/ocaml-multicore/eio}Eio} for effect-based I/O.
1010+1111+ All functions raise {!Eio.Exn.Io} on errors, with context indicating
1212+ that CFF parsing/encoding failed. This integrates naturally with Eio's
1313+ error handling conventions.
1414+1515+ {1 Example}
1616+1717+ {[
1818+ Eio_main.run @@ fun env ->
1919+ let fs = Eio.Stdenv.fs env in
2020+ let cff = Cff_eio.of_file ~fs "CITATION.cff" in
2121+ Eio.traceln "Title: %s" (Cff.title cff);
2222+ Eio.traceln "Version: %s"
2323+ (Option.value ~default:"unspecified" (Cff.version cff))
2424+ ]}
2525+2626+ {1 Errors}
2727+2828+ Parsing and encoding errors are raised as {!Eio.Exn.Io} exceptions
2929+ with the error type {!E}. *)
3030+3131+type Eio.Exn.err += E of string
3232+(** CFF parsing or encoding error. The string contains the error message. *)
3333+3434+(** {1 String Functions} *)
3535+3636+val of_yaml_string : string -> Cff.t
3737+(** [of_yaml_string s] parses a CFF from YAML string [s].
3838+3939+ @raise Eio.Exn.Io on parse error. *)
4040+4141+val to_yaml_string : Cff.t -> string
4242+(** [to_yaml_string cff] serializes [cff] to a YAML string.
4343+4444+ The output uses YAML block style for readability.
4545+4646+ @raise Eio.Exn.Io on encoding error. *)
4747+4848+(** {1 Flow Functions} *)
4949+5050+val of_yaml_flow : _ Eio.Flow.source -> Cff.t
5151+(** [of_yaml_flow flow] parses a CFF from an Eio source flow.
5252+5353+ Reads directly from the flow using bytesrw-eio.
5454+5555+ @raise Eio.Exn.Io on parse error. *)
5656+5757+val to_yaml_flow : _ Eio.Flow.sink -> Cff.t -> unit
5858+(** [to_yaml_flow flow cff] serializes [cff] to an Eio sink flow.
5959+6060+ Writes directly to the flow using bytesrw-eio.
6161+6262+ @raise Eio.Exn.Io on encoding error. *)
6363+6464+(** {1 File Functions} *)
6565+6666+val of_file : fs:_ Eio.Path.t -> string -> Cff.t
6767+(** [of_file ~fs path] reads and parses a [CITATION.cff] file.
6868+6969+ @param fs The Eio filesystem (e.g., [Eio.Stdenv.fs env])
7070+ @param path Path to the CFF file
7171+ @raise Eio.Exn.Io if the file cannot be read or contains invalid CFF data.
7272+ The exception context includes the file path. *)
7373+7474+val to_file : fs:_ Eio.Path.t -> string -> Cff.t -> unit
7575+(** [to_file ~fs path cff] writes [cff] to a file at [path].
7676+7777+ Creates or overwrites the file.
7878+7979+ @param fs The Eio filesystem (e.g., [Eio.Stdenv.fs env])
8080+ @param path Path to write the CFF file
8181+ @raise Eio.Exn.Io on I/O or encoding failure. *)
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Unix file I/O for CFF. *)
77+88+let of_yaml_string s =
99+ let reader = Bytesrw.Bytes.Reader.of_string s in
1010+ Yamlt.decode ~layout:true Cff.jsont reader
1111+1212+let to_yaml_string t =
1313+ let buf = Buffer.create 1024 in
1414+ let writer = Bytesrw.Bytes.Writer.of_buffer buf in
1515+ match Yamlt.encode ~format:Yamlt.Block Cff.jsont t ~eod:true writer with
1616+ | Ok () -> Ok (Buffer.contents buf)
1717+ | Error e -> Error e
1818+1919+let of_file path =
2020+ match In_channel.with_open_text path In_channel.input_all with
2121+ | s -> of_yaml_string s
2222+ | exception Sys_error e -> Error e
2323+2424+let to_file path t =
2525+ match to_yaml_string t with
2626+ | Error e -> Error e
2727+ | Ok s ->
2828+ match Out_channel.with_open_text path (fun oc -> Out_channel.output_string oc s) with
2929+ | () -> Ok ()
3030+ | exception Sys_error e -> Error e
+45
lib_unix/cff_unix.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 The ocaml-cff programmers. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Unix file I/O for CFF.
77+88+ This module provides YAML parsing and serialization for CFF using
99+ standard Unix file operations ({!In_channel}, {!Out_channel}).
1010+1111+ {1 Example}
1212+1313+ {[
1414+ match Cff_unix.of_file "CITATION.cff" with
1515+ | Ok cff ->
1616+ Printf.printf "Title: %s\n" (Cff.title cff);
1717+ Printf.printf "Version: %s\n"
1818+ (Option.value ~default:"unspecified" (Cff.version cff))
1919+ | Error msg ->
2020+ Printf.eprintf "Parse error: %s\n" msg
2121+ ]}
2222+2323+ {1 Functions} *)
2424+2525+val of_yaml_string : string -> (Cff.t, string) result
2626+(** [of_yaml_string s] parses a CFF from YAML string [s].
2727+2828+ Returns [Ok cff] on success or [Error msg] with a descriptive error
2929+ message on failure. *)
3030+3131+val to_yaml_string : Cff.t -> (string, string) result
3232+(** [to_yaml_string cff] serializes [cff] to a YAML string.
3333+3434+ The output uses YAML block style for readability. *)
3535+3636+val of_file : string -> (Cff.t, string) result
3737+(** [of_file path] reads and parses a [CITATION.cff] file.
3838+3939+ Returns [Ok cff] on success or [Error msg] if the file cannot be
4040+ read or contains invalid CFF data. *)
4141+4242+val to_file : string -> Cff.t -> (unit, string) result
4343+(** [to_file path cff] writes [cff] to a file at [path].
4444+4545+ Creates or overwrites the file. Returns [Error msg] on I/O failure. *)