···176176 |> Object.opt_mem "doi" string ~enc:(fun t -> t.doi)
177177 |> Object.opt_mem "identifiers" identifiers_jsont ~enc:(fun t -> t.identifiers)
178178 |> Object.opt_mem "keywords" keywords_jsont ~enc:(fun t -> t.keywords)
179179- |> Object.opt_mem "license" Cff_license.jsont_lenient ~enc:(fun t -> t.license)
179179+ |> Object.opt_mem "license" Cff_license.jsont ~enc:(fun t -> t.license)
180180 |> Object.opt_mem "license-url" string ~enc:(fun t -> t.license_url)
181181 |> Object.opt_mem "preferred-citation" Cff_reference.jsont ~enc:(fun t -> t.preferred_citation)
182182 |> Object.opt_mem "references" references_jsont ~enc:(fun t -> t.references)
+48-115
lib/cff_license.ml
···5566(** SPDX license handling for CFF. *)
7788-module Id = struct
99- type t = string
88+type t = [ `Expr of Spdx_licenses.t | `Raw of string list ]
1091111- (* Case-insensitive lookup in valid license IDs *)
1212- let uppercased_valid_ids =
1313- List.map (fun x -> (x, String.uppercase_ascii x)) Spdx_licenses.valid_license_ids
1010+let of_spdx spdx = `Expr spdx
14111515- let of_string s =
1616- let s_upper = String.uppercase_ascii s in
1717- match List.find_opt (fun (_, up) -> String.equal s_upper up) uppercased_valid_ids with
1818- | Some (canonical, _) -> Ok canonical
1919- | None -> Error (`Invalid_license_id s)
2020-2121- let to_string t = t
2222-2323- let equal = String.equal
2424- let compare = String.compare
1212+let of_string s =
1313+ match Spdx_licenses.parse s with
1414+ | Ok spdx -> `Expr spdx
1515+ | Error _ -> `Raw [s]
25162626- let pp ppf t = Format.pp_print_string ppf t
2727-end
2828-2929-type t = Id.t list (* Non-empty list; multiple = OR relationship *)
3030-3131-let single id = [id]
3232-let multiple ids = ids
3333-3434-let ids t = t
3535-3636-let is_single = function
3737- | [_] -> true
3838- | _ -> false
3939-4040-let of_string s = Result.map single (Id.of_string s)
4141-4242-let of_string_list ss =
4343- let rec aux acc = function
4444- | [] -> Ok (List.rev acc)
4545- | s :: rest ->
4646- match Id.of_string s with
4747- | Ok id -> aux (id :: acc) rest
4848- | Error e -> Error e
1717+let of_strings ss =
1818+ (* Try to parse as OR combination, fall back to Raw *)
1919+ let try_parse_all () =
2020+ let rec build = function
2121+ | [] -> None
2222+ | [s] ->
2323+ (match Spdx_licenses.parse s with
2424+ | Ok spdx -> Some spdx
2525+ | Error _ -> None)
2626+ | s :: rest ->
2727+ (match Spdx_licenses.parse s, build rest with
2828+ | Ok spdx, Some rest_spdx -> Some (Spdx_licenses.OR (spdx, rest_spdx))
2929+ | _ -> None)
3030+ in
3131+ build ss
4932 in
5050- match ss with
5151- | [] -> Error (`Invalid_license_id "empty license list")
5252- | ss -> aux [] ss
3333+ match try_parse_all () with
3434+ | Some spdx -> `Expr spdx
3535+ | None -> `Raw ss
53365454-let to_string_list t = t
3737+let to_spdx = function
3838+ | `Expr spdx -> Some spdx
3939+ | `Raw _ -> None
55405656-let equal t1 t2 =
5757- List.length t1 = List.length t2 &&
5858- List.for_all2 Id.equal t1 t2
4141+let to_strings = function
4242+ | `Expr spdx -> [Spdx_licenses.to_string spdx]
4343+ | `Raw ss -> ss
59446060-let compare t1 t2 =
6161- List.compare Id.compare t1 t2
4545+let pp ppf = function
4646+ | `Expr spdx -> Format.pp_print_string ppf (Spdx_licenses.to_string spdx)
4747+ | `Raw ss ->
4848+ match ss with
4949+ | [s] -> Format.pp_print_string ppf s
5050+ | _ ->
5151+ Format.fprintf ppf "[%a]"
5252+ (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ")
5353+ Format.pp_print_string) ss
62546363-let pp ppf t =
6464- match t with
6565- | [id] -> Id.pp ppf id
6666- | ids ->
6767- Format.fprintf ppf "[%a]"
6868- (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") Id.pp)
6969- ids
7070-7171-(* Convert to Spdx_licenses.t (OR combination) *)
7272-let to_spdx t =
7373- let rec build = function
7474- | [] -> assert false (* t is non-empty *)
7575- | [id] -> Spdx_licenses.Simple (Spdx_licenses.LicenseID id)
7676- | id :: rest ->
7777- Spdx_licenses.OR (Spdx_licenses.Simple (Spdx_licenses.LicenseID id), build rest)
7878- in
7979- build t
8080-8181-(* Convert from Spdx_licenses.t (only simple IDs and OR combinations) *)
8282-let of_spdx spdx =
8383- let rec extract acc = function
8484- | Spdx_licenses.Simple (Spdx_licenses.LicenseID id) ->
8585- Ok (id :: acc)
8686- | Spdx_licenses.Simple (Spdx_licenses.LicenseIDPlus _) ->
8787- Error `Unsupported_expression
8888- | Spdx_licenses.Simple (Spdx_licenses.LicenseRef _) ->
8989- Error `Unsupported_expression
9090- | Spdx_licenses.WITH _ ->
9191- Error `Unsupported_expression
9292- | Spdx_licenses.AND _ ->
9393- Error `Unsupported_expression
9494- | Spdx_licenses.OR (left, right) ->
9595- Result.bind (extract acc left) (fun acc -> extract acc right)
9696- in
9797- Result.map List.rev (extract [] spdx)
9898-9999-(* Jsont codec - handles both single string and array of strings *)
5555+(* Jsont codec - lenient, accepts any string/array *)
10056let jsont =
10157 let string_codec =
10258 Jsont.string |> Jsont.map
103103- ~dec:(fun s ->
104104- match Id.of_string s with
105105- | Ok id -> [id]
106106- | Error (`Invalid_license_id s) ->
107107- Jsont.Error.msgf Jsont.Meta.none "Invalid SPDX license ID: %s" s)
5959+ ~dec:(fun s -> of_string s)
10860 ~enc:(function
109109- | [id] -> id
110110- | _ -> assert false) (* Only used for single-element lists *)
6161+ | `Expr spdx -> Spdx_licenses.to_string spdx
6262+ | `Raw [s] -> s
6363+ | `Raw _ -> assert false)
11164 in
11265 let array_codec =
11366 Jsont.(array string) |> Jsont.map
114114- ~dec:(fun ss ->
115115- match of_string_list (Stdlib.Array.to_list ss) with
116116- | Ok t -> t
117117- | Error (`Invalid_license_id s) ->
118118- Jsont.Error.msgf Jsont.Meta.none "Invalid SPDX license ID: %s" s)
119119- ~enc:(fun t -> Stdlib.Array.of_list t)
120120- in
121121- Jsont.any
122122- ~dec_string:string_codec
123123- ~dec_array:array_codec
124124- ~enc:(fun t ->
125125- match t with
126126- | [_] -> string_codec
127127- | _ -> array_codec)
128128- ()
129129-130130-(* Lenient codec that accepts any string/array without validation *)
131131-let jsont_lenient =
132132- let string_codec =
133133- Jsont.string |> Jsont.map ~dec:(fun s -> [s]) ~enc:(function [s] -> s | _ -> assert false)
134134- in
135135- let array_codec =
136136- Jsont.(array string) |> Jsont.map ~dec:(fun ss -> Stdlib.Array.to_list ss) ~enc:(fun t -> Stdlib.Array.of_list t)
6767+ ~dec:(fun ss -> of_strings (Array.to_list ss))
6868+ ~enc:(fun t -> Array.of_list (to_strings t))
13769 in
13870 Jsont.any
13971 ~dec_string:string_codec
14072 ~dec_array:array_codec
14173 ~enc:(fun t ->
14274 match t with
143143- | [_] -> string_codec
7575+ | `Expr (Spdx_licenses.Simple _) -> string_codec
7676+ | `Raw [_] -> string_codec
14477 | _ -> array_codec)
14578 ()
+41-120
lib/cff_license.mli
···33 SPDX-License-Identifier: ISC
44 ---------------------------------------------------------------------------*)
5566-(** SPDX license identifiers for CFF.
66+(** SPDX license expressions for CFF.
7788 CFF uses {{:https://spdx.org/licenses/}SPDX license identifiers}
99- for the [license] field. SPDX provides a standardized list of
1010- open source license identifiers.
99+ for the [license] field. This module wraps {!Spdx_licenses.t} with
1010+ support for invalid/unknown licenses to enable round-tripping.
11111212- {1 License Field}
1212+ {1 License Representation}
13131414- The [license] field can be a single license identifier like ["MIT"],
1515- or a list of licenses with OR relationship like ["GPL-3.0-only"] and
1616- ["MIT"] together.
1414+ Licenses are represented as either:
1515+ - [`Expr spdx] - A valid, parsed SPDX license expression
1616+ - [`Raw strings] - Unparsed strings for invalid/unknown licenses
17171818- When multiple licenses are listed, it means the user may choose
1919- {b any one} of the listed licenses. This matches the SPDX OR
2020- semantics.
1818+ The parser is lenient: it tries to parse as SPDX but preserves
1919+ invalid strings for round-tripping.
21202221 {1 Examples}
23222423 {2 Single License}
2424+ {[
2525+ license: MIT
2626+ ]}
25272828+ {2 SPDX Expression}
2629 {[
2727- cff-version: "1.2.0"
2828- title: "My Project"
2929- license: MIT
3030+ license: GPL-3.0-or-later WITH Classpath-exception-2.0
3031 ]}
31323233 {2 Multiple Licenses (OR)}
3333-3434 {[
3535- cff-version: "1.2.0"
3636- title: "My Project"
3735 license:
3836 - Apache-2.0
3937 - MIT
4038 ]}
3939+ This is parsed as [Apache-2.0 OR MIT]. *)
41404242- This means the software is available under Apache-2.0 OR MIT.
4141+type t = [ `Expr of Spdx_licenses.t | `Raw of string list ]
4242+(** The license type: either a valid SPDX expression or raw strings. *)
43434444- {1 Common License IDs}
4444+(** {1 Construction} *)
45454646- Some commonly used SPDX license identifiers:
4646+val of_spdx : Spdx_licenses.t -> t
4747+(** [of_spdx spdx] wraps a valid SPDX expression. *)
47484848- - [MIT] - MIT License
4949- - [Apache-2.0] - Apache License 2.0
5050- - [GPL-3.0-only] - GNU General Public License v3.0 only
5151- - [GPL-3.0-or-later] - GNU GPL v3.0 or later
5252- - [BSD-2-Clause] - BSD 2-Clause "Simplified" License
5353- - [BSD-3-Clause] - BSD 3-Clause "New" License
5454- - [ISC] - ISC License
5555- - [MPL-2.0] - Mozilla Public License 2.0
5656- - [LGPL-3.0-only] - GNU Lesser GPL v3.0
5757- - [CC-BY-4.0] - Creative Commons Attribution 4.0
5858-5959- {1 Deprecated IDs}
6060-6161- Some older license identifiers are deprecated in SPDX:
4949+val of_string : string -> t
5050+(** [of_string s] parses [s] as an SPDX expression.
5151+ Returns [`Expr] on success, [`Raw [s]] on parse failure. *)
62526363- - [GPL-2.0] should use [GPL-2.0-only] or [GPL-2.0-or-later]
6464- - [GPL-3.0] should use [GPL-3.0-only] or [GPL-3.0-or-later]
6565- - [LGPL-2.1] should use [LGPL-2.1-only] or [LGPL-2.1-or-later]
5353+val of_strings : string list -> t
5454+(** [of_strings ss] parses a list of license strings.
5555+ If all strings are valid license IDs, returns an [`Expr] with OR combination.
5656+ Otherwise returns [`Raw ss] to preserve the original strings. *)
66576767- The {!jsont_lenient} codec accepts these deprecated IDs. *)
5858+(** {1 Access} *)
68596969-(** A validated SPDX license identifier. *)
7070-module Id : sig
7171- type t
7272- (** A single validated SPDX license ID. *)
6060+val to_spdx : t -> Spdx_licenses.t option
6161+(** [to_spdx t] returns [Some spdx] if [t] is a valid expression,
6262+ [None] if it contains unparsed raw strings. *)
73637474- val of_string : string -> (t, [> `Invalid_license_id of string]) result
7575- (** Parse and validate a license ID.
6464+val to_strings : t -> string list
6565+(** [to_strings t] returns the license as a list of strings.
6666+ For [`Expr], returns the normalized SPDX string.
6767+ For [`Raw], returns the original strings. *)
76687777- The check is case-insensitive. Returns [Error] for unknown
7878- license identifiers. *)
7979-8080- val to_string : t -> string
8181- (** Return the canonical (properly cased) license ID string. *)
8282-8383- val equal : t -> t -> bool
8484- val compare : t -> t -> int
8585-8686- val pp : Format.formatter -> t -> unit
8787- (** Pretty-print the license ID. *)
8888-end
8989-9090-type t
9191-(** A CFF license: one or more SPDX license IDs.
9292-9393- Multiple IDs represent an OR relationship: the user may choose
9494- any of the listed licenses. *)
9595-9696-val single : Id.t -> t
9797-(** Create a license from a single ID. *)
9898-9999-val multiple : Id.t list -> t
100100-(** Create a license from multiple IDs (OR relationship).
101101-102102- Raises [Invalid_argument] if the list is empty. *)
103103-104104-val ids : t -> Id.t list
105105-(** Get the list of license IDs.
106106-107107- For a single license, returns a one-element list. *)
108108-109109-val is_single : t -> bool
110110-(** [true] if this is a single license ID, [false] for multiple. *)
111111-112112-val of_string : string -> (t, [> `Invalid_license_id of string]) result
113113-(** Parse a single license ID string into a license.
114114-115115- Equivalent to [Result.map single (Id.of_string s)]. *)
116116-117117-val of_string_list : string list -> (t, [> `Invalid_license_id of string]) result
118118-(** Parse a list of license ID strings.
119119-120120- All IDs must be valid; returns [Error] if any ID is invalid. *)
121121-122122-val to_string_list : t -> string list
123123-(** Return the list of license ID strings. *)
124124-125125-val equal : t -> t -> bool
126126-(** License equality. *)
127127-128128-val compare : t -> t -> int
129129-(** License comparison. *)
6969+(** {1 Formatting} *)
1307013171val pp : Format.formatter -> t -> unit
132132-(** Pretty-print: single ID or comma-separated list for multiple. *)
133133-134134-(** {1 SPDX Interop} *)
135135-136136-val to_spdx : t -> Spdx_licenses.t
137137-(** Convert to an SPDX license expression (OR combination). *)
138138-139139-val of_spdx : Spdx_licenses.t -> (t, [> `Unsupported_expression]) result
140140-(** Convert from an SPDX license expression.
141141-142142- Only simple license IDs and OR combinations are supported.
143143- Complex expressions using AND, WITH (exceptions), or license
144144- references return [Error `Unsupported_expression]. *)
7272+(** Pretty-print the license. *)
14573146146-(** {1 Codecs} *)
7474+(** {1 Codec} *)
1477514876val jsont : t Jsont.t
149149-(** JSON/YAML codec that validates license IDs.
150150-151151- Handles both single string (["MIT"]) and array of strings.
152152- Returns an error for invalid SPDX license identifiers. *)
153153-154154-val jsont_lenient : t Jsont.t
155155-(** JSON/YAML codec that accepts any string without validation.
7777+(** JSON/YAML codec for licenses.
15678157157- Use this codec when parsing CFF files that may contain deprecated
158158- or non-standard license identifiers. Invalid IDs are preserved
159159- as-is for round-tripping. *)
7979+ Handles both single string and array of strings.
8080+ Lenient: accepts any string without validation for round-tripping. *)
+1-1
lib/cff_reference.ml
···550550 ~enc:(fun r -> r.metadata.keywords)
551551 |> Jsont.Object.opt_mem "languages" string_list_jsont
552552 ~enc:(fun r -> r.metadata.languages)
553553- |> Jsont.Object.opt_mem "license" Cff_license.jsont_lenient
553553+ |> Jsont.Object.opt_mem "license" Cff_license.jsont
554554 ~enc:(fun r -> r.metadata.license)
555555 |> Jsont.Object.opt_mem "license-url" Jsont.string
556556 ~enc:(fun r -> r.metadata.license_url)
+133
test/test_cff.ml
···181181 in
182182 Alcotest.test_case test_name `Quick test
183183184184+(* License parsing tests *)
185185+186186+let cff_with_single_license = {|
187187+cff-version: 1.2.0
188188+message: Please cite
189189+title: Test
190190+authors:
191191+ - family-names: Test
192192+license: MIT
193193+|}
194194+195195+let cff_with_license_expression = {|
196196+cff-version: 1.2.0
197197+message: Please cite
198198+title: Test
199199+authors:
200200+ - family-names: Test
201201+license: GPL-3.0-or-later WITH Classpath-exception-2.0
202202+|}
203203+204204+let cff_with_license_array = {|
205205+cff-version: 1.2.0
206206+message: Please cite
207207+title: Test
208208+authors:
209209+ - family-names: Test
210210+license:
211211+ - Apache-2.0
212212+ - MIT
213213+|}
214214+215215+let cff_with_unknown_license = {|
216216+cff-version: 1.2.0
217217+message: Please cite
218218+title: Test
219219+authors:
220220+ - family-names: Test
221221+license: Some-Unknown-License-v1.0
222222+|}
223223+224224+let cff_with_unknown_license_array = {|
225225+cff-version: 1.2.0
226226+message: Please cite
227227+title: Test
228228+authors:
229229+ - family-names: Test
230230+license:
231231+ - MIT
232232+ - Not-A-Real-License
233233+|}
234234+235235+let test_license_single () =
236236+ match Cff_unix.of_yaml_string cff_with_single_license with
237237+ | Ok cff ->
238238+ (match Cff.license cff with
239239+ | Some (`Expr (Spdx_licenses.Simple (Spdx_licenses.LicenseID "MIT"))) -> ()
240240+ | Some (`Expr _) -> Alcotest.fail "Expected simple MIT license"
241241+ | Some (`Raw _) -> Alcotest.fail "License should have parsed as valid SPDX"
242242+ | None -> Alcotest.fail "Missing license")
243243+ | Error e ->
244244+ Alcotest.fail (Printf.sprintf "Failed to parse: %s" e)
245245+246246+let test_license_expression () =
247247+ match Cff_unix.of_yaml_string cff_with_license_expression with
248248+ | Ok cff ->
249249+ (match Cff.license cff with
250250+ | Some (`Expr (Spdx_licenses.WITH _)) -> ()
251251+ | Some (`Expr _) -> Alcotest.fail "Expected WITH expression"
252252+ | Some (`Raw _) -> Alcotest.fail "License should have parsed as valid SPDX"
253253+ | None -> Alcotest.fail "Missing license")
254254+ | Error e ->
255255+ Alcotest.fail (Printf.sprintf "Failed to parse: %s" e)
256256+257257+let test_license_array () =
258258+ match Cff_unix.of_yaml_string cff_with_license_array with
259259+ | Ok cff ->
260260+ (match Cff.license cff with
261261+ | Some (`Expr (Spdx_licenses.OR _)) -> ()
262262+ | Some (`Expr _) -> Alcotest.fail "Expected OR expression"
263263+ | Some (`Raw _) -> Alcotest.fail "License should have parsed as valid SPDX"
264264+ | None -> Alcotest.fail "Missing license")
265265+ | Error e ->
266266+ Alcotest.fail (Printf.sprintf "Failed to parse: %s" e)
267267+268268+let test_license_unknown () =
269269+ match Cff_unix.of_yaml_string cff_with_unknown_license with
270270+ | Ok cff ->
271271+ (match Cff.license cff with
272272+ | Some (`Raw ["Some-Unknown-License-v1.0"]) -> ()
273273+ | Some (`Raw ss) ->
274274+ Alcotest.fail (Printf.sprintf "Wrong raw value: [%s]" (String.concat "; " ss))
275275+ | Some (`Expr _) -> Alcotest.fail "Unknown license should be Raw, not Expr"
276276+ | None -> Alcotest.fail "Missing license")
277277+ | Error e ->
278278+ Alcotest.fail (Printf.sprintf "Failed to parse: %s" e)
279279+280280+let test_license_unknown_in_array () =
281281+ match Cff_unix.of_yaml_string cff_with_unknown_license_array with
282282+ | Ok cff ->
283283+ (match Cff.license cff with
284284+ | Some (`Raw ["MIT"; "Not-A-Real-License"]) -> ()
285285+ | Some (`Raw ss) ->
286286+ Alcotest.fail (Printf.sprintf "Wrong raw value: [%s]" (String.concat "; " ss))
287287+ | Some (`Expr _) -> Alcotest.fail "Array with unknown should be Raw"
288288+ | None -> Alcotest.fail "Missing license")
289289+ | Error e ->
290290+ Alcotest.fail (Printf.sprintf "Failed to parse: %s" e)
291291+292292+let test_license_unknown_roundtrip () =
293293+ match Cff_unix.of_yaml_string cff_with_unknown_license with
294294+ | Error e -> Alcotest.fail (Printf.sprintf "Failed to parse: %s" e)
295295+ | Ok cff1 ->
296296+ match Cff_unix.to_yaml_string cff1 with
297297+ | Error e -> Alcotest.fail (Printf.sprintf "Failed to encode: %s" e)
298298+ | Ok yaml ->
299299+ match Cff_unix.of_yaml_string yaml with
300300+ | Error e -> Alcotest.fail (Printf.sprintf "Failed to reparse: %s" e)
301301+ | Ok cff2 ->
302302+ (match Cff.license cff2 with
303303+ | Some (`Raw ["Some-Unknown-License-v1.0"]) -> ()
304304+ | Some (`Raw ss) ->
305305+ Alcotest.fail (Printf.sprintf "Roundtrip changed value: [%s]" (String.concat "; " ss))
306306+ | Some (`Expr _) -> Alcotest.fail "Roundtrip changed Raw to Expr"
307307+ | None -> Alcotest.fail "Roundtrip lost license")
308308+184309(* Test that we correctly reject or handle known-invalid files *)
185310let test_fail_invalid_date () =
186311 let path = "../vendor/git/citation-file-format/examples/1.2.0/fail/tue-excellent-buildings/bso-toolbox-invalid-date/CITATION.cff" in
···214339 ];
215340 "roundtrip", [
216341 Alcotest.test_case "simple roundtrip" `Quick test_roundtrip;
342342+ ];
343343+ "license", [
344344+ Alcotest.test_case "single license" `Quick test_license_single;
345345+ Alcotest.test_case "license expression" `Quick test_license_expression;
346346+ Alcotest.test_case "license array" `Quick test_license_array;
347347+ Alcotest.test_case "unknown license" `Quick test_license_unknown;
348348+ Alcotest.test_case "unknown in array" `Quick test_license_unknown_in_array;
349349+ Alcotest.test_case "unknown roundtrip" `Quick test_license_unknown_roundtrip;
217350 ];
218351 "1.2.0 fixtures", List.map make_fixture_test pass_fixtures_1_2_0;
219352 "fail fixtures", [