OCaml codecs for the Citation File Format (CFF)

spdx

+224 -237
+1 -1
lib/cff.ml
··· 176 176 |> Object.opt_mem "doi" string ~enc:(fun t -> t.doi) 177 177 |> Object.opt_mem "identifiers" identifiers_jsont ~enc:(fun t -> t.identifiers) 178 178 |> Object.opt_mem "keywords" keywords_jsont ~enc:(fun t -> t.keywords) 179 - |> Object.opt_mem "license" Cff_license.jsont_lenient ~enc:(fun t -> t.license) 179 + |> Object.opt_mem "license" Cff_license.jsont ~enc:(fun t -> t.license) 180 180 |> Object.opt_mem "license-url" string ~enc:(fun t -> t.license_url) 181 181 |> Object.opt_mem "preferred-citation" Cff_reference.jsont ~enc:(fun t -> t.preferred_citation) 182 182 |> Object.opt_mem "references" references_jsont ~enc:(fun t -> t.references)
+48 -115
lib/cff_license.ml
··· 5 5 6 6 (** SPDX license handling for CFF. *) 7 7 8 - module Id = struct 9 - type t = string 8 + type t = [ `Expr of Spdx_licenses.t | `Raw of string list ] 10 9 11 - (* Case-insensitive lookup in valid license IDs *) 12 - let uppercased_valid_ids = 13 - List.map (fun x -> (x, String.uppercase_ascii x)) Spdx_licenses.valid_license_ids 10 + let of_spdx spdx = `Expr spdx 14 11 15 - let of_string s = 16 - let s_upper = String.uppercase_ascii s in 17 - match List.find_opt (fun (_, up) -> String.equal s_upper up) uppercased_valid_ids with 18 - | Some (canonical, _) -> Ok canonical 19 - | None -> Error (`Invalid_license_id s) 20 - 21 - let to_string t = t 22 - 23 - let equal = String.equal 24 - let compare = String.compare 12 + let of_string s = 13 + match Spdx_licenses.parse s with 14 + | Ok spdx -> `Expr spdx 15 + | Error _ -> `Raw [s] 25 16 26 - let pp ppf t = Format.pp_print_string ppf t 27 - end 28 - 29 - type t = Id.t list (* Non-empty list; multiple = OR relationship *) 30 - 31 - let single id = [id] 32 - let multiple ids = ids 33 - 34 - let ids t = t 35 - 36 - let is_single = function 37 - | [_] -> true 38 - | _ -> false 39 - 40 - let of_string s = Result.map single (Id.of_string s) 41 - 42 - let of_string_list ss = 43 - let rec aux acc = function 44 - | [] -> Ok (List.rev acc) 45 - | s :: rest -> 46 - match Id.of_string s with 47 - | Ok id -> aux (id :: acc) rest 48 - | Error e -> Error e 17 + let of_strings ss = 18 + (* Try to parse as OR combination, fall back to Raw *) 19 + let try_parse_all () = 20 + let rec build = function 21 + | [] -> None 22 + | [s] -> 23 + (match Spdx_licenses.parse s with 24 + | Ok spdx -> Some spdx 25 + | Error _ -> None) 26 + | s :: rest -> 27 + (match Spdx_licenses.parse s, build rest with 28 + | Ok spdx, Some rest_spdx -> Some (Spdx_licenses.OR (spdx, rest_spdx)) 29 + | _ -> None) 30 + in 31 + build ss 49 32 in 50 - match ss with 51 - | [] -> Error (`Invalid_license_id "empty license list") 52 - | ss -> aux [] ss 33 + match try_parse_all () with 34 + | Some spdx -> `Expr spdx 35 + | None -> `Raw ss 53 36 54 - let to_string_list t = t 37 + let to_spdx = function 38 + | `Expr spdx -> Some spdx 39 + | `Raw _ -> None 55 40 56 - let equal t1 t2 = 57 - List.length t1 = List.length t2 && 58 - List.for_all2 Id.equal t1 t2 41 + let to_strings = function 42 + | `Expr spdx -> [Spdx_licenses.to_string spdx] 43 + | `Raw ss -> ss 59 44 60 - let compare t1 t2 = 61 - List.compare Id.compare t1 t2 45 + let pp ppf = function 46 + | `Expr spdx -> Format.pp_print_string ppf (Spdx_licenses.to_string spdx) 47 + | `Raw ss -> 48 + match ss with 49 + | [s] -> Format.pp_print_string ppf s 50 + | _ -> 51 + Format.fprintf ppf "[%a]" 52 + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") 53 + Format.pp_print_string) ss 62 54 63 - let pp ppf t = 64 - match t with 65 - | [id] -> Id.pp ppf id 66 - | ids -> 67 - Format.fprintf ppf "[%a]" 68 - (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") Id.pp) 69 - ids 70 - 71 - (* Convert to Spdx_licenses.t (OR combination) *) 72 - let to_spdx t = 73 - let rec build = function 74 - | [] -> assert false (* t is non-empty *) 75 - | [id] -> Spdx_licenses.Simple (Spdx_licenses.LicenseID id) 76 - | id :: rest -> 77 - Spdx_licenses.OR (Spdx_licenses.Simple (Spdx_licenses.LicenseID id), build rest) 78 - in 79 - build t 80 - 81 - (* Convert from Spdx_licenses.t (only simple IDs and OR combinations) *) 82 - let of_spdx spdx = 83 - let rec extract acc = function 84 - | Spdx_licenses.Simple (Spdx_licenses.LicenseID id) -> 85 - Ok (id :: acc) 86 - | Spdx_licenses.Simple (Spdx_licenses.LicenseIDPlus _) -> 87 - Error `Unsupported_expression 88 - | Spdx_licenses.Simple (Spdx_licenses.LicenseRef _) -> 89 - Error `Unsupported_expression 90 - | Spdx_licenses.WITH _ -> 91 - Error `Unsupported_expression 92 - | Spdx_licenses.AND _ -> 93 - Error `Unsupported_expression 94 - | Spdx_licenses.OR (left, right) -> 95 - Result.bind (extract acc left) (fun acc -> extract acc right) 96 - in 97 - Result.map List.rev (extract [] spdx) 98 - 99 - (* Jsont codec - handles both single string and array of strings *) 55 + (* Jsont codec - lenient, accepts any string/array *) 100 56 let jsont = 101 57 let string_codec = 102 58 Jsont.string |> Jsont.map 103 - ~dec:(fun s -> 104 - match Id.of_string s with 105 - | Ok id -> [id] 106 - | Error (`Invalid_license_id s) -> 107 - Jsont.Error.msgf Jsont.Meta.none "Invalid SPDX license ID: %s" s) 59 + ~dec:(fun s -> of_string s) 108 60 ~enc:(function 109 - | [id] -> id 110 - | _ -> assert false) (* Only used for single-element lists *) 61 + | `Expr spdx -> Spdx_licenses.to_string spdx 62 + | `Raw [s] -> s 63 + | `Raw _ -> assert false) 111 64 in 112 65 let array_codec = 113 66 Jsont.(array string) |> Jsont.map 114 - ~dec:(fun ss -> 115 - match of_string_list (Stdlib.Array.to_list ss) with 116 - | Ok t -> t 117 - | Error (`Invalid_license_id s) -> 118 - Jsont.Error.msgf Jsont.Meta.none "Invalid SPDX license ID: %s" s) 119 - ~enc:(fun t -> Stdlib.Array.of_list t) 120 - in 121 - Jsont.any 122 - ~dec_string:string_codec 123 - ~dec_array:array_codec 124 - ~enc:(fun t -> 125 - match t with 126 - | [_] -> string_codec 127 - | _ -> array_codec) 128 - () 129 - 130 - (* Lenient codec that accepts any string/array without validation *) 131 - let jsont_lenient = 132 - let string_codec = 133 - Jsont.string |> Jsont.map ~dec:(fun s -> [s]) ~enc:(function [s] -> s | _ -> assert false) 134 - in 135 - let array_codec = 136 - Jsont.(array string) |> Jsont.map ~dec:(fun ss -> Stdlib.Array.to_list ss) ~enc:(fun t -> Stdlib.Array.of_list t) 67 + ~dec:(fun ss -> of_strings (Array.to_list ss)) 68 + ~enc:(fun t -> Array.of_list (to_strings t)) 137 69 in 138 70 Jsont.any 139 71 ~dec_string:string_codec 140 72 ~dec_array:array_codec 141 73 ~enc:(fun t -> 142 74 match t with 143 - | [_] -> string_codec 75 + | `Expr (Spdx_licenses.Simple _) -> string_codec 76 + | `Raw [_] -> string_codec 144 77 | _ -> array_codec) 145 78 ()
+41 -120
lib/cff_license.mli
··· 3 3 SPDX-License-Identifier: ISC 4 4 ---------------------------------------------------------------------------*) 5 5 6 - (** SPDX license identifiers for CFF. 6 + (** SPDX license expressions for CFF. 7 7 8 8 CFF uses {{:https://spdx.org/licenses/}SPDX license identifiers} 9 - for the [license] field. SPDX provides a standardized list of 10 - open source license identifiers. 9 + for the [license] field. This module wraps {!Spdx_licenses.t} with 10 + support for invalid/unknown licenses to enable round-tripping. 11 11 12 - {1 License Field} 12 + {1 License Representation} 13 13 14 - The [license] field can be a single license identifier like ["MIT"], 15 - or a list of licenses with OR relationship like ["GPL-3.0-only"] and 16 - ["MIT"] together. 14 + Licenses are represented as either: 15 + - [`Expr spdx] - A valid, parsed SPDX license expression 16 + - [`Raw strings] - Unparsed strings for invalid/unknown licenses 17 17 18 - When multiple licenses are listed, it means the user may choose 19 - {b any one} of the listed licenses. This matches the SPDX OR 20 - semantics. 18 + The parser is lenient: it tries to parse as SPDX but preserves 19 + invalid strings for round-tripping. 21 20 22 21 {1 Examples} 23 22 24 23 {2 Single License} 24 + {[ 25 + license: MIT 26 + ]} 25 27 28 + {2 SPDX Expression} 26 29 {[ 27 - cff-version: "1.2.0" 28 - title: "My Project" 29 - license: MIT 30 + license: GPL-3.0-or-later WITH Classpath-exception-2.0 30 31 ]} 31 32 32 33 {2 Multiple Licenses (OR)} 33 - 34 34 {[ 35 - cff-version: "1.2.0" 36 - title: "My Project" 37 35 license: 38 36 - Apache-2.0 39 37 - MIT 40 38 ]} 39 + This is parsed as [Apache-2.0 OR MIT]. *) 41 40 42 - This means the software is available under Apache-2.0 OR MIT. 41 + type t = [ `Expr of Spdx_licenses.t | `Raw of string list ] 42 + (** The license type: either a valid SPDX expression or raw strings. *) 43 43 44 - {1 Common License IDs} 44 + (** {1 Construction} *) 45 45 46 - Some commonly used SPDX license identifiers: 46 + val of_spdx : Spdx_licenses.t -> t 47 + (** [of_spdx spdx] wraps a valid SPDX expression. *) 47 48 48 - - [MIT] - MIT License 49 - - [Apache-2.0] - Apache License 2.0 50 - - [GPL-3.0-only] - GNU General Public License v3.0 only 51 - - [GPL-3.0-or-later] - GNU GPL v3.0 or later 52 - - [BSD-2-Clause] - BSD 2-Clause "Simplified" License 53 - - [BSD-3-Clause] - BSD 3-Clause "New" License 54 - - [ISC] - ISC License 55 - - [MPL-2.0] - Mozilla Public License 2.0 56 - - [LGPL-3.0-only] - GNU Lesser GPL v3.0 57 - - [CC-BY-4.0] - Creative Commons Attribution 4.0 58 - 59 - {1 Deprecated IDs} 60 - 61 - Some older license identifiers are deprecated in SPDX: 49 + val of_string : string -> t 50 + (** [of_string s] parses [s] as an SPDX expression. 51 + Returns [`Expr] on success, [`Raw [s]] on parse failure. *) 62 52 63 - - [GPL-2.0] should use [GPL-2.0-only] or [GPL-2.0-or-later] 64 - - [GPL-3.0] should use [GPL-3.0-only] or [GPL-3.0-or-later] 65 - - [LGPL-2.1] should use [LGPL-2.1-only] or [LGPL-2.1-or-later] 53 + val of_strings : string list -> t 54 + (** [of_strings ss] parses a list of license strings. 55 + If all strings are valid license IDs, returns an [`Expr] with OR combination. 56 + Otherwise returns [`Raw ss] to preserve the original strings. *) 66 57 67 - The {!jsont_lenient} codec accepts these deprecated IDs. *) 58 + (** {1 Access} *) 68 59 69 - (** A validated SPDX license identifier. *) 70 - module Id : sig 71 - type t 72 - (** A single validated SPDX license ID. *) 60 + val to_spdx : t -> Spdx_licenses.t option 61 + (** [to_spdx t] returns [Some spdx] if [t] is a valid expression, 62 + [None] if it contains unparsed raw strings. *) 73 63 74 - val of_string : string -> (t, [> `Invalid_license_id of string]) result 75 - (** Parse and validate a license ID. 64 + val to_strings : t -> string list 65 + (** [to_strings t] returns the license as a list of strings. 66 + For [`Expr], returns the normalized SPDX string. 67 + For [`Raw], returns the original strings. *) 76 68 77 - The check is case-insensitive. Returns [Error] for unknown 78 - license identifiers. *) 79 - 80 - val to_string : t -> string 81 - (** Return the canonical (properly cased) license ID string. *) 82 - 83 - val equal : t -> t -> bool 84 - val compare : t -> t -> int 85 - 86 - val pp : Format.formatter -> t -> unit 87 - (** Pretty-print the license ID. *) 88 - end 89 - 90 - type t 91 - (** A CFF license: one or more SPDX license IDs. 92 - 93 - Multiple IDs represent an OR relationship: the user may choose 94 - any of the listed licenses. *) 95 - 96 - val single : Id.t -> t 97 - (** Create a license from a single ID. *) 98 - 99 - val multiple : Id.t list -> t 100 - (** Create a license from multiple IDs (OR relationship). 101 - 102 - Raises [Invalid_argument] if the list is empty. *) 103 - 104 - val ids : t -> Id.t list 105 - (** Get the list of license IDs. 106 - 107 - For a single license, returns a one-element list. *) 108 - 109 - val is_single : t -> bool 110 - (** [true] if this is a single license ID, [false] for multiple. *) 111 - 112 - val of_string : string -> (t, [> `Invalid_license_id of string]) result 113 - (** Parse a single license ID string into a license. 114 - 115 - Equivalent to [Result.map single (Id.of_string s)]. *) 116 - 117 - val of_string_list : string list -> (t, [> `Invalid_license_id of string]) result 118 - (** Parse a list of license ID strings. 119 - 120 - All IDs must be valid; returns [Error] if any ID is invalid. *) 121 - 122 - val to_string_list : t -> string list 123 - (** Return the list of license ID strings. *) 124 - 125 - val equal : t -> t -> bool 126 - (** License equality. *) 127 - 128 - val compare : t -> t -> int 129 - (** License comparison. *) 69 + (** {1 Formatting} *) 130 70 131 71 val pp : Format.formatter -> t -> unit 132 - (** Pretty-print: single ID or comma-separated list for multiple. *) 133 - 134 - (** {1 SPDX Interop} *) 135 - 136 - val to_spdx : t -> Spdx_licenses.t 137 - (** Convert to an SPDX license expression (OR combination). *) 138 - 139 - val of_spdx : Spdx_licenses.t -> (t, [> `Unsupported_expression]) result 140 - (** Convert from an SPDX license expression. 141 - 142 - Only simple license IDs and OR combinations are supported. 143 - Complex expressions using AND, WITH (exceptions), or license 144 - references return [Error `Unsupported_expression]. *) 72 + (** Pretty-print the license. *) 145 73 146 - (** {1 Codecs} *) 74 + (** {1 Codec} *) 147 75 148 76 val jsont : t Jsont.t 149 - (** JSON/YAML codec that validates license IDs. 150 - 151 - Handles both single string (["MIT"]) and array of strings. 152 - Returns an error for invalid SPDX license identifiers. *) 153 - 154 - val jsont_lenient : t Jsont.t 155 - (** JSON/YAML codec that accepts any string without validation. 77 + (** JSON/YAML codec for licenses. 156 78 157 - Use this codec when parsing CFF files that may contain deprecated 158 - or non-standard license identifiers. Invalid IDs are preserved 159 - as-is for round-tripping. *) 79 + Handles both single string and array of strings. 80 + Lenient: accepts any string without validation for round-tripping. *)
+1 -1
lib/cff_reference.ml
··· 550 550 ~enc:(fun r -> r.metadata.keywords) 551 551 |> Jsont.Object.opt_mem "languages" string_list_jsont 552 552 ~enc:(fun r -> r.metadata.languages) 553 - |> Jsont.Object.opt_mem "license" Cff_license.jsont_lenient 553 + |> Jsont.Object.opt_mem "license" Cff_license.jsont 554 554 ~enc:(fun r -> r.metadata.license) 555 555 |> Jsont.Object.opt_mem "license-url" Jsont.string 556 556 ~enc:(fun r -> r.metadata.license_url)
+133
test/test_cff.ml
··· 181 181 in 182 182 Alcotest.test_case test_name `Quick test 183 183 184 + (* License parsing tests *) 185 + 186 + let cff_with_single_license = {| 187 + cff-version: 1.2.0 188 + message: Please cite 189 + title: Test 190 + authors: 191 + - family-names: Test 192 + license: MIT 193 + |} 194 + 195 + let cff_with_license_expression = {| 196 + cff-version: 1.2.0 197 + message: Please cite 198 + title: Test 199 + authors: 200 + - family-names: Test 201 + license: GPL-3.0-or-later WITH Classpath-exception-2.0 202 + |} 203 + 204 + let cff_with_license_array = {| 205 + cff-version: 1.2.0 206 + message: Please cite 207 + title: Test 208 + authors: 209 + - family-names: Test 210 + license: 211 + - Apache-2.0 212 + - MIT 213 + |} 214 + 215 + let cff_with_unknown_license = {| 216 + cff-version: 1.2.0 217 + message: Please cite 218 + title: Test 219 + authors: 220 + - family-names: Test 221 + license: Some-Unknown-License-v1.0 222 + |} 223 + 224 + let cff_with_unknown_license_array = {| 225 + cff-version: 1.2.0 226 + message: Please cite 227 + title: Test 228 + authors: 229 + - family-names: Test 230 + license: 231 + - MIT 232 + - Not-A-Real-License 233 + |} 234 + 235 + let test_license_single () = 236 + match Cff_unix.of_yaml_string cff_with_single_license with 237 + | Ok cff -> 238 + (match Cff.license cff with 239 + | Some (`Expr (Spdx_licenses.Simple (Spdx_licenses.LicenseID "MIT"))) -> () 240 + | Some (`Expr _) -> Alcotest.fail "Expected simple MIT license" 241 + | Some (`Raw _) -> Alcotest.fail "License should have parsed as valid SPDX" 242 + | None -> Alcotest.fail "Missing license") 243 + | Error e -> 244 + Alcotest.fail (Printf.sprintf "Failed to parse: %s" e) 245 + 246 + let test_license_expression () = 247 + match Cff_unix.of_yaml_string cff_with_license_expression with 248 + | Ok cff -> 249 + (match Cff.license cff with 250 + | Some (`Expr (Spdx_licenses.WITH _)) -> () 251 + | Some (`Expr _) -> Alcotest.fail "Expected WITH expression" 252 + | Some (`Raw _) -> Alcotest.fail "License should have parsed as valid SPDX" 253 + | None -> Alcotest.fail "Missing license") 254 + | Error e -> 255 + Alcotest.fail (Printf.sprintf "Failed to parse: %s" e) 256 + 257 + let test_license_array () = 258 + match Cff_unix.of_yaml_string cff_with_license_array with 259 + | Ok cff -> 260 + (match Cff.license cff with 261 + | Some (`Expr (Spdx_licenses.OR _)) -> () 262 + | Some (`Expr _) -> Alcotest.fail "Expected OR expression" 263 + | Some (`Raw _) -> Alcotest.fail "License should have parsed as valid SPDX" 264 + | None -> Alcotest.fail "Missing license") 265 + | Error e -> 266 + Alcotest.fail (Printf.sprintf "Failed to parse: %s" e) 267 + 268 + let test_license_unknown () = 269 + match Cff_unix.of_yaml_string cff_with_unknown_license with 270 + | Ok cff -> 271 + (match Cff.license cff with 272 + | Some (`Raw ["Some-Unknown-License-v1.0"]) -> () 273 + | Some (`Raw ss) -> 274 + Alcotest.fail (Printf.sprintf "Wrong raw value: [%s]" (String.concat "; " ss)) 275 + | Some (`Expr _) -> Alcotest.fail "Unknown license should be Raw, not Expr" 276 + | None -> Alcotest.fail "Missing license") 277 + | Error e -> 278 + Alcotest.fail (Printf.sprintf "Failed to parse: %s" e) 279 + 280 + let test_license_unknown_in_array () = 281 + match Cff_unix.of_yaml_string cff_with_unknown_license_array with 282 + | Ok cff -> 283 + (match Cff.license cff with 284 + | Some (`Raw ["MIT"; "Not-A-Real-License"]) -> () 285 + | Some (`Raw ss) -> 286 + Alcotest.fail (Printf.sprintf "Wrong raw value: [%s]" (String.concat "; " ss)) 287 + | Some (`Expr _) -> Alcotest.fail "Array with unknown should be Raw" 288 + | None -> Alcotest.fail "Missing license") 289 + | Error e -> 290 + Alcotest.fail (Printf.sprintf "Failed to parse: %s" e) 291 + 292 + let test_license_unknown_roundtrip () = 293 + match Cff_unix.of_yaml_string cff_with_unknown_license with 294 + | Error e -> Alcotest.fail (Printf.sprintf "Failed to parse: %s" e) 295 + | Ok cff1 -> 296 + match Cff_unix.to_yaml_string cff1 with 297 + | Error e -> Alcotest.fail (Printf.sprintf "Failed to encode: %s" e) 298 + | Ok yaml -> 299 + match Cff_unix.of_yaml_string yaml with 300 + | Error e -> Alcotest.fail (Printf.sprintf "Failed to reparse: %s" e) 301 + | Ok cff2 -> 302 + (match Cff.license cff2 with 303 + | Some (`Raw ["Some-Unknown-License-v1.0"]) -> () 304 + | Some (`Raw ss) -> 305 + Alcotest.fail (Printf.sprintf "Roundtrip changed value: [%s]" (String.concat "; " ss)) 306 + | Some (`Expr _) -> Alcotest.fail "Roundtrip changed Raw to Expr" 307 + | None -> Alcotest.fail "Roundtrip lost license") 308 + 184 309 (* Test that we correctly reject or handle known-invalid files *) 185 310 let test_fail_invalid_date () = 186 311 let path = "../vendor/git/citation-file-format/examples/1.2.0/fail/tue-excellent-buildings/bso-toolbox-invalid-date/CITATION.cff" in ··· 214 339 ]; 215 340 "roundtrip", [ 216 341 Alcotest.test_case "simple roundtrip" `Quick test_roundtrip; 342 + ]; 343 + "license", [ 344 + Alcotest.test_case "single license" `Quick test_license_single; 345 + Alcotest.test_case "license expression" `Quick test_license_expression; 346 + Alcotest.test_case "license array" `Quick test_license_array; 347 + Alcotest.test_case "unknown license" `Quick test_license_unknown; 348 + Alcotest.test_case "unknown in array" `Quick test_license_unknown_in_array; 349 + Alcotest.test_case "unknown roundtrip" `Quick test_license_unknown_roundtrip; 217 350 ]; 218 351 "1.2.0 fixtures", List.map make_fixture_test pass_fixtures_1_2_0; 219 352 "fail fixtures", [