OCaml library for JSONfeed parsing and creation

Add test cases with mix of valid broken and valid jsonfeeds

+424
+30
test/data/complete_valid.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Complete Feed", 4 + "home_page_url": "https://example.com", 5 + "feed_url": "https://example.com/feed.json", 6 + "description": "A complete test feed", 7 + "user_comment": "Test comment", 8 + "next_url": "https://example.com/feed2.json", 9 + "icon": "https://example.com/icon.png", 10 + "favicon": "https://example.com/favicon.ico", 11 + "authors": [ 12 + { 13 + "name": "Test Author", 14 + "url": "https://example.com/author", 15 + "avatar": "https://example.com/avatar.png" 16 + } 17 + ], 18 + "language": "en-US", 19 + "expired": false, 20 + "items": [ 21 + { 22 + "id": "https://example.com/item1", 23 + "content_html": "<p>Test content</p>", 24 + "title": "Test Item", 25 + "url": "https://example.com/item1.html", 26 + "date_published": "2024-01-01T12:00:00Z", 27 + "tags": ["test", "example"] 28 + } 29 + ] 30 + }
+5
test/data/extra_comma.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed with trailing comma", 4 + "items": [], 5 + }
+8
test/data/invalid_author_type.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed with invalid author", 4 + "authors": [ 5 + "Just a string instead of object" 6 + ], 7 + "items": [] 8 + }
+11
test/data/invalid_date_format.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed with invalid date", 4 + "items": [ 5 + { 6 + "id": "https://example.com/item1", 7 + "content_html": "<p>Test</p>", 8 + "date_published": "not-a-valid-date" 9 + } 10 + ] 11 + }
+10
test/data/invalid_hub_type.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed with invalid hub", 4 + "hubs": [ 5 + { 6 + "type": "WebSub" 7 + } 8 + ], 9 + "items": [] 10 + }
+16
test/data/invalid_nested_attachment.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed with invalid attachment", 4 + "items": [ 5 + { 6 + "id": "https://example.com/item1", 7 + "content_html": "<p>Test</p>", 8 + "attachments": [ 9 + { 10 + "url": "https://example.com/file.mp3", 11 + "mime_type": 12345 12 + } 13 + ] 14 + } 15 + ] 16 + }
+5
test/data/malformed_json.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1" 3 + "title": "Missing comma between fields", 4 + "items": [] 5 + }
+5
test/data/minimal_valid.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Minimal Feed", 4 + "items": [] 5 + }
+10
test/data/missing_item_content.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed with item missing content", 4 + "items": [ 5 + { 6 + "id": "https://example.com/nocontent", 7 + "title": "Item without content" 8 + } 9 + ] 10 + }
+9
test/data/missing_item_id.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed with item missing ID", 4 + "items": [ 5 + { 6 + "content_html": "<p>Item without id</p>" 7 + } 8 + ] 9 + }
+4
test/data/missing_items.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed without items" 4 + }
+4
test/data/missing_title.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "items": [] 4 + }
+4
test/data/missing_version.json
··· 1 + { 2 + "title": "Feed without version", 3 + "items": [] 4 + }
+19
test/data/mixed_content.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Mixed Content Feed", 4 + "items": [ 5 + { 6 + "id": "https://example.com/html", 7 + "content_html": "<p>HTML only</p>" 8 + }, 9 + { 10 + "id": "https://example.com/text", 11 + "content_text": "Text only" 12 + }, 13 + { 14 + "id": "https://example.com/both", 15 + "content_html": "<p>HTML version</p>", 16 + "content_text": "Text version" 17 + } 18 + ] 19 + }
+9
test/data/with_extensions.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed with Extensions", 4 + "items": [], 5 + "_custom_field": "custom value", 6 + "_another_extension": { 7 + "nested": "data" 8 + } 9 + }
+6
test/data/wrong_type_expired.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed with wrong type for expired", 4 + "expired": "yes", 5 + "items": [] 6 + }
+7
test/data/wrong_type_items.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": "Feed with items as object", 4 + "items": { 5 + "item1": {} 6 + } 7 + }
+5
test/data/wrong_type_title.json
··· 1 + { 2 + "version": "https://jsonfeed.org/version/1.1", 3 + "title": true, 4 + "items": [] 5 + }
+5
test/data/wrong_type_version.json
··· 1 + { 2 + "version": 1.1, 3 + "title": "Feed with numeric version", 4 + "items": [] 5 + }
+9
test/dune
··· 6 6 (name test_serialization) 7 7 (modules test_serialization) 8 8 (libraries jsonfeed)) 9 + 10 + (executable 11 + (name test_location_errors) 12 + (modules test_location_errors) 13 + (libraries jsonfeed)) 14 + 15 + (cram 16 + (deps test_location_errors.exe 17 + (glob_files data/*.json)))
+116
test/test_location_errors.ml
··· 1 + (** Test executable for verifying jsont location tracking 2 + 3 + Usage: test_location_errors <file> [field] 4 + 5 + Parses JSON feed files and outputs JSON with either: 6 + - Success: {"status":"ok", "field":"<field>", "value":"<value>"} 7 + - Error: {"status":"error", "message":"...", "location":{...}, "context":"..."} 8 + *) 9 + 10 + open Jsonfeed 11 + 12 + (* Helper to format path context *) 13 + let format_context (ctx : Jsont.Error.Context.t) = 14 + if Jsont.Error.Context.is_empty ctx then 15 + "$" 16 + else 17 + let indices = ctx in 18 + let rec format_path acc = function 19 + | [] -> if acc = "" then "$" else "$" ^ acc 20 + | ((_kinded_sort, _meta), idx) :: rest -> 21 + let segment = match idx with 22 + | Jsont.Path.Mem (name, _meta) -> "." ^ name 23 + | Jsont.Path.Nth (n, _meta) -> "[" ^ string_of_int n ^ "]" 24 + in 25 + format_path (acc ^ segment) rest 26 + in 27 + format_path "" indices 28 + 29 + (* Extract field from successfully parsed feed *) 30 + let extract_field field feed = 31 + match field with 32 + | "title" -> Jsonfeed.title feed 33 + | "version" -> Jsonfeed.version feed 34 + | "item_count" -> string_of_int (List.length (Jsonfeed.items feed)) 35 + | "first_item_id" -> 36 + (match Jsonfeed.items feed with 37 + | [] -> "(no items)" 38 + | item :: _ -> Item.id item) 39 + | _ -> "(unknown field)" 40 + 41 + (* Escape JSON strings *) 42 + let escape_json_string s = 43 + let buf = Buffer.create (String.length s) in 44 + String.iter (function 45 + | '"' -> Buffer.add_string buf "\\\"" 46 + | '\\' -> Buffer.add_string buf "\\\\" 47 + | '\n' -> Buffer.add_string buf "\\n" 48 + | '\r' -> Buffer.add_string buf "\\r" 49 + | '\t' -> Buffer.add_string buf "\\t" 50 + | c when c < ' ' -> Printf.bprintf buf "\\u%04x" (Char.code c) 51 + | c -> Buffer.add_char buf c 52 + ) s; 53 + Buffer.contents buf 54 + 55 + (* Output success as JSON *) 56 + let output_success field value = 57 + Printf.printf {|{"status":"ok","field":"%s","value":"%s"}|} 58 + (escape_json_string field) 59 + (escape_json_string value); 60 + print_newline () 61 + 62 + (* Output error as JSON *) 63 + let output_error (ctx, meta, kind) = 64 + let message = Jsont.Error.kind_to_string kind in 65 + let textloc = Jsont.Meta.textloc meta in 66 + let file = Jsont.Textloc.file textloc in 67 + let first_byte = Jsont.Textloc.first_byte textloc in 68 + let last_byte = Jsont.Textloc.last_byte textloc in 69 + let (line_num, line_start_byte) = Jsont.Textloc.first_line textloc in 70 + let column = first_byte - line_start_byte + 1 in 71 + let context = format_context ctx in 72 + 73 + Printf.printf {|{"status":"error","message":"%s","location":{"file":"%s","line":%d,"column":%d,"byte_start":%d,"byte_end":%d},"context":"%s"}|} 74 + (escape_json_string message) 75 + (escape_json_string file) 76 + line_num 77 + column 78 + first_byte 79 + last_byte 80 + (escape_json_string context); 81 + print_newline () 82 + 83 + let main () = 84 + (* Disable ANSI styling in error messages for consistent output *) 85 + Jsont.Error.disable_ansi_styler (); 86 + 87 + if Array.length Sys.argv < 2 then ( 88 + Printf.eprintf "Usage: %s <file> [field]\n" Sys.argv.(0); 89 + Printf.eprintf "Fields: title, version, item_count, first_item_id\n"; 90 + exit 1 91 + ); 92 + 93 + let file = Sys.argv.(1) in 94 + let field = if Array.length Sys.argv > 2 then Sys.argv.(2) else "title" in 95 + 96 + (* Read file *) 97 + let content = 98 + try 99 + In_channel.with_open_text file In_channel.input_all 100 + with Sys_error msg -> 101 + Printf.printf {|{"status":"error","message":"File error: %s"}|} 102 + (escape_json_string msg); 103 + print_newline (); 104 + exit 1 105 + in 106 + 107 + (* Parse with location tracking *) 108 + match Jsonfeed.decode_string ~locs:true ~file content with 109 + | Ok feed -> 110 + let value = extract_field field feed in 111 + output_success field value 112 + | Error err -> 113 + output_error err; 114 + exit 1 115 + 116 + let () = main ()
+127
test/test_locations.t
··· 1 + Location tracking tests for JSON Feed parser 2 + =========================================== 3 + 4 + This test suite verifies that jsont combinators correctly track location 5 + information for both valid and invalid JSON feeds. 6 + 7 + Valid Feeds 8 + ----------- 9 + 10 + Test minimal valid feed: 11 + $ ./test_location_errors.exe data/minimal_valid.json title 12 + {"status":"ok","field":"title","value":"Minimal Feed"} 13 + 14 + $ ./test_location_errors.exe data/minimal_valid.json version 15 + {"status":"ok","field":"version","value":"https://jsonfeed.org/version/1.1"} 16 + 17 + $ ./test_location_errors.exe data/minimal_valid.json item_count 18 + {"status":"ok","field":"item_count","value":"0"} 19 + 20 + Test complete feed with all fields: 21 + $ ./test_location_errors.exe data/complete_valid.json title 22 + {"status":"ok","field":"title","value":"Complete Feed"} 23 + 24 + $ ./test_location_errors.exe data/complete_valid.json item_count 25 + {"status":"ok","field":"item_count","value":"1"} 26 + 27 + $ ./test_location_errors.exe data/complete_valid.json first_item_id 28 + {"status":"ok","field":"first_item_id","value":"https://example.com/item1"} 29 + 30 + Test mixed content types: 31 + $ ./test_location_errors.exe data/mixed_content.json item_count 32 + {"status":"ok","field":"item_count","value":"3"} 33 + 34 + Test feed with extensions: 35 + $ ./test_location_errors.exe data/with_extensions.json title 36 + {"status":"ok","field":"title","value":"Feed with Extensions"} 37 + 38 + 39 + Missing Required Fields 40 + ------------------------ 41 + 42 + Test missing title field: 43 + $ ./test_location_errors.exe data/missing_title.json title 44 + {"status":"error","message":"Missing member title in JSON Feed object","location":{"file":"data/missing_title.json","line":1,"column":1,"byte_start":0,"byte_end":65},"context":"$"} 45 + [1] 46 + 47 + Test missing version field: 48 + $ ./test_location_errors.exe data/missing_version.json title 49 + {"status":"error","message":"Missing member version in JSON Feed object","location":{"file":"data/missing_version.json","line":1,"column":1,"byte_start":0,"byte_end":51},"context":"$"} 50 + [1] 51 + 52 + Test missing items field: 53 + $ ./test_location_errors.exe data/missing_items.json title 54 + {"status":"error","message":"Missing member items in JSON Feed object","location":{"file":"data/missing_items.json","line":1,"column":1,"byte_start":0,"byte_end":83},"context":"$"} 55 + [1] 56 + 57 + Test missing item id: 58 + $ ./test_location_errors.exe data/missing_item_id.json first_item_id 59 + {"status":"error","message":"Missing member id in Item object","location":{"file":"data/missing_item_id.json","line":5,"column":5,"byte_start":108,"byte_end":161},"context":"$.items[0]"} 60 + [1] 61 + 62 + Test missing item content: 63 + $ ./test_location_errors.exe data/missing_item_content.json first_item_id 64 + {"status":"error","message":"Item must have at least one of content_html or content_text","location":{"file":"-","line":-1,"column":1,"byte_start":-1,"byte_end":-1},"context":"$.items[0]"} 65 + [1] 66 + 67 + 68 + Type Errors 69 + ----------- 70 + 71 + Test wrong type for version (number instead of string): 72 + $ ./test_location_errors.exe data/wrong_type_version.json title 73 + {"status":"error","message":"Expected string but found number","location":{"file":"data/wrong_type_version.json","line":2,"column":14,"byte_start":15,"byte_end":15},"context":"$.version"} 74 + [1] 75 + 76 + Test wrong type for items (object instead of array): 77 + $ ./test_location_errors.exe data/wrong_type_items.json item_count 78 + {"status":"error","message":"Expected array<Item object> but found object","location":{"file":"data/wrong_type_items.json","line":4,"column":12,"byte_start":102,"byte_end":102},"context":"$.items"} 79 + [1] 80 + 81 + Test wrong type for title (boolean instead of string): 82 + $ ./test_location_errors.exe data/wrong_type_title.json title 83 + {"status":"error","message":"Expected string but found bool","location":{"file":"data/wrong_type_title.json","line":3,"column":12,"byte_start":62,"byte_end":62},"context":"$.title"} 84 + [1] 85 + 86 + Test wrong type for expired (string instead of boolean): 87 + $ ./test_location_errors.exe data/wrong_type_expired.json title 88 + {"status":"error","message":"Expected bool but found string","location":{"file":"data/wrong_type_expired.json","line":4,"column":14,"byte_start":111,"byte_end":111},"context":"$.expired"} 89 + [1] 90 + 91 + 92 + Nested Errors 93 + ------------- 94 + 95 + Test invalid date format in item: 96 + $ ./test_location_errors.exe data/invalid_date_format.json first_item_id 97 + {"status":"error","message":"RFC 3339 timestamp: invalid RFC 3339 timestamp: \"not-a-valid-date\"","location":{"file":"-","line":-1,"column":1,"byte_start":-1,"byte_end":-1},"context":"$.items[0].date_published"} 98 + [1] 99 + 100 + Test invalid author type (string instead of object): 101 + $ ./test_location_errors.exe data/invalid_author_type.json title 102 + {"status":"error","message":"Expected Author object but found string","location":{"file":"data/invalid_author_type.json","line":5,"column":5,"byte_start":109,"byte_end":109},"context":"$.authors[0]"} 103 + [1] 104 + 105 + Test invalid attachment field type (deeply nested): 106 + $ ./test_location_errors.exe data/invalid_nested_attachment.json first_item_id 107 + {"status":"error","message":"Expected string but found number","location":{"file":"data/invalid_nested_attachment.json","line":11,"column":24,"byte_start":296,"byte_end":296},"context":"$.items[0].attachments[0].mime_type"} 108 + [1] 109 + 110 + Test missing required field in hub: 111 + $ ./test_location_errors.exe data/invalid_hub_type.json title 112 + {"status":"error","message":"Missing member url in Hub object","location":{"file":"data/invalid_hub_type.json","line":5,"column":5,"byte_start":103,"byte_end":132},"context":"$.hubs[0]"} 113 + [1] 114 + 115 + 116 + JSON Syntax Errors 117 + ------------------ 118 + 119 + Test trailing comma: 120 + $ ./test_location_errors.exe data/extra_comma.json title 121 + {"status":"error","message":"Expected object member but found }","location":{"file":"data/extra_comma.json","line":5,"column":1,"byte_start":105,"byte_end":105},"context":"$"} 122 + [1] 123 + 124 + Test malformed JSON (missing comma): 125 + $ ./test_location_errors.exe data/malformed_json.json title 126 + {"status":"error","message":"Expected , or } after object member but found: \"","location":{"file":"data/malformed_json.json","line":3,"column":3,"byte_start":52,"byte_end":52},"context":"$"} 127 + [1]