forked from simdjson/simdjson
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_many_test.cpp
More file actions
146 lines (132 loc) · 5.09 KB
/
parse_many_test.cpp
File metadata and controls
146 lines (132 loc) · 5.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#include <cstring>
#if (!(_MSC_VER) && !(__MINGW32__) && !(__MINGW64__))
#include <dirent.h>
#else
// Microsoft can't be bothered to provide standard utils.
#include <dirent_portable.h>
#endif
#include <unistd.h>
#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include "simdjson.h"
/**
* Does the file filename end with the given extension.
*/
static bool has_extension(const char *filename, const char *extension) {
const char *ext = strrchr(filename, '.');
return ((ext != nullptr) && (strcmp(ext, extension) == 0));
}
bool starts_with(const char *pre, const char *str) {
size_t len_pre = strlen(pre), len_str = strlen(str);
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
}
bool contains(const char *pre, const char *str) {
return (strstr(str, pre) != nullptr);
}
bool validate(const char *dirname) {
bool everything_fine = true;
const char *extension1 = ".ndjson";
const char *extension2 = ".jsonl";
const char *extension3 = ".json"; // bad json files shoud fail
size_t dirlen = strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, nullptr, alphasort);
if (c < 0) {
fprintf(stderr, "error accessing %s \n", dirname);
return false;
}
if (c == 0) {
printf("nothing in dir %s \n", dirname);
return false;
}
bool *is_file_as_expected = new bool[c];
for (int i = 0; i < c; i++) {
is_file_as_expected[i] = true;
}
size_t how_many = 0;
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
/*For all files in the folder*/
for (int i = 0; i < c; i++) {
const char *name = entry_list[i]->d_name;
if (has_extension(name, extension1) || has_extension(name, extension2) || has_extension(name, extension3)) {
/* Finding the file path */
printf("validating: file %s ", name);
fflush(nullptr);
size_t namelen = strlen(name);
size_t fullpathlen = dirlen + 1 + namelen + 1;
char *fullpath = static_cast<char *>(malloc(fullpathlen));
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
/* The actual test*/
simdjson::padded_string json;
auto error = simdjson::padded_string::load(fullpath).get(json);
if (!error) {
simdjson::dom::parser parser;
++how_many;
simdjson::dom::document_stream docs;
error = parser.parse_many(json).get(docs);
for (auto doc : docs) {
error = doc.error();
}
}
printf("%s\n", error ? "ok" : "invalid");
/* Check if the file is supposed to pass or not. Print the results */
if (contains("EXCLUDE", name)) {
// skipping
how_many--;
} else if (starts_with("pass", name) or starts_with("fail10.json", name) or starts_with("fail70.json", name)) {
if (error) {
is_file_as_expected[i] = false;
printf("warning: file %s should pass but it fails. Error is: %s\n",
name, error_message(error));
printf("size of file in bytes: %zu \n", json.size());
everything_fine = false;
}
} else if ( starts_with("fail", name) ) {
if (!error) {
is_file_as_expected[i] = false;
printf("warning: file %s should fail but it passes.\n", name);
printf("size of file in bytes: %zu \n", json.size());
everything_fine = false;
}
}
free(fullpath);
}
}
printf("%zu files checked.\n", how_many);
if (everything_fine) {
printf("All ok!\n");
} else {
fprintf(stderr,
"There were problems! Consider reviewing the following files:\n");
for (int i = 0; i < c; i++) {
if (!is_file_as_expected[i]) {
fprintf(stderr, "%s \n", entry_list[i]->d_name);
}
}
}
printf("Note that json stream expects sequences of objects and arrays, so otherwise valid json files can fail by design.\n");
for (int i = 0; i < c; ++i) {
free(entry_list[i]);
}
free(entry_list);
delete[] is_file_as_expected;
return everything_fine;
}
int main(int argc, char *argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
<< std::endl;
#ifndef SIMDJSON_TEST_DATA_DIR
std::cout
<< "We are going to assume you mean to use the 'jsonchecker' directory."
<< std::endl;
return validate("jsonchecker/") ? EXIT_SUCCESS : EXIT_FAILURE;
#else
std::cout << "We are going to assume you mean to use the '"
<< SIMDJSON_TEST_DATA_DIR << "' directory." << std::endl;
return validate(SIMDJSON_TEST_DATA_DIR) ? EXIT_SUCCESS : EXIT_FAILURE;
#endif
}
return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
}