knowL: Knowledge Libraries
Loading...
Searching...
No Matches
BaseParser_p.h
1#include <QDebug>
2#include <QDir>
3#include <QFile>
4#include <QFileInfo>
5#include <QUrl>
6
7#include <knowCore/BigNumber.h>
8#include <knowCore/Messages.h>
9#include <knowCore/Uri.h>
10#include <knowCore/Uris/rdf.h>
11#include <knowCore/Uris/xsd.h>
12#include <knowCore/ValueHash.h>
13
14#include "../BlankNode.h"
15#include "../Object.h"
16#include "../Subject.h"
17#include "../Triple.h"
18
19namespace knowRDF
20{
21 namespace Turtle
22 {
28 template<typename _Base_, typename _Lexer_, typename _Token_, bool _has_variable_>
29 struct BaseParser : public _Base_
30 {
31 _Lexer_* lexer;
32 knowCore::Messages messages;
33 knowCore::UriManager urlManager;
34
36 knowCore::ValueHash bindings;
37
38 _Token_ currentToken;
39 QList<_Token_> backToken;
40
41 void reportError(const _Token_& _token, const QString& _errorMsg)
42 {
43 messages.reportError(_errorMsg, _token.line);
44 }
45
46 void reportUnexpected(const _Token_& _token)
47 {
48 reportError(_token, clog_qt::qformat("Unexpected '{}'.", _token.toString()));
49 }
50
51 void reportUnexpected(const _Token_& _token, typename _Token_::Type _expectedType)
52 {
53 reportError(_token,
54 clog_qt::qformat("Expected '{}' before '{}'.",
55 _Token_::typeToString(_expectedType), _token.toString()));
56 }
57
58 bool isOfType(const _Token_& _token, typename _Token_::Type _type)
59 {
60 if(_token.type == _type)
61 return true;
62 reportUnexpected(_token, _type);
63 return false;
64 }
65
66 const _Token_& getNextToken()
67 {
68 if(backToken.isEmpty())
69 {
70 return (currentToken = lexer->nextToken());
71 }
72 else
73 {
74 return (currentToken = backToken.takeLast());
75 }
76 }
77 void pushBackToken(const _Token_& _new_current)
78 {
79 backToken.append(currentToken);
80 currentToken = _new_current;
81 }
82
83 knowCore::Uri parseIri()
84 {
86 switch(currentToken.type)
87 {
88 case _Token_::CURIE_CONSTANT:
89 {
90 if(currentToken.curie.canResolve(&urlManager))
91 {
92 r = currentToken.curie.resolve(&urlManager);
93 }
94 else
95 {
96 reportError(currentToken,
97 clog_qt::qformat("Unknown curie prefix '{}'", currentToken.curie.prefix()));
98 }
99 getNextToken();
100 break;
101 }
102 case _Token_::URI_CONSTANT:
103 r = urlManager.base().resolved(currentToken.string);
104 getNextToken();
105 break;
106 default:
107 reportUnexpected(currentToken);
108 getNextToken();
109 }
110 return r;
111 }
112
113 BlankNode parseBlankNode()
114 {
115 if(isOfType(currentToken, _Token_::UNDERSCORECOLON))
116 {
117 getNextToken();
118
119 QString label;
120
121 switch(currentToken.type)
122 {
123 case _Token_::IDENTIFIER:
124 label = currentToken.string;
125 getNextToken();
126 break;
127 case _Token_::A:
128 label = "a";
129 getNextToken();
130 break;
131 case _Token_::INTEGER_CONSTANT:
132 case _Token_::FLOAT_CONSTANT:
133 label = currentToken.string;
134 getNextToken();
135 if(currentToken.type == _Token_::IDENTIFIER)
136 {
137 label += currentToken.string;
138 getNextToken();
139 }
140 break;
141 default:
142 reportUnexpected(currentToken);
143 return BlankNode();
144 }
145 if(blankNodes.contains(label))
146 {
147 return blankNodes.value(label);
148 }
149 else
150 {
151 BlankNode bn(label);
152 blankNodes[label] = bn;
153 return bn;
154 }
155 }
156 return BlankNode();
157 }
158
159 Subject parseSubject()
160 {
161 switch(currentToken.type)
162 {
163 case _Token_::STARTBOXBRACKET:
164 {
165 getNextToken();
166 isOfType(currentToken, _Token_::ENDBOXBRACKET);
167 getNextToken();
168 return Subject(BlankNode());
169 }
170 case _Token_::QUESTION:
171 if(_has_variable_)
172 {
173 getNextToken();
174 if(currentToken.type != _Token_::IDENTIFIER and currentToken.type != _Token_::A)
175 {
176 reportUnexpected(currentToken);
177 }
178 QString name = currentToken.type == _Token_::IDENTIFIER ? currentToken.string : "a";
179 getNextToken();
180 return Subject(name, Subject::Type::Variable);
181 }
182 else
183 {
184 return parseIri();
185 }
186 case _Token_::UNDERSCORECOLON:
187 return parseBlankNode();
188 default:
189 return parseIri();
190 }
191 }
192
193 knowCore::Uri parsePredicate()
194 {
195 switch(currentToken.type)
196 {
197 case _Token_::A:
198 getNextToken();
199 return knowCore::Uris::rdf::a;
200 default:
201 return parseIri();
202 }
203 }
204
205 void parseObject(const Subject& _subject, const knowCore::Uri& _predicate)
206 {
207 switch(currentToken.type)
208 {
209 case _Token_::QUESTION:
210 if(_has_variable_)
211 {
212 getNextToken();
213 if(currentToken.type != _Token_::IDENTIFIER and currentToken.type != _Token_::A)
214 {
215 reportUnexpected(currentToken);
216 }
217 QString name = currentToken.type == _Token_::IDENTIFIER ? currentToken.string : "a";
218 getNextToken();
219 appendTriple(_subject, _predicate, Object(name, Object::Type::Variable));
220 }
221 else
222 {
223 reportUnexpected(currentToken);
224 getNextToken();
225 }
226 break;
227 case _Token_::UNDERSCORECOLON:
228 {
229 appendTriple(_subject, _predicate, parseBlankNode());
230 break;
231 }
232 case _Token_::CURIE_CONSTANT:
233 case _Token_::URI_CONSTANT:
234 {
235 appendTriple(_subject, _predicate, parseIri());
236 break;
237 }
238 case _Token_::STRING_CONSTANT:
239 {
240 QString str = currentToken.string;
241 getNextToken();
242 knowCore::Uri literal_uri = knowCore::Uris::xsd::string;
243 if(currentToken.type == _Token_::CIRCUMFLEXCIRCUMFLEX)
244 {
245 getNextToken();
246 literal_uri = parseIri();
247 }
248 switch(currentToken.type)
249 {
250 case _Token_::LANG_TAG:
251 {
252 appendTriple(_subject, _predicate, literal_uri, str, currentToken.string);
253 getNextToken();
254 break;
255 }
256 default:
257 {
258 appendTriple(_subject, _predicate, literal_uri, str, QString());
259 break;
260 }
261 }
262 break;
263 }
264 case _Token_::TRUE:
265 appendTriple(_subject, _predicate, knowCore::Uris::xsd::boolean, true);
266 getNextToken();
267 break;
268 case _Token_::FALSE:
269 appendTriple(_subject, _predicate, knowCore::Uris::xsd::boolean, false);
270 getNextToken();
271 break;
272 case _Token_::INTEGER_CONSTANT:
273 appendTriple(_subject, _predicate, knowCore::Uris::xsd::integer,
274 knowCore::BigNumber::fromString(currentToken.string).expect_success());
275 getNextToken();
276 break;
277 case _Token_::FLOAT_CONSTANT:
278 appendTriple(_subject, _predicate,
279 knowCore::Uris::select(currentToken.string.indexOf('e') == -1,
280 knowCore::Uris::xsd::decimal,
281 knowCore::Uris::xsd::float64),
282 knowCore::BigNumber::fromString(currentToken.string).expect_success());
283 getNextToken();
284 break;
285 case _Token_::STARTBOXBRACKET:
286 {
287 getNextToken();
288
289 BlankNode bn;
290
291 if(currentToken.type != _Token_::ENDBOXBRACKET)
292 {
293 parseSingleSubject(bn, _Token_::ENDBOXBRACKET);
294 appendTriple(_subject, _predicate, bn);
295 }
296 else
297 {
298 appendTriple(_subject, _predicate, bn);
299 getNextToken();
300 }
301
302 break;
303 }
304 case _Token_::STARTBRACKET:
305 {
306 getNextToken();
307
308 if(currentToken.type == _Token_::ENDBRACKET)
309 {
310 appendTriple(_subject, _predicate, knowCore::Uris::rdf::nil);
311 }
312 else
313 {
314
315 BlankNode bn;
316
317 appendTriple(_subject, _predicate, bn);
318
319 while(currentToken.type != _Token_::END_OF_FILE)
320 {
321 parseObject(bn, knowCore::Uris::rdf::first);
322
323 if(currentToken.type == _Token_::ENDBRACKET)
324 {
325 appendTriple(bn, knowCore::Uris::rdf::rest, knowCore::Uris::rdf::nil);
326 break;
327 }
328 else
329 {
330 BlankNode nbn;
331 appendTriple(bn, knowCore::Uris::rdf::rest, nbn);
332 bn = nbn;
333 }
334 }
335 }
336 getNextToken();
337 break;
338 }
339 case _Token_::LOAD_FILE:
340 {
341 getNextToken();
342 if(not isOfType(currentToken, _Token_::STARTBRACKET))
343 return;
344 getNextToken(); // eats '('
345 if(not isOfType(currentToken, _Token_::STRING_CONSTANT))
346 return;
347 QString filename = currentToken.string;
348
349 QUrl base = QUrl(urlManager.base());
350 QString base_fn = base.scheme() == "qrc" ? ":" + base.path() : base.toLocalFile();
351 QString fn = QFileInfo(base_fn).dir().absolutePath() + "/" + filename;
352 QFile f(fn);
353 if(f.open(QIODevice::ReadOnly))
354 {
355 appendTriple<QString>(_subject, _predicate, knowCore::Uris::xsd::string,
356 QString::fromUtf8(f.readAll()), QString());
357 }
358 else
359 {
360 reportError(currentToken, clog_qt::qformat("Failed to open file '{}'", f.fileName()));
361 }
362
363 getNextToken(); // eats filename
364 if(not isOfType(currentToken, _Token_::ENDBRACKET))
365 return;
366 getNextToken(); // eats ')'
367 break;
368 }
369 default:
370 reportUnexpected(currentToken);
371 getNextToken();
372 }
373 }
374
375 void parseSingleSubject(const Subject& subject, const typename _Token_::Type& _endType)
376 {
377 while(currentToken.type != _Token_::END_OF_FILE)
378 {
379 knowCore::Uri predicate = parsePredicate();
380
381 while(currentToken.type != _Token_::END_OF_FILE)
382 {
383 parseObject(subject, predicate);
384
385 if(currentToken.type == _Token_::COMA)
386 {
387 getNextToken();
388 }
389 else
390 {
391 break;
392 }
393 }
394 if(currentToken.type == _endType)
395 {
396 getNextToken();
397 break;
398 }
399 else if(isOfType(currentToken, _Token_::SEMI))
400 {
401 getNextToken();
402 if(currentToken.type == _endType)
403 {
404 getNextToken();
405 break;
406 }
407 }
408 }
409 }
410 using _Base_::appendTriple;
411 template<typename _T_>
412 void appendTriple(const Subject& _subject, const knowCore::Uri& _predicate,
413 const knowCore::Uri& _dataTypeUri, const _T_& _value,
414 const QString& _lang = QString())
415 {
416 const auto [success, object, message] = Object::fromValue(_dataTypeUri, _value, _lang);
417 if(success)
418 {
419 this->appendTriple(Triple(_subject, _predicate, object.value()));
420 }
421 else
422 {
423 reportError(currentToken,
424 clog_qt::qformat("Failed to create object for value '{}' with error '{}'",
425 _value, message.value()));
426 }
427 }
428 void appendTriple(const Subject& _subject, const knowCore::Uri& _predicate,
429 const Object& _value)
430 {
431 this->appendTriple(Triple(_subject, _predicate, _value));
432 }
433 void appendTriple(const Subject& _subject, const knowCore::Uri& _predicate,
434 const knowCore::Uri& _dataTypeUri, const QString& _value,
435 const QString& _lang)
436 {
437 const auto [success, lit, message] = Literal::fromRdfLiteral(_dataTypeUri, _value, _lang);
438 if(success)
439 {
440 appendTriple(_subject, _predicate, lit.value());
441 }
442 else
443 {
444 reportError(
445 currentToken,
446 clog_qt::qformat("Failed to parse RDF literal '{}' of type '{}' with error '{}'",
447 _value, _dataTypeUri, message.value()));
448 }
449 }
450 };
451 } // namespace Turtle
452} // namespace knowRDF
Definition Forward.h:10
Definition Forward.h:14
Definition Messages.h:18
Definition UriManager.h:10
Definition Uri.h:15
Definition ValueHash.h:13
Definition BlankNode.h:25
static cres_qresult< Literal > fromRdfLiteral(const knowCore::Uri &_datatype, const QString &_value, const QString &_lang=QString())
Definition Literal.cpp:139
Definition Object.h:11
Definition Subject.h:21
Definition Triple.h:19
Definition BaseParser_p.h:30