knowL: Knowledge Libraries
Loading...
Searching...
No Matches
BaseParser_p.h
1#include <QDebug>
2#include <QDir>
3#include <QFile>
4#include <QFileInfo>
5#include <QUrl>
6
7#include <knowCore/BigNumber.h>
8#include <knowCore/Messages.h>
9#include <knowCore/Uri.h>
10#include <knowCore/ValueHash.h>
11#include <knowCore/Uris/rdf.h>
12#include <knowCore/Uris/xsd.h>
13
14#include "../BlankNode.h"
15#include "../Object.h"
16#include "../Subject.h"
17#include "../Triple.h"
18
19namespace knowRDF
20{
21 namespace Turtle
22 {
27 template<typename _Base_, typename _Lexer_, typename _Token_, bool _has_variable_>
28 struct BaseParser : public _Base_
29 {
30 _Lexer_* lexer;
31 knowCore::Messages messages;
32 knowCore::UriManager urlManager;
33
35 knowCore::ValueHash bindings;
36
37 _Token_ currentToken;
38 QList<_Token_> backToken;
39
40 void reportError(const _Token_& _token, const QString& _errorMsg)
41 {
42 messages.reportError(_errorMsg, _token.line);
43 }
44
45 void reportUnexpected(const _Token_& _token)
46 {
47 reportError(_token, clog_qt::qformat("Unexpected '{}'.", _token.toString()));
48 }
49
50 void reportUnexpected(const _Token_& _token, typename _Token_::Type _expectedType)
51 {
52 reportError(_token, clog_qt::qformat("Expected '{}' before '{}'.", _Token_::typeToString(_expectedType), _token.toString()));
53 }
54
55 bool isOfType(const _Token_& _token, typename _Token_::Type _type)
56 {
57 if(_token.type == _type) return true;
58 reportUnexpected(_token, _type);
59 return false;
60 }
61
62 const _Token_& getNextToken()
63 {
64 if(backToken.isEmpty())
65 {
66 return (currentToken = lexer->nextToken());
67 } else {
68 return (currentToken = backToken.takeLast());
69 }
70 }
71 void pushBackToken(const _Token_& _new_current)
72 {
73 backToken.append(currentToken);
74 currentToken = _new_current;
75 }
76
77
78 knowCore::Uri parseIri()
79 {
81 switch(currentToken.type)
82 {
83 case _Token_::CURIE_CONSTANT:
84 {
85 if(currentToken.curie.canResolve(&urlManager))
86 {
87 r = currentToken.curie.resolve(&urlManager);
88 }
89 else
90 {
91 reportError(currentToken, clog_qt::qformat("Unknown curie prefix '{}'", currentToken.curie.prefix()));
92 }
93 getNextToken();
94 break;
95 }
96 case _Token_::URI_CONSTANT:
97 r = urlManager.base().resolved(currentToken.string);
98 getNextToken();
99 break;
100 default:
101 reportUnexpected(currentToken);
102 getNextToken();
103 }
104 return r;
105 }
106
107 BlankNode parseBlankNode()
108 {
109 if(isOfType(currentToken, _Token_::UNDERSCORECOLON))
110 {
111 getNextToken();
112
113 QString label;
114
115 switch(currentToken.type)
116 {
117 case _Token_::IDENTIFIER:
118 label = currentToken.string;
119 getNextToken();
120 break;
121 case _Token_::A:
122 label = "a";
123 getNextToken();
124 break;
125 case _Token_::INTEGER_CONSTANT:
126 case _Token_::FLOAT_CONSTANT:
127 label = currentToken.string;
128 getNextToken();
129 if(currentToken.type == _Token_::IDENTIFIER)
130 {
131 label += currentToken.string;
132 getNextToken();
133 }
134 break;
135 default:
136 reportUnexpected(currentToken);
137 return BlankNode();
138 }
139 if(blankNodes.contains(label))
140 {
141 return blankNodes.value(label);
142 } else {
143 BlankNode bn(label);
144 blankNodes[label] = bn;
145 return bn;
146 }
147 }
148 return BlankNode();
149 }
150
151 Subject parseSubject()
152 {
153 switch(currentToken.type)
154 {
155 case _Token_::STARTBOXBRACKET:
156 {
157 getNextToken();
158 isOfType(currentToken, _Token_::ENDBOXBRACKET);
159 getNextToken();
160 return Subject(BlankNode());
161 }
162 case _Token_::QUESTION:
163 if(_has_variable_)
164 {
165 getNextToken();
166 if(currentToken.type != _Token_::IDENTIFIER and currentToken.type != _Token_::A)
167 {
168 reportUnexpected(currentToken);
169 }
170 QString name = currentToken.type == _Token_::IDENTIFIER ? currentToken.string : "a";
171 getNextToken();
172 return Subject(name, Subject::Type::Variable);
173 }
174 else
175 {
176 return parseIri();
177 }
178 case _Token_::UNDERSCORECOLON:
179 return parseBlankNode();
180 default:
181 return parseIri();
182 }
183 }
184
185 knowCore::Uri parsePredicate()
186 {
187 switch(currentToken.type)
188 {
189 case _Token_::A:
190 getNextToken();
191 return knowCore::Uris::rdf::a;
192 default:
193 return parseIri();
194 }
195 }
196
197 void parseObject(const Subject & _subject, const knowCore::Uri & _predicate)
198 {
199 switch(currentToken.type)
200 {
201 case _Token_::QUESTION:
202 if(_has_variable_)
203 {
204 getNextToken();
205 if(currentToken.type != _Token_::IDENTIFIER and currentToken.type != _Token_::A)
206 {
207 reportUnexpected(currentToken);
208 }
209 QString name = currentToken.type == _Token_::IDENTIFIER ? currentToken.string : "a";
210 getNextToken();
211 appendTriple(_subject, _predicate, Object(name, Object::Type::Variable));
212 }
213 else
214 {
215 reportUnexpected(currentToken);
216 getNextToken();
217 }
218 break;
219 case _Token_::UNDERSCORECOLON:
220 {
221 appendTriple(_subject, _predicate, parseBlankNode());
222 break;
223 }
224 case _Token_::CURIE_CONSTANT:
225 case _Token_::URI_CONSTANT:
226 {
227 appendTriple(_subject, _predicate, parseIri());
228 break;
229 }
230 case _Token_::STRING_CONSTANT:
231 {
232 QString str = currentToken.string;
233 getNextToken();
234 knowCore::Uri literal_uri = knowCore::Uris::xsd::string;
235 if(currentToken.type == _Token_::CIRCUMFLEXCIRCUMFLEX)
236 {
237 getNextToken();
238 literal_uri = parseIri();
239 }
240 switch(currentToken.type)
241 {
242 case _Token_::LANG_TAG:
243 {
244 appendTriple(_subject, _predicate, literal_uri, str, currentToken.string);
245 getNextToken();
246 break;
247 }
248 default:
249 {
250 appendTriple(_subject, _predicate, literal_uri, str, QString());
251 break;
252 }
253 }
254 break;
255 }
256 case _Token_::TRUE:
257 appendTriple(_subject, _predicate, knowCore::Uris::xsd::boolean, true);
258 getNextToken();
259 break;
260 case _Token_::FALSE:
261 appendTriple(_subject, _predicate, knowCore::Uris::xsd::boolean, false);
262 getNextToken();
263 break;
264 case _Token_::INTEGER_CONSTANT:
265 appendTriple(_subject, _predicate, knowCore::Uris::xsd::integer, knowCore::BigNumber::fromString(currentToken.string).expectSuccess());
266 getNextToken();
267 break;
268 case _Token_::FLOAT_CONSTANT:
269 appendTriple(_subject, _predicate, knowCore::Uris::select(currentToken.string.indexOf('e') == -1, knowCore::Uris::xsd::decimal, knowCore::Uris::xsd::float64), knowCore::BigNumber::fromString(currentToken.string).expectSuccess());
270 getNextToken();
271 break;
272 case _Token_::STARTBOXBRACKET:
273 {
274 getNextToken();
275
276 BlankNode bn;
277
278 if(currentToken.type != _Token_::ENDBOXBRACKET)
279 {
280 parseSingleSubject(bn, _Token_::ENDBOXBRACKET);
281 appendTriple(_subject, _predicate, bn);
282 }
283 else
284 {
285 appendTriple(_subject, _predicate, bn);
286 getNextToken();
287 }
288
289 break;
290 }
291 case _Token_::STARTBRACKET:
292 {
293 getNextToken();
294
295 if(currentToken.type == _Token_::ENDBRACKET)
296 {
297 appendTriple(_subject, _predicate, knowCore::Uris::rdf::nil);
298 }
299 else
300 {
301
302 BlankNode bn;
303
304 appendTriple(_subject, _predicate, bn);
305
306 while(currentToken.type != _Token_::END_OF_FILE)
307 {
308 parseObject(bn, knowCore::Uris::rdf::first);
309
310 if(currentToken.type == _Token_::ENDBRACKET)
311 {
312 appendTriple(bn, knowCore::Uris::rdf::rest, knowCore::Uris::rdf::nil);
313 break;
314 }
315 else
316 {
317 BlankNode nbn;
318 appendTriple(bn, knowCore::Uris::rdf::rest, nbn);
319 bn = nbn;
320 }
321 }
322 }
323 getNextToken();
324 break;
325 }
326 case _Token_::LOAD_FILE:
327 {
328 getNextToken();
329 if(not isOfType(currentToken, _Token_::STARTBRACKET)) return;
330 getNextToken(); // eats '('
331 if(not isOfType(currentToken, _Token_::STRING_CONSTANT)) return;
332 QString filename = currentToken.string;
333
334 QUrl base = QUrl(urlManager.base());
335 QString base_fn = base.scheme() == "qrc" ? ":" + base.path() : base.toLocalFile();
336 QString fn = QFileInfo(base_fn).dir().absolutePath() + "/" + filename;
337 QFile f(fn);
338 if(f.open(QIODevice::ReadOnly))
339 {
340 appendTriple<QString>(_subject, _predicate, knowCore::Uris::xsd::string, QString::fromUtf8(f.readAll()), QString());
341 } else {
342 reportError(currentToken, clog_qt::qformat("Failed to open file '{}'", f.fileName()));
343 }
344
345 getNextToken(); // eats filename
346 if(not isOfType(currentToken, _Token_::ENDBRACKET)) return;
347 getNextToken(); // eats ')'
348 break;
349 }
350 default:
351 reportUnexpected(currentToken);
352 getNextToken();
353 }
354 }
355
356 void parseSingleSubject(const Subject & subject, const typename _Token_::Type & _endType)
357 {
358 while(currentToken.type != _Token_::END_OF_FILE)
359 {
360 knowCore::Uri predicate = parsePredicate();
361
362 while(currentToken.type != _Token_::END_OF_FILE)
363 {
364 parseObject(subject, predicate);
365
366 if(currentToken.type == _Token_::COMA)
367 {
368 getNextToken();
369 }
370 else
371 {
372 break;
373 }
374 }
375 if(currentToken.type == _endType)
376 {
377 getNextToken();
378 break;
379 }
380 else if(isOfType(currentToken, _Token_::SEMI))
381 {
382 getNextToken();
383 if(currentToken.type == _endType)
384 {
385 getNextToken();
386 break;
387 }
388 }
389 }
390 }
391 using _Base_::appendTriple;
392 template<typename _T_>
393 void appendTriple(const Subject & _subject, const knowCore::Uri & _predicate, const knowCore::Uri& _dataTypeUri, const _T_ & _value, const QString& _lang = QString())
394 {
395 const auto [success, object, message] = Object::fromValue(_dataTypeUri, _value, _lang);
396 if(success)
397 {
398 this->appendTriple(Triple(_subject, _predicate, object));
399 } else {
400 reportError(currentToken, clog_qt::qformat("Failed to create object for value '{}' with error '{}'", _value, message));
401 }
402 }
403 void appendTriple(const Subject & _subject, const knowCore::Uri & _predicate, const Object & _value)
404 {
405 this->appendTriple(Triple(_subject, _predicate, _value));
406 }
407 void appendTriple(const Subject & _subject, const knowCore::Uri & _predicate, const knowCore::Uri& _dataTypeUri, const QString& _value, const QString& _lang)
408 {
409 const auto[success, lit, message] = Literal::fromRdfLiteral(_dataTypeUri, _value, _lang);
410 if(success)
411 {
412 appendTriple(_subject, _predicate, lit);
413 } else {
414 reportError(currentToken, clog_qt::qformat("Failed to parse RDF literal '{}' of type '{}' with error '{}'", _value, _dataTypeUri, message));
415 }
416 }
417 };
418 }
419}
420
Definition Forward.h:8
Definition Forward.h:12
Definition Messages.h:16
Definition UriManager.h:10
Definition Uri.h:14
Definition ValueHash.h:13
Definition BlankNode.h:23
static knowCore::ReturnValue< Literal > fromRdfLiteral(const knowCore::Uri &_datatype, const QString &_value, const QString &_lang=QString())
Definition Literal.cpp:127
Definition Object.h:11
Definition Subject.h:18
Definition Triple.h:17
Definition BaseParser_p.h:29