NAWA 0.8
Web Application Framework for C++
utils.cpp
Go to the documentation of this file.
1
6/*
7 * Copyright (C) 2019-2021 Tobias Flaig.
8 *
9 * This file is part of nawa.
10 *
11 * nawa is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License,
13 * version 3, as published by the Free Software Foundation.
14 *
15 * nawa is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with nawa. If not, see <https://www.gnu.org/licenses/>.
22 */
23
24#include <boost/algorithm/string.hpp>
25#include <fstream>
26#include <iomanip>
27#include <nawa/Exception.h>
28#include <nawa/util/encoding.h>
29#include <nawa/util/utils.h>
30#include <unordered_map>
31
32using namespace nawa;
33using namespace std;
34
35namespace {
36 unordered_map<string, string> contentTypeMap = {
37 {"aac", "audio/aac"},
38 {"arc", "application/x-freearc"},
39 {"avi", "video/x-msvideo"},
40 {"azw", "application/vnd.amazon.ebook"},
41 {"bmp", "image/bmp"},
42 {"bz", "application/x-bzip"},
43 {"bz2", "application/x-bzip2"},
44 {"csh", "application/x-csh"},
45 {"css", "text/css"},
46 {"csv", "text/csv"},
47 {"deb", "application/vnd.debian.binary-package"},
48 {"doc", "application/msword"},
49 {"dot", "application/msword"},
50 {"docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
51 {"dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
52 {"eot", "application/vnd.ms-fontobject"},
53 {"epub", "application/epub+zip"},
54 {"flv", "video/x-flv"},
55 {"f4v", "video/mp4"},
56 {"f4a", "audio/mp4"},
57 {"gif", "image/gif"},
58 {"gz", "application/x-gzip"},
59 {"htm", "text/html"},
60 {"html", "text/html"},
61 {"ico", "image/vnd.microsoft.icon"},
62 {"ics", "text/calendar"},
63 {"jar", "application/java-archive"},
64 {"java", "text/plain"},
65 {"jpg", "image/jpeg"},
66 {"jpeg", "image/jpeg"},
67 {"js", "text/javascript"},
68 {"json", "application/json"},
69 {"mid", "audio/x-midi"},
70 {"midi", "audio/x-midi"},
71 {"mjs", "application/javascript"},
72 {"mp3", "audio/mpeg"},
73 {"mpeg", "video/mpeg"},
74 {"mp4", "application/mp4"},
75 {"m4v", "video/mp4"},
76 {"m4a", "audio/mp4"},
77 {"mpkg", "application/vnd.apple.installer+xml"},
78 {"odp", "application/vnd.oasis.opendocument.presentation"},
79 {"otp", "application/vnd.oasis.opendocument.presentation"},
80 {"ods", "application/vnd.oasis.opendocument.spreadsheet"},
81 {"ots", "application/vnd.oasis.opendocument.spreadsheet"},
82 {"odt", "application/vnd.oasis.opendocument.text"},
83 {"ott", "application/vnd.oasis.opendocument.text"},
84 {"ogg", "application/ogg"},
85 {"ogx", "application/ogg"},
86 {"oga", "audio/ogg"},
87 {"ogv", "video/ogg"},
88 {"otf", "font/otf"},
89 {"png", "image/png"},
90 {"pdf", "application/pdf"},
91 {"ppt", "application/vnd.ms-powerpoint"},
92 {"pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation"},
93 {"rar", "application/x-rar-compressed"},
94 {"rtf", "application/rtf"},
95 {"sh", "application/x-sh"},
96 {"svg", "image/svg+xml"},
97 {"swf", "application/x-shockwave-flash"},
98 {"tar", "application/x-tar"},
99 {"tif", "image/tiff"},
100 {"tiff", "image/tiff"},
101 {"ttf", "font/ttf"},
102 {"txt", "text/plain"},
103 {"vsd", "application/vnd.visio"},
104 {"wav", "audio/wav"},
105 {"weba", "audio/webm"},
106 {"webm", "video/webm"},
107 {"webp", "image/webp"},
108 {"woff", "font/woff"},
109 {"woff2", "font/woff2"},
110 {"xhtml", "application/xhtml+xml"},
111 {"xls", "application/vnd.ms-excel"},
112 {"xlt", "application/vnd.ms-excel"},
113 {"xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"},
114 {"xltx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"},
115 {"xml", "application/xml"},
116 {"xul", "application/vnd.mozilla.xul+xml"},
117 {"xz", "application/x-xz"},
118 {"zip", "application/zip"},
119 {"3gp", "video/3gpp"},
120 {"3g2", "video/3gpp2"},
121 {"7z", "application/x-7z-compressed"}};
122
129 inline string getDayOfWeek(int dow) {
130 string ret;
131 switch (dow) {
132 case 0:
133 ret = "Sun";
134 break;
135 case 1:
136 ret = "Mon";
137 break;
138 case 2:
139 ret = "Tue";
140 break;
141 case 3:
142 ret = "Wed";
143 break;
144 case 4:
145 ret = "Thu";
146 break;
147 case 5:
148 ret = "Fri";
149 break;
150 case 6:
151 ret = "Sat";
152 break;
153 default:
154 break;
155 }
156 return ret;
157 }
158
165 inline string getMonth(int mon) {
166 string ret;
167 switch (mon) {
168 case 0:
169 ret = "Jan";
170 break;
171 case 1:
172 ret = "Feb";
173 break;
174 case 2:
175 ret = "Mar";
176 break;
177 case 3:
178 ret = "Apr";
179 break;
180 case 4:
181 ret = "May";
182 break;
183 case 5:
184 ret = "Jun";
185 break;
186 case 6:
187 ret = "Jul";
188 break;
189 case 7:
190 ret = "Aug";
191 break;
192 case 8:
193 ret = "Sep";
194 break;
195 case 9:
196 ret = "Oct";
197 break;
198 case 10:
199 ret = "Nov";
200 break;
201 case 11:
202 ret = "Dec";
203 break;
204 default:
205 break;
206 }
207 return ret;
208 }
209
210}// namespace
211
212// doxygen bug, somehow doxygen does not like std::function
213void utils::regexReplaceCallback(std::string& s, std::regex const& rgx,
214 std::function<std::string(std::vector<std::string> const&)> const& fmt) {
215 // how many submatches do we have to deal with?
216 int marks = rgx.mark_count();
217 // we want to iterate through all submatches (to collect them in a vector passed to fmt())
218 vector<int> submatchList;
219 for (int i = -1; i <= marks; ++i) {
220 submatchList.push_back(i);
221 }
222
223 sregex_token_iterator begin(s.begin(), s.end(), rgx, submatchList), end;
224 stringstream out;
225
226 // prefixes and submatches (should) alternate
227 int submatch = -1;
228 vector<string> submatchVector;
229 for (auto it = begin; it != end; ++it) {
230 if (submatch == -1) {
231 out << it->str();
232 ++submatch;
233 } else {
234 submatchVector.push_back(it->str());
235 if (submatch < marks) {
236 ++submatch;
237 } else {
238 out << fmt(submatchVector);
239 submatchVector.clear();
240 submatch = -1;
241 }
242 }
243 }
244 s = out.str();
245}
246
247string utils::hexDump(string const& in) {
248 stringstream rets;
249 rets << hex << setfill('0');
250 for (char c : in) {
251 rets << setw(2) << (int) (unsigned char) c;
252 }
253 return rets.str();
254}
255
256string utils::toLowercase(string s) {
257 transform(s.begin(), s.end(), s.begin(), ::tolower);
258 return s;
259}
260
261string utils::toUppercase(string s) {
262 transform(s.begin(), s.end(), s.begin(), ::toupper);
263 return s;
264}
265
266string utils::generateErrorPage(unsigned int httpStatus) {
267 string errorStr;
268 string explanation;
269 switch (httpStatus) {
270 case 400:
271 errorStr = "Bad Request";
272 explanation = "The server cannot process your request.";
273 break;
274 case 401:
275 errorStr = "Unauthorized";
276 explanation = "The necessary credentials have not been provided.";
277 break;
278 case 403:
279 errorStr = "Forbidden";
280 explanation = "You do not have the necessary permissions to view this page.";
281 break;
282 case 404:
283 errorStr = "Not Found";
284 explanation = "The requested URL was not found on this server.";
285 break;
286 case 405:
287 errorStr = "Method Not Allowed";
288 explanation = "The used request method is not supported for the requested resource.";
289 break;
290 case 406:
291 errorStr = "Not Applicable";
292 explanation = "The requested function is unable to produce a resource that satisfies your browser's Accept header.";
293 break;
294 case 408:
295 errorStr = "Request Timeout";
296 explanation = "A timeout occurred while waiting for your request.";
297 break;
298 case 409:
299 errorStr = "Conflict";
300 explanation = "The request cannot be processed due to a conflict on the underlying resource.";
301 break;
302 case 410:
303 errorStr = "Gone";
304 explanation = "The requested resource is no longer available.";
305 break;
306 case 415:
307 errorStr = "Unsupported Media Type";
308 explanation = "Your browser has requested a media type that cannot be provided by this resource.";
309 break;
310 case 418:
311 errorStr = "I'm a teapot";
312 explanation = "I cannot brew coffee for you.";
313 break;
314 case 429:
315 errorStr = "Too Many Requests";
316 break;
317 case 451:
318 errorStr = "Unavailable For Legal Reasons";
319 break;
320 case 500:
321 errorStr = "Internal Server Error";
322 explanation = "The server encountered an internal error and is unable to fulfill your request.";
323 break;
324 case 501:
325 errorStr = "Not Implemented";
326 explanation = "The server is not able to fulfill your request.";
327 break;
328 case 503:
329 errorStr = "Service Unavailable";
330 explanation = "This service is currently unavailable. Please try again later.";
331 break;
332 default:
333 errorStr = "Unknown Error";
334 }
335
336 stringstream ep;
337 ep << "<!DOCTYPE html><html><head><title>" << httpStatus << ' ' << errorStr << "</title></head><body><h1>"
338 << errorStr << "</h1><p>" << explanation << "</p></body></html>";
339
340 return ep.str();
341}
342
343string utils::getFileExtension(string const& filename) {
344 try {
345 return filename.substr(filename.find_last_of('.') + 1);
346 } catch (out_of_range&) {}
347
348 return {};
349}
350
351string utils::contentTypeByExtension(string extension) {
352 auto ext = toLowercase(move(extension));
353 if (contentTypeMap.count(ext) == 1) {
354 return contentTypeMap.at(ext);
355 }
356 return "application/octet-stream";
357}
358
359string utils::makeHttpTime(time_t time) {
360 stringstream httpTime;
361 tm gmt;
362 gmtime_r(&time, &gmt);
363 httpTime << getDayOfWeek(gmt.tm_wday) << put_time(&gmt, ", %d ") << getMonth(gmt.tm_mon);
364 httpTime << put_time(&gmt, " %Y %H:%M:%S GMT");
365
366 return httpTime.str();
367}
368
369time_t utils::readHttpTime(string const& httpTime) {
370 istringstream timeStream(httpTime);
371 tm timeStruct;
372 timeStream >> get_time(&timeStruct, "%a, %d %b %Y %H:%M:%S GMT");
373
374 // timegm will interpret the tm as UTC and convert it to a time_t
375 return timegm(&timeStruct);
376}
377
378string utils::makeSmtpTime(time_t time) {
379 stringstream smtpTime;
380 tm ltime;
381 localtime_r(&time, &ltime);
382 smtpTime << getDayOfWeek(ltime.tm_wday) << put_time(&ltime, ", %e ") << getMonth(ltime.tm_mon);
383 smtpTime << put_time(&ltime, " %Y %H:%M:%S %z");
384
385 return smtpTime.str();
386}
387
388time_t utils::readSmtpTime(string const& smtpTime) {
389 string smtpTimeM = smtpTime;
390 tm timeStruct;
391
392 // there seems to be a bug in get_time, %e parsing with leading space does not work, so this fails for
393 // days of month < 10:
394 //timeStream >> get_time(&timeStruct, "%a, %e %b %Y %H:%M:%S %z");
395
396 // dirty hack
397 if (smtpTimeM.length() > 5 && smtpTimeM[5] == ' ') {
398 smtpTimeM[5] = '0';
399 }
400 istringstream timeStream(smtpTimeM);
401 timeStream >> get_time(&timeStruct, "%a, %d %b %Y %H:%M:%S %z");
402
403 // timegm will create a time_t, but does not honor the time zone, unfortunately (not part of tm)
404 time_t unixTime = timegm(&timeStruct);
405
406 // so we'll have to add/subtract the difference manually
407 if (smtpTimeM.length() > 30) {
408 int tzAdjust = smtpTimeM[26] == '-' ? 1 : -1;
409 int tzH = stoi(smtpTimeM.substr(27, 2));
410 int tzM = stoi(smtpTimeM.substr(29, 2));
411 unixTime += tzAdjust * (tzH * 3600 + tzM * 60);
412 }
413
414 // mktime will interpret the tm as local time and convert it to a time_t
415 return unixTime;
416}
417
418vector<string> utils::splitString(string str, char delimiter, bool ignoreEmpty) {
419 vector<string> ret;
420 for (size_t pos = 0; !str.empty();) {
421 pos = str.find_first_of(delimiter);
422 auto token = str.substr(0, pos);
423 if (!ignoreEmpty || !token.empty()) {
424 ret.push_back(str.substr(0, pos));
425 }
426 if (pos < str.length()) {
427 str = str.substr(pos + 1);
428 } else {
429 break;
430 }
431 }
432 return ret;
433}
434
435string utils::mergePath(vector<string> const& path) {
436 if (path.empty()) {
437 return "/";
438 }
439 stringstream stringPath;
440 for (auto const& e : path) {
441 stringPath << '/' << e;
442 }
443 return stringPath.str();
444}
445
446vector<string> utils::splitPath(string const& pathString) {
447 // remove query string
448 string rawPath = pathString.substr(0, pathString.find('?'));
449 return splitString(rawPath, '/', true);
450}
451
452string utils::convertLineEndings(string const& in, string const& ending) {
453 stringstream ret;
454 for (const auto& c : in) {
455 if (c == '\n')
456 ret << ending;
457 else if (c != '\r')
458 ret << c;
459 }
460 return ret.str();
461}
462
463string utils::getFileContents(string const& path) {
464 // open file as binary
465 ifstream f(path, ifstream::binary);
466
467 // throw exception if file cannot be opened
468 if (!f) {
469 throw Exception(__PRETTY_FUNCTION__, 1, "Cannot open file for reading");
470 }
471
472 // get file size
473 f.seekg(0, ios::end);
474 long fs = f.tellg();
475 f.seekg(0);
476
477 // load to string
478 string ret(static_cast<unsigned long>(fs), '\0');
479 f.read(&ret[0], fs);
480
481 return ret;
482}
483
484string utils::stringReplace(string input, unordered_map<char, char> const& patterns) {
485 for (auto const& [key, val] : patterns) {
486 replace(input.begin(), input.end(), key, val);
487 }
488 return input;
489}
490
491string utils::stringReplace(string input, unordered_map<string, string> const& patterns) {
492 for (auto const& [key, val] : patterns) {
493 for (size_t pos = input.find(key); pos != string::npos;) {
494 input.replace(pos, key.length(), val);
495 pos = input.find(key, pos + val.length());
496 }
497 }
498 return input;
499}
500
501unordered_multimap<string, string> utils::splitQueryString(string const& queryString) {
502 string qs;
503 size_t qmrkPos = queryString.find_first_of('?');
504 unordered_multimap<string, string> ret;
505 if (qmrkPos != string::npos && queryString.length() > qmrkPos) {
506 qs = queryString.substr(qmrkPos + 1);
507 } else if (qmrkPos == string::npos) {
508 qs = queryString;
509 }
510 auto pairs = splitString(qs, '&', true);
511 for (auto const& p : pairs) {
512 size_t eqPos = p.find_first_of('=');
513 string k = p.substr(0, eqPos);
514 string v = (eqPos < p.length() - 1) ? encoding::urlDecode(p.substr(eqPos + 1)) : "";
515 ret.insert({k, v});
516 }
517 return ret;
518}
519
520unordered_map<string, string> utils::parseHeaders(string rawHeaders) {
521 unordered_map<string, string> ret;
522 // filter out carriage returns
523 boost::erase_all(rawHeaders, "\r");
524 // split
525 auto lines = splitString(rawHeaders, '\n', true);
526 for (auto const& line : lines) {
527 auto colonPos = line.find_first_of(':');
528 if (line.length() < colonPos + 2) {
529 continue;
530 }
531 auto key = toLowercase(line.substr(0, colonPos));
532 auto val = line.substr(colonPos + 1);
533 boost::trim_left(val);
534 ret[key] = val;
535 }
536 return ret;
537}
538
539unordered_multimap<std::string, std::string> utils::parseCookies(string const& rawCookies) {
540 unordered_multimap<std::string, std::string> ret;
541 // split by ;
542 auto cookies = splitString(rawCookies, ';', true);
543 for (auto c : cookies) {
544 // remove whitespaces
545 boost::trim(c);
546 // key and value
547 auto eqPos = c.find_first_of('=');
548 if (c.length() < eqPos + 2) {
549 continue;
550 }
551 auto key = c.substr(0, eqPos);
552 auto val = c.substr(eqPos + 1);
553 ret.insert({key, val});
554 }
555 return ret;
556}
Exception class that can be used by apps to catch errors resulting from nawa function calls.
Namespace containing functions for text encoding and decoding.
std::string urlDecode(std::string input)
Definition: encoding.cpp:256
void regexReplaceCallback(std::string &s, std::regex const &rgx, std::function< std::string(std::vector< std::string > const &)> const &fmt)
std::string hexDump(std::string const &in)
Definition: utils.cpp:247
std::string getFileContents(std::string const &path)
Definition: utils.cpp:463
time_t readSmtpTime(std::string const &smtpTime)
Definition: utils.cpp:388
std::unordered_multimap< std::string, std::string > splitQueryString(std::string const &queryString)
Definition: utils.cpp:501
std::string convertLineEndings(std::string const &in, std::string const &ending)
Definition: utils.cpp:452
time_t readHttpTime(std::string const &httpTime)
Definition: utils.cpp:369
std::unordered_multimap< std::string, std::string > parseCookies(std::string const &rawCookies)
Definition: utils.cpp:539
std::string stringReplace(std::string input, std::unordered_map< char, char > const &patterns)
Definition: utils.cpp:484
std::string generateErrorPage(unsigned int httpStatus)
Definition: utils.cpp:266
std::vector< std::string > splitPath(std::string const &pathString)
Definition: utils.cpp:446
std::string toLowercase(std::string s)
Definition: utils.cpp:256
std::string contentTypeByExtension(std::string extension)
Definition: utils.cpp:351
std::string toUppercase(std::string s)
Definition: utils.cpp:261
std::string getFileExtension(std::string const &filename)
Definition: utils.cpp:343
std::string makeHttpTime(time_t time)
Definition: utils.cpp:359
std::vector< std::string > splitString(std::string str, char delimiter, bool ignoreEmpty=false)
Definition: utils.cpp:418
std::string makeSmtpTime(time_t time)
Definition: utils.cpp:378
std::string mergePath(std::vector< std::string > const &path)
Definition: utils.cpp:435
std::unordered_map< std::string, std::string > parseHeaders(std::string rawHeaders)
Definition: utils.cpp:520
Definition: AppInit.h:31
Contains useful functions that improve the readability and facilitate maintenance of the NAWA code.