/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /*************************************************************************** * mltokenizer.cc * * Tue Nov 4 08:46:35 CET 2008 * Copyright 2008 Bent Bisballe Nyeng * deva@aasimon.org ****************************************************************************/ /* * This file is part of Pracro. * * Pracro is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Pracro is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Pracro; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include "mltokenizer.h" static std::string rereplaceescaping(std::string mlvalue) { std::string output; size_t i = 0; while(i < mlvalue.length()) { if(mlvalue[i] == '\1') { output += '{'; i++; } else if(mlvalue[i] == '\2') { output += '}'; i++; } else { output += mlvalue[i]; i++; } } return output; } static std::string replaceescaping(std::string mlvalue) { std::string output; size_t i = 0; while(i < mlvalue.length()) { if(i < mlvalue.length() - 1 && mlvalue[i] == '{' && mlvalue[i + 1] == '{') { output += '\1'; i+=2; } else if(i < mlvalue.length() - 1 && mlvalue[i] == '}' && mlvalue[i + 1] == '}') { output += '\2'; i+=2; } else { output += mlvalue[i]; i++; } } return output; } static std::string gettoken(std::string input, size_t start, std::string term) { std::string output; size_t i = start; while(i < input.length()) { size_t j = 0; while(j < term.length()) { if(input[i] == term[j]) return output; j++; } output += input[i]; i++; } return output; } typedef enum { NAME, VALUE, TEXT, ENDOFITEM, UNDEFINED } tokenizerstate_t; std::vector< mltoken_t > mltokenize(std::string mlvalue) { std::vector< mltoken_t > tokens; mlvalue = replaceescaping(mlvalue); tokenizerstate_t state = UNDEFINED; mltoken_t token; size_t i = 0; while(i < mlvalue.length()) { switch(state) { case NAME: token.name = gettoken(mlvalue, i, "|"); i += token.name.length() + 1; token.type = MLTT_VALUE; token.value = ""; state = VALUE; break; case VALUE: token.value = gettoken(mlvalue, i, "}\n"); i += token.value.length() + 1; token.value = rereplaceescaping(token.value); token.type = MLTT_VALUE; tokens.push_back(token); state = UNDEFINED; break; case TEXT: if(mlvalue[i] == '$') token.value = "$"; else token.value = gettoken(mlvalue, i, "$\n"); i += token.value.length(); token.value = rereplaceescaping(token.value); token.type = MLTT_TEXT; token.name = ""; if(tokens.size() && tokens.back().type == MLTT_TEXT) tokens.back().value += token.value; else tokens.push_back(token); state = UNDEFINED; break; case ENDOFITEM: token.value = "\n"; i++; token.type = MLTT_ENDOFITEM; token.name = ""; tokens.push_back(token); state = UNDEFINED; break; case UNDEFINED: switch(mlvalue[i]) { case '$': if(i < mlvalue.length() - 1 && mlvalue[i + 1] == '{') { i++; break; } // ignore else { state = TEXT; break; } case '{': state = NAME; i++; break; case '\n': state = ENDOFITEM; break; default: state = TEXT; break; } } } if(state != UNDEFINED) { printf("Oups... missed something in the end!\n"); tokens.push_back(token); } return tokens; } #ifdef TEST_MLTOKENIZER int main() { std::string mlvalue = "$ab}}c\ndef ${na$me|${{va$lue}}}\n12${34}\n"; std::vector< mltoken_t > tokens = mltokenize(mlvalue); std::vector< mltoken_t >::iterator i = tokens.begin(); while(i != tokens.end()) { printf("Token:\n"); printf("\tType: "); switch(i->type) { case MLTT_VALUE: printf("VALUE\n"); break; case MLTT_TEXT: printf("TEXT\n"); break; case MLTT_ENDOFITEM: printf("ENDOFITEM\n"); break; case MLTT_UNDEFINED: printf("UNDEFINED\n"); break; } printf("\tName: %s\n", i->name.c_str()); printf("\tValue: %s\n", i->value.c_str()); printf("\n"); i++; } return 0; } #endif