#include "ApplesoftRetokenizer.h" #include "applesofttoken.h" #include #include ApplesoftRetokenizer::ApplesoftRetokenizer() { m_isParsed = false; } void ApplesoftRetokenizer::setData(QByteArray data) { m_data = data; m_data_end = data.length(); m_isParsed = false; } void ApplesoftRetokenizer::parse(quint16 start_address) { if (m_isParsed) { qWarning("File is already parsed. Not reparsing."); return; } //TODO: This could be changed to search for hidden space between applesoft lines int idx = 0; quint8 val = 0; m_retokenized_lines.clear(); quint16 current_address = start_address; while (idx < m_data.length()) { ApplesoftLine line; line.address = current_address; line.next_address = (quint8) m_data[idx] + (((quint8) m_data[idx+1]) *256); idx++; idx++; line.linenum = (quint8) m_data[idx] + (((quint8) m_data[idx+1])*256); idx++; idx++; if (line.next_address == 0x00) { break; } do { val = m_data[idx++]; ApplesoftToken token(val); line.tokens.append(token); } while (val != 0x00); retokenizeLine(line); current_address = line.next_address; m_retokenized_lines.append(line); } m_data_end = idx; if (idx < m_data.length()) { qDebug() << QString("%1 byte(s) unaccounted for.").arg(m_data.length() - idx); } retokenizeLinesForFormatting(); m_isParsed = true; } void ApplesoftRetokenizer::retokenizeLinesForFormatting() { QVector retLines; foreach(ApplesoftLine line, m_retokenized_lines) { int indentlevel = 1; // quint16 linenum = line.linenum; bool firstToken = true; ApplesoftToken previousToken; QMutableVectorIterator tokenIt(line.tokens); while (tokenIt.hasNext()) { ApplesoftToken token = tokenIt.next(); bool isFlowTarget = false; QString tokenstr = token.getRawPrintableString(); if (firstToken) { if (!tokenstr.startsWith(" ")) { ApplesoftToken tmptoken(ApplesoftToken::OptFmtLeadingSpaceTokenValue); tokenIt.remove(); tokenIt.insert(tmptoken); tokenIt.insert(token); } firstToken = false; } quint16 preTokenId = previousToken.getTokenId(); if (preTokenId == ApplesoftToken::ASGoto || preTokenId == ApplesoftToken::ASGosub || preTokenId == ApplesoftToken::ASThen) { isFlowTarget = false; if (preTokenId == ApplesoftToken::ASGoto || preTokenId == ApplesoftToken::ASGosub) { isFlowTarget = true; } else if (preTokenId == ApplesoftToken::ASThen && token.getTokenId() == ApplesoftToken::IntegerTokenVal) { isFlowTarget = true; } if (isFlowTarget) { QPair pair; pair.first = line.linenum; pair.second = token.getWordValue(); m_flowTargets.append(pair); ApplesoftToken tmptoken(ApplesoftToken::OptFmtFlagFlowTargetNextTokenValue); tokenIt.remove(); tokenIt.insert(tmptoken); tokenIt.insert(token); } } if (token.getTokenId() == ApplesoftToken::ASReturn) { ApplesoftToken tmptoken(ApplesoftToken::OptFmtReturnLineBreakTokenValue); tokenIt.insert(tmptoken); } if (token.getTokenId() == ':') { ApplesoftToken tmptoken(ApplesoftToken::OptFmtIndentLineBreakTokenValue); tokenIt.insert(tmptoken); for (int ind = 0; ind < indentlevel; ind++) { ApplesoftToken tmptoken(ApplesoftToken::OptFmtIndentTabTokenValue); tokenIt.insert(tmptoken); } if (!tokenIt.peekNext().getRawPrintableString().startsWith(" ")) { ApplesoftToken tmptoken(ApplesoftToken::OptFmtIndentSpaceTokenValue); tokenIt.insert(tmptoken); } } if (token.getTokenId() == ApplesoftToken::ASThen) { indentlevel++; if (tokenIt.peekNext().getTokenId() != ApplesoftToken::IntegerTokenVal) { ApplesoftToken tmptoken(ApplesoftToken::OptFmtIndentLineBreakTokenValue); tokenIt.insert(tmptoken); for (int ind = 0; ind < indentlevel; ind++) { ApplesoftToken tmptoken(ApplesoftToken::OptFmtIndentTabTokenValue); tokenIt.insert(tmptoken); } if (!tokenIt.peekNext().getRawPrintableString().startsWith(" ")) { ApplesoftToken tmptoken(ApplesoftToken::OptFmtIndentSpaceTokenValue); tokenIt.insert(tmptoken); } } } previousToken = token; } retLines.append(line); } m_retokenized_lines = retLines; } void ApplesoftRetokenizer::retokenizeLine(ApplesoftLine &line) { line.tokens = retokenizeRems(line.tokens); line.tokens = retokenizeStrings(line.tokens); line.tokens = retokenizeDataStatements(line.tokens); line.tokens = retokenizeVariables(line.tokens); line.tokens = retokenizeNumbers(line.tokens); line.tokens = retokenizeNegativeNumbers(line.tokens); } QVector ApplesoftRetokenizer::retokenizeRems(QVector&datatokens) { // Handle REMs ApplesoftToken token; QVector replacements; QVector tmptokens = datatokens; QByteArray buffer; bool inRem = false; while (!tmptokens.isEmpty()) { token = tmptokens.takeFirst(); if (!inRem) { replacements.append(token); if (token.getByteValue() == ApplesoftToken::ASRem) { inRem = true; } } else { buffer.append(token.getByteValue()); } } if (inRem) { ApplesoftToken remstrtoken(ApplesoftToken::RemStringTokenVal, buffer); replacements.append(remstrtoken); buffer.clear(); inRem = false; } return replacements; } QVector ApplesoftRetokenizer::retokenizeStrings(QVector&datatokens) { // Handle Strings QVector replacements; QVector tmptokens = datatokens; QString buffer; ApplesoftToken token; bool inString = false; while (!tmptokens.isEmpty()) { token = tmptokens.takeFirst(); if (token.getTokenId() >= 0x80) { replacements.append(token); // continue; } else if (token.getWordValue() == '"') { if (!inString) { inString = true; buffer.append(token.getWordValue()); // continue; } else { buffer.append(token.getWordValue()); ApplesoftToken strtoken(ApplesoftToken::StringTokenVal, buffer); replacements.append(strtoken); buffer.clear(); inString = false; // continue; } } else if (inString) { buffer.append(token.getWordValue()); // continue; } else replacements.append(token); } return replacements; } QVector ApplesoftRetokenizer::retokenizeDataStatements(QVector&datatokens) { // Handle DATAs QVector tmptokens = datatokens; QVector replacements; ApplesoftToken token; QVector datatokenbuffer; bool inData = false; while (!tmptokens.isEmpty()) { token = tmptokens.takeFirst(); if (!inData) { replacements.append(token); if (token.getTokenId() == ApplesoftToken::ASData) { inData = true; } } else { datatokenbuffer.append(token); } } if (inData) { QVector dataTokens; dataTokens = retokenizeDataPayload(datatokenbuffer); replacements.append(dataTokens); datatokenbuffer.clear(); inData = false; } return replacements; } QVector ApplesoftRetokenizer::retokenizeDataPayload(QVector& datatokens) { QVector retval; ApplesoftToken token; QString stringbuffer; while (!datatokens.isEmpty()) { token = datatokens.takeFirst(); if (token.getTokenId() == ApplesoftToken::StringTokenVal) { ApplesoftToken newToken(ApplesoftToken::DataStringTokenVal, token.getStringValue()); retval.append(newToken); continue; } if (token.getWordValue() == ',') { if (!stringbuffer.isEmpty()) { ApplesoftToken datastrtoken(ApplesoftToken::DataStringTokenVal, stringbuffer); retval.append(datastrtoken); stringbuffer.clear(); } retval.append(token); continue; } stringbuffer.append(token.getWordValue()); } if (!stringbuffer.isEmpty()) { ApplesoftToken datastrtoken(ApplesoftToken::DataStringTokenVal, stringbuffer); retval.append(datastrtoken); stringbuffer.clear(); } return retval; } QVector ApplesoftRetokenizer::retokenizeVariables(QVector&datatokens) { // Handle variable names QList tmptokens = QList::fromVector(datatokens); ApplesoftToken token; QRegularExpression varregexp("[A-Za-z][A-Za-z0-9]*[$%]?\\(?"); QString parsestring; // Parse the tokens to find assist for (int idx = 0; idx < tmptokens.count();idx++) { token = datatokens.at(idx); if (token.getTokenId() < 0x0080 && token.getTokenId() > 0x0000) { parsestring.append(QChar(token.getWordValue())); } else { parsestring.append("_"); } } QList matchstack; QRegularExpressionMatchIterator matches = varregexp.globalMatch(parsestring); // qDebug() << parsestring; while (matches.hasNext()) { QRegularExpressionMatch rematch = matches.next(); matchstack.push_front(rematch); // qDebug() << "Capture " << " = " << rematch.capturedTexts() << "From: " << rematch.capturedStart() // << "To: " << rematch.capturedEnd()-1 << "("< ApplesoftRetokenizer::retokenizeNumbers(QVector&datatokens) { // Handle numbers QList tmptokens = QList::fromVector(datatokens); ApplesoftToken token; QRegularExpression varregexp("[0-9]+(\\.[0-9]*)?"); QString parsestring; // Parse the tokens to find assist for (int idx = 0; idx < tmptokens.count();idx++) { token = datatokens.at(idx); if (token.getTokenId() < 0x0080 && token.getTokenId() > 0x0000) { parsestring.append(QChar(token.getWordValue())); } else { parsestring.append("_"); } } QList matchstack; QRegularExpressionMatchIterator matches = varregexp.globalMatch(parsestring); // qDebug() << parsestring; while (matches.hasNext()) { QRegularExpressionMatch rematch = matches.next(); matchstack.push_front(rematch); // qDebug() << "Capture " << " = " << rematch.capturedTexts() << "From: " << rematch.capturedStart() // << "To: " << rematch.capturedEnd()-1 << "("< ApplesoftRetokenizer::retokenizeNegativeNumbers(QVector&datatokens) { //TODO: Code to make determination of negative numbers vs. unary minus/math formulas. // Prefixed '-' tokens for negative numbers should get merged with the integer value token. // So, need to determine when we're in an expression vs when we're starting a number. // // A = -1 should retokenize. // A = - 4 - 1 should retokenize -4 // A = - 4 - - 1 should retokenize -4 and -1 // A = 3 - 1 shoud not retokenize // A = A - 1 should not // A = PEEK(123) - 5 should not // A = 4 * - 1 should // A = (1 + 2) - 4 should not // A = (1 + 2) + - 4 should // A = (1 + 2) - - 4 should // POKE - 4, 1 should // PRINT + - 4 should // PRINT - 4 should // PRINT + + + - - - 4 should retokenize the last -4. // A = 1 - - 4 should // A = 1 - - - 4 should, probably, but it's errorprone to say the least, // as are any multiple arbitrary +/-'s. Have to hope for the best here. // Best bet would be to look at how AppleSoft handles these values. // A = - 0 is the same as 0 QList tmptokens = QList::fromVector(datatokens); ApplesoftToken token; QMutableListIteratorit(tmptokens); bool lastWasInt = false; while (it.hasNext()) { token = it.next(); if (token.getTokenId() == ApplesoftToken::IntegerTokenVal) lastWasInt = true; else if (token.getTokenId() == ApplesoftToken::FloatTokenVal) lastWasInt = true; else if (token.getTokenId() == ApplesoftToken::IntVarTokenVal) lastWasInt = true; else if (token.getTokenId() == ApplesoftToken::FloatVarTokenVal) lastWasInt = true; else if (token.getTokenId() == ')') lastWasInt = true; else if (token.getTokenId() == ApplesoftToken::ASMINUS) { if (!lastWasInt && it.hasNext() && it.peekNext().getTokenId() == ApplesoftToken::IntegerTokenVal) { it.remove(); token = it.next(); it.remove(); int val = token.getUnsignedIntegerValue() * -1; token.setValue(val); it.insert(token); lastWasInt = true; } else { lastWasInt = false; } } else { lastWasInt = false; } } return tmptokens.toVector(); }