Scala: Ugly hairy rewrite C++ code jig

I think this might be the first time I’ve gotten to use a real algorithm at work in several years. This is also my first attempt at Scala (pretty nice!), and one ugly jig. It had to be adjusted every fourth or fifth component I used it on because of strangeness in the way that component was written, or in the way Java handles regular expressions, or both.

 
import java.io.{File,FileReader,FileWriter}
import java.util.regex.{Pattern,Matcher}
import java.lang.Math.min
 
class Fixer(path : String) {
 
  def files = new File(path).listFiles
 
  def isCppFile (f : File) = f.getName().toLowerCase() endsWith ".cpp"
  def isHFile (f: File) = {
    val name = f.getName().toLowerCase()
    name.endsWith(".h") && name != "resource.h" && !name.startsWith("test_") && name != "vxnapi3.h"
  }
 
  def mostSimilarFile(fs : => Array[File]) = {
    var result : File = null
    var bestDistance = 9999
    for (f <- fs) {
      val dist = editDistance(
          f.getName().replaceAll("\\.[^\\.]*$", "").toLowerCase,
          new File(path).getName().toLowerCase
          )
      if (dist < bestDistance) {
        bestDistance = dist
        result = f
      }
    }
    result
  }
 
  def cppFile = {
    val result = mostSimilarFile(files filter isCppFile)
    println(result)
    result
  }
  def hFile = {
    val result = mostSimilarFile(files filter isHFile)
    println("hFile:"+result)
    result
  }
 
  def readFile (f: File) = {
    val r = new FileReader(f)
    val b = new Array[Char](1024)
    val sb = new StringBuffer
    var l = 0
    do {
      l = r.read(b)
      if (l > 0) sb.append(b, 0, l)
    } while (l > 0)
    r.close()
    sb.toString()
  }
 
  def writeFile (f: File, s : String) = {
    val w = new FileWriter(f) 
    w.write(s)
    w.close
  }
 
  var cppText_ : String = null
  def cppText = {
    if (cppText_ == null) cppText_ = readFile(cppFile)
    cppText_
  }
 
  var hText_ : String = null
  def hText = {
    if (hText_ == null) hText_ = readFile(hFile)
    hText_
  }
 
  class RegInfo(
      guid_ : String,
      compId_ : String,
      compType_ : String,
      compSubType_ : String,
      friendlyName_ : String,
      progId_ : String,
      className_ : String,
      baseClassName_ : String
    )
  {
    def guid = guid_
    def compId = compId_
    def compType = compType_
    def compSubType = compSubType_
    def friendlyName = friendlyName_
    def progId = progId_
    def className = className_
    def baseClassName = baseClassName_
 
    def traitClassName = className + "Traits"
    def declareTraits =
      "struct " + traitClassName + " {\r\n" +
      "    static const int COMPID;\r\n" +
      "    static const int TYPE;\r\n" +
      "    static const int SUBTYPE;\r\n" +
      "    static const GUID IID;\r\n" +
      "    static const char* PROG_ID;\r\n" +
      "    static const char* FRIENDLY_NAME;\r\n" +
      "};\r\n"
 
    def rightPad(s : String, l : Int) : String =
      if (s.length >= l) s else rightPad(s+" ", l)
 
    def implementItem(typ : String, name : String, value : String) =
      "const " + rightPad(typ, 5) + " " + traitClassName + "::" + rightPad(name, 14) +
      "= " + value + ";\r\n"
 
    def implementTraits =
      "//////////////////////////////////////////////////////////////////////////////\r\n" +
      "// " + traitClassName + "\r\n" +
      "\r\n" +
      implementItem("int", "COMPID", compId) +
      implementItem("int", "TYPE", compType) +
      implementItem("int", "SUBTYPE", compSubType) +
      implementItem("GUID", "IID", guid) +
      implementItem("char*", "PROG_ID", progId) +
      implementItem("char*", "FRIENDLY_NAME", friendlyName) +
      "\r\n"
  }
 
  var regInfo_ : RegInfo = null
  def regInfo = {
    if (regInfo_ == null) regInfo_ = makeRegInfo
    regInfo_
  }
 
  def extract(re : String, g : Int) = {
    val matcher = Pattern.compile(re, Pattern.MULTILINE).matcher(hText + cppText)
    if (!matcher.find) null
    else {
      val result = matcher.group(g)
      if (matcher.find) throw new Exception("Found more than one!! " + re)
      result
    }
  }
 
  val fdataTerm = "\\s*([^,]*),\\s*(?://[^\n]*)?"
  val fdataPattern = Pattern.compile(
    "CFactoryData\\s+g_FactoryDataArray[^=]*=\\s*" +
    "\\{\\s*" +
    "\\{" +
      fdataTerm +
      fdataTerm +
      fdataTerm +
      fdataTerm +
      fdataTerm +
      fdataTerm +
    "\\s*\\}\\s*" +
    "\\}\\s*;"
    )
 
  /*
  ** Yet again, a regexp broken down into code because the Java regexp
  ** NFA runner takes exponential time for things that should have no
  ** backtracking whatsoever.  Argh!!!
  */
  def scanFactoryData = {
    var begin = 0
    var end = 0
    def nextLine = {
      begin = end
      if (begin >= cppText.length) null
      else {
        end = begin
        while (end < cppText.length && cppText(end) != '\n') end += 1
        if (end < cppText.length) end += 1
        cppText.substring(begin, end)
      }
    }
 
    var state = -1
    val startPattern = Pattern.compile("^\\s*CFactoryData\\s+g_FactoryDataArray.*")
    val valuePattern = Pattern.compile("^\\s*([^,]*),\\s*(?://[^\n]*)?\n?")
    var line = nextLine
    var friendlyName : String = null
    var progId : String = null
    while (line != null) {
      state match {
      case -1 =>
        if (startPattern.matcher(line).find)
          state = 0
      case _ =>
        val m = valuePattern.matcher(line)
        if (m.find) {
          state match {
          case 2 =>
            friendlyName = m.group(1)
          case 3 =>
            progId = m.group(1)
          case _ =>
            ()
          }
          state += 1
        }
      }
      line = nextLine
    }
    ( friendlyName, progId )
  }
 
  def editDistance(cname : String, fname : String) = {
    val dp = new Array[Array[Int]](100, 100)
    dp(0)(0) = 0;
    for (i <- 1 to cname.length)
      dp(i)(0) = i
    for (j <- 1 to fname.length)
      dp(0)(j) = j
    for (i <- 1 to cname.length)
      for (j <- 1 to fname.length)
        dp(i)(j) = min(
                     min(1+dp(i-1)(j), 1+dp(i)(j-1)),
                     dp(i-1)(j-1) + (if (cname(i-1) == fname(j-1)) 0 else 1)
                     )
 
    dp(cname.length)(fname.length)
  }
 
  def findClassName = {
    val constructorMatcher = Pattern.compile(
      "^([A-Z][A-Z0-9a-z_]*)\\s*::\\s*([A-Z][A-Za-z0-9_]*)\\s*\\(",
      Pattern.MULTILINE
      ).matcher(cppText)
 
    // There can be many constructors - find the one with the closest Levenshtein
    // edit distance to the filename (sans extension).
 
    var bestDistance = 9999
    var className : String = null
    while (constructorMatcher.find) {
      if (constructorMatcher.group(1) == constructorMatcher.group(2)) {
        val cname = constructorMatcher.group(1)
        val fname = cppFile.getName().replaceAll("\\.[^\\.]*$", "")
        val dist = editDistance(cname, fname)
        if (dist < bestDistance) {
          bestDistance = dist
          className = cname
        }
      }
    }
 
    className
  }
 
  def findDIID = {
    val matcher = Pattern.compile("DIID_[A-Z0-9a-z_]*").matcher(cppText)
    val fname = cppFile.getName().replaceAll("\\.[^\\.]*$", "").toUpperCase
 
    var bestDistance = 9999
    var diid : String = null
    while (matcher.find) {
      val dist = editDistance(
        matcher.group(0).replaceAll("^DIID_", ""),
        fname
      )
      println("diid = " + matcher.group(0) + "; dist = " + dist)
      if (dist < bestDistance) {
        println("accept")
        bestDistance = dist
        diid = matcher.group(0)
      }
    }
    diid
  }
 
  def makeRegInfo = {
    val ( friendlyName, progId ) = scanFactoryData
 
    var baseClassName : String = null  
    val className = findClassName
    val bcMatcher = Pattern.compile(
      "class\\s+" + className + "\\s*:\\s*public\\s+([A-Za-z0-9_]+(?:<[^>]*>)?)"
      ).matcher(hText)
    if (bcMatcher.find)
      baseClassName = bcMatcher.group(1)
 
    new RegInfo(
      findDIID,
      extract("m_iCompId\\s*=\\s*([^;]*?)\\s*;", 1),
      extract("m_iType\\s*=\\s*([^;]*?)\\s*;", 1),
      extract("m_iSubType\\s*=\\s*([^;]*?)\\s*;", 1),
      friendlyName,
      progId,
      className,
      baseClassName
    )
  }
 
  val precedingComments = "\\s*(?://[^\n]*\n\\s*)*"
 
  def roughPrecedingComment (text : String, bp : Int) = {
    // Why, oh why, does Java use the NFA matcher for regular expressions?
    // There isn't enough stack space to properly match optional leading
    // C-style comments, so here is a rough approximation by backward scanning
    var b = bp - 1
    while (b > 0 && Character.isSpace(text(b-1))) b -= 1
    if (b > 4 && text(b-1) == '/' && text(b-2) == '*') {
      b -= 4
      while (b > 0 && (text(b) != '/' || text(b+1) != '*'))
        b -= 1
      while (b > 0 && Character.isSpace(text(b-1)))
        b -= 1
    }
    b
  }
 
  def removeNDQI = {
    val pm = Pattern.compile(
      "^HRESULT\\s+[a-zA-Z0-9_\\s]*::NondelegatingQueryInterface\\s*\\(" +
        "[^)]*\\)\\s*\\{\\s*" +
        ".*?" +
        "^\\}[\t ]*\\n?",
        Pattern.DOTALL | Pattern.MULTILINE
      ).matcher(cppText)
 
    if (pm.find) {
      val b = roughPrecedingComment(cppText, pm.start)
      val e = pm.end
 
      cppText_ =
        cppText_.substring(0, b) +
        "\r\n" +
        cppText_.substring(e)
    }
  }
 
  def removeGUIDDecl = {
    val p = Pattern.compile("\\s+^EXTERN_C.*GUID.*=.*;[^\n]*\n", Pattern.MULTILINE)
    cppText_ = p.matcher(cppText).replaceAll("")
  }
 
  def replace(re : String, s : String) = {
    val p = Pattern.compile(re, Pattern.MULTILINE)
    cppText_ = p.matcher(cppText).replaceAll(s)
    hText_ = p.matcher(hText).replaceAll(s)
  }
 
  def removeFactoryData = {
    replace(
      precedingComments +
      "CFactoryData\\s+g_FactoryDataArray[^=]*=\\s*\\{\\s*\\{[^}]*\\},?\\s*\\}\\s*;[^\\n]*\\n?",
      "\r\n"
      )
    replace(
      precedingComments +
      "int\\s+g_cFactoryDataEntries\\s*=[^;]*;[^\n]*\n",
      "\r\n"
      )
  }
 
  def removeGetRegName = {
    replace(
      precedingComments +
      "STDAPI\\s+GetRegName\\([^\\)]*\\)\\s*\\{[^\\}]*\\}[^\n]*\n",
      "\r\n"
      )
  }
 
  def removeSillyInstance = {
    replace(
      precedingComments +
      regInfo.className + " [A-Za-z_0-9]+\\s*\\(\\s*NULL\\s*\\)\\s*;[^\n]*\n?",
      ""
      )
  }
 
  def fixConstructor = {
    replace(
      "^\\s*(m_iCompId|m_iType|m_iSubType)\\s*=\\s*[^;]*;[^\n]*\n",
      ""
      )
    replace(
      "(?:" + regInfo.baseClassName + "|CInterleaveCommTempl_)\\s*(\\([^)]*\\))",
      "ComponentImpl_$1"
      )
  }
 
  def addRegisterMacro = {
    if (!cppText.endsWith("\r\n")) cppText_ += "\r\n"
    cppText_ += "\r\nREGISTER(" + regInfo.className + ");\r\n"
  }
 
  def addTraitImpl = {
    val matcher = Pattern.compile(
      "(?:^\\s*#\\s*include[^\n]*\n|^\\s*using\\s+namespace\\s[^\n]*\n)",
      Pattern.MULTILINE
      ).matcher(cppText)
    var last = 0
    while (matcher.find) last = matcher.end
 
    cppText_ =
      cppText_.substring(0, last) +
      "\r\n" +
      regInfo.implementTraits +
      "//////////////////////////////////////////////////////////////////////////////\r\n" +
      "// " + regInfo.className + "\r\n" +
      cppText_.substring(last)
  }
 
  def addTraitDecl = {
    val cmatcher = Pattern.compile(
      "^\\s*class\\s+" + regInfo.className + "[^a-zA-Z0-9_]",
      Pattern.MULTILINE
      ).matcher(hText)
 
    assert(cmatcher.find)
    val pos = cmatcher.start
 
    hText_ =
      hText_.substring(0, pos) +
      "\r\n" +
      regInfo.declareTraits +
      hText_.substring(pos)
  }
 
  def removeNDQIDecl = {
    var pm = Pattern.compile(
      "(?m)^\\s*(?:virtual\\s+)?HRESULT\\s+_?_stdcall\\s+NondelegatingQueryInterface\\s*\\([^)]*\\)\\s*;[^\n]*\n"
      ).matcher(hText)
    if (pm.find) {
      val b = roughPrecedingComment(hText, pm.start)
      val e = pm.end
 
      hText_ =
        hText_.substring(0, b) +
        "\r\n" +
        hText_.substring(e)
    }
  }
 
  def fixDerivation = {
    replace(
      "\\s*:\\s*public\\s+" + regInfo.baseClassName + "\\s*\\{\\s*",
      "\r\n" +
      "    : public ComponentImpl<\r\n" +
      "          " + regInfo.className + "\r\n" +
      "        , " + regInfo.baseClassName + "\r\n" +
      "        , " + regInfo.traitClassName + "\r\n" +
      "    >\r\n" +
      "{\r\n"
      )
  }
 
  def fixCppFile = {
    removeNDQI
    removeGUIDDecl
    removeFactoryData
    removeGetRegName
    removeSillyInstance
    fixConstructor
    addRegisterMacro
    addTraitImpl
  }
 
  def fixHFile = {
    addTraitDecl
    removeNDQIDecl
    fixDerivation
  }
 
  def fix {
    fixHFile
    fixCppFile
 
    writeFile(cppFile, cppText)
    writeFile(hFile, hText)
  }
}
 
object fixreg {
  def main(args : Array[String]) = new Fixer(args(0)).fix
}
 
// vi:set ft=scala sts=2 sw=2 ai et:
Posted by: Jason Felice on October 5, 2009 • Posted in: Uncategorized

Comments are closed for this entry.