Scala: Ugly hairy rewrite C++ code jig
I think this might be the first time I’ve gotten to use a real algorithm at work in several years. This is also my first attempt at Scala (pretty nice!), and one ugly jig. It had to be adjusted every fourth or fifth component I used it on because of strangeness in the way that component was written, or in the way Java handles regular expressions, or both.
import java.io.{File,FileReader,FileWriter} import java.util.regex.{Pattern,Matcher} import java.lang.Math.min class Fixer(path : String) { def files = new File(path).listFiles def isCppFile (f : File) = f.getName().toLowerCase() endsWith ".cpp" def isHFile (f: File) = { val name = f.getName().toLowerCase() name.endsWith(".h") && name != "resource.h" && !name.startsWith("test_") && name != "vxnapi3.h" } def mostSimilarFile(fs : => Array[File]) = { var result : File = null var bestDistance = 9999 for (f <- fs) { val dist = editDistance( f.getName().replaceAll("\\.[^\\.]*$", "").toLowerCase, new File(path).getName().toLowerCase ) if (dist < bestDistance) { bestDistance = dist result = f } } result } def cppFile = { val result = mostSimilarFile(files filter isCppFile) println(result) result } def hFile = { val result = mostSimilarFile(files filter isHFile) println("hFile:"+result) result } def readFile (f: File) = { val r = new FileReader(f) val b = new Array[Char](1024) val sb = new StringBuffer var l = 0 do { l = r.read(b) if (l > 0) sb.append(b, 0, l) } while (l > 0) r.close() sb.toString() } def writeFile (f: File, s : String) = { val w = new FileWriter(f) w.write(s) w.close } var cppText_ : String = null def cppText = { if (cppText_ == null) cppText_ = readFile(cppFile) cppText_ } var hText_ : String = null def hText = { if (hText_ == null) hText_ = readFile(hFile) hText_ } class RegInfo( guid_ : String, compId_ : String, compType_ : String, compSubType_ : String, friendlyName_ : String, progId_ : String, className_ : String, baseClassName_ : String ) { def guid = guid_ def compId = compId_ def compType = compType_ def compSubType = compSubType_ def friendlyName = friendlyName_ def progId = progId_ def className = className_ def baseClassName = baseClassName_ def traitClassName = className + "Traits" def declareTraits = "struct " + traitClassName + " {\r\n" + " static const int COMPID;\r\n" + " static const int TYPE;\r\n" + " static const int SUBTYPE;\r\n" + " static const GUID IID;\r\n" + " static const char* PROG_ID;\r\n" + " static const char* FRIENDLY_NAME;\r\n" + "};\r\n" def rightPad(s : String, l : Int) : String = if (s.length >= l) s else rightPad(s+" ", l) def implementItem(typ : String, name : String, value : String) = "const " + rightPad(typ, 5) + " " + traitClassName + "::" + rightPad(name, 14) + "= " + value + ";\r\n" def implementTraits = "//////////////////////////////////////////////////////////////////////////////\r\n" + "// " + traitClassName + "\r\n" + "\r\n" + implementItem("int", "COMPID", compId) + implementItem("int", "TYPE", compType) + implementItem("int", "SUBTYPE", compSubType) + implementItem("GUID", "IID", guid) + implementItem("char*", "PROG_ID", progId) + implementItem("char*", "FRIENDLY_NAME", friendlyName) + "\r\n" } var regInfo_ : RegInfo = null def regInfo = { if (regInfo_ == null) regInfo_ = makeRegInfo regInfo_ } def extract(re : String, g : Int) = { val matcher = Pattern.compile(re, Pattern.MULTILINE).matcher(hText + cppText) if (!matcher.find) null else { val result = matcher.group(g) if (matcher.find) throw new Exception("Found more than one!! " + re) result } } val fdataTerm = "\\s*([^,]*),\\s*(?://[^\n]*)?" val fdataPattern = Pattern.compile( "CFactoryData\\s+g_FactoryDataArray[^=]*=\\s*" + "\\{\\s*" + "\\{" + fdataTerm + fdataTerm + fdataTerm + fdataTerm + fdataTerm + fdataTerm + "\\s*\\}\\s*" + "\\}\\s*;" ) /* ** Yet again, a regexp broken down into code because the Java regexp ** NFA runner takes exponential time for things that should have no ** backtracking whatsoever. Argh!!! */ def scanFactoryData = { var begin = 0 var end = 0 def nextLine = { begin = end if (begin >= cppText.length) null else { end = begin while (end < cppText.length && cppText(end) != '\n') end += 1 if (end < cppText.length) end += 1 cppText.substring(begin, end) } } var state = -1 val startPattern = Pattern.compile("^\\s*CFactoryData\\s+g_FactoryDataArray.*") val valuePattern = Pattern.compile("^\\s*([^,]*),\\s*(?://[^\n]*)?\n?") var line = nextLine var friendlyName : String = null var progId : String = null while (line != null) { state match { case -1 => if (startPattern.matcher(line).find) state = 0 case _ => val m = valuePattern.matcher(line) if (m.find) { state match { case 2 => friendlyName = m.group(1) case 3 => progId = m.group(1) case _ => () } state += 1 } } line = nextLine } ( friendlyName, progId ) } def editDistance(cname : String, fname : String) = { val dp = new Array[Array[Int]](100, 100) dp(0)(0) = 0; for (i <- 1 to cname.length) dp(i)(0) = i for (j <- 1 to fname.length) dp(0)(j) = j for (i <- 1 to cname.length) for (j <- 1 to fname.length) dp(i)(j) = min( min(1+dp(i-1)(j), 1+dp(i)(j-1)), dp(i-1)(j-1) + (if (cname(i-1) == fname(j-1)) 0 else 1) ) dp(cname.length)(fname.length) } def findClassName = { val constructorMatcher = Pattern.compile( "^([A-Z][A-Z0-9a-z_]*)\\s*::\\s*([A-Z][A-Za-z0-9_]*)\\s*\\(", Pattern.MULTILINE ).matcher(cppText) // There can be many constructors - find the one with the closest Levenshtein // edit distance to the filename (sans extension). var bestDistance = 9999 var className : String = null while (constructorMatcher.find) { if (constructorMatcher.group(1) == constructorMatcher.group(2)) { val cname = constructorMatcher.group(1) val fname = cppFile.getName().replaceAll("\\.[^\\.]*$", "") val dist = editDistance(cname, fname) if (dist < bestDistance) { bestDistance = dist className = cname } } } className } def findDIID = { val matcher = Pattern.compile("DIID_[A-Z0-9a-z_]*").matcher(cppText) val fname = cppFile.getName().replaceAll("\\.[^\\.]*$", "").toUpperCase var bestDistance = 9999 var diid : String = null while (matcher.find) { val dist = editDistance( matcher.group(0).replaceAll("^DIID_", ""), fname ) println("diid = " + matcher.group(0) + "; dist = " + dist) if (dist < bestDistance) { println("accept") bestDistance = dist diid = matcher.group(0) } } diid } def makeRegInfo = { val ( friendlyName, progId ) = scanFactoryData var baseClassName : String = null val className = findClassName val bcMatcher = Pattern.compile( "class\\s+" + className + "\\s*:\\s*public\\s+([A-Za-z0-9_]+(?:<[^>]*>)?)" ).matcher(hText) if (bcMatcher.find) baseClassName = bcMatcher.group(1) new RegInfo( findDIID, extract("m_iCompId\\s*=\\s*([^;]*?)\\s*;", 1), extract("m_iType\\s*=\\s*([^;]*?)\\s*;", 1), extract("m_iSubType\\s*=\\s*([^;]*?)\\s*;", 1), friendlyName, progId, className, baseClassName ) } val precedingComments = "\\s*(?://[^\n]*\n\\s*)*" def roughPrecedingComment (text : String, bp : Int) = { // Why, oh why, does Java use the NFA matcher for regular expressions? // There isn't enough stack space to properly match optional leading // C-style comments, so here is a rough approximation by backward scanning var b = bp - 1 while (b > 0 && Character.isSpace(text(b-1))) b -= 1 if (b > 4 && text(b-1) == '/' && text(b-2) == '*') { b -= 4 while (b > 0 && (text(b) != '/' || text(b+1) != '*')) b -= 1 while (b > 0 && Character.isSpace(text(b-1))) b -= 1 } b } def removeNDQI = { val pm = Pattern.compile( "^HRESULT\\s+[a-zA-Z0-9_\\s]*::NondelegatingQueryInterface\\s*\\(" + "[^)]*\\)\\s*\\{\\s*" + ".*?" + "^\\}[\t ]*\\n?", Pattern.DOTALL | Pattern.MULTILINE ).matcher(cppText) if (pm.find) { val b = roughPrecedingComment(cppText, pm.start) val e = pm.end cppText_ = cppText_.substring(0, b) + "\r\n" + cppText_.substring(e) } } def removeGUIDDecl = { val p = Pattern.compile("\\s+^EXTERN_C.*GUID.*=.*;[^\n]*\n", Pattern.MULTILINE) cppText_ = p.matcher(cppText).replaceAll("") } def replace(re : String, s : String) = { val p = Pattern.compile(re, Pattern.MULTILINE) cppText_ = p.matcher(cppText).replaceAll(s) hText_ = p.matcher(hText).replaceAll(s) } def removeFactoryData = { replace( precedingComments + "CFactoryData\\s+g_FactoryDataArray[^=]*=\\s*\\{\\s*\\{[^}]*\\},?\\s*\\}\\s*;[^\\n]*\\n?", "\r\n" ) replace( precedingComments + "int\\s+g_cFactoryDataEntries\\s*=[^;]*;[^\n]*\n", "\r\n" ) } def removeGetRegName = { replace( precedingComments + "STDAPI\\s+GetRegName\\([^\\)]*\\)\\s*\\{[^\\}]*\\}[^\n]*\n", "\r\n" ) } def removeSillyInstance = { replace( precedingComments + regInfo.className + " [A-Za-z_0-9]+\\s*\\(\\s*NULL\\s*\\)\\s*;[^\n]*\n?", "" ) } def fixConstructor = { replace( "^\\s*(m_iCompId|m_iType|m_iSubType)\\s*=\\s*[^;]*;[^\n]*\n", "" ) replace( "(?:" + regInfo.baseClassName + "|CInterleaveCommTempl_)\\s*(\\([^)]*\\))", "ComponentImpl_$1" ) } def addRegisterMacro = { if (!cppText.endsWith("\r\n")) cppText_ += "\r\n" cppText_ += "\r\nREGISTER(" + regInfo.className + ");\r\n" } def addTraitImpl = { val matcher = Pattern.compile( "(?:^\\s*#\\s*include[^\n]*\n|^\\s*using\\s+namespace\\s[^\n]*\n)", Pattern.MULTILINE ).matcher(cppText) var last = 0 while (matcher.find) last = matcher.end cppText_ = cppText_.substring(0, last) + "\r\n" + regInfo.implementTraits + "//////////////////////////////////////////////////////////////////////////////\r\n" + "// " + regInfo.className + "\r\n" + cppText_.substring(last) } def addTraitDecl = { val cmatcher = Pattern.compile( "^\\s*class\\s+" + regInfo.className + "[^a-zA-Z0-9_]", Pattern.MULTILINE ).matcher(hText) assert(cmatcher.find) val pos = cmatcher.start hText_ = hText_.substring(0, pos) + "\r\n" + regInfo.declareTraits + hText_.substring(pos) } def removeNDQIDecl = { var pm = Pattern.compile( "(?m)^\\s*(?:virtual\\s+)?HRESULT\\s+_?_stdcall\\s+NondelegatingQueryInterface\\s*\\([^)]*\\)\\s*;[^\n]*\n" ).matcher(hText) if (pm.find) { val b = roughPrecedingComment(hText, pm.start) val e = pm.end hText_ = hText_.substring(0, b) + "\r\n" + hText_.substring(e) } } def fixDerivation = { replace( "\\s*:\\s*public\\s+" + regInfo.baseClassName + "\\s*\\{\\s*", "\r\n" + " : public ComponentImpl<\r\n" + " " + regInfo.className + "\r\n" + " , " + regInfo.baseClassName + "\r\n" + " , " + regInfo.traitClassName + "\r\n" + " >\r\n" + "{\r\n" ) } def fixCppFile = { removeNDQI removeGUIDDecl removeFactoryData removeGetRegName removeSillyInstance fixConstructor addRegisterMacro addTraitImpl } def fixHFile = { addTraitDecl removeNDQIDecl fixDerivation } def fix { fixHFile fixCppFile writeFile(cppFile, cppText) writeFile(hFile, hText) } } object fixreg { def main(args : Array[String]) = new Fixer(args(0)).fix } // vi:set ft=scala sts=2 sw=2 ai et:


Comments are closed for this entry.