Haskell: Filter C++ Classes
I’ve needed a jig to separate a C++ class’s declaration and implementation for quite some time. Compile with “ghc -O2 –make classfilt.hs” and invoke with “classfilt –impl” or “classfilt –decl”, piping in the class source with inline methods. It certainly doesn’t handle all the corners of the C++ language, but it does most of the job.
import Char (isAlpha, isDigit, isSpace) import Data.Maybe import System.Environment (getArgs) isCSymF c = isAlpha c || c == '_' isCSym c = isCSymF c || isDigit c spanEscapedQuote q (c:cs) | c == q = ([c], cs) spanEscapedQuote q ('\\':c:cs) = case spanEscapedQuote q cs of (t,r) -> ('\\':c:t, r) spanEscapedQuote q (c:cs) = case spanEscapedQuote q cs of (t,r) -> (c:t, r) isNonNLSpace c = isSpace c && c /= '\n' maybeLBrace [] = Nothing maybeLBrace ('{':cs) = Just ("{", cs) maybeLBrace (c:cs) | isSpace c = case maybeLBrace cs of Just (t,r) -> Just (c:t,r) Nothing -> Nothing | True = Nothing hasLBrace s = isJust $ maybeLBrace s -- Primitive C++ lexical scanner. It knows enough not to get confused by -- things in comments and quotation marks, and things on preprocessor directive -- lines. It returns a list of pairs, and one invariant is that collecting and -- concatenating all of the 'snd' elements of the pair will return the original -- file. data Token = Comment | Identifier | LBrace | RBrace | LParen | Semi | OtherStuff deriving (Show, Eq) tokens _ [] = [] tokens _ ('/':'*':cs) = case finishCStyleComment cs of (t,r) -> (Comment, "/*" ++ t) : tokens False r where finishCStyleComment ('*':'/':cs) = ("*/", cs) finishCStyleComment (c:cs) = case finishCStyleComment cs of (s,r) -> (c:s,r) tokens _ ('/':'/':cs) = case span (/= '\n') cs of (t,'\n':r) -> (Comment, "//" ++ t ++ "\n") : tokens True r (t,r) -> (Comment, "//" ++ t) : tokens False r tokens _ ('"':cs) = case spanEscapedQuote '"' cs of (t,r) -> (OtherStuff, '"' : t) : tokens False r -- " tokens _ ('\'':cs) = case spanEscapedQuote '\'' cs of (t,r) -> (OtherStuff, '\'' : t) : tokens False r tokens _ ('{':cs) = (LBrace, "{") : tokens False cs tokens _ s | hasLBrace s = case maybeLBrace s of Just (t,r) -> (LBrace,t) : tokens False r tokens _ ('}':cs) = (RBrace, "}") : tokens False cs tokens _ (';':cs) = (Semi, ";") : tokens False cs tokens _ ('(':cs) = (LParen, "(") : tokens False cs tokens _ ('\n':cs) = (OtherStuff, "\n") : tokens True cs tokens True ('#':cs) = case span (/= '\n') cs of (t,'\n':r) -> (OtherStuff, "#" ++ t ++ "\n") : tokens True r (t,r) -> (OtherStuff, "#" ++ t) : tokens False r tokens _ (c:cs) | isCSymF c = case span isCSym cs of (t,r) -> (Identifier, c:t) : tokens False r | isNonNLSpace c = case span isNonNLSpace cs of (t,r) -> (OtherStuff, c:t) : tokens False r tokens s (c:cs) = (OtherStuff, [c]) : tokens s cs -- filter method bodies from class declarations and replace with ';' rewriteAsClassDeclaration s = rTopLevel $ tokens True s where rTopLevel [] = "" rTopLevel ((Identifier,"class"):ts) = "class" ++ rClassDecl ts rTopLevel ((_,s):ts) = s ++ rTopLevel ts rClassDecl ((Semi,s):ts) = s ++ rTopLevel ts rClassDecl ((LBrace,s):ts) = s ++ rClassBody ts rClassDecl ((_,s):ts) = s ++ rClassDecl ts rClassBody ((RBrace,s):ts) = s ++ rTopLevel ts rClassBody ((LBrace,_):ts) = ";" ++ rMethodImpl 0 ts rClassBody ((_,s):ts) = s ++ rClassBody ts rMethodImpl 0 ((RBrace,s):ts) = rClassBody ts rMethodImpl n ((RBrace,_):ts) = rMethodImpl (n+1) ts rMethodImpl n ((LBrace,_):ts) = rMethodImpl (n-1) ts rMethodImpl n ((_,_):ts) = rMethodImpl n ts -- filter everything except method implementations and add class -- name prefix. minimumIndentation s = minimum $ filter (/= 0) $ map findIndent $ lines s where findIndent (' ':cs) = 1 + findIndent cs findIndent ('\t':cs) = 4 + findIndent cs findIndent _ = 0 unindent n s = init $ unlines $ map (unindentLine n) $ lines s where unindentLine n s | n <= 0 = s unindentLine n (' ':cs) = unindentLine (n-1) cs unindentLine n ('\t':cs) = unindentLine (n-4) cs unindentLine n s = s reformatMethod name s = unindent (minimumIndentation s) s qualifyMethodName cname s = leader ++ cname ++ "::" ++ methodName where methodName = reverse $ takeWhile isCSym $ dropWhile isSpace $ reverse s leader = reverse $ dropWhile isCSym $ dropWhile isSpace $ reverse s rewriteAsClassImplementation s = rTopLevel $ tokens True s where rTopLevel [] = "" rTopLevel ((Identifier,"class"):ts) = rClassDecl "class" Nothing ts rTopLevel ((_,s):ts) = s ++ rTopLevel ts rClassDecl s _ [] = s rClassDecl s _ ((Semi,rs):ts) = s ++ rs ++ rTopLevel ts rClassDecl s (Nothing) ((Identifier,name):ts) = rClassDecl (s ++ name) (Just name) ts rClassDecl s (Just name) ((LBrace,_):ts) = rClassBodyStmt name False "" ts rClassDecl s maybeName ((_,rs):ts) = rClassDecl (s ++ rs) maybeName ts rClassBodyStmt cname _ _ ((Semi,_):ts) = rClassBodyStmt cname False "" ts rClassBodyStmt cname _ _ ((RBrace,_):ts) = rClassBodySemi ts rClassBodyStmt cname _ s ((LBrace,rs):ts) = rMethodBody cname (s ++ rs) 0 ts rClassBodyStmt cname False s ((LParen,rs):ts) = rClassBodyStmt cname True (qualifyMethodName cname s ++ rs) ts rClassBodyStmt cname m s ((Identifier,"virtual"):ts) = rClassBodyStmt cname m s ts rClassBodyStmt cname m s ((_,rs):ts) = rClassBodyStmt cname m (s ++ rs) ts rMethodBody name s 0 ((RBrace,rs):ts) = (reformatMethod name $ s ++ rs) ++ rClassBodyStmt name False "" ts rMethodBody name s n ((RBrace,rs):ts) = rMethodBody name (s ++ rs) (n-1) ts rMethodBody name s n ((LBrace,rs):ts) = rMethodBody name (s ++ rs) (n+1) ts rMethodBody name s n ((_,rs):ts) = rMethodBody name (s ++ rs) n ts rClassBodySemi ((Semi,_):ts) = rTopLevel ts rClassBodySemi ((_,_):ts) = rClassBodySemi ts classfilt ["--decl"] s = rewriteAsClassDeclaration s classfilt ["--impl"] s = rewriteAsClassImplementation s main = do args <- getArgs interact (classfilt args) -- vi:set sts=4 sw=4 ai et:
Posted on August 18, 2009 at 9:59 pm by Jason Felice · Permalink
In: Jigs · Tagged with: c++, haskell
In: Jigs · Tagged with: c++, haskell

