Haskell: Filter C++ Classes

I’ve needed a jig to separate a C++ class’s declaration and implementation for quite some time. Compile with “ghc -O2 –make classfilt.hs” and invoke with “classfilt –impl” or “classfilt –decl”, piping in the class source with inline methods. It certainly doesn’t handle all the corners of the C++ language, but it does most of the job.

import Char (isAlpha, isDigit, isSpace)
import Data.Maybe
import System.Environment (getArgs)
 
isCSymF c = isAlpha c || c == '_'
isCSym c  = isCSymF c || isDigit c
 
spanEscapedQuote q (c:cs) | c == q = ([c], cs)
spanEscapedQuote q ('\\':c:cs)     = case spanEscapedQuote q cs of
                                       (t,r) -> ('\\':c:t, r)
spanEscapedQuote q (c:cs)          = case spanEscapedQuote q cs of
                                       (t,r) -> (c:t, r)
 
isNonNLSpace c = isSpace c && c /= '\n'
 
maybeLBrace []       = Nothing
maybeLBrace ('{':cs) = Just ("{", cs)
maybeLBrace (c:cs)
    | isSpace c         = case maybeLBrace cs of
                            Just (t,r) -> Just (c:t,r)
                            Nothing    -> Nothing
    | True              = Nothing
 
hasLBrace s = isJust $ maybeLBrace s
 
-- Primitive C++ lexical scanner.  It knows enough not to get confused by
-- things in comments and quotation marks, and things on preprocessor directive
-- lines.  It returns a list of pairs, and one invariant is that collecting and
-- concatenating all of the 'snd' elements of the pair will return the original
-- file.
 
data Token = Comment
             | Identifier
             | LBrace
             | RBrace
             | LParen
             | Semi
             | OtherStuff
             deriving (Show, Eq)
 
tokens _ []           = []
tokens _ ('/':'*':cs) = case finishCStyleComment cs of
                          (t,r) -> (Comment, "/*" ++ t) : tokens False r
                        where
                          finishCStyleComment ('*':'/':cs) = ("*/", cs)
                          finishCStyleComment (c:cs) = case finishCStyleComment cs of
                                                         (s,r) -> (c:s,r)
tokens _ ('/':'/':cs) = case span (/= '\n') cs of
                          (t,'\n':r) -> (Comment, "//" ++ t ++ "\n") : tokens True r
                          (t,r)      -> (Comment, "//" ++ t) : tokens False r
tokens _ ('"':cs)     = case spanEscapedQuote '"' cs of
                          (t,r)      -> (OtherStuff, '"' : t) : tokens False r -- "
tokens _ ('\'':cs)    = case spanEscapedQuote '\'' cs of
                          (t,r)      -> (OtherStuff, '\'' : t) : tokens False r
tokens _ ('{':cs)     = (LBrace, "{") : tokens False cs
tokens _ s
    | hasLBrace s     = case maybeLBrace s of
                          Just (t,r) -> (LBrace,t) : tokens False r
 
tokens _ ('}':cs)     = (RBrace, "}") : tokens False cs
tokens _ (';':cs)     = (Semi, ";") : tokens False cs
tokens _ ('(':cs)     = (LParen, "(") : tokens False cs
 
tokens _ ('\n':cs)    = (OtherStuff, "\n") : tokens True cs
tokens True ('#':cs)  = case span (/= '\n') cs of
                          (t,'\n':r) -> (OtherStuff, "#" ++ t ++ "\n") : tokens True r
                          (t,r)      -> (OtherStuff, "#" ++ t) : tokens False r
tokens _ (c:cs)
    | isCSymF c       = case span isCSym cs of
                          (t,r)      -> (Identifier, c:t) : tokens False r
    | isNonNLSpace c  = case span isNonNLSpace cs of
                          (t,r)      -> (OtherStuff, c:t) : tokens False r
tokens s (c:cs)       = (OtherStuff, [c]) : tokens s cs
 
 
-- filter method bodies from class declarations and replace with ';'
rewriteAsClassDeclaration s =
        rTopLevel $ tokens True s
    where
        rTopLevel []                         = ""
        rTopLevel ((Identifier,"class"):ts)  = "class" ++ rClassDecl ts
        rTopLevel ((_,s):ts)                 = s ++ rTopLevel ts
 
        rClassDecl ((Semi,s):ts)             = s ++ rTopLevel ts
        rClassDecl ((LBrace,s):ts)           = s ++ rClassBody ts
        rClassDecl ((_,s):ts)                = s ++ rClassDecl ts
 
        rClassBody ((RBrace,s):ts)           = s ++ rTopLevel ts
        rClassBody ((LBrace,_):ts)           = ";" ++ rMethodImpl 0 ts
        rClassBody ((_,s):ts)                = s ++ rClassBody ts
 
        rMethodImpl 0 ((RBrace,s):ts)        = rClassBody ts
        rMethodImpl n ((RBrace,_):ts)        = rMethodImpl (n+1) ts
        rMethodImpl n ((LBrace,_):ts)        = rMethodImpl (n-1) ts
        rMethodImpl n ((_,_):ts)             = rMethodImpl n ts
 
-- filter everything except method implementations and add class
-- name prefix.
 
minimumIndentation s =
        minimum $ filter (/= 0) $ map findIndent $ lines s
    where
        findIndent (' ':cs)  = 1 + findIndent cs
        findIndent ('\t':cs) = 4 + findIndent cs
        findIndent _         = 0
 
unindent n s =
        init $ unlines $ map (unindentLine n) $ lines s
    where
        unindentLine n s | n <= 0 = s
        unindentLine n (' ':cs)   = unindentLine (n-1) cs
        unindentLine n ('\t':cs)  = unindentLine (n-4) cs
        unindentLine n s          = s
 
reformatMethod name s = unindent (minimumIndentation s) s
 
qualifyMethodName cname s =
        leader ++ cname ++ "::" ++ methodName
    where
        methodName = reverse $ takeWhile isCSym $ dropWhile isSpace $ reverse s
        leader = reverse $ dropWhile isCSym $ dropWhile isSpace $ reverse s
 
rewriteAsClassImplementation s =
        rTopLevel $ tokens True s
    where
        rTopLevel []                                  = ""
        rTopLevel ((Identifier,"class"):ts)           = rClassDecl "class" Nothing ts
        rTopLevel ((_,s):ts)                          = s ++ rTopLevel ts
 
        rClassDecl s _ []                             = s
        rClassDecl s _ ((Semi,rs):ts)                 = s ++ rs ++ rTopLevel ts
        rClassDecl s (Nothing) ((Identifier,name):ts) = rClassDecl (s ++ name) (Just name) ts
        rClassDecl s (Just name) ((LBrace,_):ts)      = rClassBodyStmt name False "" ts
        rClassDecl s maybeName ((_,rs):ts)            = rClassDecl (s ++ rs) maybeName ts
 
        rClassBodyStmt cname _ _ ((Semi,_):ts)        = rClassBodyStmt cname False "" ts
        rClassBodyStmt cname _ _ ((RBrace,_):ts)      = rClassBodySemi ts
        rClassBodyStmt cname _ s ((LBrace,rs):ts)     = rMethodBody cname (s ++ rs) 0 ts
        rClassBodyStmt cname False s ((LParen,rs):ts) = rClassBodyStmt cname True (qualifyMethodName cname s ++ rs) ts
        rClassBodyStmt cname m s ((Identifier,"virtual"):ts) = rClassBodyStmt cname m s ts
        rClassBodyStmt cname m s ((_,rs):ts)          = rClassBodyStmt cname m (s ++ rs) ts
 
        rMethodBody name s 0 ((RBrace,rs):ts)         = (reformatMethod name $ s ++ rs) ++ rClassBodyStmt name False "" ts
        rMethodBody name s n ((RBrace,rs):ts)         = rMethodBody name (s ++ rs) (n-1) ts
        rMethodBody name s n ((LBrace,rs):ts)         = rMethodBody name (s ++ rs) (n+1) ts
        rMethodBody name s n ((_,rs):ts)              = rMethodBody name (s ++ rs) n ts
 
        rClassBodySemi ((Semi,_):ts)                  = rTopLevel ts
        rClassBodySemi ((_,_):ts)                     = rClassBodySemi ts
 
classfilt ["--decl"] s = rewriteAsClassDeclaration s
classfilt ["--impl"] s = rewriteAsClassImplementation s
 
main = do args <- getArgs
          interact (classfilt args)
 
-- vi:set sts=4 sw=4 ai et:
Posted by: Jason Felice on August 18, 2009 • Tags: , • Posted in: Jigs

Comments are closed for this entry.