diff --git a/.gitignore b/.gitignore index 7849625..93ed529 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,5 @@ /lhs2TeX /Lhs2TeX /dist +/dist-newstyle +.ghc.environment.* diff --git a/src/HsLexer.lhs b/src/HsLexer.lhs index ad90035..e049752 100644 --- a/src/HsLexer.lhs +++ b/src/HsLexer.lhs @@ -9,8 +9,8 @@ > where > import Data.Char ( isSpace, isUpper, isLower, isDigit, isAlphaNum, isPunctuation, toLower ) > import qualified Data.Char ( isSymbol ) -> import Control.Applicative > import Control.Monad +> import Control.Monad.Error.Class () > import Document > import Auxiliaries > import TeXCommands ( Lang(..) ) @@ -27,6 +27,7 @@ A Haskell lexer, based on the Prelude function \hs{lex}. > | Char String > | String String > | Special Char +> | SpecialS String > | Comment String > | Nested String > | Pragma String @@ -62,6 +63,7 @@ hierarchical modules. Also added Pragma. > string (Char s) = s > string (String s) = s > string (Special c) = [c] +> string (SpecialS s) = s > string (Comment s) = "--" ++ s > string (Nested s) = "{-" ++ s ++ "-}" > string (Pragma s) = "{-#" ++ s ++ "#-}" @@ -92,23 +94,33 @@ ks, 28.08.2008: New: Agda and Haskell modes. > lexify :: Lang -> [Char] -> Either Exc [Token] > lexify _lang [] = return [] -> lexify lang s@(_ : _) = case lex' lang s of +> lexify lang s@(_ : _) = case lex' lang s of > Nothing -> Left ("lexical error", s) > Just (t, s') -> do ts <- lexify lang s'; return (t : ts) > > lex' :: Lang -> String -> Maybe (Token, String) > lex' _lang "" = Nothing -> lex' _lang ('\'' : s) = do let (t, u) = lexLitChar s -> v <- match "\'" u -> return (Char ("'" ++ t ++ "'"), v) +> lex' _lang ('\'' : '[' : s) = Just (SpecialS "\'[", s) +> lex' lang ('\'' : s0) = do let (t, u) = lexLitChar s0 +> case match "\'" u of +> Just v -> return (Char ("'" ++ t ++ "'"), v) +> Nothing -> do +> (t', u') <- lex' lang s0 +> case t' of +> Conid s -> return (Conid ('\'' : s), u') +> Consym s -> return (Consym ('\'' : s), u') +> Varsym s -> return (Varsym ('\'' : s), u') +> Special c -> return (Consym (['\'', c]), u') +> _ -> Nothing +> > lex' _lang ('"' : s) = do let (t, u) = lexLitStr s > v <- match "\"" u > return (String ("\"" ++ t ++ "\""), v) -> lex' lang ('-' : '-' : s) +> lex' lang ('-' : '-' : s) > | not (null s') && isSymbol lang (head s') > = case s' of > (c : s'') -> return (varsymid lang ("--" ++ d ++ [c]), s'') -> [] -> impossible "lex'" +> _ -> fail "lex' --" > | otherwise = return (Comment t, u) > where (d, s') = span (== '-') s > (t, u) = break (== '\n') s' @@ -143,27 +155,27 @@ ks, 28.08.2008: New: Agda and Haskell modes. > where > numeral Agda = Varid > numeral Haskell = Numeral -> classify s' -> | s' `elem` keywords lang -> = Keyword s' -> | otherwise = Varid s' +> classify s0 +> | s0 `elem` keywords lang +> = Keyword s0 +> | otherwise = Varid s0 > > > lexFracExp :: String -> Maybe (String, String) -> lexFracExp s = do t <- match "." s -> (ds, u) <- lexDigits' t -> (e, v) <- lexExp u -> return ('.' : ds ++ e, v) -> <|> lexExp s +> lexFracExp s = do t <- match "." s +> (ds, u) <- lexDigits' t +> (e, v) <- lexExp u +> return ('.' : ds ++ e, v) +> `mplus` lexExp s > > lexExp :: String -> Maybe (String, String) > lexExp (e:s) -> | e `elem` "eE" = do (c : t) <- Just s -> unless (c `elem` "+-") Nothing -> (ds, u) <- lexDigits' t -> return (e : c : ds, u) -> <|> do (ds, t) <- lexDigits' s -> return (e : ds, t) +> | e `elem` "eE" = do (c : t) <- Just s +> unless (c `elem` "+-") Nothing +> (ds, u) <- lexDigits' t +> return (e : c : ds, u) +> `mplus` do (ds, t) <- lexDigits' s +> return (e : ds, t) > lexExp s = Just ("", s) > > lexDigits' :: String -> Maybe (String, String) @@ -203,12 +215,19 @@ incorrectly reject programs that contain comments like the following one: {- start normal, but close as pragma #-} ... I don't expect this to be a problem, though. -> lexLitChar, lexLitStr :: String -> (String, String) +> lexLitChar, lexLitChar' :: String -> (String, String) > lexLitChar [] = ([], []) -> lexLitChar ('\'' : s) = ([], '\'' : s) -> lexLitChar ('\\' : c : s) = '\\' <| c <| lexLitChar s -> lexLitChar (c : s) = c <| lexLitChar s +> lexLitChar ('\\' : c : s) = '\\' <| c <| lexLitChar' s +> lexLitChar (c : '\'' : s) = c <| ("", '\'' : s ) +> lexLitChar _ = ([], []) > +> lexLitChar' [] = ([], []) +> lexLitChar' ('\'' : s) = ([], '\'' : s) +> lexLitChar' (c : s) = c <| lexLitChar' s + +05.08.2016, DataKinds need treating of apostroph to be more strict. |lexLitChar'| lexes the char expression after the backslash, i.e. until another apostroph + +> lexLitStr :: String -> (String, String) > lexLitStr [] = ([], []) > lexLitStr ('"' : s) = ([], '"' : s) > lexLitStr ('\\' : c : s) = '\\' <| c <| lexLitStr s @@ -243,7 +262,7 @@ Keywords > "import", "in", "infix", "infixl", > "infixr", "instance", "let", "module", > "newtype", "of", "then", "type", -> "where" ] +> "where", "family" ] > keywords Agda = [ "let", "in", "where", "field", "with", > "postulate", "primitive", "open", "import", > "module", "data", "codata", "record", "infix", @@ -338,7 +357,7 @@ non-separators. > data CatCode = White > | Sep -> | Del Char +> | Del String > | NoSep > deriving (Eq) @@ -369,8 +388,9 @@ an improvement. > catCode (Char _) = NoSep > catCode (String _) = NoSep > catCode (Special c) -> | c `elem` "([{}])" = Del c +> | c `elem` "([{}])" = Del [c] > | otherwise = Sep +> catCode (SpecialS s) = Del s \NB Only @([])@ are classified as delimiters; @{}@ are separators since they do not bracket expressions. diff --git a/src/Math.lhs b/src/Math.lhs index 3d68fea..19ff1bc 100644 --- a/src/Math.lhs +++ b/src/Math.lhs @@ -10,16 +10,18 @@ > import Prelude hiding ( lines ) > import Data.List ( partition ) > import Numeric ( showFFloat ) -> import Control.Applicative +> import Control.Applicative ( many ) > import Control.Arrow ( (>>>) ) -> import Control.Monad ( (>=>) ) +> import Control.Monad ( MonadPlus(..), (>=>) ) > > import Verbatim ( expand, trim ) +> import Typewriter ( latex ) > import MathCommon > import Document > import Directives > import HsLexer > import Parser +> import qualified FiniteMap as FM > import Auxiliaries > import TeXCommands ( Lang(..) ) @@ -74,7 +76,7 @@ This variant cannot handle unbalanced parentheses. > > chunk :: (CToken tok) => Parser (Pos tok) (Chunk (Pos tok)) > chunk = do a <- many atom -> as <- many (do s <- sep; a' <- many atom; return (Delim s : offside a')) +> as <- many (do s <- sep; a <- many atom; return (Delim s : offside a)) > return (offside a ++ concat as) > where offside [] = [] > -- old: |opt a = [Apply a]| @@ -84,22 +86,20 @@ This variant cannot handle unbalanced parentheses. > col' (Paren a _ _) = poscol a > > atom :: (CToken tok) => Parser (Pos tok) (Atom (Pos tok)) -> atom = fmap Atom noSep -> <|> do l <- left -> e <- chunk -> r <- right l -> return (Paren l e r) +> atom = fmap Atom noSep +> `mplus` do l <- left +> e <- chunk +> r <- right l +> return (Paren l e r) Primitive parser. > sep, noSep, left :: (CToken tok) => Parser tok tok > sep = satisfy (\t -> catCode t == Sep) > noSep = satisfy (\t -> catCode t == NoSep) -> left = satisfy (\t -> case catCode t of Del c -> c `elem` "(["; _-> False) -> -> right :: (CToken tok) => tok -> Parser tok tok -> right l = satisfy (\c' -> case (catCode l, catCode c') of -> (Del o, Del c) -> (o,c) `elem` zip "([" ")]" +> left = satisfy (\t -> case catCode t of Del c -> c `elem` ["(", "[", "'["]; _-> False) +> right l = satisfy (\c -> case (catCode l, catCode c) of +> (Del o, Del c) -> (o,c) `elem` zip ["(","["] [")","]"] > _ -> False) % - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - - @@ -135,9 +135,9 @@ Position von |=| oder |::| heranzuziehen ist gef"ahrlich; wenn z.B. > _ -> False > > instance Functor Line where -> fmap _f Blank = Blank -> fmap f (Three l c r) = Three (f l) (f c) (f r) -> fmap f (Multi a) = Multi (f a) +> fmap f Blank = Blank +> fmap f (Three l c r) = Three (f l) (f c) (f r) +> fmap f (Multi a) = Multi (f a) % - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - - \subsubsection{Adding spaces} @@ -149,16 +149,16 @@ indicates whether to insert a space or not; |after| means immediately after a keyword (hence |before b| really means not immediately after). > addSpaces :: (CToken tok) => [tok] -> [tok] -> addSpaces ts0 = before False ts0 +> addSpaces ts = before False ts > where -> before _b [] = [] -> before b (t : ts) = case token t of +> before b [] = [] +> before b (t : ts) = case token t of > u | selfSpacing u -> t : before False ts > Special c > | c `elem` ",;([{" -> t : before False ts > Keyword _ -> [ fromToken (TeX False sub'space) | b ] ++ t : after ts > _ -> t : before True ts -> +> > after [] = [] > after (t : ts) = case token t of > u | selfSpacing u -> t : before False ts @@ -187,14 +187,8 @@ Auch wenn |auto = False| wird der Stack auf dem laufenden gehalten. > type Stack = [(Col, Doc, [Pos Token])] > -> leftIndent :: -> Formats -> -> Bool -> -> (Stack, Stack) -> -> [Line [Pos Token]] -> -> (Doc, (Stack, Stack)) -> leftIndent dict auto (lst0, rst0) -> = loop lst0 rst0 +> leftIndent dict auto (lst, rst) +> = loop lst rst > where > copy d | auto = d > | otherwise = Empty @@ -203,21 +197,21 @@ Die Funktion |isInternal| pr"uft, ob |v| ein spezielles Symbol wie @::@, @=@ etc~oder ein Operator wie @++@ ist. > loop lst rst [] = (Empty, (lst, rst)) -> loop lst rst (l' : ls) = case l' of +> loop lst rst (l : ls) = case l of > Blank -> loop lst rst ls > Three l c r -> (sub'column3 (copy lskip <<>> latexs dict l) > (latexs dict c) -> (copy rskip <<>> latexs dict r) <<>> sep' ls <<>> rest, st') +> (copy rskip <<>> latexs dict r) <<>> sep ls <<>> rest, st') > where (lskip, lst') = indent l lst > (rskip, rst') = indent r rst > (rest, st') = loop lst' rst' ls -- does not work: |if null l && null c then rst' else []| -> Multi m -> (sub'column1 (copy lskip <<>> latexs dict m) <<>> sep' ls <<>> rest, st') +> Multi m -> (sub'column1 (copy lskip <<>> latexs dict m) <<>> sep ls <<>> rest, st') > where (lskip, lst') = indent m lst > (rest, st') = loop lst' [] ls > -> sep' [] = Empty -> sep' (Blank : _ ) = sub'blankline -> sep' (_ : _) = sub'nl +> sep [] = Empty +> sep (Blank : _ ) = sub'blankline +> sep (_ : _) = sub'nl > > indent :: [Pos Token] -> Stack -> (Doc, Stack) > indent [] stack = (Empty, stack) @@ -229,7 +223,7 @@ Die Funktion |isInternal| pr"uft, ob |v| ein spezielles Symbol wie > GT -> (skip', (poscol t, skip', ts) : top : stack) > where > skip' = case span (\u -> poscol u < poscol t) line of -> (us, v : _vs) | poscol v == poscol t +> (us, v : vs) | poscol v == poscol t > -> skip <<>> sub'phantom (latexs dict us) > -- does not work: |(us, _) -> skip ++ [Phantom (fmap token us), Skip (col t - last (c : fmap col us))]| > _ -> skip <<>> sub'hskip (Text em) diff --git a/src/MathCommon.lhs b/src/MathCommon.lhs index 8336c72..4f865f3 100644 --- a/src/MathCommon.lhs +++ b/src/MathCommon.lhs @@ -164,7 +164,7 @@ inherits the position of the original token. > | isOptional = (Mandatory, set l s ++ args es) > | otherwise = (Optional False, [l] ++ s ++ [r] ++ args es) > where (flag, s) = eval e -> isOptional = catCode l == Del '(' && not (mandatory e) +> isOptional = catCode l == Del "(" && not (mandatory e) > && case flag of Mandatory -> False; Optional f -> opt || f \NB It is not a good idea to remove parentheses around atoms, because diff --git a/src/MathPoly.lhs b/src/MathPoly.lhs index 54e66cb..ed3c0a7 100644 --- a/src/MathPoly.lhs +++ b/src/MathPoly.lhs @@ -17,7 +17,7 @@ are subtle differences, and they will grow over time \dots > import Data.List ( partition, nub, sort, transpose ) > import Control.Applicative > import Control.Arrow ( (>>>) ) -> import Control.Monad ( (>=>) ) +> import Control.Monad ( (>=>), mplus ) > > import Verbatim ( expand, trim ) > import MathCommon @@ -96,7 +96,7 @@ This variant can handle unbalanced parentheses in some cases (see below). > > chunk :: (CToken tok) => Int -> Parser (Pos tok) (Chunk (Pos tok)) > chunk d = do a <- many (atom d) -> as <- many (do s <- csep; a' <- many (atom d); return (Delim s : offside a')) +> as <- many (do s <- sep; a' <- many (atom d); return (Delim s : offside a')) > return (offside a ++ concat as) > where offside [] = [] > -- old: |opt a = [Apply a]| @@ -106,7 +106,7 @@ This variant can handle unbalanced parentheses in some cases (see below). > col' (Paren a _ _) = poscol a > > atom :: (CToken tok) => Int -> Parser (Pos tok) (Atom (Pos tok)) -> atom d = fmap Atom cnoSep +> atom d = fmap Atom noSep > <|> do l <- left > e <- chunk (d+1) > r <- right l @@ -123,18 +123,16 @@ parsed as an arbitrary amount of right parentheses. Primitive parser. -> csep, cnoSep, left, anyright :: (CToken tok) => Parser tok tok -> csep = satisfy (\t -> catCode t == Sep) -> cnoSep = satisfy (\t -> catCode t == NoSep) -> left = satisfy (\t -> case catCode t of Del c -> c `elem` "([{"; _ -> False) -> anyright = satisfy (\t -> case catCode t of Del c -> c `elem` ")]}"; _ -> False) -> -> right :: (CToken tok) => tok -> Parser tok tok -> right l = satisfy (\c' -> case (catCode l, catCode c') of -> (Del o, Del c) -> (o,c) `elem` zip "([{" ")]}" -> _ -> False) -> <|> do eof -> return (fromToken $ TeX False Empty) +> sep, noSep, left, anyright :: (CToken tok) => Parser tok tok +> sep = satisfy (\t -> catCode t == Sep) +> noSep = satisfy (\t -> catCode t == NoSep) +> left = satisfy (\t -> case catCode t of Del c -> c `elem` ["(","[","{","'["]; _ -> False) +> anyright = satisfy (\t -> case catCode t of Del c -> c `elem` [")","]","}"]; _ -> False) +> right l = satisfy (\c -> case (catCode l, catCode c) of +> (Del o, Del c) -> (o,c) `elem` zip ["(","[","{","'["] [")","]","}","]"] +> _ -> False) +> `mplus` do eof +> return (fromToken $ TeX False Empty) ks, 06.09.2003: Modified the |right| parser to accept the end of file, to allow for unbalanced parentheses. This behaviour is not (yet) backported diff --git a/src/Typewriter.lhs b/src/Typewriter.lhs index 2ee9c3d..8651885 100644 --- a/src/Typewriter.lhs +++ b/src/Typewriter.lhs @@ -54,6 +54,7 @@ > tex _ (Char s) = sub'char (catenate (map conv' (init $ tail s))) -- NEW: remove quotes > tex _ (String s) = sub'string (catenate (map conv' (init $ tail s))) -- NEW: remove quotes > tex _ (Special c) = sub'special (replace Empty [c] (conv False c)) +> tex _ (SpecialS s) = sub'special (replace Empty s (convert False s)) > tex _ (Comment s) = sub'comment (Embedded s) > tex _ (Nested s) = sub'nested (Embedded s) > tex _ (Pragma s) = sub'pragma (Embedded s)