Compare commits

..

18 Commits

Author SHA1 Message Date
Greg Shuflin 57a0d0a603 Add parser module 2024-04-23 18:03:27 -07:00
Greg Shuflin b44fda3283 Fix markdown 2024-04-23 02:47:16 -07:00
Greg Shuflin 1be26eb453 Fix readme for real 2024-04-23 02:46:43 -07:00
Greg Shuflin 7067edc86f Fix readme 2024-04-23 02:46:26 -07:00
Greg Shuflin dc771fc7ad Working simple tree-sitter grammar 2024-04-23 02:37:01 -07:00
Greg Shuflin 45c4d08fb9 Add justfile 2024-04-23 02:15:26 -07:00
Greg Shuflin 77257d0eb7 Messing with treesitter grammar
doesn't work yet
2024-04-23 02:13:44 -07:00
Greg Shuflin f33195ab28 Trying out a thing 2024-04-21 03:08:05 -07:00
Greg Shuflin 8cde20641b working on new grammar 2024-04-21 03:01:13 -07:00
Greg Shuflin ba4ccfe6bf More tree-sitter testing stuff 2024-04-21 02:34:39 -07:00
Greg Shuflin 7bc92aef97 treesitter test 2024-04-21 02:26:53 -07:00
Greg Shuflin 95e22567e7 Add experiments crate 2024-04-20 01:39:11 -07:00
Greg Shuflin dc09d804ef Split into workspace 2024-04-20 01:37:57 -07:00
Greg Shuflin 49e6e3a71d Update readme 2024-04-20 01:29:10 -07:00
Greg Shuflin cf7a2ff9ba Add logo
Logo originally from 2018 Nov 11
2023-03-24 03:30:48 -07:00
Greg Shuflin aff809e4ce Merge commit '18b4ac0d4b79377428a0a32c16712057cc0a9a61' as 'subtrees/parser-combinator' 2023-03-09 17:30:07 -08:00
Greg Shuflin 18b4ac0d4b Squashed 'subtrees/parser-combinator/' content from commit 5526ce7
git-subtree-dir: subtrees/parser-combinator
git-subtree-split: 5526ce7bd17beda52047fbc3442e23e0174b79a7
2023-03-09 17:30:07 -08:00
Greg Shuflin ab53cfdb7d Rust preliminaries 2023-01-14 02:00:26 -08:00
89 changed files with 1996 additions and 11049 deletions

5
.gitignore vendored
View File

@ -1,4 +1,3 @@
Cargo.lock
target
.schala_repl
.schala_history
node_modules/
experiments/tree-sitter-test/src

15
Cargo.lock generated Normal file
View File

@ -0,0 +1,15 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "experiments"
version = "0.1.0"
[[package]]
name = "schala-main"
version = "0.1.0"
[[package]]
name = "schala-parser"
version = "0.1.0"

View File

@ -1,18 +1,7 @@
[package]
name = "schala"
version = "0.1.0"
authors = ["greg <greg.shuflin@protonmail.com>"]
[dependencies]
schala-repl = { path = "schala-repl" }
schala-codegen = { path = "schala-codegen" }
maaru-lang = { path = "maaru" }
rukka-lang = { path = "rukka" }
robo-lang = { path = "robo" }
schala-lang = { path = "schala-lang" }
[build-dependencies]
includedir_codegen = "0.2.0"
[workspace]
members = [
"schala-main",
"schala-parser",
"experiments",
]
resolver = "2"

31
Grammar
View File

@ -1,31 +0,0 @@
<program> := <statements> EOF
<statements> := <statement>
| <statement> SEP <statements>
<statement> := let <id> = <expr>
| <expr>
| <fn_block>
<fn_block> := fn <id> ( <arg_list> ) <statements> end
<arg_list> := e
| <id>
| <id> , <arg_list>
<expr> := if <expr> then <statements> end
| if <expr> then <statements> else <statements> end
| while <expr> SEP <statements> end
| ( <expr> )
| <binop>
<binop> := <simple_expr>
| <simple_expr> <id> <binop>
<simple_expr> := <id>
| <number>
| <string>

View File

@ -1,920 +0,0 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
-- | This module is an extensively documented walkthrough for typechecking a
-- basic functional language using the Hindley-Damas-Milner algorithm.
--
-- In the end, we'll be able to infer the type of expressions like
--
-- @
-- find (λx. (>) x 0)
-- :: [Integer] -> Either () Integer
-- @
--
-- It can be used in multiple different forms:
--
-- * The source is written in literate programming style, so you can almost
-- read it from top to bottom, minus some few references to later topics.
-- * /Loads/ of doctests (runnable and verified code examples) are included
-- * The code is runnable in GHCi, all definitions are exposed.
-- * A small main module that gives many examples of what you might try out in
-- GHCi is also included.
-- * The Haddock output yields a nice overview over the definitions given, with
-- a nice rendering of a truckload of Haddock comments.
module HindleyMilner where
import Control.Monad.Trans
import Control.Monad.Trans.Except
import Control.Monad.Trans.State
import Data.Map (Map)
import qualified Data.Map as M
import Data.Monoid
import Data.Set (Set)
import qualified Data.Set as S
import Data.String
import Data.Text (Text)
import qualified Data.Text as T
-- $setup
--
-- For running doctests:
--
-- >>> :set -XOverloadedStrings
-- >>> :set -XOverloadedLists
-- >>> :set -XLambdaCase
-- >>> import qualified Data.Text.IO as T
-- >>> let putPprLn = T.putStrLn . ppr
-- #############################################################################
-- #############################################################################
-- * Preliminaries
-- #############################################################################
-- #############################################################################
-- #############################################################################
-- ** Prettyprinting
-- #############################################################################
-- | A prettyprinter class. Similar to 'Show', but with a focus on having
-- human-readable output as opposed to being valid Haskell.
class Pretty a where
ppr :: a -> Text
-- #############################################################################
-- ** Names
-- #############################################################################
-- | A 'name' is an identifier in the language we're going to typecheck.
-- Variables on both the term and type level have 'Name's, for example.
newtype Name = Name Text
deriving (Eq, Ord, Show)
-- | >>> "lorem" :: Name
-- Name "lorem"
instance IsString Name where
fromString = Name . T.pack
-- | >>> putPprLn (Name "var")
-- var
instance Pretty Name where
ppr (Name n) = n
-- #############################################################################
-- ** Monotypes
-- #############################################################################
-- | A monotype is an unquantified/unparametric type, in other words it contains
-- no @forall@s. Monotypes are the inner building blocks of all types. Examples
-- of monotypes are @Int@, @a@, @a -> b@.
--
-- In formal notation, 'MType's are often called τ (tau) types.
data MType = TVar Name -- ^ @a@
| TFun MType MType -- ^ @a -> b@
| TConst Name -- ^ @Int@, @()@, …
-- Since we can't declare our own types in our simple type system
-- here, we'll hard-code certain basic ones so we can typecheck some
-- familar functions that use them later.
| TList MType -- ^ @[a]@
| TEither MType MType -- ^ @Either a b@
| TTuple MType MType -- ^ @(a,b)@
deriving Show
-- | >>> putPprLn (TFun (TEither (TVar "a") (TVar "b")) (TFun (TVar "c") (TVar "d")))
-- Either a b → c → d
--
-- Using the 'IsString' instance:
--
-- >>> putPprLn (TFun (TEither "a" "b") (TFun "c" "d"))
-- Either a b → c → d
instance Pretty MType where
ppr = go False
where
go _ (TVar name) = ppr name
go _ (TList a) = "[" <> ppr a <> "]"
go _ (TEither l r) = "Either " <> ppr l <> " " <> ppr r
go _ (TTuple a b) = "(" <> ppr a <> ", " <> ppr b <> ")"
go _ (TConst name) = ppr name
go parenthesize (TFun a b)
| parenthesize = "(" <> lhs <> "" <> rhs <> ")"
| otherwise = lhs <> "" <> rhs
where lhs = go True a
rhs = go False b
-- | >>> "var" :: MType
-- TVar (Name "var")
instance IsString MType where
fromString = TVar . fromString
-- | The free variables of an 'MType'. This is simply the collection of all the
-- individual type variables occurring inside of it.
--
-- __Example:__ The free variables of @a -> b@ are @a@ and @b@.
freeMType :: MType -> Set Name
freeMType = \case
TVar a -> [a]
TFun a b -> freeMType a <> freeMType b
TList a -> freeMType a
TEither l r -> freeMType l <> freeMType r
TTuple a b -> freeMType a <> freeMType b
TConst _ -> []
-- | Substitute all the contained type variables mentioned in the substitution,
-- and leave everything else alone.
instance Substitutable MType where
applySubst s = \case
TVar a -> let Subst s' = s
in M.findWithDefault (TVar a) a s'
TFun f x -> TFun (applySubst s f) (applySubst s x)
TList a -> TList (applySubst s a)
TEither l r -> TEither (applySubst s l) (applySubst s r)
TTuple a b -> TTuple (applySubst s a) (applySubst s b)
c@TConst {} -> c
-- #############################################################################
-- ** Polytypes
-- #############################################################################
-- | A polytype is a monotype universally quantified over a number of type
-- variables. In Haskell, all definitions have polytypes, but since the @forall@
-- is implicit they look a bit like monotypes, maybe confusingly so. For
-- example, the type of @1 :: Int@ is actually @forall <nothing>. Int@, and the
-- type of @id@ is @forall a. a -> a@, although GHC displays it as @a -> a@.
--
-- A polytype claims to work "for all imaginable type parameters", very similar
-- to how a lambda claims to work "for all imaginable value parameters". We can
-- insert a value into a lambda's parameter to evaluate it to a new value, and
-- similarly we'll later insert types into a polytype's quantified variables to
-- gain new types.
--
-- __Example:__ in a definition @id :: forall a. a -> a@, the @a@ after the
-- ∀ ("forall") is the collection of type variables, and @a -> a@ is the 'MType'
-- quantified over. When we have such an @id@, we also have its specialized
-- version @Int -> Int@ available. This process will be the topic of the type
-- inference/unification algorithms.
--
-- In formal notation, 'PType's are often called σ (sigma) types.
--
-- The purpose of having monotypes and polytypes is that we'd like to only have
-- universal quantification at the top level, restricting our language to rank-1
-- polymorphism, where type inferece is total (all types can be inferred) and
-- simple (only a handful of typing rules). Weakening this constraint would be
-- easy: if we allowed universal quantification within function types we would
-- get rank-N polymorphism. Taking it even further to allow it anywhere,
-- effectively replacing all occurrences of 'MType' with 'PType', yields
-- impredicative types. Both these extensions make the type system
-- *significantly* more complex though.
data PType = Forall (Set Name) MType -- ^ ∀{α}. τ
-- | >>> putPprLn (Forall ["a"] (TFun "a" "a"))
-- ∀a. a → a
instance Pretty PType where
ppr (Forall qs mType) = "" <> pprUniversals <> ". " <> ppr mType
where
pprUniversals
| S.null qs = ""
| otherwise = (T.intercalate " " . map ppr . S.toList) qs
-- | The free variables of a 'PType' are the free variables of the contained
-- 'MType', except those universally quantified.
--
-- >>> let sigma = Forall ["a"] (TFun "a" (TFun (TTuple "b" "a") "c"))
-- >>> putPprLn sigma
-- ∀a. a → (b, a) → c
-- >>> let display = T.putStrLn . T.intercalate ", " . foldMap (\x -> [ppr x])
-- >>> display (freePType sigma)
-- b, c
freePType :: PType -> Set Name
freePType (Forall qs mType) = freeMType mType `S.difference` qs
-- | Substitute all the free type variables.
instance Substitutable PType where
applySubst (Subst subst) (Forall qs mType) =
let qs' = M.fromSet (const ()) qs
subst' = Subst (subst `M.difference` qs')
in Forall qs (applySubst subst' mType)
-- #############################################################################
-- ** The environment
-- #############################################################################
-- | The environment consists of all the values available in scope, and their
-- associated polytypes. Other common names for it include "(typing) context",
-- and because of the commonly used symbol for it sometimes directly
-- \"Gamma"/@"Γ"@.
--
-- There are two kinds of membership in an environment,
--
-- - @∈@: an environment @Γ@ can be viewed as a set of @(value, type)@ pairs,
-- and we can test whether something is /literally contained/ by it via
-- x:σ ∈ Γ
-- - @⊢@, pronounced /entails/, describes all the things that are well-typed,
-- given an environment @Γ@. @Γ ⊢ x:τ@ can thus be seen as a judgement that
-- @x:τ@ is /figuratively contained/ in @Γ@.
--
-- For example, the environment @{x:Int}@ literally contains @x@, but given
-- this, it also entails @λy. x@, @λy z. x@, @let id = λy. y in id x@ and so on.
--
-- In Haskell terms, the environment consists of all the things you currently
-- have available, or that can be built by comining them. If you import the
-- Prelude, your environment entails
--
-- @
-- id → ∀a. a→a
-- map → ∀a b. (a→b) → [a] → [b]
-- putStrLn → ∀∅. String → IO ()
-- …
-- id map → ∀a b. (a→b) → [a] → [b]
-- map putStrLn → ∀∅. [String] -> [IO ()]
-- …
-- @
newtype Env = Env (Map Name PType)
-- | >>> :{
-- putPprLn (Env
-- [ ("id", Forall ["a"] (TFun "a" "a"))
-- , ("const", Forall ["a", "b"] (TFun "a" (TFun "b" "a"))) ])
-- :}
-- Γ = { const : ∀a b. a → b → a
-- , id : ∀a. a → a }
instance Pretty Env where
ppr (Env env) = "Γ = { " <> T.intercalate "\n , " pprBindings <> " }"
where
bindings = M.assocs env
pprBinding (name, pType) = ppr name <> " : " <> ppr pType
pprBindings = map pprBinding bindings
-- | The free variables of an 'Env'ironment are all the free variables of the
-- 'PType's it contains.
freeEnv :: Env -> Set Name
freeEnv (Env env) = let allPTypes = M.elems env
in S.unions (map freePType allPTypes)
-- | Performing a 'Subst'itution in an 'Env'ironment means performing that
-- substituion on all the contained 'PType's.
instance Substitutable Env where
applySubst s (Env env) = Env (M.map (applySubst s) env)
-- #############################################################################
-- ** Substitutions
-- #############################################################################
-- | A substitution is a mapping from type variables to 'MType's. Applying a
-- substitution means applying those replacements. For example, the substitution
-- @a -> Int@ applied to @a -> a@ yields the result @Int -> Int@.
--
-- A key concept behind Hindley-Milner is that once we dive deeper into an
-- expression, we learn more about our type variables. We might learn that @a@
-- has to be specialized to @b -> b@, and then later on that @b@ is actually
-- @Int@. Substitutions are an organized way of carrying this information along.
newtype Subst = Subst (Map Name MType)
-- | We're going to apply substitutions to a variety of other values that
-- somehow contain type variables, so we overload this application operation in
-- a class here.
--
-- Laws:
--
-- @
-- 'applySubst' 'mempty' ≡ 'id'
-- 'applySubst' (s1 '<>' s2) ≡ 'applySubst' s1 . 'applySubst' s2
-- @
class Substitutable a where
applySubst :: Subst -> a -> a
instance (Substitutable a, Substitutable b) => Substitutable (a,b) where
applySubst s (x,y) = (applySubst s x, applySubst s y)
-- | @'applySubst' s1 s2@ applies one substitution to another, replacing all the
-- bindings in the second argument @s2@ with their values mentioned in the first
-- one (@s1@).
instance Substitutable Subst where
applySubst s (Subst target) = Subst (fmap (applySubst s) target)
-- | >>> :{
-- putPprLn (Subst
-- [ ("a", TFun "b" "b")
-- , ("b", TEither "c" "d") ])
-- :}
-- { a > b → b
-- , b > Either c d }
instance Pretty Subst where
ppr (Subst s) = "{ " <> T.intercalate "\n, " [ ppr k <> " > " <> ppr v | (k,v) <- M.toList s ] <> " }"
-- | Combine two substitutions by applying all substitutions mentioned in the
-- first argument to the type variables contained in the second.
instance Monoid Subst where
-- Considering that all we can really do with a substitution is apply it, we
-- can use the one of 'Substitutable's laws to show that substitutions
-- combine associatively,
--
-- @
-- applySubst (compose s1 (compose s2 s3))
-- = applySubst s1 . applySubst (compose s2 s3)
-- = applySubst s1 . applySubst s2 . applySubst s3
-- = applySubst (compose s1 s2) . applySubst s3
-- = applySubst (compose (compose s1 s2) s3)
-- @
mappend subst1 subst2 = Subst (s1 `M.union` s2)
where
Subst s1 = subst1
Subst s2 = applySubst subst1 subst2
mempty = Subst M.empty
-- #############################################################################
-- #############################################################################
-- * Typechecking
-- #############################################################################
-- #############################################################################
-- $ Typechecking does two things:
--
-- 1. If two types are not immediately identical, attempt to 'unify' them
-- to get a type compatible with both of them
-- 2. 'infer' the most general type of a value by comparing the values in its
-- definition with the 'Env'ironment
-- #############################################################################
-- ** Inference context
-- #############################################################################
-- | The inference type holds a supply of unique names, and can fail with a
-- descriptive error if something goes wrong.
--
-- /Invariant:/ the supply must be infinite, or we might run out of names to
-- give to things.
newtype Infer a = Infer (ExceptT InferError (State [Name]) a)
deriving (Functor, Applicative, Monad)
-- | Errors that can happen during the type inference process.
data InferError =
-- | Two types that don't match were attempted to be unified.
--
-- For example, @a -> a@ and @Int@ do not unify.
--
-- >>> putPprLn (CannotUnify (TFun "a" "a") (TConst "Int"))
-- Cannot unify a → a with Int
CannotUnify MType MType
-- | A 'TVar' is bound to an 'MType' that already contains it.
--
-- The canonical example of this is @λx. x x@, where the first @x@
-- in the body has to have type @a -> b@, and the second one @a@. Since
-- they're both the same @x@, this requires unification of @a@ with
-- @a -> b@, which only works if @a = a -> b = (a -> b) -> b = …@, yielding
-- an infinite type.
--
-- >>> putPprLn (OccursCheckFailed "a" (TFun "a" "a"))
-- Occurs check failed: a already appears in a → a
| OccursCheckFailed Name MType
-- | The value of an unknown identifier was read.
--
-- >>> putPprLn (UnknownIdentifier "a")
-- Unknown identifier: a
| UnknownIdentifier Name
deriving Show
-- | >>> putPprLn (CannotUnify (TEither "a" "b") (TTuple "a" "b"))
-- Cannot unify Either a b with (a, b)
instance Pretty InferError where
ppr = \case
CannotUnify t1 t2 ->
"Cannot unify " <> ppr t1 <> " with " <> ppr t2
OccursCheckFailed name ty ->
"Occurs check failed: " <> ppr name <> " already appears in " <> ppr ty
UnknownIdentifier name ->
"Unknown identifier: " <> ppr name
-- | Evaluate a value in an 'Infer'ence context.
--
-- >>> let expr = EAbs "f" (EAbs "g" (EAbs "x" (EApp (EApp "f" "x") (EApp "g" "x"))))
-- >>> putPprLn expr
-- λf g x. f x (g x)
-- >>> let inferred = runInfer (infer (Env []) expr)
-- >>> let demonstrate = \case Right (_, ty) -> T.putStrLn (":: " <> ppr ty)
-- >>> demonstrate inferred
-- :: (c → e → f) → (c → e) → c → f
runInfer :: Infer a -- ^ Inference data
-> Either InferError a
runInfer (Infer inf) =
evalState (runExceptT inf) (map Name (infiniteSupply alphabet))
where
alphabet = map T.singleton ['a'..'z']
-- [a, b, c] ==> [a,b,c, a1,b1,c1, a2,b2,c2, …]
infiniteSupply supply = supply <> addSuffixes supply (1 :: Integer)
where
addSuffixes xs n = map (\x -> addSuffix x n) xs <> addSuffixes xs (n+1)
addSuffix x n = x <> T.pack (show n)
-- | Throw an 'InferError' in an 'Infer'ence context.
--
-- >>> case runInfer (throw (UnknownIdentifier "var")) of Left err -> putPprLn err
-- Unknown identifier: var
throw :: InferError -> Infer a
throw = Infer . throwE
-- #############################################################################
-- ** Unification
-- #############################################################################
-- $ Unification describes the process of making two different types compatible
-- by specializing them where needed. A desirable property to have here is being
-- able to find the most general unifier. Luckily, we'll be able to do that in
-- our type system.
-- | The unification of two 'MType's is the most general substituion that can be
-- applied to both of them in order to yield the same result.
--
-- >>> let m1 = TFun "a" "b"
-- >>> putPprLn m1
-- a → b
-- >>> let m2 = TFun "c" (TEither "d" "e")
-- >>> putPprLn m2
-- c → Either d e
-- >>> let inferSubst = unify (m1, m2)
-- >>> case runInfer inferSubst of Right subst -> putPprLn subst
-- { a > c
-- , b > Either d e }
unify :: (MType, MType) -> Infer Subst
unify = \case
(TFun a b, TFun x y) -> unifyBinary (a,b) (x,y)
(TVar v, x) -> v `bindVariableTo` x
(x, TVar v) -> v `bindVariableTo` x
(TConst a, TConst b) | a == b -> pure mempty
(TList a, TList b) -> unify (a,b)
(TEither a b, TEither x y) -> unifyBinary (a,b) (x,y)
(TTuple a b, TTuple x y) -> unifyBinary (a,b) (x,y)
(a, b) -> throw (CannotUnify a b)
where
-- Unification of binary type constructors, such as functions and Either.
-- Unification is first done for the first operand, and assuming the
-- required substitution, for the second one.
unifyBinary :: (MType, MType) -> (MType, MType) -> Infer Subst
unifyBinary (a,b) (x,y) = do
s1 <- unify (a, x)
s2 <- unify (applySubst s1 (b, y))
pure (s1 <> s2)
-- | Build a 'Subst'itution that binds a 'Name' of a 'TVar' to an 'MType'. The
-- resulting substitution should be idempotent, i.e. applying it more than once
-- to something should not be any different from applying it only once.
--
-- - In the simplest case, this just means building a substitution that just
-- does that.
-- - Substituting a 'Name' with a 'TVar' with the same name unifies a type
-- variable with itself, and the resulting substitution does nothing new.
-- - If the 'Name' we're trying to bind to an 'MType' already occurs in that
-- 'MType', the resulting substitution would not be idempotent: the 'MType'
-- would be replaced again, yielding a different result. This is known as the
-- Occurs Check.
bindVariableTo :: Name -> MType -> Infer Subst
bindVariableTo name (TVar v) | boundToSelf = pure mempty
where
boundToSelf = name == v
bindVariableTo name mType | name `occursIn` mType = throw (OccursCheckFailed name mType)
where
n `occursIn` ty = n `S.member` freeMType ty
bindVariableTo name mType = pure (Subst (M.singleton name mType))
-- #############################################################################
-- ** Type inference
-- #############################################################################
-- $ Type inference is the act of finding out a value's type by looking at the
-- environment it is in, in order to make it compatible with it.
--
-- In literature, the Hindley-Damas-Milner inference algorithm ("Algorithm W")
-- is often presented in the style of logical formulas, and below you'll find
-- that version along with code that actually does what they say.
--
-- These formulas look a bit like fractions, where the "numerator" is a
-- collection of premises, and the denominator is the consequence if all of them
-- hold.
--
-- __Example:__
--
-- @
-- Γ ⊢ even : Int → Bool Γ ⊢ 1 : Int
--
-- Γ ⊢ even 1 : Bool
-- @
--
-- means that if we have a value of type @Int -> Bool@ called "even" and a value
-- of type @Int@ called @1@, then we also have a value of type @Bool@ via
-- @even 1@ available to us.
--
-- The actual inference rules are polymorphic versions of this example, and
-- the code comments will explain each step in detail.
-- -----------------------------------------------------------------------------
-- *** The language: typed lambda calculus
-- -----------------------------------------------------------------------------
-- | The syntax tree of the language we'd like to typecheck. You can view it as
-- a close relative to simply typed lambda calculus, having only the most
-- necessary syntax elements.
--
-- Since 'ELet' is non-recursive, the usual fixed-point function
-- @fix : (a → a) → a@ can be introduced to allow recursive definitions.
data Exp = ELit Lit -- ^ True, 1
| EVar Name -- ^ @x@
| EApp Exp Exp -- ^ @f x@
| EAbs Name Exp -- ^ @λx. e@
| ELet Name Exp Exp -- ^ @let x = e in e'@ (non-recursive)
deriving Show
-- | Literals we'd like to support. Since we can't define new data types in our
-- simple type system, we'll have to hard-code the possible ones here.
data Lit = LBool Bool
| LInteger Integer
deriving Show
-- | >>> putPprLn (EAbs "f" (EAbs "g" (EAbs "x" (EApp (EApp "f" "x") (EApp "g" "x")))))
-- λf g x. f x (g x)
instance Pretty Exp where
ppr (ELit lit) = ppr lit
ppr (EVar name) = ppr name
ppr (EApp f x) = pprApp1 f <> " " <> pprApp2 x
where
pprApp1 = \case
eLet@ELet{} -> "(" <> ppr eLet <> ")"
eLet@EAbs{} -> "(" <> ppr eLet <> ")"
e -> ppr e
pprApp2 = \case
eApp@EApp{} -> "(" <> ppr eApp <> ")"
e -> pprApp1 e
ppr x@EAbs{} = pprAbs True x
where
pprAbs True (EAbs name expr) = "λ" <> ppr name <> pprAbs False expr
pprAbs False (EAbs name expr) = " " <> ppr name <> pprAbs False expr
pprAbs _ expr = ". " <> ppr expr
ppr (ELet name value body) =
"let " <> ppr name <> " = " <> ppr value <> " in " <> ppr body
-- | >>> putPprLn (LBool True)
-- True
--
-- >>> putPprLn (LInteger 127)
-- 127
instance Pretty Lit where
ppr = \case
LBool b -> showT b
LInteger i -> showT i
where
showT :: Show a => a -> Text
showT = T.pack . show
-- | >>> "var" :: Exp
-- EVar (Name "var")
instance IsString Exp where
fromString = EVar . fromString
-- -----------------------------------------------------------------------------
-- *** Some useful definitions
-- -----------------------------------------------------------------------------
-- | Generate a fresh 'Name' in a type 'Infer'ence context. An example use case
-- of this is η expansion, which transforms @f@ into @λx. f x@, where "x" is a
-- new name, i.e. unbound in the current context.
fresh :: Infer MType
fresh = drawFromSupply >>= \case
Right name -> pure (TVar name)
Left err -> throw err
where
drawFromSupply :: Infer (Either InferError Name)
drawFromSupply = Infer (do
s:upply <- lift get
lift (put upply)
pure (Right s) )
-- | Add a new binding to the environment.
--
-- The Haskell equivalent would be defining a new value, for example in module
-- scope or in a @let@ block. This corresponds to the "comma" operation used in
-- formal notation,
--
-- @
-- Γ, x:σ ≡ extendEnv Γ (x,σ)
-- @
extendEnv :: Env -> (Name, PType) -> Env
extendEnv (Env env) (name, pType) = Env (M.insert name pType env)
-- -----------------------------------------------------------------------------
-- *** Inferring the types of all language constructs
-- -----------------------------------------------------------------------------
-- | Infer the type of an 'Exp'ression in an 'Env'ironment, resulting in the
-- 'Exp's 'MType' along with a substitution that has to be done in order to reach
-- this goal.
--
-- This is widely known as /Algorithm W/.
infer :: Env -> Exp -> Infer (Subst, MType)
infer env = \case
ELit lit -> inferLit lit
EVar name -> inferVar env name
EApp f x -> inferApp env f x
EAbs x e -> inferAbs env x e
ELet x e e' -> inferLet env x e e'
-- | Literals such as 'True' and '1' have their types hard-coded.
inferLit :: Lit -> Infer (Subst, MType)
inferLit lit = pure (mempty, TConst litTy)
where
litTy = case lit of
LBool {} -> "Bool"
LInteger {} -> "Integer"
-- | Inferring the type of a variable is done via
--
-- @
-- x:σ ∈ Γ τ = instantiate(σ)
-- [Var]
-- Γ ⊢ x:τ
-- @
--
-- This means that if @Γ@ /literally contains/ (@∈@) a value, then it also
-- /entails it/ (@⊢@) in all its instantiations.
inferVar :: Env -> Name -> Infer (Subst, MType)
inferVar env name = do
sigma <- lookupEnv env name -- x:σ ∈ Γ
tau <- instantiate sigma -- τ = instantiate(σ)
-- ------------------
pure (mempty, tau) -- Γ ⊢ x:τ
-- | Look up the 'PType' of a 'Name' in the 'Env'ironment.
--
-- This checks whether @x:σ@ is /literally contained/ in @Γ@. For more details
-- about this, see the documentation of 'Env'.
--
-- To give a Haskell analogon, looking up @id@ when @Prelude@ is loaded, the
-- resulting 'PType' would be @id@'s type, namely @forall a. a -> a@.
lookupEnv :: Env -> Name -> Infer PType
lookupEnv (Env env) name = case M.lookup name env of
Just x -> pure x
Nothing -> throw (UnknownIdentifier name)
-- | Bind all quantified variables of a 'PType' to 'fresh' type variables.
--
-- __Example:__ instantiating @forall a. a -> b -> a@ results in the 'MType'
-- @c -> b -> c@, where @c@ is a fresh name (to avoid shadowing issues).
--
-- You can picture the 'PType' to be the prototype converted to an instantiated
-- 'MType', which can now be used in the unification process.
--
-- Another way of looking at it is by simply forgetting which variables were
-- quantified, carefully avoiding name clashes when doing so.
--
-- 'instantiate' can also be seen as the opposite of 'generalize', which we'll
-- need later to convert an 'MType' to a 'PType'.
instantiate :: PType -> Infer MType
instantiate (Forall qs t) = do
subst <- substituteAllWithFresh qs
pure (applySubst subst t)
where
-- For each given name, add a substitution from that name to a fresh type
-- variable to the result.
substituteAllWithFresh :: Set Name -> Infer Subst
substituteAllWithFresh xs = do
let freshSubstActions = M.fromSet (const fresh) xs
freshSubsts <- sequenceA freshSubstActions
pure (Subst freshSubsts)
-- | Function application captures the fact that if we have a function and an
-- argument we can give to that function, we also have the result value of the
-- result type available to us.
--
-- @
-- Γ ⊢ f : fτ Γ ⊢ x : xτ fxτ = fresh unify(fτ, xτ → fxτ)
-- [App]
-- Γ ⊢ f x : fxτ
-- @
--
-- This rule says that given a function and a value with a type, the function
-- type has to unify with a function type that allows the value type to be its
-- argument.
inferApp
:: Env
-> Exp -- ^ __f__ x
-> Exp -- ^ f __x__
-> Infer (Subst, MType)
inferApp env f x = do
(s1, fTau) <- infer env f -- f : fτ
(s2, xTau) <- infer (applySubst s1 env) x -- x : xτ
fxTau <- fresh -- fxτ = fresh
s3 <- unify (applySubst s2 fTau, TFun xTau fxTau) -- unify (fτ, xτ → fxτ)
let s = s3 <> s2 <> s1 -- --------------------
pure (s, applySubst s3 fxTau) -- f x : fxτ
-- | Lambda abstraction is based on the fact that when we introduce a new
-- variable, the resulting lambda maps from that variable's type to the type of
-- the body.
--
-- @
-- τ = fresh σ = ∀∅. τ Γ, x:σ ⊢ e:τ'
-- [Abs]
-- Γ ⊢ λx.e : τ→τ'
-- @
--
-- Here, @Γ, x:τ@ is @Γ@ extended by one additional mapping, namely @x:τ@.
--
-- Abstraction is typed by extending the environment by a new 'MType', and if
-- under this assumption we can construct a function mapping to a value of that
-- type, we can say that the lambda takes a value and maps to it.
inferAbs
:: Env
-> Name -- ^ λ__x__. e
-> Exp -- ^ λx. __e__
-> Infer (Subst, MType)
inferAbs env x e = do
tau <- fresh -- τ = fresh
let sigma = Forall [] tau -- σ = ∀∅. τ
env' = extendEnv env (x, sigma) -- Γ, x:σ
(s, tau') <- infer env' e -- … ⊢ e:τ'
-- ---------------
pure (s, TFun (applySubst s tau) tau') -- λx.e : τ→τ'
-- | A let binding allows extending the environment with new bindings in a
-- principled manner. To do this, we first have to typecheck the expression to
-- be introduced. The result of this is then generalized to a 'PType', since let
-- bindings introduce new polymorphic values, which are then added to the
-- environment. Now we can finally typecheck the body of the "in" part of the
-- let binding.
--
-- Note that in our simple language, let is non-recursive, but recursion can be
-- introduced as usual by adding a primitive @fix : (a → a) → a@ if desired.
--
-- @
-- Γ ⊢ e:τ σ = gen(Γ,τ) Γ, x:σ ⊢ e':τ'
-- [Let]
-- Γ ⊢ let x = e in e' : τ'
-- @
inferLet
:: Env
-> Name -- ^ let __x__ = e in e'
-> Exp -- ^ let x = __e__ in e'
-> Exp -- ^ let x = e in __e'__
-> Infer (Subst, MType)
inferLet env x e e' = do
(s1, tau) <- infer env e -- Γ ⊢ e:τ
let env' = applySubst s1 env
let sigma = generalize env' tau -- σ = gen(Γ,τ)
let env'' = extendEnv env' (x, sigma) -- Γ, x:σ
(s2, tau') <- infer env'' e' -- Γ ⊢ …
-- --------------------------
pure (s2 <> s1, tau') -- … let x = e in e' : τ'
-- | Generalize an 'MType' to a 'PType' by universally quantifying over all the
-- type variables contained in it, except those already free in the environment.
--
-- >>> let tau = TFun "a" (TFun "b" "a")
-- >>> putPprLn tau
-- a → b → a
-- >>> putPprLn (generalize (Env [("x", Forall [] "b")]) tau)
-- ∀a. a → b → a
--
-- In more formal notation,
--
-- @
-- gen(Γ,τ) = ∀{α}. τ
-- where {α} = free(τ) free(Γ)
-- @
--
-- 'generalize' can also be seen as the opposite of 'instantiate', which
-- converts a 'PType' to an 'MType'.
generalize :: Env -> MType -> PType
generalize env mType = Forall qs mType
where
qs = freeMType mType `S.difference` freeEnv env

185
Main.hs
View File

@ -1,185 +0,0 @@
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
module Main where
import qualified Data.Map as M
import Data.Monoid
import Data.Text (Text)
import qualified Data.Text.IO as T
import HindleyMilner
-- #############################################################################
-- #############################################################################
-- * Testing
-- #############################################################################
-- #############################################################################
-- #############################################################################
-- ** A small custom Prelude
-- #############################################################################
prelude :: Env
prelude = Env (M.fromList
[ ("(*)", Forall [] (tInteger ~> tInteger ~> tInteger))
, ("(+)", Forall [] (tInteger ~> tInteger ~> tInteger))
, ("(,)", Forall ["a","b"] ("a" ~> "b" ~> TTuple "a" "b"))
, ("(-)", Forall [] (tInteger ~> tInteger ~> tInteger))
, ("(.)", Forall ["a", "b", "c"] (("b" ~> "c") ~> ("a" ~> "b") ~> "a" ~> "c"))
, ("(<)", Forall [] (tInteger ~> tInteger ~> tBool))
, ("(<=)", Forall [] (tInteger ~> tInteger ~> tBool))
, ("(>)", Forall [] (tInteger ~> tInteger ~> tBool))
, ("(>=)", Forall [] (tInteger ~> tInteger ~> tBool))
, ("const", Forall ["a","b"] ("a" ~> "b" ~> "a"))
, ("Cont/>>=", Forall ["a"] ((("a" ~> "r") ~> "r") ~> ("a" ~> (("b" ~> "r") ~> "r")) ~> (("b" ~> "r") ~> "r")))
, ("find", Forall ["a","b"] (("a" ~> tBool) ~> TList "a" ~> tMaybe "a"))
, ("fix", Forall ["a"] (("a" ~> "a") ~> "a"))
, ("foldr", Forall ["a","b"] (("a" ~> "b" ~> "b") ~> "b" ~> TList "a" ~> "b"))
, ("id", Forall ["a"] ("a" ~> "a"))
, ("ifThenElse", Forall ["a"] (tBool ~> "a" ~> "a" ~> "a"))
, ("Left", Forall ["a","b"] ("a" ~> TEither "a" "b"))
, ("length", Forall ["a"] (TList "a" ~> tInteger))
, ("map", Forall ["a","b"] (("a" ~> "b") ~> TList "a" ~> TList "b"))
, ("reverse", Forall ["a"] (TList "a" ~> TList "a"))
, ("Right", Forall ["a","b"] ("b" ~> TEither "a" "b"))
, ("[]", Forall ["a"] (TList "a"))
, ("(:)", Forall ["a"] ("a" ~> TList "a" ~> TList "a"))
])
where
tBool = TConst "Bool"
tInteger = TConst "Integer"
tMaybe = TEither (TConst "()")
-- | Synonym for 'TFun' to make writing type signatures easier.
--
-- Instead of
--
-- @
-- Forall ["a","b"] (TFun "a" (TFun "b" "a"))
-- @
--
-- we can write
--
-- @
-- Forall ["a","b"] ("a" ~> "b" ~> "a")
-- @
(~>) :: MType -> MType -> MType
(~>) = TFun
infixr 9 ~>
-- #############################################################################
-- ** Run it!
-- #############################################################################
-- | Run type inference on a cuple of values
main :: IO ()
main = do
let inferAndPrint = T.putStrLn . (" " <>) . showType prelude
T.putStrLn "Well-typed:"
do
inferAndPrint (lambda ["x"] "x")
inferAndPrint (lambda ["f","g","x"] (apply "f" ["x", apply "g" ["x"]]))
inferAndPrint (lambda ["f","g","x"] (apply "f" [apply "g" ["x"]]))
inferAndPrint (lambda ["m", "k", "c"] (apply "m" [lambda ["x"] (apply "k" ["x", "c"])])) -- >>= for Cont
inferAndPrint (lambda ["f"] (apply "(.)" ["reverse", apply "map" ["f"]]))
inferAndPrint (apply "find" [lambda ["x"] (apply "(>)" ["x", int 0])])
inferAndPrint (apply "map" [apply "map" ["map"]])
inferAndPrint (apply "(*)" [int 1, int 2])
inferAndPrint (apply "foldr" ["(+)", int 0])
inferAndPrint (apply "map" ["length"])
inferAndPrint (apply "map" ["map"])
inferAndPrint (lambda ["x"] (apply "ifThenElse" [apply "(<)" ["x", int 0], int 0, "x"]))
inferAndPrint (lambda ["x"] (apply "fix" [lambda ["xs"] (apply "(:)" ["x", "xs"])]))
T.putStrLn "Ill-typed:"
do
inferAndPrint (apply "(*)" [int 1, bool True])
inferAndPrint (apply "foldr" [int 1])
inferAndPrint (lambda ["x"] (apply "x" ["x"]))
inferAndPrint (lambda ["x"] (ELet "xs" (apply "(:)" ["x", "xs"]) "xs"))
-- | Build multiple lambda bindings.
--
-- Instead of
--
-- @
-- EAbs "f" (EAbs "x" (EApp "f" "x"))
-- @
--
-- we can write
--
-- @
-- lambda ["f", "x"] (EApp "f" "x")
-- @
--
-- for
--
-- @
-- λf x. f x
-- @
lambda :: [Name] -> Exp -> Exp
lambda names expr = foldr EAbs expr names
-- | Apply a function to multiple arguments.
--
-- Instead of
--
-- @
-- EApp (EApp (EApp "f" "x") "y") "z")
-- @
--
-- we can write
--
-- @
-- apply "f" ["x", "y", "z"]
-- @
--
-- for
--
-- @
-- f x y z
-- @
apply :: Exp -> [Exp] -> Exp
apply = foldl EApp
-- | Construct an integer literal.
int :: Integer -> Exp
int = ELit . LInteger
-- | Construct a boolean literal.
bool :: Bool -> Exp
bool = ELit . LBool
-- | Convenience function to run type inference algorithm
showType :: Env -- ^ Starting environment, e.g. 'prelude'.
-> Exp -- ^ Expression to typecheck
-> Text -- ^ Text representation of the result. Contains an error
-- message on failure.
showType env expr =
case (runInfer . fmap (generalize (Env mempty) . uncurry applySubst) . infer env) expr of
Left err -> "Error inferring type of " <> ppr expr <>": " <> ppr err
Right ty -> ppr expr <> " :: " <> ppr ty

View File

@ -1,78 +1,4 @@
# Schala - A Programming Language Implementation
# Schala - a programming language meta-interpreter
Schala is a Rust framework written to make it easy to
create and experiment with toy programming languages. It provides
a common REPL, and a trait `ProgrammingLanguage` with provisions
for tokenizing text, parsing tokens, evaluating an abstract syntax tree,
and other tasks that are common to all programming languages.
Schala is implemented as a Rust library `schala_lib`, which provides a
`schala_main` function. This function serves as the main loop of the REPL, if run
interactively, or otherwise reads and interprets programming language source
files. It expects as input a vector of `PLIGenerator`, which is a type representing
a closure that returns a boxed trait object that implements the `ProgrammingLanguage` trait,
and stores any persistent state relevant to that programming language. The ability
to share state between different programming languages is in the works.
## About
Schala started out life as an experiment in writing a Javascript-like
programming language that would never encounter any kind of runtime value
error, but rather always return `null` under any kind of error condition. I had
seen one too many Javascript `Uncaught TypeError: Cannot read property ___ of
undefined` messages, and I was a bit frustrated. Plus I had always wanted to
write a programming langauge from scratch, and Rust is a fun language to
program in. Over time I became interested in playing around with other sorts
of programming languages as well, and wanted to make the process as general as
possible.
The name of the project comes from Schala the Princess of Zeal from the 1995
SNES RPG *Chrono Trigger*. I like classic JRPGs and enjoyed the thought of
creating a language name confusingly close to Scala. The naming scheme for
languages implemented with the Schala meta-interpreter is Chrono Trigger
characters.
Schala is incomplete alpha software and is not ready for public release.
## Languages implemented using the meta-interpreter
* The eponymous *Schala* language is an interpreted/compiled scripting langauge,
designed to be relatively simple, but with a reasonably sophisticated type
system.
* *Maaru* was the original Schala (since renamed to free up the name *Schala*
for the above language), a very simple dynamically-typed scripting language
such that all possible runtime errors result in null rather than program
failure.
* *Robo* is an experiment in creating a lazy, functional, strongly-typed language
much like Haskell
* *Rukka* is a straightforward LISP implementation
## Reference works
Here's a partial list of resources I've made use of in the process
of learning how to write a programming language.
### Type-checking
https://skillsmatter.com/skillscasts/10868-inside-the-rust-compiler
### Evaluation
*Understanding Computation*, Tom Stuart, O'Reilly 2013
*Basics of Compiler Design*, Torben Mogensen
### Parsing
http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
https://soc.github.io/languages/unified-condition-syntax
[Crafting Interpreters](http://www.craftinginterpreters.com/)
### LLVM
http://blog.ulysse.io/2016/07/03/llvm-getting-started.html
###Rust resources
https://thefullsnack.com/en/rust-for-the-web.html
https://rocket.rs/guide/getting-started/
`schala` is an implementation of a yet-unnamed quasi-functional programming
language.

114
TODO.md
View File

@ -1,114 +0,0 @@
# TODO Items
- https://nshipster.com/never/
-https://cranelift.readthedocs.io/en/latest/?badge=latest<Paste>
-consult http://gluon-lang.org/book/embedding-api.html
- if/match playground
simple if
`if x == 1.0 { "a" } else { "b" }`
one comparison multiple targets:
`if x == { 1.0 -> "a", 2.0 -> "b", else -> "c" }`
different comparison operators/ method calls:
`if x { == 1.0 -> "a", eq NaN -> "n", .hella() -> "h", else -> "z" }`
pattern matching/introducing bindings:
`if alice { .age < 18 -> "18", is Person("Alice", age) -> "${age}", else -> "none" }`
pattern matching w/ if-let:
`if person is Person("Alice", age) { "${age}" } else { "nope" }`
-https://soc.github.io/languages/unified-condition-syntax syntax:
`if <cond-expr>" then <then-expr> else <else-expr>`
`if <half-expr> \n <rest-expr1> then <result1-expr> \n <rest-expr2> then <result-expr2> else <result3-expr>`
-and rest-exprs (or "targets") can have 'is' for pattern-matching, actually so can a full cond-expr
UNIFIED IF EXPRESSIONS FINAL WORK:
basic syntax:
`if_expr := if discriminator '{' (guard_expr)* '}'`
`guard_expr := pattern 'then' block_or_expr'`
`pattern := rhs | is_pattern`
`is_pattern := 'is' ???`
`rhs := expression | ???`
if the only two guard patterns are true and false, then the abbreviated syntax:
`'if' discriminator 'then' block_or_expr 'else' block_or_expr`
can replace `'if' discriminator '{' 'true' 'then' block_or_expr; 'false' 'then' block_or_expr '}'`
- Next priorities: - get ADTs working, get matches working
- inclusive/exclusive range syntax like .. vs ..=
- sketch of an idea for the REPL:
-each compiler pass should be a (procedural?) macro like
compiler_pass!("parse", dataproducts: ["ast", "parse_tree"], {
match parsing::parse(INPUT) {
Ok(
PASS.add_artifact(
}
-should have an Idris-like `cast To From` function
- REPL:
- want to be able to do things like `:doc Identifier`, and have the language load up these definitions to the REPL
* change 'trait' to 'interface'
-think about idris-related ideas of multiple implementations of a type for an interface (+ vs * impl for monoids, for preorder/inorder/postorder for Foldable)
* Share state between programming languages
* idea for Schala - scoped types - be able to define a quick enum type scoped to a function ro something, that only is meant to be used as a quick bespoke interface between two other things
* another idea, allow:
type enum {
type enum MySubVariant {
SubVariant1, SubVariant2, etc.
}
Variant1(MySubVariant),
Variant2(...),
}
* idea for Schala: both currying *and* default arguments!
ex. fn a(b: Int, c:Int, d:Int = 1) -> Int
a(1,2) : Int
a(1,2,d=2): Int
a(_,1,3) : Int -> Int
a(1,2, c=_): Int -> Int
a(_,_,_) : Int -> Int -> Int -> Int
- AST : maybe replace the Expression type with "Ascription(TypeName, Box<Expression>) nodes??
- parser: add a "debug" field to the Parser struct for all debug-related things
-scala-style html"dfasfsadf${}" string interpolations!
*Compiler passes architecture
-ProgrammingLanguageInterface defines a evaluate_in_repl() and evaluate_no_repl() functions
-these take in a vec of CompilerPasses
struct CompilerPass {
name: String,
run: fn(PrevPass) -> NextPass
}
-change "Type...." names in parser.rs to "Anno..." for non-collision with names in typechecking.rs
-get rid of code pertaining to compilation specifically, have a more generation notion of "execution type"

8
experiments/Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "experiments"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

3
experiments/src/main.rs Normal file
View File

@ -0,0 +1,3 @@
fn main() {
println!("Hello, world!");
}

View File

@ -0,0 +1,48 @@
module.exports = grammar({
name: "TestLang",
rules: {
source_file: $ => repeat($._definition),
_definition: $ => choice(
$.function_definition
//TODO others
),
function_definition: $ => seq(
'fn',
$.identifier,
$.parameter_list,
field("return_type", optional($._type)),
$.block,
),
parameter_list: $ => seq("(", /* TODO */ ")"),
block: $ => seq(
"{",
choice(
repeat($._statement),
"",
),
"}"
),
_statement: $ => choice(
$._return_statement
),
_return_statement: $ => seq("return", $._expression, ";"),
_expression: $ => choice($.identifier, $.unary, $.binary),
unary: $ => prec(4, choice(seq("-", $._expression), seq("!", $._expression))),
binary: $ => choice(prec.left(2, seq($._expression, "*", $._expression)), prec.left(1, seq($._expression, "+", $._expression))),
_type: $ => "bool",
_type: $ => choice(
$.primitive_type,
),
primitive_type: $ => choice("bool", "int"),
identifier: $ => /[a-z]+/,
}
});

View File

@ -0,0 +1,8 @@
_default:
just --list
# Test out the grammar
test-grammar:
#!/usr/bin/env bash
tree-sitter generate
tree-sitter test

View File

@ -0,0 +1,380 @@
{
"name": "tree-sitter-test",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "tree-sitter-test",
"version": "1.0.0",
"hasInstallScript": true,
"license": "ISC",
"dependencies": {
"node-addon-api": "^7.1.0",
"node-gyp-build": "^4.8.0"
},
"devDependencies": {
"prebuildify": "^6.0.0",
"tree-sitter-cli": "^0.22.5"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
}
},
"node_modules/base64-js": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
"dev": true,
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/bl": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
"integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
"dev": true,
"dependencies": {
"buffer": "^5.5.0",
"inherits": "^2.0.4",
"readable-stream": "^3.4.0"
}
},
"node_modules/buffer": {
"version": "5.7.1",
"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
"dev": true,
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"dependencies": {
"base64-js": "^1.3.1",
"ieee754": "^1.1.13"
}
},
"node_modules/chownr": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
"integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==",
"dev": true
},
"node_modules/end-of-stream": {
"version": "1.4.4",
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz",
"integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==",
"dev": true,
"dependencies": {
"once": "^1.4.0"
}
},
"node_modules/fs-constants": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==",
"dev": true
},
"node_modules/ieee754": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
"integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
"dev": true,
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/inherits": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
"dev": true
},
"node_modules/lru-cache": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
"integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==",
"dev": true,
"dependencies": {
"yallist": "^4.0.0"
},
"engines": {
"node": ">=10"
}
},
"node_modules/minimist": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
"dev": true,
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/mkdirp-classic": {
"version": "0.5.3",
"resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
"integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==",
"dev": true
},
"node_modules/node-abi": {
"version": "3.60.0",
"resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.60.0.tgz",
"integrity": "sha512-zcGgwoXbzw9NczqbGzAWL/ToDYAxv1V8gL1D67ClbdkIfeeDBbY0GelZtC25ayLvVjr2q2cloHeQV1R0QAWqRQ==",
"dev": true,
"dependencies": {
"semver": "^7.3.5"
},
"engines": {
"node": ">=10"
}
},
"node_modules/node-addon-api": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.0.tgz",
"integrity": "sha512-mNcltoe1R8o7STTegSOHdnJNN7s5EUvhoS7ShnTHDyOSd+8H+UdWODq6qSv67PjC8Zc5JRT8+oLAMCr0SIXw7g==",
"engines": {
"node": "^16 || ^18 || >= 20"
}
},
"node_modules/node-gyp-build": {
"version": "4.8.0",
"resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.0.tgz",
"integrity": "sha512-u6fs2AEUljNho3EYTJNBfImO5QTo/J/1Etd+NVdCj7qWKUSN/bSLkZwhDv7I+w/MSC6qJ4cknepkAYykDdK8og==",
"bin": {
"node-gyp-build": "bin.js",
"node-gyp-build-optional": "optional.js",
"node-gyp-build-test": "build-test.js"
}
},
"node_modules/npm-run-path": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-3.1.0.tgz",
"integrity": "sha512-Dbl4A/VfiVGLgQv29URL9xshU8XDY1GeLy+fsaZ1AA8JDSfjvr5P5+pzRbWqRSBxk6/DW7MIh8lTM/PaGnP2kg==",
"dev": true,
"dependencies": {
"path-key": "^3.0.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
"dev": true,
"dependencies": {
"wrappy": "1"
}
},
"node_modules/path-key": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
"integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/prebuildify": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/prebuildify/-/prebuildify-6.0.1.tgz",
"integrity": "sha512-8Y2oOOateom/s8dNBsGIcnm6AxPmLH4/nanQzL5lQMU+sC0CMhzARZHizwr36pUPLdvBnOkCNQzxg4djuFSgIw==",
"dev": true,
"dependencies": {
"minimist": "^1.2.5",
"mkdirp-classic": "^0.5.3",
"node-abi": "^3.3.0",
"npm-run-path": "^3.1.0",
"pump": "^3.0.0",
"tar-fs": "^2.1.0"
},
"bin": {
"prebuildify": "bin.js"
}
},
"node_modules/pump": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
"integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
"dev": true,
"dependencies": {
"end-of-stream": "^1.1.0",
"once": "^1.3.1"
}
},
"node_modules/readable-stream": {
"version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
"dev": true,
"dependencies": {
"inherits": "^2.0.3",
"string_decoder": "^1.1.1",
"util-deprecate": "^1.0.1"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
"dev": true,
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/semver": {
"version": "7.6.0",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz",
"integrity": "sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==",
"dev": true,
"dependencies": {
"lru-cache": "^6.0.0"
},
"bin": {
"semver": "bin/semver.js"
},
"engines": {
"node": ">=10"
}
},
"node_modules/string_decoder": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
"integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
"dev": true,
"dependencies": {
"safe-buffer": "~5.2.0"
}
},
"node_modules/tar-fs": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.1.tgz",
"integrity": "sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==",
"dev": true,
"dependencies": {
"chownr": "^1.1.1",
"mkdirp-classic": "^0.5.2",
"pump": "^3.0.0",
"tar-stream": "^2.1.4"
}
},
"node_modules/tar-stream": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
"integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
"dev": true,
"dependencies": {
"bl": "^4.0.3",
"end-of-stream": "^1.4.1",
"fs-constants": "^1.0.0",
"inherits": "^2.0.3",
"readable-stream": "^3.1.1"
},
"engines": {
"node": ">=6"
}
},
"node_modules/tree-sitter": {
"version": "0.21.1",
"resolved": "https://registry.npmjs.org/tree-sitter/-/tree-sitter-0.21.1.tgz",
"integrity": "sha512-7dxoA6kYvtgWw80265MyqJlkRl4yawIjO7S5MigytjELkX43fV2WsAXzsNfO7sBpPPCF5Gp0+XzHk0DwLCq3xQ==",
"hasInstallScript": true,
"peer": true,
"dependencies": {
"node-addon-api": "^8.0.0",
"node-gyp-build": "^4.8.0"
}
},
"node_modules/tree-sitter-cli": {
"version": "0.22.5",
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.22.5.tgz",
"integrity": "sha512-c3VT46Bc3a6pEd0JAwufbqEw9Q2FRLDp5E230hGvnr+Hivw+Y6jyeP+3T89KDptvn48MOPVmbgaLm69xYgLVTw==",
"dev": true,
"hasInstallScript": true,
"bin": {
"tree-sitter": "cli.js"
}
},
"node_modules/tree-sitter/node_modules/node-addon-api": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.0.0.tgz",
"integrity": "sha512-ipO7rsHEBqa9STO5C5T10fj732ml+5kLN1cAG8/jdHd56ldQeGj3Q7+scUS+VHK/qy1zLEwC4wMK5+yM0btPvw==",
"peer": true,
"engines": {
"node": "^18 || ^20 || >= 21"
}
},
"node_modules/util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
"dev": true
},
"node_modules/wrappy": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
"dev": true
},
"node_modules/yallist": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
"integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==",
"dev": true
}
}
}

View File

@ -0,0 +1,38 @@
{
"name": "tree-sitter-test",
"version": "1.0.0",
"main": "index.js",
"types": "bindings/node",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"install": "node-gyp-build",
"prebuildify": "prebuildify --napi --strip"
},
"author": "",
"license": "ISC",
"description": "",
"dependencies": {
"node-addon-api": "^7.1.0",
"node-gyp-build": "^4.8.0"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
},
"devDependencies": {
"prebuildify": "^6.0.0",
"tree-sitter-cli": "^0.22.5"
},
"files": [
"grammar.js",
"binding.gyp",
"prebuilds/**",
"bindings/node/*",
"queries/*",
"src/**"
]
}

View File

@ -0,0 +1,26 @@
=============
Initial test
=============
fn main() {
}
----
(source_file
(function_definition
(identifier)
(parameter_list)
(block)
)
)
====
Another test
====
fn yolo() bool { }
----
(source_file
(function_definition
(identifier) (parameter_list) (primitive_type) (block)))

View File

@ -1,11 +0,0 @@
[package]
name = "maaru-lang"
version = "0.1.0"
authors = ["greg <greg.shuflin@protonmail.com>"]
[dependencies]
itertools = "0.5.8"
take_mut = "0.1.3"
llvm-sys = "*"
schala-repl = { path = "../schala-repl" }

View File

@ -1,279 +0,0 @@
extern crate llvm_sys;
use std::collections::HashMap;
use self::llvm_sys::prelude::*;
use self::llvm_sys::{LLVMIntPredicate};
use parser::{AST, Statement, Function, Prototype, Expression, BinOp};
use schala_repl::LLVMCodeString;
use schala_repl::llvm_wrap as LLVMWrap;
type VariableMap = HashMap<String, LLVMValueRef>;
struct CompilationData {
context: LLVMContextRef,
module: LLVMModuleRef,
builder: LLVMBuilderRef,
variables: VariableMap,
main_function: LLVMValueRef,
current_function: Option<LLVMValueRef>,
}
pub fn compile_ast(ast: AST) -> LLVMCodeString {
println!("Compiling!");
let names: VariableMap = HashMap::new();
let context = LLVMWrap::create_context();
let module = LLVMWrap::module_create_with_name("example module");
let builder = LLVMWrap::CreateBuilderInContext(context);
let program_return_type = LLVMWrap::Int64TypeInContext(context);
let main_function_type = LLVMWrap::FunctionType(program_return_type, Vec::new(), false);
let main_function: LLVMValueRef = LLVMWrap::AddFunction(module, "main", main_function_type);
let mut data = CompilationData {
context: context,
builder: builder,
module: module,
variables: names,
main_function: main_function,
current_function: None,
};
let bb = LLVMWrap::AppendBasicBlockInContext(data.context, data.main_function, "entry");
LLVMWrap::PositionBuilderAtEnd(builder, bb);
let value = ast.codegen(&mut data);
LLVMWrap::BuildRet(builder, value);
let ret = LLVMWrap::PrintModuleToString(module);
// Clean up. Values created in the context mostly get cleaned up there.
LLVMWrap::DisposeBuilder(builder);
LLVMWrap::DisposeModule(module);
LLVMWrap::ContextDispose(context);
LLVMCodeString(ret)
}
trait CodeGen {
fn codegen(&self, &mut CompilationData) -> LLVMValueRef;
}
impl CodeGen for AST {
fn codegen(&self, data: &mut CompilationData) -> LLVMValueRef {
let int_type = LLVMWrap::Int64TypeInContext(data.context);
let mut ret = LLVMWrap::ConstInt(int_type, 0, false);
for statement in self {
ret = statement.codegen(data);
}
ret
}
}
impl CodeGen for Statement {
fn codegen(&self, data: &mut CompilationData) -> LLVMValueRef {
use self::Statement::*;
match self {
&ExprNode(ref expr) => expr.codegen(data),
&FuncDefNode(ref func) => func.codegen(data),
}
}
}
impl CodeGen for Function {
fn codegen(&self, data: &mut CompilationData) -> LLVMValueRef {
/* should have a check here for function already being defined */
let function = self.prototype.codegen(data);
let ref body = self.body;
data.current_function = Some(function);
let return_type = LLVMWrap::Int64TypeInContext(data.context);
let mut ret = LLVMWrap::ConstInt(return_type, 0, false);
let block = LLVMWrap::AppendBasicBlockInContext(data.context, function, "entry");
LLVMWrap::PositionBuilderAtEnd(data.builder, block);
//insert function params into variables
for value in LLVMWrap::GetParams(function) {
let name = LLVMWrap::GetValueName(value);
data.variables.insert(name, value);
}
for expr in body {
ret = expr.codegen(data);
}
LLVMWrap::BuildRet(data.builder, ret);
// get basic block of main
let main_bb = LLVMWrap::GetBasicBlocks(data.main_function).get(0).expect("Couldn't get first block of main").clone();
LLVMWrap::PositionBuilderAtEnd(data.builder, main_bb);
data.current_function = None;
ret
}
}
impl CodeGen for Prototype {
fn codegen(&self, data: &mut CompilationData) -> LLVMValueRef {
let num_args = self.parameters.len();
let return_type = LLVMWrap::Int64TypeInContext(data.context);
let mut arguments: Vec<LLVMTypeRef> = vec![];
for _ in 0..num_args {
arguments.push(LLVMWrap::Int64TypeInContext(data.context));
}
let function_type =
LLVMWrap::FunctionType(return_type,
arguments,
false);
let function = LLVMWrap::AddFunction(data.module,
&*self.name,
function_type);
let function_params = LLVMWrap::GetParams(function);
for (index, param) in function_params.iter().enumerate() {
let name = self.parameters.get(index).expect(&format!("Failed this check at index {}", index));
let new = *param;
LLVMWrap::SetValueName(new, name);
}
function
}
}
impl CodeGen for Expression {
fn codegen(&self, data: &mut CompilationData) -> LLVMValueRef {
use self::BinOp::*;
use self::Expression::*;
let int_type = LLVMWrap::Int64TypeInContext(data.context);
let zero = LLVMWrap::ConstInt(int_type, 0, false);
match *self {
Variable(ref name) => *data.variables.get(&**name).expect(&format!("Can't find variable {}", name)),
BinExp(Assign, ref left, ref right) => {
if let Variable(ref name) = **left {
let new_value = right.codegen(data);
data.variables.insert((**name).clone(), new_value);
new_value
} else {
panic!("Bad variable assignment")
}
}
BinExp(ref op, ref left, ref right) => {
let lhs = left.codegen(data);
let rhs = right.codegen(data);
op.codegen_with_ops(data, lhs, rhs)
}
Number(ref n) => {
let native_val = *n as u64;
let int_value: LLVMValueRef = LLVMWrap::ConstInt(int_type, native_val, false);
int_value
}
Conditional(ref test, ref then_expr, ref else_expr) => {
let condition_value = test.codegen(data);
let is_nonzero =
LLVMWrap::BuildICmp(data.builder,
LLVMIntPredicate::LLVMIntNE,
condition_value,
zero,
"ifcond");
let func = LLVMWrap::GetBasicBlockParent(LLVMWrap::GetInsertBlock(data.builder));
let mut then_block =
LLVMWrap::AppendBasicBlockInContext(data.context, func, "then_block");
let mut else_block =
LLVMWrap::AppendBasicBlockInContext(data.context, func, "else_block");
let merge_block =
LLVMWrap::AppendBasicBlockInContext(data.context, func, "ifcont");
// add conditional branch to ifcond block
LLVMWrap::BuildCondBr(data.builder, is_nonzero, then_block, else_block);
// start inserting into then block
LLVMWrap::PositionBuilderAtEnd(data.builder, then_block);
// then-block codegen
let then_return = then_expr.codegen(data);
LLVMWrap::BuildBr(data.builder, merge_block);
// update then block b/c recursive codegen() call may have changed the notion of
// the current block
then_block = LLVMWrap::GetInsertBlock(data.builder);
// then do the same stuff again for the else branch
//
LLVMWrap::PositionBuilderAtEnd(data.builder, else_block);
let else_return = match *else_expr {
Some(ref e) => e.codegen(data),
None => zero,
};
LLVMWrap::BuildBr(data.builder, merge_block);
else_block = LLVMWrap::GetInsertBlock(data.builder);
LLVMWrap::PositionBuilderAtEnd(data.builder, merge_block);
let phi = LLVMWrap::BuildPhi(data.builder, int_type, "phinode");
let values = vec![then_return, else_return];
let blocks = vec![then_block, else_block];
LLVMWrap::AddIncoming(phi, values, blocks);
phi
}
Block(ref exprs) => {
let mut ret = zero;
for e in exprs.iter() {
ret = e.codegen(data);
}
ret
}
ref e => {
println!("Unimplemented {:?}", e);
unimplemented!()
}
}
}
}
impl BinOp {
fn codegen_with_ops(&self, data: &CompilationData, lhs: LLVMValueRef, rhs: LLVMValueRef) -> LLVMValueRef {
use self::BinOp::*;
macro_rules! simple_binop {
($fnname: expr, $name: expr) => {
$fnname(data.builder, lhs, rhs, $name)
}
}
let int_type = LLVMWrap::Int64TypeInContext(data.context);
match *self {
Add => simple_binop!(LLVMWrap::BuildAdd, "addtemp"),
Sub => simple_binop!(LLVMWrap::BuildSub, "subtemp"),
Mul => simple_binop!(LLVMWrap::BuildMul, "multemp"),
Div => simple_binop!(LLVMWrap::BuildUDiv, "divtemp"),
Mod => simple_binop!(LLVMWrap::BuildSRem, "remtemp"),
Less => {
let pred: LLVMValueRef =
LLVMWrap::BuildICmp(data.builder, LLVMIntPredicate::LLVMIntULT, lhs, rhs, "tmp");
LLVMWrap::BuildZExt(data.builder, pred, int_type, "temp")
}
Greater => {
let pred: LLVMValueRef =
LLVMWrap::BuildICmp(data.builder, LLVMIntPredicate::LLVMIntUGT, lhs, rhs, "tmp");
LLVMWrap::BuildZExt(data.builder, pred, int_type, "temp")
}
ref unknown => panic!("Bad operator {:?}", unknown),
}
}
}

View File

@ -1,481 +0,0 @@
extern crate take_mut;
use std::collections::HashMap;
use std::collections::VecDeque;
use parser::{AST, Statement, Expression, Function, Callable, BinOp};
use std::rc::Rc;
use std::io::{Write, Stdout, BufWriter};
use std::convert::From;
use parser::Expression::*;
use parser::Statement::*;
type Reduction<T> = (T, Option<SideEffect>);
#[derive(Debug, Clone)]
enum ReducedValue {
StringLiteral(Rc<String>),
ListLiteral(VecDeque<Expression>),
StructLiteral(VecDeque<(Rc<String>, Expression)>),
Number(f64),
Lambda(Function),
}
impl From<ReducedValue> for Expression {
fn from(rv: ReducedValue) -> Expression {
match rv {
ReducedValue::Number(n) => Expression::Number(n),
ReducedValue::StringLiteral(n) => Expression::StringLiteral(n),
ReducedValue::Lambda(f) => Expression::Lambda(f),
ReducedValue::ListLiteral(items) => Expression::ListLiteral(items),
ReducedValue::StructLiteral(items) => Expression::StructLiteral(items),
}
}
}
impl From<Expression> for ReducedValue {
fn from(rv: Expression) -> ReducedValue {
match rv {
Expression::Number(n) => ReducedValue::Number(n),
Expression::StringLiteral(n) => ReducedValue::StringLiteral(n),
Expression::Lambda(f) => ReducedValue::Lambda(f),
Expression::ListLiteral(items) => ReducedValue::ListLiteral(items),
Expression::StructLiteral(items) => ReducedValue::StructLiteral(items),
_ => panic!("trying to store a non-fully-reduced variable"),
}
}
}
fn get_indexer(f: f64) -> Option<usize> {
if f.fract() == 0.0 {
if f.trunc() >= 0.0 {
return Some(f.trunc() as usize);
}
}
None
}
#[derive(Debug)]
enum SideEffect {
Print(String),
AddBinding(Rc<String>, ReducedValue),
}
pub struct Evaluator<'a> {
parent: Option<&'a Evaluator<'a>>,
variables: HashMap<String, ReducedValue>,
stdout: BufWriter<Stdout>,
pub trace_evaluation: bool,
}
impl<'a> Evaluator<'a> {
pub fn new(parent: Option<&'a Evaluator>) -> Evaluator<'a> {
Evaluator {
variables: HashMap::new(),
parent: parent,
stdout: BufWriter::new(::std::io::stdout()),
trace_evaluation: parent.map_or(false, |e| e.trace_evaluation),
}
}
pub fn run(&mut self, ast: AST) -> Vec<String> {
ast.into_iter()
.map(|astnode| format!("{}", self.reduction_loop(astnode)))
.collect()
}
fn add_binding(&mut self, var: String, value: ReducedValue) {
self.variables.insert(var, value);
}
fn lookup_binding(&self, var: &str) -> Option<ReducedValue> {
match self.variables.get(var) {
Some(expr) => Some(expr.clone()),
None => match self.parent {
Some(env) => env.lookup_binding(var),
None => None
}
}
}
}
trait Evaluable {
fn is_reducible(&self) -> bool;
}
impl Evaluable for Statement {
fn is_reducible(&self) -> bool {
match self {
&ExprNode(ref expr) => expr.is_reducible(),
&FuncDefNode(_) => true,
}
}
}
impl Evaluable for Expression {
fn is_reducible(&self) -> bool {
match *self {
Null => false,
StringLiteral(_) => false,
Lambda(_) => false,
Number(_) => false,
ListLiteral(ref items) => {
items.iter().any(|x| x.is_reducible())
}
StructLiteral(ref items) => {
items.iter().any(|pair| pair.1.is_reducible())
}
_ => true,
}
}
}
impl Expression {
fn is_truthy(&self) -> bool {
match *self {
Null => false,
StringLiteral(ref s) if **s == "" => false,
Number(n) if n == 0.0 => false,
_ => true,
}
}
}
fn is_assignment(op: &BinOp) -> bool {
use self::BinOp::*;
match *op {
Assign | AddAssign | SubAssign |
MulAssign | DivAssign => true,
_ => false,
}
}
impl<'a> Evaluator<'a> {
fn reduction_loop(&mut self, mut node: Statement) -> Statement {
loop {
node = self.step(node);
if !node.is_reducible() {
break;
}
}
node
}
fn step(&mut self, node: Statement) -> Statement {
let mut trace = String::new();
if self.trace_evaluation {
trace.push_str(&format!("Step: {:?}", node));
}
let (new_node, side_effect) = self.reduce_astnode(node);
if self.trace_evaluation {
trace.push_str(&format!("{:?}", new_node));
}
if let Some(s) = side_effect {
if self.trace_evaluation {
trace.push_str(&format!(" | side-effect: {:?}", s));
}
self.perform_side_effect(s);
}
if self.trace_evaluation {
println!("{}", trace);
}
new_node
}
fn perform_side_effect(&mut self, side_effect: SideEffect) {
use self::SideEffect::*;
match side_effect {
Print(s) => {
write!(self.stdout, "{}\n", s).unwrap();
match self.stdout.flush() {
Ok(_) => (),
Err(_) => println!("Could not flush stdout"),
};
}
AddBinding(var, value) => {
self.add_binding((*var).clone(), value);
},
}
}
fn reduce_astnode(&mut self, node: Statement) -> Reduction<Statement> {
match node {
ExprNode(expr) => {
if expr.is_reducible() {
let (new_expr, side_effect) = self.reduce_expr(expr);
(ExprNode(new_expr), side_effect)
} else {
(ExprNode(expr), None)
}
}
FuncDefNode(func) => {
let name = func.prototype.name.clone();
let reduced_value = ReducedValue::Lambda(func.clone());
let binding = Some(SideEffect::AddBinding(name, reduced_value));
(ExprNode(Expression::Lambda(func)), binding)
}
}
}
//TODO I probably want another Expression variant that holds a ReducedValue
fn reduce_expr(&mut self, expression: Expression) -> Reduction<Expression> {
match expression {
Null => (Null, None),
e @ StringLiteral(_) => (e, None),
e @ Number(_) => (e, None),
e @ Lambda(_) => (e, None),
Variable(ref var) => {
match self.lookup_binding(var).map(|x| x.into()) {
None => (Null, None),
Some(expr) => (expr, None),
}
}
BinExp(op, mut left, mut right) => {
if right.is_reducible() {
let mut side_effect = None;
take_mut::take(right.as_mut(), |expr| { let (a, b) = self.reduce_expr(expr); side_effect = b; a});
return (BinExp(op, left, right), side_effect);
}
if let BinOp::Assign = op {
return match *left {
Variable(var) => {
let reduced_value: ReducedValue = ReducedValue::from(*right);
let binding = SideEffect::AddBinding(var, reduced_value);
(Null, Some(binding))
},
_ => (Null, None)
};
}
if is_assignment(&op) {
use self::BinOp::*;
let new_op = match op {
AddAssign => Add,
SubAssign => Sub,
MulAssign => Mul,
DivAssign => Div,
_ => unreachable!(),
};
let reduction =
BinExp(BinOp::Assign,
Box::new(*left.clone()),
Box::new(BinExp(new_op, left, right))
);
return (reduction, None);
}
if left.is_reducible() {
let mut side_effect = None;
take_mut::take(left.as_mut(), |expr| { let (a, b) = self.reduce_expr(expr); side_effect = b; a});
(BinExp(op, left, right), side_effect)
} else {
(self.reduce_binop(op, *left, *right), None) //can assume both arguments are maximally reduced
}
}
Call(callable, mut args) => {
let mut f = true;
for arg in args.iter_mut() {
if arg.is_reducible() {
take_mut::take(arg, |arg| self.reduce_expr(arg).0);
f = false;
break;
}
}
if f {
self.reduce_call(callable, args)
} else {
(Call(callable, args), None)
}
}
While(test, body) => {
let mut block = VecDeque::from(body.clone());
block.push_back(While(test.clone(), body.clone()));
let reduction = Conditional(test, Box::new(Block(block)), None);
(reduction, None)
}
Conditional(box test, then_block, else_block) => {
if test.is_reducible() {
let (new_test, new_effect) = self.reduce_expr(test);
(Conditional(Box::new(new_test), then_block, else_block), new_effect)
} else {
if test.is_truthy() {
(*then_block, None)
} else {
match else_block {
Some(box expr) => (expr, None),
None => (Null, None),
}
}
}
}
Block(mut exprs) => {
let first = exprs.pop_front();
match first {
None => (Null, None),
Some(expr) => {
if exprs.len() == 0 {
(expr, None)
} else {
if expr.is_reducible() {
let (new, side_effect) = self.reduce_expr(expr);
exprs.push_front(new);
(Block(exprs), side_effect)
} else {
(Block(exprs), None)
}
}
}
}
}
Index(mut expr, mut index_expr) => {
if index_expr.is_reducible() {
let mut side_effect = None;
take_mut::take(index_expr.as_mut(), |expr| { let (a, b) = self.reduce_expr(expr); side_effect = b; a});
return (Index(expr, index_expr), side_effect)
}
if expr.is_reducible() {
let mut side_effect = None;
take_mut::take(expr.as_mut(), |expr| { let (a, b) = self.reduce_expr(expr); side_effect = b; a});
return (Index(expr, index_expr), side_effect);
}
match (*expr, *index_expr) {
(ListLiteral(list_items), Number(n)) => {
let indexed_expr = get_indexer(n).and_then(|i| list_items.get(i));
if let Some(e) = indexed_expr {
(e.clone(), None)
} else {
(Null, None)
}
}
(StructLiteral(items), StringLiteral(s)) => {
for item in items {
if s == item.0 {
return (item.1.clone(), None); //TODO this is hella inefficient
}
}
(Null, None)
},
_ => (Null, None)
}
}
ListLiteral(mut exprs) => {
let mut side_effect = None;
for expr in exprs.iter_mut() {
if expr.is_reducible() {
take_mut::take(expr, |expr| {
let (a, b) = self.reduce_expr(expr);
side_effect = b;
a
});
break;
}
}
(ListLiteral(exprs), side_effect)
},
StructLiteral(mut items) => {
let mut side_effect = None;
for pair in items.iter_mut() {
if pair.1.is_reducible() {
take_mut::take(pair, |pair| {
let (name, expr) = pair;
let (a, b) = self.reduce_expr(expr);
side_effect = b;
(name, a)
});
break;
}
}
(StructLiteral(items), side_effect)
}
}
}
fn reduce_binop(&mut self, op: BinOp, left: Expression, right: Expression) -> Expression {
use self::BinOp::*;
let truthy = Number(1.0);
let falsy = Null;
match (op, left, right) {
(Add, Number(l), Number(r)) => Number(l + r),
(Add, StringLiteral(s1), StringLiteral(s2)) => StringLiteral(Rc::new(format!("{}{}", *s1, *s2))),
(Add, StringLiteral(s1), Number(r)) => StringLiteral(Rc::new(format!("{}{}", *s1, r))),
(Add, Number(l), StringLiteral(s1)) => StringLiteral(Rc::new(format!("{}{}", l, *s1))),
(Sub, Number(l), Number(r)) => Number(l - r),
(Mul, Number(l), Number(r)) => Number(l * r),
(Div, Number(l), Number(r)) if r != 0.0 => Number(l / r),
(Mod, Number(l), Number(r)) => Number(l % r),
(Less, Number(l), Number(r)) => if l < r { truthy } else { falsy },
(LessEq, Number(l), Number(r)) => if l <= r { truthy } else { falsy },
(Greater, Number(l), Number(r)) => if l > r { truthy } else { falsy },
(GreaterEq, Number(l), Number(r)) => if l >= r { truthy } else { falsy },
(Equal, Number(l), Number(r)) => if l == r { truthy } else { falsy },
(Equal, Null, Null) => truthy,
(Equal, StringLiteral(s1), StringLiteral(s2)) => if s1 == s2 { truthy } else { falsy },
(Equal, _, _) => falsy,
_ => falsy,
}
}
fn reduce_call(&mut self, callable: Callable, arguments: Vec<Expression>) -> Reduction<Expression> {
if let Some(res) = handle_builtin(&callable, &arguments) {
return res;
}
let function = match callable {
Callable::Lambda(func) => func.clone(),
Callable::NamedFunction(name) => {
match self.lookup_binding(&*name) {
Some(ReducedValue::Lambda(func)) => func,
_ => return (Null, None),
}
}
};
if function.prototype.parameters.len() != arguments.len() {
return (Null, None);
}
let mut evaluator = Evaluator::new(Some(self));
for (binding, expr) in function.prototype.parameters.iter().zip(arguments.iter()) {
evaluator.add_binding((**binding).clone(), expr.clone().into());
}
let nodes = function.body.iter().map(|node| node.clone());
let mut retval = ExprNode(Null);
for n in nodes {
retval = evaluator.reduction_loop(n);
}
match retval {
ExprNode(expr) => (expr, None),
FuncDefNode(_) => panic!("This should never happen! A maximally-reduced node\
should never be a function definition!")
}
}
}
fn handle_builtin(callable: &Callable, arguments: &Vec<Expression>) -> Option<Reduction<Expression>> {
let name: &str = match *callable {
Callable::NamedFunction(ref name) => *&name,
_ => return None,
};
match name {
"print" => {
let mut s = String::new();
for arg in arguments {
s.push_str(&format!("{} ", arg));
}
return Some((Null, Some(SideEffect::Print(s))));
},
_ => None
}
}

View File

@ -1,103 +0,0 @@
#![feature(box_patterns)]
extern crate schala_repl;
mod tokenizer;
mod parser;
mod eval;
mod compilation;
use schala_repl::{ProgrammingLanguageInterface, EvalOptions, UnfinishedComputation, FinishedComputation, TraceArtifact};
#[derive(Debug)]
pub struct TokenError {
pub msg: String,
}
impl TokenError {
pub fn new(msg: &str) -> TokenError {
TokenError { msg: msg.to_string() }
}
}
pub use self::eval::Evaluator as MaaruEvaluator;
pub struct Maaru<'a> {
evaluator: MaaruEvaluator<'a>
}
impl<'a> Maaru<'a> {
pub fn new() -> Maaru<'a> {
Maaru {
evaluator: MaaruEvaluator::new(None),
}
}
}
impl<'a> ProgrammingLanguageInterface for Maaru<'a> {
fn get_language_name(&self) -> String {
"Maaru".to_string()
}
fn get_source_file_suffix(&self) -> String {
format!("maaru")
}
fn execute_pipeline(&mut self, input: &str, options: &EvalOptions) -> FinishedComputation {
let mut output = UnfinishedComputation::default();
let tokens = match tokenizer::tokenize(input) {
Ok(tokens) => {
if let Some(_) = options.debug_passes.get("tokens") {
output.add_artifact(TraceArtifact::new("tokens", format!("{:?}", tokens)));
}
tokens
},
Err(err) => {
return output.finish(Err(format!("Tokenization error: {:?}\n", err.msg)))
}
};
let ast = match parser::parse(&tokens, &[]) {
Ok(ast) => {
if let Some(_) = options.debug_passes.get("ast") {
output.add_artifact(TraceArtifact::new("ast", format!("{:?}", ast)));
}
ast
},
Err(err) => {
return output.finish(Err(format!("Parse error: {:?}\n", err.msg)))
}
};
let mut evaluation_output = String::new();
for s in self.evaluator.run(ast).iter() {
evaluation_output.push_str(s);
}
output.finish(Ok(evaluation_output))
}
/* TODO make this work with new framework */
/*
fn can_compile(&self) -> bool {
true
}
fn compile(&mut self, input: &str) -> LLVMCodeString {
let tokens = match tokenizer::tokenize(input) {
Ok(tokens) => tokens,
Err(err) => {
let msg = format!("Tokenization error: {:?}\n", err.msg);
panic!("{}", msg);
}
};
let ast = match parser::parse(&tokens, &[]) {
Ok(ast) => ast,
Err(err) => {
let msg = format!("Parse error: {:?}\n", err.msg);
panic!("{}", msg);
}
};
compilation::compile_ast(ast)
}
*/
}

View File

@ -1,755 +0,0 @@
use tokenizer::{Token, Kw, OpTok};
use tokenizer::Token::*;
use std::fmt;
use std::collections::VecDeque;
use std::rc::Rc;
use std::convert::From;
// Grammar
// program := (statement delimiter ?)*
// delimiter := Newline | Semicolon
// statement := declaration | expression
// declaration := FN prototype LCurlyBrace (statement)* RCurlyBrace
// prototype := identifier LParen identlist RParen
// identlist := Ident (Comma Ident)* | ε
// exprlist := Expression (Comma Expression)* | ε
// itemlist := Ident COLON Expression (Comma Ident COLON Expression)* | ε
//
// expression := postop_expression (op postop_expression)*
// postop_expression := primary_expression postop
// primary_expression := number_expr | String | identifier_expr | paren_expr | conditional_expr | while_expr | lambda_expr | list_expr | struct_expr
// number_expr := (PLUS | MINUS ) number_expr | Number
// identifier_expr := call_expression | Variable
// list_expr := LSquareBracket exprlist RSquareBracket
// struct_expr := LCurlyBrace itemlist RCurlyBrace
// call_expression := Identifier LParen exprlist RParen
// while_expr := WHILE primary_expression LCurlyBrace (expression delimiter)* RCurlyBrace
// paren_expr := LParen expression RParen
// conditional_expr := IF expression LCurlyBrace (expression delimiter)* RCurlyBrace (LCurlyBrace (expresion delimiter)* RCurlyBrace)?
// lambda_expr := FN LParen identlist RParen LCurlyBrace (expression delimiter)* RCurlyBrace
// lambda_call := | LParen exprlist RParen
// postop := ε | LParen exprlist RParen | LBracket expression RBracket
// op := '+', '-', etc.
//
pub type AST = Vec<Statement>;
#[derive(Debug, Clone)]
pub enum Statement {
ExprNode(Expression),
FuncDefNode(Function),
}
impl fmt::Display for Statement {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Statement::*;
match *self {
ExprNode(ref expr) => write!(f, "{}", expr),
FuncDefNode(_) => write!(f, "UNIMPLEMENTED"),
}
}
}
#[derive(Debug, Clone)]
pub struct Function {
pub prototype: Prototype,
pub body: Vec<Statement>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Prototype {
pub name: Rc<String>,
pub parameters: Vec<Rc<String>>,
}
#[derive(Debug, Clone)]
pub enum Expression {
Null,
StringLiteral(Rc<String>),
Number(f64),
Variable(Rc<String>),
BinExp(BinOp, Box<Expression>, Box<Expression>),
Call(Callable, Vec<Expression>),
Conditional(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
Lambda(Function),
Block(VecDeque<Expression>),
While(Box<Expression>, Vec<Expression>),
Index(Box<Expression>, Box<Expression>),
ListLiteral(VecDeque<Expression>),
StructLiteral(VecDeque<(Rc<String>, Expression)>),
}
#[derive(Clone, Debug)]
pub enum Callable {
NamedFunction(Rc<String>),
Lambda(Function),
}
//TODO this ought to be ReducedExpression
impl fmt::Display for Expression {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Expression::*;
match *self {
Null => write!(f, "null"),
StringLiteral(ref s) => write!(f, "\"{}\"", s),
Number(n) => write!(f, "{}", n),
Lambda(Function { prototype: Prototype { ref name, ref parameters, .. }, .. }) => {
write!(f, "«function: {}, {} arg(s)»", name, parameters.len())
}
ListLiteral(ref items) => {
write!(f, "[ ")?;
let mut iter = items.iter().peekable();
while let Some(item) = iter.next() {
write!(f, "{}", item)?;
if let Some(_) = iter.peek() {
write!(f, ", ")?;
}
}
write!(f, " ]")
}
StructLiteral(ref items) => {
write!(f, "{} ", "{")?;
let mut iter = items.iter().peekable();
while let Some(pair) = iter.next() {
write!(f, "{}: {}", pair.0, pair.1)?;
if let Some(_) = iter.peek() {
write!(f, ", ")?;
}
}
write!(f, "{} ", "}")
}
_ => write!(f, "UNIMPLEMENTED"),
}
}
}
#[derive(Debug, Clone)]
pub enum BinOp {
Add,
AddAssign,
Sub,
SubAssign,
Mul,
MulAssign,
Div,
DivAssign,
Mod,
Less,
LessEq,
Greater,
GreaterEq,
Equal,
Assign,
Custom(String),
}
impl From<OpTok> for BinOp {
fn from(token: OpTok) -> BinOp {
use self::BinOp::*;
match &token.0[..] {
"+" => Add,
"+=" => AddAssign,
"-" => Sub,
"-=" => SubAssign,
"*" => Mul,
"*=" => MulAssign,
"/" => Div,
"/=" => DivAssign,
"%" => Mod,
"<" => Less,
"<=" => LessEq,
">" => Greater,
">=" => GreaterEq,
"==" => Equal,
"=" => Assign,
op => Custom(op.to_string()),
}
}
}
type Precedence = u8;
// TODO make this support incomplete parses
pub type ParseResult<T> = Result<T, ParseError>;
#[derive(Debug)]
pub struct ParseError {
pub msg: String,
pub remaining_tokens: Vec<Token>,
}
impl ParseError {
fn result_from_str<T>(msg: &str) -> ParseResult<T> {
Err(ParseError {
msg: msg.to_string(),
remaining_tokens: vec![],
})
}
}
struct Parser {
tokens: Vec<Token>,
}
impl Parser {
fn initialize(tokens: &[Token]) -> Parser {
let mut tokens = tokens.to_vec();
tokens.reverse();
Parser { tokens: tokens }
}
fn peek(&self) -> Option<Token> {
self.tokens.last().map(|x| x.clone())
}
fn next(&mut self) -> Option<Token> {
self.tokens.pop()
}
fn get_precedence(&self, op: &OpTok) -> Precedence {
match &op.0[..] {
"+" => 10,
"-" => 10,
"*" => 20,
"/" => 20,
"%" => 20,
"==" => 40,
"=" | "+=" | "-=" | "*=" | "/=" => 1,
">" | ">=" | "<" | "<=" => 30,
_ => 255,
}
}
}
macro_rules! expect {
($self_:expr, $token:pat) => {
match $self_.peek() {
Some($token) => {$self_.next();},
Some(x) => {
let err = format!("Expected `{:?}` but got `{:?}`", stringify!($token), x);
return ParseError::result_from_str(&err)
},
None => {
let err = format!("Expected `{:?}` but got end of input", stringify!($token));
return ParseError::result_from_str(&err) //TODO make this not require 2 stringifications
}
}
}
}
macro_rules! expect_identifier {
($self_:expr) => {
match $self_.peek() {
Some(Identifier(s)) => {$self_.next(); s},
Some(x) => return ParseError::result_from_str(&format!("Expected identifier, but got {:?}", x)),
None => return ParseError::result_from_str("Expected identifier, but got end of input"),
}
}
}
macro_rules! skip_whitespace {
($_self: expr) => {
loop {
match $_self.peek() {
Some(ref t) if is_delimiter(t) => {
$_self.next();
continue;
}
_ => break,
}
}
}
}
macro_rules! delimiter_block {
($_self: expr, $try_parse: ident, $($break_pattern: pat)|+) => {
{
let mut acc = Vec::new();
loop {
match $_self.peek() {
None => break,
Some(ref t) if is_delimiter(t) => { $_self.next(); continue; },
$($break_pattern)|+ => break,
_ => {
let a = try!($_self.$try_parse());
acc.push(a);
}
}
}
acc
}
}
}
fn is_delimiter(token: &Token) -> bool {
match *token {
Newline | Semicolon => true,
_ => false,
}
}
impl Parser {
fn program(&mut self) -> ParseResult<AST> {
let mut ast = Vec::new(); //TODO have this come from previously-parsed tree
loop {
let result: ParseResult<Statement> = match self.peek() {
Some(ref t) if is_delimiter(t) => {
self.next();
continue;
}
Some(_) => self.statement(),
None => break,
};
match result {
Ok(node) => ast.push(node),
Err(mut err) => {
err.remaining_tokens = self.tokens.clone();
err.remaining_tokens.reverse();
return Err(err);
}
}
}
Ok(ast)
}
fn statement(&mut self) -> ParseResult<Statement> {
let node: Statement = match self.peek() {
Some(Keyword(Kw::Fn)) => self.declaration()?,
Some(_) => Statement::ExprNode(self.expression()?),
None => panic!("Unexpected end of tokens"),
};
Ok(node)
}
fn declaration(&mut self) -> ParseResult<Statement> {
expect!(self, Keyword(Kw::Fn));
let prototype = self.prototype()?;
expect!(self, LCurlyBrace);
let body = self.body()?;
expect!(self, RCurlyBrace);
Ok(Statement::FuncDefNode(Function {
prototype: prototype,
body: body,
}))
}
fn prototype(&mut self) -> ParseResult<Prototype> {
let name = expect_identifier!(self);
expect!(self, LParen);
let parameters = self.identlist()?;
expect!(self, RParen);
Ok(Prototype {
name: name,
parameters: parameters,
})
}
fn identlist(&mut self) -> ParseResult<Vec<Rc<String>>> {
let mut args = Vec::new();
while let Some(Identifier(name)) = self.peek() {
args.push(name.clone());
self.next();
match self.peek() {
Some(Comma) => {self.next();},
_ => break,
}
}
Ok(args)
}
fn exprlist(&mut self) -> ParseResult<Vec<Expression>> {
let mut exprs = Vec::new();
loop {
if let Some(RParen) = self.peek() {
break;
}
let exp = self.expression()?;
exprs.push(exp);
match self.peek() {
Some(Comma) => {self.next();},
_ => break,
}
}
Ok(exprs)
}
fn itemlist(&mut self) -> ParseResult<VecDeque<(Rc<String>, Expression)>> {
let mut items = VecDeque::new();
loop {
if let Some(RCurlyBrace) = self.peek() {
break;
}
let name = expect_identifier!(self);
expect!(self, Colon);
let expr = self.expression()?;
items.push_back((name, expr));
match self.peek() {
Some(Comma) => {self.next();},
_ => break,
};
}
Ok(items)
}
fn body(&mut self) -> ParseResult<Vec<Statement>> {
let statements = delimiter_block!(
self,
statement,
Some(RCurlyBrace)
);
Ok(statements)
}
fn expression(&mut self) -> ParseResult<Expression> {
let lhs: Expression = self.postop_expression()?;
self.precedence_expr(lhs, 0)
}
fn precedence_expr(&mut self,
mut lhs: Expression,
min_precedence: u8)
-> ParseResult<Expression> {
while let Some(Operator(op)) = self.peek() {
let precedence = self.get_precedence(&op);
if precedence < min_precedence {
break;
}
self.next();
let mut rhs = self.postop_expression()?;
while let Some(Operator(ref op)) = self.peek() {
if self.get_precedence(op) > precedence {
let new_prec = self.get_precedence(op);
rhs = self.precedence_expr(rhs, new_prec)?;
} else {
break;
}
}
lhs = Expression::BinExp(op.into(), Box::new(lhs), Box::new(rhs));
}
Ok(lhs)
}
fn postop_expression(&mut self) -> ParseResult<Expression> {
use self::Expression::*;
let expr = self.primary_expression()?;
let ret = match self.peek() {
Some(LParen) => {
let args = self.call_expression()?;
match expr {
Lambda(f) => Call(Callable::Lambda(f), args),
e => {
let err = format!("Expected lambda expression before a call, got {:?}", e);
return ParseError::result_from_str(&err);
},
}
},
Some(LSquareBracket) => {
expect!(self, LSquareBracket);
let index_expr = self.expression()?;
expect!(self, RSquareBracket);
Index(Box::new(expr), Box::new(index_expr))
},
_ => {
expr
}
};
Ok(ret)
}
fn primary_expression(&mut self) -> ParseResult<Expression> {
Ok(match self.peek() {
Some(Keyword(Kw::Null)) => {
self.next();
Expression::Null
}
Some(NumLiteral(_)) => self.number_expression()?,
Some(Operator(OpTok(ref a))) if **a == "+" || **a == "-" => self.number_expression()?,
Some(StrLiteral(s)) => {
self.next();
Expression::StringLiteral(s)
}
Some(Keyword(Kw::If)) => self.conditional_expr()?,
Some(Keyword(Kw::While)) => self.while_expr()?,
Some(Identifier(_)) => self.identifier_expr()?,
Some(Token::LParen) => self.paren_expr()?,
Some(Keyword(Kw::Fn)) => self.lambda_expr()?,
Some(Token::LSquareBracket) => self.list_expr()?,
Some(Token::LCurlyBrace) => self.struct_expr()?,
Some(e) => {
return ParseError::result_from_str(&format!("Expected primary expression, got \
{:?}",
e));
}
None => return ParseError::result_from_str("Expected primary expression received EoI"),
})
}
fn list_expr(&mut self) -> ParseResult<Expression> {
expect!(self, LSquareBracket);
let exprlist: Vec<Expression> = self.exprlist()?;
expect!(self, RSquareBracket);
Ok(Expression::ListLiteral(VecDeque::from(exprlist)))
}
fn struct_expr(&mut self) -> ParseResult<Expression> {
expect!(self, LCurlyBrace);
let struct_items = self.itemlist()?;
expect!(self, RCurlyBrace);
Ok(Expression::StructLiteral(struct_items))
}
fn number_expression(&mut self) -> ParseResult<Expression> {
let mut multiplier = 1;
loop {
match self.peek() {
Some(NumLiteral(n)) => {
self.next();
return Ok(Expression::Number(n * multiplier as f64));
}
Some(Operator(OpTok(ref a))) if **a == "+" => {
self.next();
}
Some(Operator(OpTok(ref a))) if **a == "-" => {
multiplier *= -1;
self.next();
}
Some(e) => {
return ParseError::result_from_str(
&format!("Expected +, - or number, got {:?}", e));
}
None => {
return ParseError::result_from_str(
&format!("Expected +, - or number, got EoI"));
}
}
}
}
fn lambda_expr(&mut self) -> ParseResult<Expression> {
use self::Expression::*;
expect!(self, Keyword(Kw::Fn));
skip_whitespace!(self);
expect!(self, LParen);
let parameters = self.identlist()?;
expect!(self, RParen);
skip_whitespace!(self);
expect!(self, LCurlyBrace);
let body = self.body()?;
expect!(self, RCurlyBrace);
let prototype = Prototype {
name: Rc::new("a lambda yo!".to_string()),
parameters: parameters,
};
let function = Function {
prototype: prototype,
body: body,
};
Ok(Lambda(function))
}
fn while_expr(&mut self) -> ParseResult<Expression> {
use self::Expression::*;
expect!(self, Keyword(Kw::While));
let test = self.expression()?;
expect!(self, LCurlyBrace);
let body = delimiter_block!(
self,
expression,
Some(RCurlyBrace)
);
expect!(self, RCurlyBrace);
Ok(While(Box::new(test), body))
}
fn conditional_expr(&mut self) -> ParseResult<Expression> {
use self::Expression::*;
expect!(self, Keyword(Kw::If));
let test = self.expression()?;
skip_whitespace!(self);
expect!(self, LCurlyBrace);
skip_whitespace!(self);
let then_block = delimiter_block!(
self,
expression,
Some(RCurlyBrace)
);
expect!(self, RCurlyBrace);
skip_whitespace!(self);
let else_block = if let Some(Keyword(Kw::Else)) = self.peek() {
self.next();
skip_whitespace!(self);
expect!(self, LCurlyBrace);
let else_exprs = delimiter_block!(
self,
expression,
Some(RCurlyBrace)
);
Some(else_exprs)
} else {
None
};
expect!(self, RCurlyBrace);
Ok(Conditional(Box::new(test),
Box::new(Block(VecDeque::from(then_block))),
else_block.map(|list| Box::new(Block(VecDeque::from(list))))))
}
fn identifier_expr(&mut self) -> ParseResult<Expression> {
let name = expect_identifier!(self);
let expr = match self.peek() {
Some(LParen) => {
let args = self.call_expression()?;
Expression::Call(Callable::NamedFunction(name), args)
}
__ => Expression::Variable(name),
};
Ok(expr)
}
fn call_expression(&mut self) -> ParseResult<Vec<Expression>> {
expect!(self, LParen);
let args: Vec<Expression> = self.exprlist()?;
expect!(self, RParen);
Ok(args)
}
fn paren_expr(&mut self) -> ParseResult<Expression> {
expect!(self, Token::LParen);
let expr = self.expression()?;
expect!(self, Token::RParen);
Ok(expr)
}
}
pub fn parse(tokens: &[Token], _parsed_tree: &[Statement]) -> ParseResult<AST> {
let mut parser = Parser::initialize(tokens);
parser.program()
}
/*
#[cfg(test)]
mod tests {
use schala_lang::tokenizer;
use super::*;
use super::Statement::*;
use super::Expression::*;
macro_rules! parsetest {
($input:expr, $output:pat, $ifexpr:expr) => {
{
let tokens = tokenizer::tokenize($input).unwrap();
let ast = parse(&tokens, &[]).unwrap();
match &ast[..] {
$output if $ifexpr => (),
x => panic!("Error in parse test, got {:?} instead", x)
}
}
}
}
#[test]
fn function_parse_test() {
use super::Function;
parsetest!(
"fn a() { 1 + 2 }",
&[FuncDefNode(Function {prototype: Prototype { ref name, ref parameters }, ref body})],
match &body[..] { &[ExprNode(BinExp(_, box Number(1.0), box Number(2.0)))] => true, _ => false }
&& **name == "a" && match &parameters[..] { &[] => true, _ => false }
);
parsetest!(
"fn a(x,y){ 1 + 2 }",
&[FuncDefNode(Function {prototype: Prototype { ref name, ref parameters }, ref body})],
match &body[..] { &[ExprNode(BinExp(_, box Number(1.0), box Number(2.0)))] => true, _ => false }
&& **name == "a" && *parameters[0] == "x" && *parameters[1] == "y" && parameters.len() == 2
);
let t3 = "fn (x) { x + 2 }";
let tokens3 = tokenizer::tokenize(t3).unwrap();
assert!(parse(&tokens3, &[]).is_err());
}
#[test]
fn expression_parse_test() {
parsetest!("a", &[ExprNode(Variable(ref s))], **s == "a");
parsetest!("a + b",
&[ExprNode(BinExp(BinOp::Add, box Variable(ref a), box Variable(ref b)))],
**a == "a" && **b == "b");
parsetest!("a + b * c",
&[ExprNode(BinExp(BinOp::Add, box Variable(ref a), box BinExp(BinOp::Mul, box Variable(ref b), box Variable(ref c))))],
**a == "a" && **b == "b" && **c == "c");
parsetest!("a * b + c",
&[ExprNode(BinExp(BinOp::Add, box BinExp(BinOp::Mul, box Variable(ref a), box Variable(ref b)), box Variable(ref c)))],
**a == "a" && **b == "b" && **c == "c");
parsetest!("(a + b) * c",
&[ExprNode(BinExp(BinOp::Mul, box BinExp(BinOp::Add, box Variable(ref a), box Variable(ref b)), box Variable(ref c)))],
**a == "a" && **b == "b" && **c == "c");
}
#[test]
fn lambda_parse_test() {
use schala_lang::tokenizer;
let t1 = "(fn(x) { x + 2 })";
let tokens1 = tokenizer::tokenize(t1).unwrap();
match parse(&tokens1, &[]).unwrap()[..] {
_ => (),
}
let t2 = "fn(x) { x + 2 }";
let tokens2 = tokenizer::tokenize(t2).unwrap();
assert!(parse(&tokens2, &[]).is_err());
let t3 = "(fn(x) { x + 10 })(20)";
let tokens3 = tokenizer::tokenize(t3).unwrap();
match parse(&tokens3, &[]).unwrap() {
_ => (),
};
}
#[test]
fn conditional_parse_test() {
use schala_lang::tokenizer;
let t1 = "if null { 20 } else { 40 }";
let tokens = tokenizer::tokenize(t1).unwrap();
match parse(&tokens, &[]).unwrap()[..] {
[ExprNode(Conditional(box Null, box Block(_), Some(box Block(_))))] => (),
_ => panic!(),
}
let t2 = r"
if null {
20
} else {
40
}
";
let tokens2 = tokenizer::tokenize(t2).unwrap();
match parse(&tokens2, &[]).unwrap()[..] {
[ExprNode(Conditional(box Null, box Block(_), Some(box Block(_))))] => (),
_ => panic!(),
}
let t2 = r"
if null {
20 } else
{
40
}
";
let tokens3 = tokenizer::tokenize(t2).unwrap();
match parse(&tokens3, &[]).unwrap()[..] {
[ExprNode(Conditional(box Null, box Block(_), Some(box Block(_))))] => (),
_ => panic!(),
}
}
}
*/

View File

@ -1,208 +0,0 @@
extern crate itertools;
use std::iter::Peekable;
use std::str::Chars;
use self::itertools::Itertools;
use std::rc::Rc;
use TokenError;
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Newline,
Semicolon,
LParen,
RParen,
LSquareBracket,
RSquareBracket,
LCurlyBrace,
RCurlyBrace,
Comma,
Period,
Colon,
NumLiteral(f64),
StrLiteral(Rc<String>),
Identifier(Rc<String>),
Operator(OpTok),
Keyword(Kw),
}
#[derive(Debug, Clone, PartialEq)]
pub struct OpTok(pub Rc<String>);
#[derive(Debug, Clone, PartialEq)]
pub enum Kw {
If,
Else,
While,
Let,
Fn,
Null,
}
pub type TokenizeResult = Result<Vec<Token>, TokenError>;
fn is_digit(c: &char) -> bool {
c.is_digit(10)
}
pub fn tokenize(input: &str) -> TokenizeResult {
use self::Token::*;
let mut tokens = Vec::new();
let mut iter: Peekable<Chars> = input.chars().peekable();
while let Some(c) = iter.next() {
if c == '#' {
while let Some(c) = iter.next() {
if c == '\n' {
break;
}
}
continue;
}
let cur_tok = match c {
c if char::is_whitespace(c) && c != '\n' => continue,
'\n' => Newline,
';' => Semicolon,
'(' => LParen,
')' => RParen,
':' => Colon,
',' => Comma,
'{' => LCurlyBrace,
'}' => RCurlyBrace,
'[' => LSquareBracket,
']' => RSquareBracket,
'"' => tokenize_str(&mut iter)?,
c if !char::is_alphanumeric(c) => tokenize_operator(c, &mut iter)?,
c @ '.' | c if is_digit(&c) => tokenize_number_or_period(c, &mut iter)?,
c => tokenize_identifier(c, &mut iter)?,
};
tokens.push(cur_tok);
}
Ok(tokens)
}
fn tokenize_str(iter: &mut Peekable<Chars>) -> Result<Token, TokenError> {
let mut buffer = String::new();
loop {
// TODO handle string escapes, interpolation
match iter.next() {
Some(x) if x == '"' => break,
Some(x) => buffer.push(x),
None => return Err(TokenError::new("Unclosed quote")),
}
}
Ok(Token::StrLiteral(Rc::new(buffer)))
}
fn tokenize_operator(c: char, iter: &mut Peekable<Chars>) -> Result<Token, TokenError> {
let mut buffer = String::new();
buffer.push(c);
buffer.extend(iter.peeking_take_while(|x| !char::is_alphanumeric(*x) && !char::is_whitespace(*x)));
Ok(Token::Operator(OpTok(Rc::new(buffer))))
}
fn tokenize_number_or_period(c: char, iter: &mut Peekable<Chars>) -> Result<Token, TokenError> {
if c == '.' && !iter.peek().map_or(false, is_digit) {
return Ok(Token::Period);
}
let mut buffer = String::new();
buffer.push(c);
buffer.extend(iter.peeking_take_while(|x| is_digit(x) || *x == '.'));
match buffer.parse::<f64>() {
Ok(f) => Ok(Token::NumLiteral(f)),
Err(_) => Err(TokenError::new("Failed to parse digit")),
}
}
fn tokenize_identifier(c: char, iter: &mut Peekable<Chars>) -> Result<Token, TokenError> {
fn ends_identifier(c: &char) -> bool {
let c = *c;
char::is_whitespace(c) || is_digit(&c) || c == ';' || c == '(' || c == ')' ||
c == ',' || c == '.' || c == ',' || c == ':' || c == '[' || c == ']'
}
use self::Token::*;
let mut buffer = String::new();
buffer.push(c);
buffer.extend(iter.peeking_take_while(|x| !ends_identifier(x)));
Ok(match &buffer[..] {
"if" => Keyword(Kw::If),
"else" => Keyword(Kw::Else),
"while" => Keyword(Kw::While),
"let" => Keyword(Kw::Let),
"fn" => Keyword(Kw::Fn),
"null" => Keyword(Kw::Null),
b => Identifier(Rc::new(b.to_string())),
})
}
/*
#[cfg(test)]
mod tests {
use super::*;
use super::Token::*;
macro_rules! token_test {
($input: expr, $output: pat, $ifexpr: expr) => {
let tokens = tokenize($input).unwrap();
match tokens[..] {
$output if $ifexpr => (),
_ => panic!("Actual output: {:?}", tokens),
}
}
}
#[test]
fn basic_tokeniziation_tests() {
token_test!("let a = 3\n",
[Keyword(Kw::Let), Identifier(ref a), Operator(OpTok(ref b)), NumLiteral(3.0), Newline],
**a == "a" && **b == "=");
token_test!("2+1",
[NumLiteral(2.0), Operator(OpTok(ref a)), NumLiteral(1.0)],
**a == "+");
token_test!("2 + 1",
[NumLiteral(2.0), Operator(OpTok(ref a)), NumLiteral(1.0)],
**a == "+");
token_test!("2.3*49.2",
[NumLiteral(2.3), Operator(OpTok(ref a)), NumLiteral(49.2)],
**a == "*");
token_test!("a+3",
[Identifier(ref a), NumLiteral(3.0)],
**a == "a+");
assert!(tokenize("2.4.5").is_err());
token_test!("fn my_func(a) { a ? 3[1] }",
[Keyword(Kw::Fn), Identifier(ref a), LParen, Identifier(ref b), RParen, LCurlyBrace, Identifier(ref c),
Operator(OpTok(ref d)), NumLiteral(3.0), LSquareBracket, NumLiteral(1.0), RSquareBracket, RCurlyBrace],
**a == "my_func" && **b == "a" && **c == "a" && **d == "?");
}
#[test]
fn string_test() {
token_test!("null + \"a string\"",
[Keyword(Kw::Null), Operator(OpTok(ref a)), StrLiteral(ref b)],
**a == "+" && **b == "a string");
token_test!("\"{?'q@?\"",
[StrLiteral(ref a)],
**a == "{?'q@?");
}
#[test]
fn operator_test() {
token_test!("a *> b",
[Identifier(ref a), Operator(OpTok(ref b)), Identifier(ref c)],
**a == "a" && **b == "*>" && **c == "b");
}
}
*/

View File

@ -1,11 +0,0 @@
[package]
name = "robo-lang"
version = "0.1.0"
authors = ["greg <greg.shuflin@protonmail.com>"]
[dependencies]
itertools = "0.5.8"
take_mut = "0.1.3"
llvm-sys = "*"
schala-repl = { path = "../schala-repl" }

View File

@ -1,170 +0,0 @@
#![feature(box_patterns)]
extern crate itertools;
extern crate schala_repl;
use itertools::Itertools;
use schala_repl::{ProgrammingLanguageInterface, EvalOptions, FinishedComputation, UnfinishedComputation};
pub struct Robo {
}
impl Robo {
pub fn new() -> Robo {
Robo { }
}
}
#[derive(Debug)]
pub struct TokenError {
pub msg: String,
}
impl TokenError {
pub fn new(msg: &str) -> TokenError {
TokenError { msg: msg.to_string() }
}
}
#[allow(dead_code)]
#[derive(Debug)]
pub enum Token {
StrLiteral(String),
Backtick,
Newline,
LParen,
RParen,
LBracket,
RBracket,
LBrace,
RBrace,
Period,
Comma,
Colon,
Semicolon,
SingleQuote,
Identifier(String),
Operator(String),
NumLiteral(Number),
}
#[allow(dead_code)]
#[derive(Debug)]
pub enum Number {
IntegerRep(String),
FloatRep(String)
}
#[allow(dead_code)]
pub type AST = Vec<ASTNode>;
#[allow(dead_code)]
#[derive(Debug)]
pub enum ASTNode {
FunctionDefinition(String, Expression),
ImportStatement(String),
}
#[allow(dead_code)]
#[derive(Debug)]
pub enum Expression {
}
fn tokenize(input: &str) -> Result<Vec<Token>, TokenError> {
use self::Token::*;
let mut tokens = Vec::new();
let mut iter = input.chars().peekable();
while let Some(c) = iter.next() {
if c == ';' {
while let Some(c) = iter.next() {
if c == '\n' {
break;
}
}
continue;
}
let cur_tok = match c {
c if char::is_whitespace(c) && c != '\n' => continue,
'\n' => Newline,
'(' => LParen,
')' => RParen,
'[' => LBracket,
']' => RBracket,
'{' => LBrace,
'}' => RBrace,
',' => Comma,
':' => Colon,
';' => Semicolon,
'.' => Period,
'`' => Backtick,
'\'' => SingleQuote,
'"' => {
let mut buffer = String::new();
loop {
match iter.next() {
Some(x) if x == '"' => break,
Some(x) => buffer.push(x),
None => return Err(TokenError::new("Unclosed quote")),
}
}
StrLiteral(buffer)
}
c if c.is_digit(10) => {
let mut integer = true;
let mut buffer = String::new();
buffer.push(c);
buffer.extend(iter.peeking_take_while(|x| x.is_digit(10)));
if let Some(&'.') = iter.peek() {
buffer.push(iter.next().unwrap());
integer = false;
}
buffer.extend(iter.peeking_take_while(|x| x.is_digit(10)));
let inner = if integer {
Number::IntegerRep(buffer)
} else {
Number::FloatRep(buffer)
};
NumLiteral(inner)
},
c if char::is_alphanumeric(c) => {
let mut buffer = String::new();
buffer.push(c);
buffer.extend(iter.peeking_take_while(|x| char::is_alphanumeric(*x)));
Identifier(buffer)
},
c => {
let mut buffer = String::new();
buffer.push(c);
buffer.extend(iter.peeking_take_while(|x| !char::is_whitespace(*x)));
Operator(buffer)
}
};
tokens.push(cur_tok);
}
Ok(tokens)
}
impl ProgrammingLanguageInterface for Robo {
fn get_language_name(&self) -> String {
"Robo".to_string()
}
fn get_source_file_suffix(&self) -> String {
format!("robo")
}
fn execute_pipeline(&mut self, input: &str, _eval_options: &EvalOptions) -> FinishedComputation {
let output = UnfinishedComputation::default();
let tokens = match tokenize(input) {
Ok(tokens) => tokens,
Err(e) => {
return output.finish(Err(format!("Tokenize error: {:?}", e)));
}
};
output.finish(Ok(format!("{:?}", tokens)))
}
}

View File

@ -1,11 +0,0 @@
[package]
name = "rukka-lang"
version = "0.1.0"
authors = ["greg <greg.shuflin@protonmail.com>"]
[dependencies]
itertools = "0.5.8"
take_mut = "0.1.3"
llvm-sys = "*"
schala-repl = { path = "../schala-repl" }

View File

@ -1,435 +0,0 @@
#![feature(box_patterns)]
extern crate itertools;
extern crate schala_repl;
use itertools::Itertools;
use schala_repl::{ProgrammingLanguageInterface, EvalOptions, UnfinishedComputation, FinishedComputation};
use std::iter::Peekable;
use std::vec::IntoIter;
use std::str::Chars;
use std::collections::HashMap;
pub struct EvaluatorState {
binding_stack: Vec<HashMap<String, Sexp>>
}
impl EvaluatorState {
fn new() -> EvaluatorState {
use self::Sexp::Primitive;
use self::PrimitiveFn::*;
let mut default_map = HashMap::new();
default_map.insert(format!("+"), Primitive(Plus));
default_map.insert(format!("-"), Primitive(Minus));
default_map.insert(format!("*"), Primitive(Mult));
default_map.insert(format!("/"), Primitive(Div));
default_map.insert(format!("%"), Primitive(Mod));
default_map.insert(format!(">"), Primitive(Greater));
default_map.insert(format!("<"), Primitive(Less));
default_map.insert(format!("<="), Primitive(LessThanOrEqual));
default_map.insert(format!(">="), Primitive(GreaterThanOrEqual));
default_map.insert(format!("display"), Primitive(Display));
EvaluatorState {
binding_stack: vec![default_map],
}
}
fn set_var(&mut self, var: String, value: Sexp) {
let binding = self.binding_stack.last_mut().unwrap();
binding.insert(var, value);
}
fn get_var(&self, var: &str) -> Option<&Sexp> {
for bindings in self.binding_stack.iter().rev() {
match bindings.get(var) {
Some(x) => return Some(x),
None => (),
}
}
None
}
fn push_env(&mut self) {
self.binding_stack.push(HashMap::new());
}
fn pop_env(&mut self) {
self.binding_stack.pop();
}
}
pub struct Rukka {
state: EvaluatorState
}
impl Rukka {
pub fn new() -> Rukka { Rukka { state: EvaluatorState::new() } }
}
impl ProgrammingLanguageInterface for Rukka {
fn get_language_name(&self) -> String {
"Rukka".to_string()
}
fn get_source_file_suffix(&self) -> String {
format!("rukka")
}
fn execute_pipeline(&mut self, input: &str, _eval_options: &EvalOptions) -> FinishedComputation {
let output = UnfinishedComputation::default();
let sexps = match read(input) {
Err(err) => {
return output.finish(Err(format!("Error: {}", err)));
},
Ok(sexps) => sexps
};
let output_str: String = sexps.into_iter().enumerate().map(|(i, sexp)| {
match self.state.eval(sexp) {
Ok(result) => format!("{}: {}", i, result.print()),
Err(err) => format!("{} Error: {}", i, err),
}
}).intersperse(format!("\n")).collect();
output.finish(Ok(output_str))
}
}
impl EvaluatorState {
fn eval(&mut self, expr: Sexp) -> Result<Sexp, String> {
use self::Sexp::*;
Ok(match expr {
SymbolAtom(ref sym) => match self.get_var(sym) {
Some(ref sexp) => {
let q: &Sexp = sexp; //WTF? if I delete this line, the copy doesn't work??
q.clone() //TODO make this not involve a clone
},
None => return Err(format!("Variable {} not bound", sym)),
},
expr @ Primitive(_) => expr,
expr @ FnLiteral { .. } => expr,
expr @ StringAtom(_) => expr,
expr @ NumberAtom(_) => expr,
expr @ BoolAtom(_) => expr,
Cons(box operator, box operands) => match operator {
SymbolAtom(ref sym) if match &sym[..] {
"quote" | "eq?" | "cons" | "car" | "cdr" | "atom?" | "define" | "lambda" | "if" | "cond" => true, _ => false
} => self.eval_special_form(sym, operands)?,
_ => {
let evaled = self.eval(operator)?;
self.apply(evaled, operands)?
}
},
Nil => Nil,
})
}
fn eval_special_form(&mut self, form: &str, operands: Sexp) -> Result<Sexp, String> {
use self::Sexp::*;
Ok(match form {
"quote" => match operands {
Cons(box quoted, box Nil) => quoted,
_ => return Err(format!("Bad syntax in quote")),
},
"eq?" => match operands {//TODO make correct
Cons(box lhs, box Cons(box rhs, _)) => BoolAtom(lhs == rhs),
_ => BoolAtom(true),
},
"cons" => match operands {
Cons(box cadr, box Cons(box caddr, box Nil)) => {
let newl = self.eval(cadr)?;
let newr = self.eval(caddr)?;
Cons(Box::new(newl), Box::new(newr))
},
_ => return Err(format!("Bad arguments for cons")),
},
"car" => match operands {
Cons(box car, _) => car,
_ => return Err(format!("called car with a non-pair argument")),
},
"cdr" => match operands {
Cons(_, box cdr) => cdr,
_ => return Err(format!("called cdr with a non-pair argument")),
},
"atom?" => match operands {
Cons(_, _) => BoolAtom(false),
_ => BoolAtom(true),
},
"define" => match operands {
Cons(box SymbolAtom(sym), box Cons(box expr, box Nil)) => {
let evaluated = self.eval(expr)?;
self.set_var(sym, evaluated);
Nil
},
_ => return Err(format!("Bad assignment")),
}
"lambda" => match operands {
Cons(box mut paramlist, box Cons(box formalexp, box Nil)) => {
let mut formal_params = vec![];
{
let mut ptr = &paramlist;
loop {
match ptr {
&Cons(ref arg, ref rest) => {
if let SymbolAtom(ref sym) = **arg {
formal_params.push(sym.clone());
ptr = rest;
} else {
return Err(format!("Bad lambda format"));
}
},
_ => break,
}
}
}
FnLiteral {
formal_params,
body: Box::new(formalexp)
}
},
_ => return Err(format!("Bad lambda expression")),
},
"if" => match operands {
Cons(box test, box body) => {
let truth_value = test.truthy();
match (truth_value, body) {
(true, Cons(box consequent, _)) => consequent,
(false, Cons(_, box Cons(box alternative, _))) => alternative,
_ => return Err(format!("Bad if expression"))
}
},
_ => return Err(format!("Bad if expression"))
},
s => return Err(format!("Non-existent special form {}; this should never happen", s)),
})
}
fn apply(&mut self, function: Sexp, operands: Sexp) -> Result<Sexp, String> {
use self::Sexp::*;
match function {
FnLiteral { formal_params, body } => {
self.push_env();
let mut cur = operands;
for param in formal_params {
match cur {
Cons(box arg, box rest) => {
cur = rest;
self.set_var(param, arg);
},
_ => return Err(format!("Bad argument for function application")),
}
}
let result = self.eval(*body);
self.pop_env();
result
},
Primitive(prim) => {
let mut evaled_operands = Vec::new();
let mut cur_operand = operands;
loop {
match cur_operand {
Nil => break,
Cons(box l, box rest) => {
evaled_operands.push(self.eval(l)?);
cur_operand = rest;
},
_ => return Err(format!("Bad operands list"))
}
}
prim.apply(evaled_operands)
}
_ => return Err(format!("Bad type to apply")),
}
}
}
fn read(input: &str) -> Result<Vec<Sexp>, String> {
let mut chars: Peekable<Chars> = input.chars().peekable();
let mut tokens = tokenize(&mut chars).into_iter().peekable();
let mut sexps = Vec::new();
while let Some(_) = tokens.peek() {
sexps.push(parse(&mut tokens)?);
}
Ok(sexps)
}
#[derive(Debug)]
enum Token {
LParen,
RParen,
Quote,
Word(String),
StringLiteral(String),
NumLiteral(u64),
}
//TODO make this notion of Eq more sophisticated
#[derive(Debug, PartialEq, Clone)]
enum Sexp {
SymbolAtom(String),
StringAtom(String),
NumberAtom(u64),
BoolAtom(bool),
Cons(Box<Sexp>, Box<Sexp>),
Nil,
FnLiteral {
formal_params: Vec<String>,
body: Box<Sexp>
},
Primitive(PrimitiveFn)
}
#[derive(Debug, PartialEq, Clone)]
enum PrimitiveFn {
Plus, Minus, Mult, Div, Mod, Greater, Less, GreaterThanOrEqual, LessThanOrEqual, Display
}
impl PrimitiveFn {
fn apply(&self, evaled_operands: Vec<Sexp>) -> Result<Sexp, String> {
use self::Sexp::*;
use self::PrimitiveFn::*;
let op = self.clone();
Ok(match op {
Display => {
for arg in evaled_operands {
print!("{}\n", arg.print());
}
Nil
},
Plus | Mult => {
let mut result = match op { Plus => 0, Mult => 1, _ => unreachable!() };
for arg in evaled_operands {
if let NumberAtom(n) = arg {
if let Plus = op {
result += n;
} else if let Mult = op {
result *= n;
}
} else {
return Err(format!("Bad operand: {:?}", arg));
}
}
NumberAtom(result)
},
op => return Err(format!("Primitive op {:?} not implemented", op)),
})
}
}
impl Sexp {
fn print(&self) -> String {
use self::Sexp::*;
match self {
&BoolAtom(true) => format!("#t"),
&BoolAtom(false) => format!("#f"),
&SymbolAtom(ref sym) => format!("{}", sym),
&StringAtom(ref s) => format!("\"{}\"", s),
&NumberAtom(ref n) => format!("{}", n),
&Cons(ref car, ref cdr) => format!("({} . {})", car.print(), cdr.print()),
&Nil => format!("()"),
&FnLiteral { ref formal_params, .. } => format!("<lambda {:?}>", formal_params),
&Primitive(ref sym) => format!("<primitive \"{:?}\">", sym),
}
}
fn truthy(&self) -> bool {
use self::Sexp::*;
match self {
&BoolAtom(false) => false,
_ => true
}
}
}
fn tokenize(input: &mut Peekable<Chars>) -> Vec<Token> {
use self::Token::*;
let mut tokens = Vec::new();
loop {
match input.next() {
None => break,
Some('(') => tokens.push(LParen),
Some(')') => tokens.push(RParen),
Some('\'') => tokens.push(Quote),
Some(c) if c.is_whitespace() => continue,
Some(c) if c.is_numeric() => {
let tok: String = input.peeking_take_while(|next| next.is_numeric()).collect();
let n: u64 = format!("{}{}", c, tok).parse().unwrap();
tokens.push(NumLiteral(n));
},
Some('"') => {
let string: String = input.scan(false, |escape, cur_char| {
let seen_escape = *escape;
*escape = cur_char == '\\' && !seen_escape;
match (cur_char, seen_escape) {
('"', false) => None,
('\\', false) => Some(None),
(c, _) => Some(Some(c))
}
}).filter_map(|x| x).collect();
tokens.push(StringLiteral(string));
}
Some(c) => {
let sym: String = input.peeking_take_while(|next| {
match *next {
'(' | ')' => false,
c if c.is_whitespace() => false,
_ => true
}
}).collect();
tokens.push(Word(format!("{}{}", c, sym)));
}
}
}
tokens
}
fn parse(tokens: &mut Peekable<IntoIter<Token>>) -> Result<Sexp, String> {
use self::Token::*;
use self::Sexp::*;
match tokens.next() {
Some(Word(ref s)) if s == "#f" => Ok(BoolAtom(false)),
Some(Word(ref s)) if s == "#t" => Ok(BoolAtom(true)),
Some(Word(s)) => Ok(SymbolAtom(s)),
Some(StringLiteral(s)) => Ok(StringAtom(s)),
Some(LParen) => parse_sexp(tokens),
Some(RParen) => Err(format!("Unexpected ')'")),
Some(Quote) => {
let quoted = parse(tokens)?;
Ok(Cons(Box::new(SymbolAtom(format!("quote"))), Box::new(Cons(Box::new(quoted), Box::new(Nil)))))
},
Some(NumLiteral(n)) => Ok(NumberAtom(n)),
None => Err(format!("Unexpected end of input")),
}
}
fn parse_sexp(tokens: &mut Peekable<IntoIter<Token>>) -> Result<Sexp, String> {
use self::Token::*;
use self::Sexp::*;
let mut cell = Nil;
{
let mut cell_ptr = &mut cell;
loop {
match tokens.peek() {
None => return Err(format!("Unexpected end of input")),
Some(&RParen) => {
tokens.next();
break;
},
_ => {
let current = parse(tokens)?;
let new_cdr = Cons(Box::new(current), Box::new(Nil));
match cell_ptr {
&mut Cons(_, ref mut cdr) => **cdr = new_cdr,
&mut Nil => *cell_ptr = new_cdr,
_ => unreachable!()
};
let old_ptr = cell_ptr;
let new_ptr: &mut Sexp = match old_ptr { &mut Cons(_, ref mut cdr) => cdr, _ => unreachable!() } as &mut Sexp;
cell_ptr = new_ptr;
}
}
}
}
Ok(cell)
}

View File

@ -1,12 +0,0 @@
[package]
name = "schala-codegen"
version = "0.1.0"
authors = ["greg <greg.shuflin@protonmail.com>"]
[dependencies]
syn = { version = "0.13.1", features = ["full", "extra-traits"] }
quote = "0.5"
schala-repl = { path = "../schala-repl" }
[lib]
proc-macro = true

View File

@ -1,103 +0,0 @@
#![feature(trace_macros)]
#![feature(proc_macro)]
extern crate proc_macro;
#[macro_use]
extern crate quote;
extern crate syn;
extern crate schala_repl;
use proc_macro::TokenStream;
use syn::{Ident, Attribute, DeriveInput};
fn extract_attribute_arg_by_name(name: &str, attrs: &Vec<Attribute>) -> Option<String> {
use syn::{Meta, Lit, MetaNameValue};
attrs.iter().map(|attr| attr.interpret_meta()).find(|meta| {
match meta {
&Some(Meta::NameValue(MetaNameValue { ident, .. })) if ident.as_ref() == name => true,
_ => false
}
}).and_then(|meta| {
match meta {
Some(Meta::NameValue(MetaNameValue { lit: Lit::Str(litstr), .. })) => Some(litstr.value()),
_ => None,
}
})
}
fn extract_attribute_list(name: &str, attrs: &Vec<Attribute>) -> Option<Vec<(Ident, Option<Vec<Ident>>)>> {
use syn::{Meta, MetaList, NestedMeta};
attrs.iter().find(|attr| {
match attr.path.segments.iter().nth(0) {
Some(segment) if segment.ident.as_ref() == name => true,
_ => false
}
}).and_then(|attr| {
match attr.interpret_meta() {
Some(Meta::List(MetaList { nested, .. })) => {
Some(nested.iter().map(|nested_meta| match nested_meta {
&NestedMeta::Meta(Meta::Word(ident)) => (ident, None),
&NestedMeta::Meta(Meta::List(MetaList { ident, nested: ref nested2, .. })) => {
let own_args = nested2.iter().map(|nested_meta2| match nested_meta2 {
&NestedMeta::Meta(Meta::Word(ident)) => ident,
_ => panic!("Bad format for doubly-nested attribute list")
}).collect();
(ident, Some(own_args))
},
_ => panic!("Bad format for nested list")
}).collect())
},
_ => panic!("{} must be a comma-delimited list surrounded by parens", name)
}
})
}
#[proc_macro_derive(ProgrammingLanguageInterface, attributes(LanguageName, SourceFileExtension, PipelineSteps))]
pub fn derive_programming_language_interface(input: TokenStream) -> TokenStream {
use schala_repl::PassDescriptor;
let ast: DeriveInput = syn::parse(input).unwrap();
let name = &ast.ident;
let attrs = &ast.attrs;
let language_name: String = extract_attribute_arg_by_name("LanguageName", attrs).expect("LanguageName is required");
let file_ext = extract_attribute_arg_by_name("SourceFileExtension", attrs).expect("SourceFileExtension is required");
let passes = extract_attribute_list("PipelineSteps", attrs).expect("PipelineSteps are required");
let pass_idents = passes.iter().map(|x| x.0);
//let pass_names: Vec<String> = passes.iter().map(|pass| pass.0.to_string()).collect();
let pass_descriptors = passes.iter().map(|pass| {
let name = pass.0.to_string();
let opts: Vec<String> = match &pass.1 {
None => vec![],
Some(opts) => opts.iter().map(|o| o.to_string()).collect(),
};
quote! {
PassDescriptor {
name: #name.to_string(),
debug_options: vec![#(format!(#opts)),*]
}
}
});
let tokens = quote! {
use schala_repl::PassDescriptor;
impl ProgrammingLanguageInterface for #name {
fn get_language_name(&self) -> String {
#language_name.to_string()
}
fn get_source_file_suffix(&self) -> String {
#file_ext.to_string()
}
fn execute_pipeline(&mut self, input: &str, options: &EvalOptions) -> FinishedComputation {
let mut chain = pass_chain![self, options; #(#pass_idents),* ];
chain(input)
}
fn get_passes(&self) -> Vec<PassDescriptor> {
vec![ #(#pass_descriptors),* ]
//vec![ #(PassDescriptor { name: #pass_names.to_string(), debug_options: vec![] }),* ]
}
}
};
tokens.into()
}

View File

@ -1,13 +0,0 @@
[package]
name = "schala-lang"
version = "0.1.0"
authors = ["greg <greg.shuflin@protonmail.com>"]
[dependencies]
itertools = "0.5.8"
take_mut = "0.1.3"
maplit = "*"
lazy_static = "0.2.8"
schala-repl = { path = "../schala-repl" }
schala-codegen = { path = "../schala-codegen" }

View File

@ -1,176 +0,0 @@
use std::rc::Rc;
use builtin::{BinOp, PrefixOp};
#[derive(Debug, PartialEq)]
pub struct AST(pub Vec<Statement>);
#[derive(Debug, PartialEq, Clone)]
pub enum Statement {
ExpressionStatement(Expression),
Declaration(Declaration),
}
pub type Block = Vec<Statement>;
pub type ParamName = Rc<String>;
pub type InterfaceName = Rc<String>; //should be a singleton I think??
pub type FormalParam = (ParamName, Option<TypeName>);
#[derive(Debug, PartialEq, Clone)]
pub enum Declaration {
FuncSig(Signature),
FuncDecl(Signature, Block),
TypeDecl {
name: TypeSingletonName,
body: TypeBody,
mutable: bool
},
TypeAlias(Rc<String>, Rc<String>), //should have TypeSingletonName in it, or maybe just String, not sure
Binding {
name: Rc<String>,
constant: bool,
expr: Expression,
},
Impl {
type_name: TypeName,
interface_name: Option<InterfaceName>,
block: Vec<Declaration>,
},
Interface {
name: Rc<String>,
signatures: Vec<Signature>
}
}
#[derive(Debug, PartialEq, Clone)]
pub struct Signature {
pub name: Rc<String>,
pub params: Vec<FormalParam>,
pub type_anno: Option<TypeName>,
}
#[derive(Debug, PartialEq, Clone)]
pub struct TypeBody(pub Vec<Variant>);
#[derive(Debug, PartialEq, Clone)]
pub enum Variant {
UnitStruct(Rc<String>),
TupleStruct(Rc<String>, Vec<TypeName>),
Record(Rc<String>, Vec<(Rc<String>, TypeName)>),
}
#[derive(Debug, PartialEq, Clone)]
pub struct Expression(pub ExpressionType, pub Option<TypeName>);
#[derive(Debug, PartialEq, Clone)]
pub enum TypeName {
Tuple(Vec<TypeName>),
Singleton(TypeSingletonName)
}
#[derive(Debug, PartialEq, Clone)]
pub struct TypeSingletonName {
pub name: Rc<String>,
pub params: Vec<TypeName>,
}
#[derive(Debug, PartialEq, Clone)]
pub enum ExpressionType {
NatLiteral(u64),
FloatLiteral(f64),
StringLiteral(Rc<String>),
BoolLiteral(bool),
BinExp(BinOp, Box<Expression>, Box<Expression>),
PrefixExp(PrefixOp, Box<Expression>),
TupleLiteral(Vec<Expression>),
Value(Rc<String>),
NamedStruct {
name: Rc<String>,
fields: Vec<(Rc<String>, Expression)>,
},
Call {
f: Box<Expression>,
arguments: Vec<Expression>,
},
Index {
indexee: Box<Expression>,
indexers: Vec<Expression>,
},
IfExpression {
discriminator: Box<Discriminator>,
body: Box<IfExpressionBody>,
},
WhileExpression {
condition: Option<Box<Expression>>,
body: Block,
},
ForExpression {
enumerators: Vec<Enumerator>,
body: Box<ForBody>,
},
Lambda {
params: Vec<FormalParam>,
body: Block,
},
ListLiteral(Vec<Expression>),
}
#[derive(Debug, PartialEq, Clone)]
pub enum Discriminator {
Simple(Expression),
BinOp(Expression, BinOp)
}
#[derive(Debug, PartialEq, Clone)]
pub enum IfExpressionBody {
SimpleConditional(Block, Option<Block>),
SimplePatternMatch(Pattern, Block, Option<Block>),
GuardList(Vec<GuardArm>)
}
#[derive(Debug, PartialEq, Clone)]
pub struct GuardArm {
pub guard: Guard,
pub body: Block,
}
#[derive(Debug, PartialEq, Clone)]
pub enum Guard {
Pat(Pattern),
HalfExpr(HalfExpr)
}
#[derive(Debug, PartialEq, Clone)]
pub struct HalfExpr {
pub op: Option<BinOp>,
pub expr: ExpressionType,
}
#[derive(Debug, PartialEq, Clone)]
pub enum Pattern {
Ignored,
TuplePattern(Vec<Pattern>),
Literal(PatternLiteral),
TupleStruct(Rc<String>, Vec<Pattern>),
Record(Rc<String>, Vec<(Rc<String>, Pattern)>),
}
#[derive(Debug, PartialEq, Clone)]
pub enum PatternLiteral {
NumPattern(ExpressionType),
StringPattern(Rc<String>),
BoolPattern(bool),
VarPattern(Rc<String>)
}
#[derive(Debug, PartialEq, Clone)]
pub struct Enumerator {
pub id: Rc<String>,
pub generator: Expression,
}
#[derive(Debug, PartialEq, Clone)]
pub enum ForBody {
MonadicReturn(Expression),
StatementBlock(Block),
}

View File

@ -1,100 +0,0 @@
use std::rc::Rc;
use std::collections::HashMap;
use std::fmt;
use self::Type::*; use self::TConstOld::*;
//TODO get rid of these types and replace them with the right MonoType or whatever ones later
#[derive(Debug, PartialEq, Clone)]
pub enum Type {
Const(TConstOld),
Func(Box<Type>, Box<Type>),
}
#[derive(Debug, PartialEq, Clone)]
pub enum TConstOld {
Nat,
Int,
Float,
StringT,
Bool,
}
impl fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
#[derive(Debug, PartialEq, Clone)]
pub struct BinOp {
sigil: Rc<String>
}
impl BinOp {
pub fn from_sigil(sigil: &str) -> BinOp {
BinOp { sigil: Rc::new(sigil.to_string()) }
}
pub fn sigil(&self) -> &Rc<String> {
&self.sigil
}
pub fn get_type(&self) -> Result<Type, String> {
let s = self.sigil.as_str();
BINOPS.get(s).map(|x| x.0.clone()).ok_or(format!("Binop {} not found", s))
}
pub fn min_precedence() -> i32 {
i32::min_value()
}
pub fn get_precedence(op: &str) -> i32 {
let default = 10_000_000;
BINOPS.get(op).map(|x| x.2.clone()).unwrap_or(default)
}
}
#[derive(Debug, PartialEq, Clone)]
pub struct PrefixOp {
sigil: Rc<String>
}
impl PrefixOp {
pub fn from_sigil(sigil: &str) -> PrefixOp {
PrefixOp { sigil: Rc::new(sigil.to_string()) }
}
pub fn sigil(&self) -> &Rc<String> {
&self.sigil
}
pub fn is_prefix(op: &str) -> bool {
PREFIX_OPS.get(op).is_some()
}
pub fn get_type(&self) -> Result<Type, String> {
let s = self.sigil.as_str();
PREFIX_OPS.get(s).map(|x| x.0.clone()).ok_or(format!("Prefix op {} not found", s))
}
}
lazy_static! {
static ref PREFIX_OPS: HashMap<&'static str, (Type, ())> =
hashmap! {
"+" => (Func(bx!(Const(Int)), bx!(Const(Int))), ()),
"-" => (Func(bx!(Const(Int)), bx!(Const(Int))), ()),
"!" => (Func(bx!(Const(Bool)), bx!(Const(Bool))), ()),
};
}
/* the second tuple member is a placeholder for when I want to make evaluation rules tied to the
* binop definition */
lazy_static! {
static ref BINOPS: HashMap<&'static str, (Type, (), i32)> =
hashmap! {
"+" => (Func(bx!(Const(Nat)), bx!(Func(bx!(Const(Nat)), bx!(Const(Nat))))), (), 10),
"-" => (Func(bx!(Const(Nat)), bx!(Func(bx!(Const(Nat)), bx!(Const(Nat))))), (), 10),
"*" => (Func(bx!(Const(Nat)), bx!(Func(bx!(Const(Nat)), bx!(Const(Nat))))), (), 20),
"/" => (Func(bx!(Const(Nat)), bx!(Func(bx!(Const(Nat)), bx!(Const(Float))))), (), 20),
"//" => (Func(bx!(Const(Nat)), bx!(Func(bx!(Const(Nat)), bx!(Const(Nat))))), (), 20), //TODO change this to `quot`
"%" => (Func(bx!(Const(Nat)), bx!(Func(bx!(Const(Nat)), bx!(Const(Nat))))), (), 20),
"++" => (Func(bx!(Const(StringT)), bx!(Func(bx!(Const(StringT)), bx!(Const(StringT))))), (), 30),
"^" => (Func(bx!(Const(Nat)), bx!(Func(bx!(Const(Nat)), bx!(Const(Nat))))), (), 20),
"&" => (Func(bx!(Const(Nat)), bx!(Func(bx!(Const(Nat)), bx!(Const(Nat))))), (), 20),
"|" => (Func(bx!(Const(Nat)), bx!(Func(bx!(Const(Nat)), bx!(Const(Nat))))), (), 20),
};
}

View File

@ -1,375 +0,0 @@
use std::cell::RefCell;
use std::rc::Rc;
use std::fmt::Write;
use std::io;
use itertools::Itertools;
use util::StateStack;
use reduced_ast::{ReducedAST, Stmt, Expr, Lit, Func};
use symbol_table::{SymbolSpec, Symbol, SymbolTable};
pub struct State<'a> {
values: StateStack<'a, Rc<String>, ValueEntry>,
symbol_table_handle: Rc<RefCell<SymbolTable>>,
}
macro_rules! builtin_binding {
($name:expr, $values:expr) => {
$values.insert(Rc::new(format!($name)), ValueEntry::Binding { constant: true, val: Expr::Func(Func::BuiltIn(Rc::new(format!($name)))) });
}
}
impl<'a> State<'a> {
pub fn new(symbol_table_handle: Rc<RefCell<SymbolTable>>) -> State<'a> {
let mut values = StateStack::new(Some(format!("global")));
builtin_binding!("print", values);
builtin_binding!("println", values);
builtin_binding!("getline", values);
State { values, symbol_table_handle }
}
pub fn debug_print(&self) -> String {
format!("Values: {:?}", self.values)
}
}
#[derive(Debug)]
enum ValueEntry {
Binding {
constant: bool,
val: /*FullyEvaluatedExpr*/ Expr,
}
}
type EvalResult<T> = Result<T, String>;
impl Expr {
fn to_repl(&self) -> String {
use self::Lit::*;
use self::Func::*;
fn paren_wrapped_vec(exprs: &Vec<Expr>) -> String {
let mut buf = String::new();
write!(buf, "(").unwrap();
for term in exprs.iter().map(|e| Some(e)).intersperse(None) {
match term {
Some(e) => write!(buf, "{}", e.to_repl()).unwrap(),
None => write!(buf, ", ").unwrap(),
};
}
write!(buf, ")").unwrap();
buf
}
match self {
Expr::Lit(ref l) => match l {
Nat(n) => format!("{}", n),
Int(i) => format!("{}", i),
Float(f) => format!("{}", f),
Bool(b) => format!("{}", b),
StringLit(s) => format!("\"{}\"", s),
Custom(name, args) if args.len() == 0 => format!("{}", name),
Custom(name, args) => format!("{}{}", name, paren_wrapped_vec(args)),
},
Expr::Func(f) => match f {
BuiltIn(name) => format!("<built-in function {}>", name),
UserDefined { name: None, .. } => format!("<function>"),
UserDefined { name: Some(name), .. } => format!("<function {}>", name),
},
Expr::Constructor { name } => format!("<constructor {}>", name),
Expr::Tuple(exprs) => paren_wrapped_vec(exprs),
_ => format!("{:?}", self),
}
}
}
impl<'a> State<'a> {
pub fn evaluate(&mut self, ast: ReducedAST, repl: bool) -> Vec<Result<String, String>> {
let mut acc = vec![];
// handle prebindings
for statement in ast.0.iter() {
self.prebinding(statement);
}
for statement in ast.0 {
match self.statement(statement) {
Ok(Some(ref output)) if repl => acc.push(Ok(output.to_repl())),
Ok(_) => (),
Err(error) => {
acc.push(Err(format!("Runtime error: {}", error)));
return acc;
},
}
}
acc
}
fn prebinding(&mut self, stmt: &Stmt) {
match stmt {
Stmt::PreBinding { name, func } => {
let v_entry = ValueEntry::Binding { constant: true, val: Expr::Func(func.clone()) };
self.values.insert(name.clone(), v_entry);
},
Stmt::Expr(_expr) => {
//TODO have this support things like nested function defs
},
_ => ()
}
}
fn statement(&mut self, stmt: Stmt) -> EvalResult<Option<Expr>> {
match stmt {
Stmt::Binding { name, constant, expr } => {
let val = self.expression(expr)?;
self.values.insert(name.clone(), ValueEntry::Binding { constant, val });
Ok(None)
},
Stmt::Expr(expr) => Ok(Some(self.expression(expr)?)),
Stmt::PreBinding {..} | Stmt::Noop => Ok(None),
}
}
fn block(&mut self, stmts: Vec<Stmt>) -> EvalResult<Expr> {
let mut ret = None;
for stmt in stmts {
ret = self.statement(stmt)?;
}
Ok(ret.unwrap_or(Expr::Unit))
}
fn expression(&mut self, expr: Expr) -> EvalResult<Expr> {
use self::Expr::*;
match expr {
literal @ Lit(_) => Ok(literal),
Call { box f, args } => {
match self.expression(f)? {
Constructor {name} => self.apply_data_constructor(name, args),
Func(f) => self.apply_function(f, args),
other => return Err(format!("Tried to call {:?} which is not a function or data constructor", other)),
}
},
Val(v) => self.value(v),
constr @ Constructor { .. } => Ok(constr),
func @ Func(_) => Ok(func),
Tuple(exprs) => Ok(Tuple(exprs.into_iter().map(|expr| self.expression(expr)).collect::<Result<Vec<Expr>,_>>()?)),
Conditional { box cond, then_clause, else_clause } => self.conditional(cond, then_clause, else_clause),
Assign { box val, box expr } => {
let name = match val {
Expr::Val(name) => name,
_ => return Err(format!("Trying to assign to a non-value")),
};
let constant = match self.values.lookup(&name) {
None => return Err(format!("{} is undefined", name)),
Some(ValueEntry::Binding { constant, .. }) => constant.clone(),
};
if constant {
return Err(format!("trying to update {}, a non-mutable binding", name));
}
let val = self.expression(expr)?;
self.values.insert(name.clone(), ValueEntry::Binding { constant: false, val });
Ok(Expr::Unit)
},
e => Err(format!("Expr {:?} eval not implemented", e))
}
}
fn apply_data_constructor(&mut self, name: Rc<String>, args: Vec<Expr>) -> EvalResult<Expr> {
{
let symbol_table = self.symbol_table_handle.borrow();
match symbol_table.values.get(&name) {
Some(Symbol { spec: SymbolSpec::DataConstructor { type_name, type_args }, name }) => {
if args.len() != type_args.len() {
return Err(format!("Data constructor {} requires {} args", name, type_args.len()));
}
()
},
_ => return Err(format!("Bad symbol {}", name))
}
}
let evaled_args = args.into_iter().map(|expr| self.expression(expr)).collect::<Result<Vec<Expr>,_>>()?;
//let evaled_args = vec![];
Ok(Expr::Lit(self::Lit::Custom(name.clone(), evaled_args)))
}
fn apply_function(&mut self, f: Func, args: Vec<Expr>) -> EvalResult<Expr> {
match f {
Func::BuiltIn(sigil) => self.apply_builtin(sigil, args),
Func::UserDefined { params, body, name } => {
if params.len() != args.len() {
return Err(format!("calling a {}-argument function with {} args", params.len(), args.len()))
}
let mut func_state = State {
values: self.values.new_frame(name.map(|n| format!("{}", n))),
symbol_table_handle: self.symbol_table_handle.clone(),
};
for (param, val) in params.into_iter().zip(args.into_iter()) {
let val = func_state.expression(val)?;
func_state.values.insert(param, ValueEntry::Binding { constant: true, val });
}
// TODO figure out function return semantics
func_state.block(body)
}
}
}
fn apply_builtin(&mut self, name: Rc<String>, args: Vec<Expr>) -> EvalResult<Expr> {
use self::Expr::*;
use self::Lit::*;
let evaled_args: Result<Vec<Expr>, String> = args.into_iter().map(|arg| self.expression(arg)).collect();
let evaled_args = evaled_args?;
Ok(match (name.as_str(), evaled_args.as_slice()) {
/* binops */
("+", &[Lit(Nat(l)), Lit(Nat(r))]) => Lit(Nat(l + r)),
("++", &[Lit(StringLit(ref s1)), Lit(StringLit(ref s2))]) => Lit(StringLit(Rc::new(format!("{}{}", s1, s2)))),
("-", &[Lit(Nat(l)), Lit(Nat(r))]) => Lit(Nat(l - r)),
("*", &[Lit(Nat(l)), Lit(Nat(r))]) => Lit(Nat(l * r)),
("/", &[Lit(Nat(l)), Lit(Nat(r))]) => Lit(Float((l as f64)/ (r as f64))),
("//", &[Lit(Nat(l)), Lit(Nat(r))]) => if r == 0 {
return Err(format!("divide by zero"));
} else {
Lit(Nat(l / r))
},
("%", &[Lit(Nat(l)), Lit(Nat(r))]) => Lit(Nat(l % r)),
("^", &[Lit(Nat(l)), Lit(Nat(r))]) => Lit(Nat(l ^ r)),
("&", &[Lit(Nat(l)), Lit(Nat(r))]) => Lit(Nat(l & r)),
("|", &[Lit(Nat(l)), Lit(Nat(r))]) => Lit(Nat(l | r)),
("==", &[Lit(Nat(l)), Lit(Nat(r))]) => Lit(Bool(l == r)),
("==", &[Lit(Int(l)), Lit(Int(r))]) => Lit(Bool(l == r)),
("==", &[Lit(Float(l)), Lit(Float(r))]) => Lit(Bool(l == r)),
("==", &[Lit(Bool(l)), Lit(Bool(r))]) => Lit(Bool(l == r)),
("==", &[Lit(StringLit(ref l)), Lit(StringLit(ref r))]) => Lit(Bool(l == r)),
/* prefix ops */
("!", &[Lit(Bool(true))]) => Lit(Bool(false)),
("!", &[Lit(Bool(false))]) => Lit(Bool(true)),
("-", &[Lit(Nat(n))]) => Lit(Int(-1*(n as i64))),
("-", &[Lit(Int(n))]) => Lit(Int(-1*(n as i64))),
("+", &[Lit(Int(n))]) => Lit(Int(n)),
("+", &[Lit(Nat(n))]) => Lit(Nat(n)),
/* builtin functions */
("print", &[ref anything]) => {
print!("{}", anything.to_repl());
Expr::Unit
},
("println", &[ref anything]) => {
println!("{}", anything.to_repl());
Expr::Unit
},
("getline", &[]) => {
let mut buf = String::new();
io::stdin().read_line(&mut buf).expect("Error readling line in 'getline'");
Lit(StringLit(Rc::new(buf.trim().to_string())))
},
(x, args) => return Err(format!("bad or unimplemented builtin {:?} | {:?}", x, args)),
})
}
fn conditional(&mut self, cond: Expr, then_clause: Vec<Stmt>, else_clause: Vec<Stmt>) -> EvalResult<Expr> {
let cond = self.expression(cond)?;
Ok(match cond {
Expr::Lit(Lit::Bool(true)) => self.block(then_clause)?,
Expr::Lit(Lit::Bool(false)) => self.block(else_clause)?,
_ => return Err(format!("Conditional with non-boolean condition"))
})
}
fn value(&mut self, name: Rc<String>) -> EvalResult<Expr> {
use self::ValueEntry::*;
use self::Func::*;
//TODO add a layer of indirection here to talk to the symbol table first, and only then look up
//in the values table
let symbol_table = self.symbol_table_handle.borrow();
let value = symbol_table.values.get(&name);
Ok(match value {
Some(Symbol { name, spec }) => match spec {
SymbolSpec::DataConstructor { type_name, type_args } => {
if type_args.len() == 0 {
Expr::Lit(Lit::Custom(name.clone(), vec![]))
} else {
return Err(format!("This data constructor thing not done"))
}
},
SymbolSpec::Func(_) => match self.values.lookup(&name) {
Some(Binding { val: Expr::Func(UserDefined { name, params, body }), .. }) => {
Expr::Func(UserDefined { name: name.clone(), params: params.clone(), body: body.clone() })
},
_ => unreachable!(),
},
},
/* see if it's an ordinary variable TODO make variables go in symbol table */
None => match self.values.lookup(&name) {
Some(Binding { val, .. }) => val.clone(),
None => return Err(format!("Couldn't find value {}", name)),
}
})
}
}
#[cfg(test)]
mod eval_tests {
use std::cell::RefCell;
use std::rc::Rc;
use symbol_table::SymbolTable;
use tokenizing::tokenize;
use parsing::parse;
use eval::State;
macro_rules! fresh_env {
($string:expr, $correct:expr) => {
let symbol_table = Rc::new(RefCell::new(SymbolTable::new()));
let mut state = State::new(symbol_table);
let ast = parse(tokenize($string)).0.unwrap();
state.symbol_table_handle.borrow_mut().add_top_level_symbols(&ast);
let reduced = ast.reduce(&state.symbol_table_handle.borrow());
let all_output = state.evaluate(reduced, true);
let ref output = all_output.last().unwrap();
assert_eq!(**output, Ok($correct.to_string()));
}
}
#[test]
fn test_basic_eval() {
fresh_env!("1 + 2", "3");
fresh_env!("let mut a = 1; a = 2", "Unit");
fresh_env!("let mut a = 1; a = 2; a", "2");
fresh_env!(r#"("a", 1 + 2)"#, r#"("a", 3)"#);
}
#[test]
fn function_eval() {
fresh_env!("fn oi(x) { x + 1 }; oi(4)", "5");
fresh_env!("fn oi(x) { x + 1 }; oi(1+2)", "4");
}
#[test]
fn scopes() {
let scope_ok = r#"
let a = 20
fn haha() {
let a = 10
a
}
haha()
"#;
fresh_env!(scope_ok, "10");
let scope_ok = r#"
let a = 20
fn haha() {
let a = 10
a
}
a
"#;
fresh_env!(scope_ok, "20");
}
}

View File

@ -1,147 +0,0 @@
#![feature(trace_macros)]
#![feature(slice_patterns, box_patterns, box_syntax)]
#![feature(proc_macro)]
extern crate itertools;
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate maplit;
#[macro_use]
extern crate schala_repl;
#[macro_use]
extern crate schala_codegen;
use std::cell::RefCell;
use std::rc::Rc;
use itertools::Itertools;
use schala_repl::{ProgrammingLanguageInterface, EvalOptions, TraceArtifact, UnfinishedComputation, FinishedComputation};
macro_rules! bx {
($e:expr) => { Box::new($e) }
}
mod util;
mod builtin;
mod tokenizing;
mod ast;
mod parsing;
mod symbol_table;
mod typechecking;
mod reduced_ast;
mod eval;
//trace_macros!(true);
#[derive(ProgrammingLanguageInterface)]
#[LanguageName = "Schala"]
#[SourceFileExtension = "schala"]
#[PipelineSteps(tokenizing, parsing(compact,expanded,trace), symbol_table, typechecking, ast_reducing, eval)]
pub struct Schala {
state: eval::State<'static>,
symbol_table: Rc<RefCell<symbol_table::SymbolTable>>,
type_context: typechecking::TypeContext<'static>,
}
impl Schala {
fn new_blank_env() -> Schala {
let symbols = Rc::new(RefCell::new(symbol_table::SymbolTable::new()));
Schala {
symbol_table: symbols.clone(),
type_context: typechecking::TypeContext::new(symbols.clone()),
state: eval::State::new(symbols),
}
}
pub fn new() -> Schala {
let prelude = r#"
type Option<T> = Some(T) | None
"#;
let mut s = Schala::new_blank_env();
s.execute_pipeline(prelude, &EvalOptions::default());
s
}
}
fn tokenizing(_handle: &mut Schala, input: &str, comp: Option<&mut UnfinishedComputation>) -> Result<Vec<tokenizing::Token>, String> {
let tokens = tokenizing::tokenize(input);
comp.map(|comp| {
let token_string = tokens.iter().map(|t| format!("{:?}<L:{},C:{}>", t.token_type, t.offset.0, t.offset.1)).join(", ");
comp.add_artifact(TraceArtifact::new("tokens", token_string));
});
let errors: Vec<String> = tokens.iter().filter_map(|t| t.get_error()).collect();
if errors.len() == 0 {
Ok(tokens)
} else {
Err(format!("{:?}", errors))
}
}
fn parsing(_handle: &mut Schala, input: Vec<tokenizing::Token>, comp: Option<&mut UnfinishedComputation>) -> Result<ast::AST, String> {
let (ast, trace) = parsing::parse(input);
comp.map(|comp| {
//TODO need to control which of these debug stages get added
let opt = comp.cur_debug_options.get(0).map(|s| s.clone());
match opt {
None => comp.add_artifact(TraceArtifact::new("ast", format!("{:?}", ast))),
Some(ref s) if s == "compact" => comp.add_artifact(TraceArtifact::new("ast", format!("{:?}", ast))),
Some(ref s) if s == "expanded" => comp.add_artifact(TraceArtifact::new("ast", format!("{:#?}", ast))),
Some(ref s) if s == "trace" => comp.add_artifact(TraceArtifact::new_parse_trace(trace)),
Some(ref x) => println!("Bad parsing debug option: {}", x),
};
});
ast.map_err(|err| err.msg)
}
fn symbol_table(handle: &mut Schala, input: ast::AST, comp: Option<&mut UnfinishedComputation>) -> Result<ast::AST, String> {
let add = handle.symbol_table.borrow_mut().add_top_level_symbols(&input);
match add {
Ok(()) => {
let artifact = TraceArtifact::new("symbol_table", handle.symbol_table.borrow().debug_symbol_table());
comp.map(|comp| comp.add_artifact(artifact));
Ok(input)
},
Err(msg) => Err(msg)
}
}
fn typechecking(handle: &mut Schala, input: ast::AST, comp: Option<&mut UnfinishedComputation>) -> Result<ast::AST, String> {
match handle.type_context.type_check_ast(&input) {
Ok(ty) => {
comp.map(|c| {
c.add_artifact(TraceArtifact::new("type_table", format!("{}", handle.type_context.debug_types())));
c.add_artifact(TraceArtifact::new("type_check", format!("{:?}", ty)));
});
Ok(input)
},
Err(msg) => {
comp.map(|comp| {
comp.add_artifact(TraceArtifact::new("type_table", format!("{}", handle.type_context.debug_types())));
comp.add_artifact(TraceArtifact::new("type_check", format!("Type error: {:?}", msg)));
});
Ok(input)
}
}
}
fn ast_reducing(handle: &mut Schala, input: ast::AST, comp: Option<&mut UnfinishedComputation>) -> Result<reduced_ast::ReducedAST, String> {
let ref symbol_table = handle.symbol_table.borrow();
let output = input.reduce(symbol_table);
comp.map(|comp| comp.add_artifact(TraceArtifact::new("ast_reducing", format!("{:?}", output))));
Ok(output)
}
fn eval(handle: &mut Schala, input: reduced_ast::ReducedAST, comp: Option<&mut UnfinishedComputation>) -> Result<String, String> {
comp.map(|comp| comp.add_artifact(TraceArtifact::new("value_state", handle.state.debug_print())));
let evaluation_outputs = handle.state.evaluate(input, true);
let text_output: Result<Vec<String>, String> = evaluation_outputs
.into_iter()
.collect();
let eval_output: Result<String, String> = text_output
.map(|v| { v.into_iter().intersperse(format!("\n")).collect() });
eval_output
}

File diff suppressed because it is too large Load Diff

View File

@ -1,217 +0,0 @@
use std::rc::Rc;
use ast::{AST, Statement, Expression, Declaration, Discriminator, IfExpressionBody, Pattern};
use symbol_table::{Symbol, SymbolSpec, SymbolTable};
use builtin::{BinOp, PrefixOp};
#[derive(Debug)]
pub struct ReducedAST(pub Vec<Stmt>);
#[derive(Debug, Clone)]
pub enum Stmt {
PreBinding {
name: Rc<String>,
func: Func,
},
Binding {
name: Rc<String>,
constant: bool,
expr: Expr,
},
Expr(Expr),
Noop,
}
#[derive(Debug, Clone)]
pub enum Expr {
Lit(Lit),
Func(Func),
Tuple(Vec<Expr>),
Constructor {
variant: usize,
expr: Box<Expr>,
},
Val(Rc<String>),
Call {
f: Box<Expr>,
args: Vec<Expr>,
},
Assign {
val: Box<Expr>,
expr: Box<Expr>,
},
Conditional {
cond: Box<Expr>,
then_clause: Vec<Stmt>,
else_clause: Vec<Stmt>,
},
Match {
cond: Box<Expr>,
arms: Vec<(Pattern, Vec<Stmt>)>
},
UnimplementedSigilValue
}
pub enum Pat {
Ignored
}
#[derive(Debug, Clone)]
pub enum Lit {
Nat(u64),
Int(i64),
Float(f64),
Bool(bool),
StringLit(Rc<String>),
Custom(Rc<String>, Vec<Expr>),
}
#[derive(Debug, Clone)]
pub enum Func {
BuiltIn(Rc<String>),
UserDefined {
name: Option<Rc<String>>,
params: Vec<Rc<String>>,
body: Vec<Stmt>,
}
}
impl AST {
pub fn reduce(&self, symbol_table: &SymbolTable) -> ReducedAST {
let mut output = vec![];
for statement in self.0.iter() {
output.push(statement.reduce(symbol_table));
}
ReducedAST(output)
}
}
impl Statement {
fn reduce(&self, symbol_table: &SymbolTable) -> Stmt {
use ast::Statement::*;
match self {
ExpressionStatement(expr) => Stmt::Expr(expr.reduce(symbol_table)),
Declaration(decl) => decl.reduce(symbol_table),
}
}
}
impl Expression {
fn reduce(&self, symbol_table: &SymbolTable) -> Expr {
use ast::ExpressionType::*;
let ref input = self.0;
match input {
NatLiteral(n) => Expr::Lit(Lit::Nat(*n)),
FloatLiteral(f) => Expr::Lit(Lit::Float(*f)),
StringLiteral(s) => Expr::Lit(Lit::StringLit(s.clone())),
BoolLiteral(b) => Expr::Lit(Lit::Bool(*b)),
BinExp(binop, lhs, rhs) => binop.reduce(symbol_table, lhs, rhs),
PrefixExp(op, arg) => op.reduce(symbol_table, arg),
//remember Some(5) is a CallExpr
// => ast: Ok(AST([ExpressionStatement(Expression(Call { f: Expression(Value("Some"), None), arguments: [Expression(NatLiteral(5), None)] }, None))]))
Value(name) => {
match symbol_table.values.get(name) {
Some(Symbol { spec: SymbolSpec::DataConstructor { type_args, .. }, .. }) => {
Expr::Constructor { type_name: name.clone() }
},
_ => Expr::Val(name.clone()),
}
},
Call { f, arguments } => Expr::Call {
f: Box::new(f.reduce(symbol_table)),
args: arguments.iter().map(|arg| arg.reduce(symbol_table)).collect(),
},
TupleLiteral(exprs) => Expr::Tuple(exprs.iter().map(|e| e.reduce(symbol_table)).collect()),
IfExpression { discriminator, body } => reduce_if_expression(discriminator, body, symbol_table),
_ => Expr::UnimplementedSigilValue,
}
}
}
fn reduce_if_expression(discriminator: &Discriminator, body: &IfExpressionBody, symbol_table: &SymbolTable) -> Expr {
let cond = Box::new(match *discriminator {
Discriminator::Simple(ref expr) => expr.reduce(symbol_table),
_ => panic!(),
});
match *body {
IfExpressionBody::SimpleConditional(ref then_clause, ref else_clause) => {
let then_clause = then_clause.iter().map(|expr| expr.reduce(symbol_table)).collect();
let else_clause = match else_clause {
None => vec![],
Some(stmts) => stmts.iter().map(|expr| expr.reduce(symbol_table)).collect(),
};
Expr::Conditional { cond, then_clause, else_clause }
},
IfExpressionBody::SimplePatternMatch(ref pat, ref then_clause, ref else_clause) => {
let then_clause = then_clause.iter().map(|expr| expr.reduce(symbol_table)).collect();
let else_clause = match else_clause {
None => vec![],
Some(stmts) => stmts.iter().map(|expr| expr.reduce(symbol_table)).collect(),
};
Expr::Match {
cond,
arms: vec![
(pat.clone(), then_clause),
(Pattern::Ignored, else_clause)
],
}
},
IfExpressionBody::GuardList(ref _guard_arms) => panic!(),
}
}
impl Pattern {
fn reduce(&self, symbol_table: &SymbolTable) -> Pat {
match self {
Pattern::Ignored => Pat::Ignored,
Pattern::TuplePattern(_) => panic!(),
Pattern::Literal(_) => panic!(),
Pattern::TupleStruct(_, _) => panic!(),
Pattern::Record(_, _) => panic!(),
}
}
}
impl Declaration {
fn reduce(&self, symbol_table: &SymbolTable) -> Stmt {
use self::Declaration::*;
use ::ast::Signature;
match self {
Binding {name, constant, expr } => Stmt::Binding { name: name.clone(), constant: *constant, expr: expr.reduce(symbol_table) },
FuncDecl(Signature { name, params, .. }, statements) => Stmt::PreBinding {
name: name.clone(),
func: Func::UserDefined {
name: Some(name.clone()),
params: params.iter().map(|param| param.0.clone()).collect(),
body: statements.iter().map(|stmt| stmt.reduce(symbol_table)).collect(),
}
},
TypeDecl { .. } => Stmt::Noop,
TypeAlias(_, _) => Stmt::Noop,
Interface { .. } => Stmt::Noop,
Impl { .. } => Stmt::Expr(Expr::UnimplementedSigilValue),
_ => Stmt::Expr(Expr::UnimplementedSigilValue)
}
}
}
impl BinOp {
fn reduce(&self, symbol_table: &SymbolTable, lhs: &Box<Expression>, rhs: &Box<Expression>) -> Expr {
if **self.sigil() == "=" {
Expr::Assign {
val: Box::new(lhs.reduce(symbol_table)),
expr: Box::new(rhs.reduce(symbol_table)),
}
} else {
let f = Box::new(Expr::Func(Func::BuiltIn(self.sigil().clone())));
Expr::Call { f, args: vec![lhs.reduce(symbol_table), rhs.reduce(symbol_table)]}
}
}
}
impl PrefixOp {
fn reduce(&self, symbol_table: &SymbolTable, arg: &Box<Expression>) -> Expr {
let f = Box::new(Expr::Func(Func::BuiltIn(self.sigil().clone())));
Expr::Call { f, args: vec![arg.reduce(symbol_table)]}
}
}

View File

@ -1,121 +0,0 @@
use std::collections::HashMap;
use std::rc::Rc;
use std::fmt;
use std::fmt::Write;
use ast;
use typechecking::TypeName;
//cf. p. 150 or so of Language Implementation Patterns
pub struct SymbolTable {
pub values: HashMap<Rc<String>, Symbol> //TODO this will eventually have real type information
}
impl SymbolTable {
pub fn new() -> SymbolTable {
SymbolTable { values: HashMap::new() }
}
}
#[derive(Debug)]
pub struct Symbol {
pub name: Rc<String>,
pub spec: SymbolSpec,
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<Name: {}, Spec: {}>", self.name, self.spec)
}
}
#[derive(Debug)]
pub enum SymbolSpec {
Func(Vec<TypeName>),
DataConstructor {
type_name: Rc<String>,
type_args: Vec<Rc<String>>,
},
}
impl fmt::Display for SymbolSpec {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::SymbolSpec::*;
match self {
Func(type_names) => write!(f, "Func({:?})", type_names),
DataConstructor { type_name, type_args } => write!(f, "DataConstructor({:?} -> {})", type_args, type_name),
}
}
}
impl SymbolTable {
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
pub fn add_top_level_symbols(&mut self, ast: &ast::AST) -> Result<(), String> {
use self::ast::{Statement, TypeName, Variant, TypeSingletonName, TypeBody};
use self::ast::Declaration::*;
for statement in ast.0.iter() {
if let Statement::Declaration(decl) = statement {
match decl {
FuncSig(signature) | FuncDecl(signature, _) => {
let mut ch: char = 'a';
let mut types = vec![];
for param in signature.params.iter() {
match param {
(_, Some(ty)) => {
//TODO eventually handle this case different
types.push(Rc::new(format!("{}", ch)));
ch = ((ch as u8) + 1) as char;
},
(_, None) => {
types.push(Rc::new(format!("{}", ch)));
ch = ((ch as u8) + 1) as char;
}
}
}
let spec = SymbolSpec::Func(types);
self.values.insert(
signature.name.clone(),
Symbol { name: signature.name.clone(), spec }
);
},
TypeDecl { name: TypeSingletonName { name, params}, body: TypeBody(variants), mutable } => {
for var in variants {
match var {
Variant::UnitStruct(variant_name) => {
let spec = SymbolSpec::DataConstructor {
type_name: name.clone(),
type_args: vec![],
};
self.values.insert(variant_name.clone(), Symbol { name: variant_name.clone(), spec });
},
Variant::TupleStruct(variant_name, tuple_members) => {
let type_args = tuple_members.iter().map(|type_name| match type_name {
TypeName::Singleton(TypeSingletonName { name, ..}) => name.clone(),
TypeName::Tuple(_) => unimplemented!(),
}).collect();
let spec = SymbolSpec::DataConstructor {
type_name: name.clone(),
type_args
};
let symbol = Symbol { name: variant_name.clone(), spec };
self.values.insert(variant_name.clone(), symbol);
},
e => return Err(format!("{:?} not supported in typing yet", e)),
}
}
},
_ => ()
}
}
}
Ok(())
}
pub fn debug_symbol_table(&self) -> String {
let mut output = format!("Symbol table\n");
for (name, sym) in &self.values {
write!(output, "{} -> {}\n", name, sym).unwrap();
}
output
}
}

View File

@ -1,316 +0,0 @@
use itertools::Itertools;
use std::collections::HashMap;
use std::rc::Rc;
use std::iter::{Iterator, Peekable};
use std::fmt;
#[derive(Debug, PartialEq, Clone)]
pub enum TokenType {
Newline, Semicolon,
LParen, RParen,
LSquareBracket, RSquareBracket,
LAngleBracket, RAngleBracket,
LCurlyBrace, RCurlyBrace,
Pipe,
Comma, Period, Colon, Underscore,
Slash,
Operator(Rc<String>),
DigitGroup(Rc<String>), HexLiteral(Rc<String>), BinNumberSigil,
StrLiteral(Rc<String>),
Identifier(Rc<String>),
Keyword(Kw),
EOF,
Error(String),
}
use self::TokenType::*;
impl fmt::Display for TokenType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
&Operator(ref s) => write!(f, "Operator({})", **s),
&DigitGroup(ref s) => write!(f, "DigitGroup({})", s),
&HexLiteral(ref s) => write!(f, "HexLiteral({})", s),
&StrLiteral(ref s) => write!(f, "StrLiteral({})", s),
&Identifier(ref s) => write!(f, "Identifier({})", s),
&Error(ref s) => write!(f, "Error({})", s),
other => write!(f, "{:?}", other),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Kw {
If, Then, Else,
Is,
Func,
For, While,
Match,
Const, Let, In,
Mut,
Return,
Alias, Type, SelfType, SelfIdent,
Interface, Impl,
True, False,
Module
}
lazy_static! {
static ref KEYWORDS: HashMap<&'static str, Kw> =
hashmap! {
"if" => Kw::If,
"then" => Kw::Then,
"else" => Kw::Else,
"is" => Kw::Is,
"fn" => Kw::Func,
"for" => Kw::For,
"while" => Kw::While,
"const" => Kw::Const,
"let" => Kw::Let,
"in" => Kw::In,
"mut" => Kw::Mut,
"return" => Kw::Return,
"alias" => Kw::Alias,
"type" => Kw::Type,
"Self" => Kw::SelfType,
"self" => Kw::SelfIdent,
"interface" => Kw::Interface,
"impl" => Kw::Impl,
"true" => Kw::True,
"false" => Kw::False,
"module" => Kw::Module,
};
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub offset: (usize, usize),
}
impl Token {
pub fn get_error(&self) -> Option<String> {
match self.token_type {
TokenType::Error(ref s) => Some(s.clone()),
_ => None,
}
}
pub fn to_string_with_metadata(&self) -> String {
format!("{}(L:{},c:{})", self.token_type, self.offset.0, self.offset.1)
}
}
const OPERATOR_CHARS: [char; 18] = ['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '@', '^', '|', '~', '`'];
fn is_operator(c: &char) -> bool {
OPERATOR_CHARS.iter().any(|x| x == c)
}
type CharData = (usize, usize, char);
pub fn tokenize(input: &str) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut input = input.lines().enumerate()
.intersperse((0, "\n"))
.flat_map(|(line_idx, ref line)| {
line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch))
})
.peekable();
while let Some((line_idx, ch_idx, c)) = input.next() {
let cur_tok_type = match c {
'/' => match input.peek().map(|t| t.2) {
Some('/') => {
while let Some((_, _, c)) = input.next() {
if c == '\n' {
break;
}
}
continue;
},
Some('*') => {
input.next();
let mut comment_level = 1;
while let Some((_, _, c)) = input.next() {
if c == '*' && input.peek().map(|t| t.2) == Some('/') {
input.next();
comment_level -= 1;
} else if c == '/' && input.peek().map(|t| t.2) == Some('*') {
input.next();
comment_level += 1;
}
if comment_level == 0 {
break;
}
}
continue;
},
_ => Slash
},
c if c.is_whitespace() && c != '\n' => continue,
'\n' => Newline, ';' => Semicolon,
':' => Colon, ',' => Comma,
'(' => LParen, ')' => RParen,
'{' => LCurlyBrace, '}' => RCurlyBrace,
'[' => LSquareBracket, ']' => RSquareBracket,
'"' => handle_quote(&mut input),
c if c.is_digit(10) => handle_digit(c, &mut input),
c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input), //TODO I'll probably have to rewrite this if I care about types being uppercase, also type parameterization
c if is_operator(&c) => handle_operator(c, &mut input),
unknown => Error(format!("Unexpected character: {}", unknown)),
};
tokens.push(Token { token_type: cur_tok_type, offset: (line_idx, ch_idx) });
}
tokens
}
fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenType {
if c == '0' && input.peek().map_or(false, |&(_, _, c)| { c == 'x' }) {
input.next();
let rest: String = input.peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_').map(|(_, _, c)| { c }).collect();
HexLiteral(Rc::new(rest))
} else if c == '0' && input.peek().map_or(false, |&(_, _, c)| { c == 'b' }) {
input.next();
BinNumberSigil
} else {
let mut buf = c.to_string();
buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| { c }));
DigitGroup(Rc::new(buf))
}
}
fn handle_quote(input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenType {
let mut buf = String::new();
loop {
match input.next().map(|(_, _, c)| { c }) {
Some('"') => break,
Some('\\') => {
let next = input.peek().map(|&(_, _, c)| { c });
if next == Some('n') {
input.next();
buf.push('\n')
} else if next == Some('"') {
input.next();
buf.push('"');
} else if next == Some('t') {
input.next();
buf.push('\t');
}
},
Some(c) => buf.push(c),
None => return TokenType::Error(format!("Unclosed string")),
}
}
TokenType::StrLiteral(Rc::new(buf))
}
fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenType {
let mut buf = String::new();
buf.push(c);
if c == '_' && input.peek().map(|&(_, _, c)| { !c.is_alphabetic() }).unwrap_or(true) {
return TokenType::Underscore
}
loop {
match input.peek().map(|&(_, _, c)| { c }) {
Some(c) if c.is_alphanumeric() => {
input.next();
buf.push(c);
},
_ => break,
}
}
match KEYWORDS.get(buf.as_str()) {
Some(kw) => TokenType::Keyword(*kw),
None => TokenType::Identifier(Rc::new(buf)),
}
}
fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item=CharData>>) -> TokenType {
match c {
'<' | '>' | '|' | '.' => {
let ref next = input.peek().map(|&(_, _, c)| { c });
if !next.map(|n| { is_operator(&n) }).unwrap_or(false) {
return match c {
'<' => LAngleBracket,
'>' => RAngleBracket,
'|' => Pipe,
'.' => Period,
_ => unreachable!(),
}
}
},
_ => (),
};
let mut buf = String::new();
if c == '`' {
loop {
match input.peek().map(|&(_, _, c)| { c }) {
Some(c) if c.is_alphabetic() || c == '_' => {
input.next();
buf.push(c);
},
Some('`') => {
input.next();
break;
},
_ => break
}
}
} else {
buf.push(c);
loop {
match input.peek().map(|&(_, _, c)| { c }) {
Some(c) if is_operator(&c) => {
input.next();
buf.push(c);
},
_ => break
}
}
}
TokenType::Operator(Rc::new(buf))
}
#[cfg(test)]
mod schala_tokenizer_tests {
use super::*;
use super::Kw::*;
macro_rules! digit { ($ident:expr) => { DigitGroup(Rc::new($ident.to_string())) } }
macro_rules! ident { ($ident:expr) => { Identifier(Rc::new($ident.to_string())) } }
macro_rules! op { ($ident:expr) => { Operator(Rc::new($ident.to_string())) } }
#[test]
fn tokens() {
let a = tokenize("let a: A<B> = c ++ d");
let token_types: Vec<TokenType> = a.into_iter().map(move |t| t.token_type).collect();
assert_eq!(token_types, vec![Keyword(Let), ident!("a"), Colon, ident!("A"),
LAngleBracket, ident!("B"), RAngleBracket, op!("="), ident!("c"), op!("++"), ident!("d")]);
}
#[test]
fn underscores() {
let token_types: Vec<TokenType> = tokenize("4_8").into_iter().map(move |t| t.token_type).collect();
assert_eq!(token_types, vec![digit!("4"), Underscore, digit!("8")]);
}
#[test]
fn comments() {
let token_types: Vec<TokenType> = tokenize("1 + /* hella /* bro */ */ 2").into_iter().map(move |t| t.token_type).collect();
assert_eq!(token_types, vec![digit!("1"), op!("+"), digit!("2")]);
}
#[test]
fn backtick_operators() {
let token_types: Vec<TokenType> = tokenize("1 `plus` 2").into_iter().map(move |t| t.token_type).collect();
assert_eq!(token_types, vec![digit!("1"), op!("plus"), digit!("2")]);
}
}

View File

@ -1,445 +0,0 @@
use std::collections::HashMap;
use std::rc::Rc;
use parsing::{AST, Statement, Declaration, Signature, Expression, ExpressionType, Operation, Variant, TypeName, TypeSingletonName};
// from Niko's talk
/* fn type_check(expression, expected_ty) -> Ty {
let ty = bare_type_check(expression, expected_type);
if ty icompatible with expected_ty {
try_coerce(expression, ty, expected_ty)
} else {
ty
}
}
fn bare_type_check(exprssion, expected_type) -> Ty { ... }
*/
/* H-M ALGO NOTES
from https://www.youtube.com/watch?v=il3gD7XMdmA
(also check out http://dev.stephendiehl.com/fun/006_hindley_milner.html)
typeInfer :: Expr a -> Matching (Type a)
unify :: Type a -> Type b -> Matching (Type c)
(Matching a) is a monad in which unification is done
ex:
typeInfer (If e1 e2 e3) = do
t1 <- typeInfer e1
t2 <- typeInfer e2
t3 <- typeInfer e3
_ <- unify t1 BoolType
unify t2 t3 -- b/c t2 and t3 have to be the same type
typeInfer (Const (ConstInt _)) = IntType -- same for other literals
--function application
typeInfer (Apply f x) = do
tf <- typeInfer f
tx <- typeInfer x
case tf of
FunctionType t1 t2 -> do
_ <- unify t1 tx
return t2
_ -> fail "Not a function"
--type annotation
typeInfer (Typed x t) = do
tx <- typeInfer x
unify tx t
--variable and let expressions - need to pass around a map of variable names to types here
typeInfer :: [ (Var, Type Var) ] -> Expr Var -> Matching (Type Var)
typeInfer ctx (Var x) = case (lookup x ctx) of
Just t -> return t
Nothing -> fail "Unknown variable"
--let x = e1 in e2
typeInfer ctx (Let x e1 e2) = do
t1 <- typeInfer ctx e1
typeInfer ((x, t1) :: ctx) e2
--lambdas are complicated (this represents ʎx.e)
typeInfer ctx (Lambda x e) = do
t1 <- allocExistentialVariable
t2 <- typeInfer ((x, t1) :: ctx) e
return $ FunctionType t1 t2 -- ie. t1 -> t2
--to solve the problem of map :: (a -> b) -> [a] -> [b]
when we use a variable whose type has universal tvars, convert those universal
tvars to existential ones
-and each distinct universal tvar needs to map to the same existential type
-so we change typeinfer:
typeInfer ctx (Var x) = do
case (lookup x ctx) of
Nothing -> ...
Just t -> do
let uvars = nub (toList t) -- nub removes duplicates, so this gets unique universally quantified variables
evars <- mapM (const allocExistentialVariable) uvars
let varMap = zip uvars evars
let vixVar varMap v = fromJust $ lookup v varMap
return (fmap (fixVar varMap) t)
--how do we define unify??
-recall, type signature is:
unify :: Type a -> Type b -> Matching (Type c)
unify BoolType BoolType = BoolType --easy, same for all constants
unify (FunctionType t1 t2) (FunctionType t3 t4) = do
t5 <- unify t1 t3
t6 <- unify t2 t4
return $ FunctionType t5 t6
unify (TVar a) (TVar b) = if a == b then TVar a else fail
--existential types can be assigned another type at most once
--some complicated stuff about hanlding existential types
--everything else is a type error
unify a b = fail
SKOLEMIZATION - how you prevent an unassigned existential type variable from leaking!
-before a type gets to global scope, replace all unassigned existential vars w/ new unique universal
type variables
*/
#[derive(Debug, PartialEq, Clone)]
pub enum Type {
TVar(TypeVar),
TConst(TypeConst),
TFunc(Box<Type>, Box<Type>),
}
#[derive(Debug, PartialEq, Clone)]
pub enum TypeVar {
Univ(Rc<String>),
Exist(u64),
}
impl TypeVar {
fn univ(label: &str) -> TypeVar {
TypeVar::Univ(Rc::new(label.to_string()))
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum TypeConst {
UserT(Rc<String>),
Integer,
Float,
StringT,
Boolean,
Unit,
Bottom,
}
type TypeCheckResult = Result<Type, String>;
#[derive(Debug, PartialEq, Eq, Hash)]
struct PathSpecifier(Rc<String>);
#[derive(Debug, PartialEq, Clone)]
struct TypeContextEntry {
ty: Type,
constant: bool
}
pub struct TypeContext {
symbol_table: HashMap<PathSpecifier, TypeContextEntry>,
evar_table: HashMap<u64, Type>,
existential_type_label_count: u64
}
impl TypeContext {
pub fn new() -> TypeContext {
TypeContext {
symbol_table: HashMap::new(),
evar_table: HashMap::new(),
existential_type_label_count: 0,
}
}
pub fn add_symbols(&mut self, ast: &AST) {
use self::Declaration::*;
use self::Type::*;
use self::TypeConst::*;
for statement in ast.0.iter() {
match *statement {
Statement::ExpressionStatement(_) => (),
Statement::Declaration(ref decl) => match *decl {
FuncSig(_) => (),
Impl { .. } => (),
TypeDecl(ref type_constructor, ref body) => {
for variant in body.0.iter() {
let (spec, ty) = match variant {
&Variant::UnitStruct(ref data_constructor) => {
let spec = PathSpecifier(data_constructor.clone());
let ty = TConst(UserT(type_constructor.name.clone()));
(spec, ty)
},
&Variant::TupleStruct(ref data_construcor, ref args) => {
//TODO fix
let arg = args.get(0).unwrap();
let type_arg = self.from_anno(arg);
let spec = PathSpecifier(data_construcor.clone());
let ty = TFunc(Box::new(type_arg), Box::new(TConst(UserT(type_constructor.name.clone()))));
(spec, ty)
},
&Variant::Record(_, _) => unimplemented!(),
};
let entry = TypeContextEntry { ty, constant: true };
self.symbol_table.insert(spec, entry);
}
},
TypeAlias { .. } => (),
Binding {ref name, ref constant, ref expr} => {
let spec = PathSpecifier(name.clone());
let ty = expr.1.as_ref()
.map(|ty| self.from_anno(ty))
.unwrap_or_else(|| { self.alloc_existential_type() }); // this call to alloc_existential is OK b/c a binding only ever has one type, so if the annotation is absent, it's fine to just make one de novo
let entry = TypeContextEntry { ty, constant: *constant };
self.symbol_table.insert(spec, entry);
},
FuncDecl(ref signature, _) => {
let spec = PathSpecifier(signature.name.clone());
let ty = self.from_signature(signature);
let entry = TypeContextEntry { ty, constant: true };
self.symbol_table.insert(spec, entry);
},
}
}
}
}
fn lookup(&mut self, binding: &Rc<String>) -> Option<TypeContextEntry> {
let key = PathSpecifier(binding.clone());
self.symbol_table.get(&key).map(|entry| entry.clone())
}
pub fn debug_symbol_table(&self) -> String {
format!("Symbol table:\n {:?}\nEvar table:\n{:?}", self.symbol_table, self.evar_table)
}
fn alloc_existential_type(&mut self) -> Type {
let ret = Type::TVar(TypeVar::Exist(self.existential_type_label_count));
self.existential_type_label_count += 1;
ret
}
fn from_anno(&mut self, anno: &TypeName) -> Type {
use self::Type::*;
use self::TypeConst::*;
match anno {
&TypeName::Singleton(TypeSingletonName { ref name, .. }) => {
match name.as_ref().as_ref() {
"Int" => TConst(Integer),
"Float" => TConst(Float),
"Bool" => TConst(Boolean),
"String" => TConst(StringT),
s => TVar(TypeVar::Univ(Rc::new(format!("{}",s)))),
}
},
&TypeName::Tuple(ref items) => {
if items.len() == 1 {
TConst(Unit)
} else {
TConst(Bottom)
}
}
}
}
fn from_signature(&mut self, sig: &Signature) -> Type {
use self::Type::*;
use self::TypeConst::*;
//TODO this won't work properly until you make sure that all (universal) type vars in the function have the same existential type var
// actually this should never even put existential types into the symbol table at all
//this will crash if more than 5 arg function is used
let names = vec!["a", "b", "c", "d", "e", "f"];
let mut idx = 0;
let mut get_type = || { let q = TVar(TypeVar::Univ(Rc::new(format!("{}", names.get(idx).unwrap())))); idx += 1; q };
let return_type = sig.type_anno.as_ref().map(|anno| self.from_anno(&anno)).unwrap_or_else(|| { get_type() });
if sig.params.len() == 0 {
TFunc(Box::new(TConst(Unit)), Box::new(return_type))
} else {
let mut output_type = return_type;
for p in sig.params.iter() {
let p_type = p.1.as_ref().map(|anno| self.from_anno(anno)).unwrap_or_else(|| { get_type() });
output_type = TFunc(Box::new(p_type), Box::new(output_type));
}
output_type
}
}
pub fn type_check(&mut self, ast: &AST) -> TypeCheckResult {
use self::Type::*;
use self::TypeConst::*;
let mut last = TConst(Unit);
for statement in ast.0.iter() {
match statement {
&Statement::Declaration(ref _decl) => {
//return Err(format!("Declarations not supported"));
},
&Statement::ExpressionStatement(ref expr) => {
last = self.infer(expr)?;
}
}
}
Ok(last)
}
fn infer(&mut self, expr: &Expression) -> TypeCheckResult {
match (&expr.0, &expr.1) {
(exprtype, &Some(ref anno)) => {
let tx = self.infer_no_anno(exprtype)?;
let ty = self.from_anno(anno);
self.unify(tx, ty)
},
(exprtype, &None) => self.infer_no_anno(exprtype),
}
}
fn infer_no_anno(&mut self, ex: &ExpressionType) -> TypeCheckResult {
use self::ExpressionType::*;
use self::Type::*;
use self::TypeConst::*;
Ok(match ex {
&IntLiteral(_) => TConst(Integer),
&FloatLiteral(_) => TConst(Float),
&StringLiteral(_) => TConst(StringT),
&BoolLiteral(_) => TConst(Boolean),
&Value(ref name, _) => {
self.lookup(name)
.map(|entry| entry.ty)
.ok_or(format!("Couldn't find {}", name))?
},
&BinExp(ref op, ref lhs, ref rhs) => {
let t_lhs = self.infer(lhs)?;
match self.infer_op(op)? {
TFunc(t1, t2) => {
let _ = self.unify(t_lhs, *t1)?;
let t_rhs = self.infer(rhs)?;
let x = *t2;
match x {
TFunc(t3, t4) => {
let _ = self.unify(t_rhs, *t3)?;
*t4
},
_ => return Err(format!("Not a function type either")),
}
},
_ => return Err(format!("Op {:?} is not a function type", op)),
}
},
&Call { ref f, ref arguments } => {
let tf = self.infer(f)?;
let targ = self.infer(arguments.get(0).unwrap())?;
match tf {
TFunc(box t1, box t2) => {
let _ = self.unify(t1, targ)?;
t2
},
_ => return Err(format!("Not a function!")),
}
},
_ => TConst(Bottom),
})
}
fn infer_op(&mut self, op: &Operation) -> TypeCheckResult {
use self::Type::*;
use self::TypeConst::*;
macro_rules! binoptype {
($lhs:expr, $rhs:expr, $out:expr) => { TFunc(Box::new($lhs), Box::new(TFunc(Box::new($rhs), Box::new($out)))) };
}
Ok(match (*op.0).as_ref() {
"+" => binoptype!(TConst(Integer), TConst(Integer), TConst(Integer)),
"++" => binoptype!(TConst(StringT), TConst(StringT), TConst(StringT)),
"-" => binoptype!(TConst(Integer), TConst(Integer), TConst(Integer)),
"*" => binoptype!(TConst(Integer), TConst(Integer), TConst(Integer)),
"/" => binoptype!(TConst(Integer), TConst(Integer), TConst(Integer)),
"%" => binoptype!(TConst(Integer), TConst(Integer), TConst(Integer)),
_ => TConst(Bottom)
})
}
fn unify(&mut self, t1: Type, t2: Type) -> TypeCheckResult {
use self::Type::*;
use self::TypeVar::*;
println!("Calling unify with `{:?}` and `{:?}`", t1, t2);
match (&t1, &t2) {
(&TConst(ref c1), &TConst(ref c2)) if c1 == c2 => Ok(TConst(c1.clone())),
(&TFunc(ref t1, ref t2), &TFunc(ref t3, ref t4)) => {
let t5 = self.unify(*t1.clone().clone(), *t3.clone().clone())?;
let t6 = self.unify(*t2.clone().clone(), *t4.clone().clone())?;
Ok(TFunc(Box::new(t5), Box::new(t6)))
},
(&TVar(Univ(ref a)), &TVar(Univ(ref b))) => {
if a == b {
Ok(TVar(Univ(a.clone())))
} else {
Err(format!("Couldn't unify universal types {} and {}", a, b))
}
},
//the interesting case!!
(&TVar(Exist(ref a)), ref t2) => {
let x = self.evar_table.get(a).map(|x| x.clone());
match x {
Some(ref t1) => self.unify(t1.clone().clone(), t2.clone().clone()),
None => {
self.evar_table.insert(*a, t2.clone().clone());
Ok(t2.clone().clone())
}
}
},
(ref t1, &TVar(Exist(ref a))) => {
let x = self.evar_table.get(a).map(|x| x.clone());
match x {
Some(ref t2) => self.unify(t2.clone().clone(), t1.clone().clone()),
None => {
self.evar_table.insert(*a, t1.clone().clone());
Ok(t1.clone().clone())
}
}
},
_ => Err(format!("Types {:?} and {:?} don't unify", t1, t2))
}
}
}
#[cfg(test)]
mod tests {
use super::{Type, TypeVar, TypeConst, TypeContext};
use super::Type::*;
use super::TypeConst::*;
use schala_lang::parsing::{parse, tokenize};
macro_rules! type_test {
($input:expr, $correct:expr) => {
{
let mut tc = TypeContext::new();
let ast = parse(tokenize($input)).0.unwrap() ;
tc.add_symbols(&ast);
assert_eq!($correct, tc.type_check(&ast).unwrap())
}
}
}
#[test]
fn basic_inference() {
type_test!("30", TConst(Integer));
type_test!("fn x(a: Int): Bool {}; x(1)", TConst(Boolean));
}
}

View File

@ -1,493 +0,0 @@
use std::cell::RefCell;
use std::rc::Rc;
use std::collections::HashMap;
use std::fmt;
use std::fmt::Write;
/*
use std::collections::hash_set::Union;
use std::iter::Iterator;
use itertools::Itertools;
*/
use ast;
use util::StateStack;
use symbol_table::{SymbolSpec, SymbolTable};
pub type TypeName = Rc<String>;
type TypeResult<T> = Result<T, String>;
#[derive(Debug, PartialEq, Clone)]
enum Type {
Const(TConst),
Var(TypeName),
Func(Vec<Type>),
}
#[derive(Debug, PartialEq, Clone)]
enum TConst {
Unit,
Nat,
StringT,
Custom(String)
}
#[derive(Debug, PartialEq, Clone)]
struct Scheme {
names: Vec<TypeName>,
ty: Type,
}
impl fmt::Display for Scheme {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "∀{:?} . {:?}", self.names, self.ty)
}
}
#[derive(Debug, PartialEq, Clone)]
struct Substitution(HashMap<TypeName, Type>);
impl Substitution {
fn empty() -> Substitution {
Substitution(HashMap::new())
}
}
#[derive(Debug, PartialEq, Clone)]
struct TypeEnv(HashMap<TypeName, Scheme>);
impl TypeEnv {
fn default() -> TypeEnv {
TypeEnv(HashMap::new())
}
fn populate_from_symbols(&mut self, symbol_table: &SymbolTable) {
for (name, symbol) in symbol_table.values.iter() {
if let SymbolSpec::Func(ref type_names) = symbol.spec {
let mut ch: char = 'a';
let mut names = vec![];
for _ in type_names.iter() {
names.push(Rc::new(format!("{}", ch)));
ch = ((ch as u8) + 1) as char;
}
let sigma = Scheme {
names: names.clone(),
ty: Type::Func(names.into_iter().map(|n| Type::Var(n)).collect())
};
self.0.insert(name.clone(), sigma);
}
}
}
}
pub struct TypeContext<'a> {
values: StateStack<'a, TypeName, Type>,
symbol_table_handle: Rc<RefCell<SymbolTable>>,
global_env: TypeEnv
}
impl<'a> TypeContext<'a> {
pub fn new(symbol_table_handle: Rc<RefCell<SymbolTable>>) -> TypeContext<'static> {
TypeContext { values: StateStack::new(None), global_env: TypeEnv::default(), symbol_table_handle }
}
pub fn debug_types(&self) -> String {
let mut output = format!("Type environment\n");
for (name, scheme) in &self.global_env.0 {
write!(output, "{} -> {}\n", name, scheme).unwrap();
}
output
}
pub fn type_check_ast(&mut self, input: &ast::AST) -> Result<String, String> {
let ref symbol_table = self.symbol_table_handle.borrow();
self.global_env.populate_from_symbols(symbol_table);
let output = self.global_env.infer_block(&input.0)?;
Ok(format!("{:?}", output))
}
}
impl TypeEnv {
fn instantiate(&mut self, sigma: Scheme) -> Type {
match sigma {
Scheme { ty, .. } => ty,
}
}
fn generate(&mut self, ty: Type) -> Scheme {
Scheme {
names: vec![], //TODO incomplete
ty
}
}
fn infer_block(&mut self, block: &Vec<ast::Statement>) -> TypeResult<Type> {
let mut output = Type::Const(TConst::Unit);
for statement in block {
output = self.infer_statement(statement)?;
}
Ok(output)
}
fn infer_statement(&mut self, statement: &ast::Statement) -> TypeResult<Type> {
match statement {
ast::Statement::ExpressionStatement(expr) => self.infer_expr(expr),
ast::Statement::Declaration(decl) => self.infer_decl(decl)
}
}
fn infer_decl(&mut self, decl: &ast::Declaration) -> TypeResult<Type> {
use ast::Declaration::*;
match decl {
Binding { name, expr, .. } => {
let ty = self.infer_expr(expr)?;
let sigma = self.generate(ty);
self.0.insert(name.clone(), sigma);
},
_ => (),
}
Ok(Type::Const(TConst::Unit))
}
fn infer_expr(&mut self, expr: &ast::Expression) -> TypeResult<Type> {
match expr {
ast::Expression(expr, Some(anno)) => {
self.infer_exprtype(expr)
},
ast::Expression(expr, None) => {
self.infer_exprtype(expr)
}
}
}
fn infer_exprtype(&mut self, expr: &ast::ExpressionType) -> TypeResult<Type> {
use self::TConst::*;
use ast::ExpressionType::*;
Ok(match expr {
NatLiteral(_) => Type::Const(Nat),
StringLiteral(_) => Type::Const(StringT),
BinExp(op, lhs, rhs) => {
return Err(format!("NOTDONE"))
},
Call { f, arguments } => {
return Err(format!("NOTDONE"))
},
Value(name) => {
let s = match self.0.get(name) {
Some(sigma) => sigma.clone(),
None => return Err(format!("Unknown variable: {}", name))
};
self.instantiate(s)
},
_ => Type::Const(Unit)
})
}
}
/* GIANT TODO - use the rust im crate, unless I make this code way less haskell-ish after it's done
*/
/*
pub type TypeResult<T> = Result<T, String>;
*/
/* TODO this should just check the name against a map, and that map should be pre-populated with
* types */
/*
impl parsing::TypeName {
fn to_type(&self) -> TypeResult<Type> {
use self::parsing::TypeSingletonName;
use self::parsing::TypeName::*;
use self::Type::*; use self::TConstOld::*;
Ok(match self {
Tuple(_) => return Err(format!("Tuples not yet implemented")),
Singleton(name) => match name {
TypeSingletonName { name, .. } => match &name[..] {
/*
"Nat" => Const(Nat),
"Int" => Const(Int),
"Float" => Const(Float),
"Bool" => Const(Bool),
"String" => Const(StringT),
*/
n => Const(Custom(n.to_string()))
}
}
})
}
}
*/
/*
impl TypeContext {
pub fn type_check_ast(&mut self, ast: &parsing::AST) -> TypeResult<String> {
let ref block = ast.0;
let mut infer = Infer::default();
let env = TypeEnvironment::default();
let output = infer.infer_block(block, &env);
match output {
Ok(s) => Ok(format!("{:?}", s)),
Err(s) => Err(format!("Error: {:?}", s))
}
}
}
// this is the equivalent of the Haskell Infer monad
#[derive(Debug, Default)]
struct Infer {
_idents: u32,
}
#[derive(Debug)]
enum InferError {
CannotUnify(MonoType, MonoType),
OccursCheckFailed(Rc<String>, MonoType),
UnknownIdentifier(Rc<String>),
Custom(String),
}
type InferResult<T> = Result<T, InferError>;
impl Infer {
fn fresh(&mut self) -> MonoType {
let i = self._idents;
self._idents += 1;
let name = Rc::new(format!("{}", ('a' as u8 + 1) as char));
MonoType::Var(name)
}
fn unify(&mut self, a: MonoType, b: MonoType) -> InferResult<Substitution> {
use self::InferError::*; use self::MonoType::*;
Ok(match (a, b) {
(Const(ref a), Const(ref b)) if a == b => Substitution::new(),
(Var(ref name), ref var) => Substitution::bind_variable(name, var),
(ref var, Var(ref name)) => Substitution::bind_variable(name, var),
(Function(box a1, box b1), Function(box a2, box b2)) => {
let s1 = self.unify(a1, a2)?;
let s2 = self.unify(b1.apply_substitution(&s1), b2.apply_substitution(&s1))?;
s1.merge(s2)
},
(a, b) => return Err(CannotUnify(a, b))
})
}
fn infer_block(&mut self, block: &Vec<parsing::Statement>, env: &TypeEnvironment) -> InferResult<MonoType> {
use self::parsing::Statement;
let mut ret = MonoType::Const(TypeConst::Unit);
for statement in block.iter() {
ret = match statement {
Statement::ExpressionStatement(expr) => {
let (sub, ty) = self.infer_expr(expr, env)?;
//TODO handle substitution monadically
ty
}
Statement::Declaration(decl) => MonoType::Const(TypeConst::Unit),
}
}
Ok(ret)
}
fn infer_expr(&mut self, expr: &parsing::Expression, env: &TypeEnvironment) -> InferResult<(Substitution, MonoType)> {
use self::parsing::Expression;
match expr {
Expression(e, Some(anno)) => self.infer_annotated_expr(e, anno, env),
/*
let anno_ty = anno.to_type()?;
let ty = self.infer_exprtype(&e)?;
self.unify(ty, anno_ty)
},
*/
Expression(e, None) => self.infer_exprtype(e, env)
}
}
fn infer_annotated_expr(&mut self, expr: &parsing::ExpressionType, anno: &parsing::TypeName, env: &TypeEnvironment) -> InferResult<(Substitution, MonoType)> {
Err(InferError::Custom(format!("exprtype not done: {:?}", expr)))
}
fn infer_exprtype(&mut self, expr: &parsing::ExpressionType, env: &TypeEnvironment) -> InferResult<(Substitution, MonoType)> {
use self::parsing::ExpressionType::*;
use self::TypeConst::*;
Ok(match expr {
NatLiteral(_) => (Substitution::new(), MonoType::Const(Nat)),
FloatLiteral(_) => (Substitution::new(), MonoType::Const(Float)),
StringLiteral(_) => (Substitution::new(), MonoType::Const(StringT)),
BoolLiteral(_) => (Substitution::new(), MonoType::Const(Bool)),
Value(name) => match env.lookup(name) {
Some(sigma) => {
let tau = self.instantiate(&sigma);
(Substitution::new(), tau)
},
None => return Err(InferError::UnknownIdentifier(name.clone())),
},
e => return Err(InferError::Custom(format!("Type inference for {:?} not done", e)))
})
}
fn instantiate(&mut self, sigma: &PolyType) -> MonoType {
let ref ty: MonoType = sigma.1;
let mut subst = Substitution::new();
for name in sigma.0.iter() {
let fresh_mvar = self.fresh();
let new = Substitution::bind_variable(name, &fresh_mvar);
subst = subst.merge(new);
}
ty.apply_substitution(&subst)
}
}
*/
/* OLD STUFF DOWN HERE */
/*
impl TypeContext {
fn infer_block(&mut self, statements: &Vec<parsing::Statement>) -> TypeResult<Type> {
let mut ret_type = Type::Const(TConst::Unit);
for statement in statements {
ret_type = self.infer_statement(statement)?;
}
Ok(ret_type)
}
fn infer_statement(&mut self, statement: &parsing::Statement) -> TypeResult<Type> {
use self::parsing::Statement::*;
match statement {
ExpressionStatement(expr) => self.infer(expr),
Declaration(decl) => self.add_declaration(decl),
}
}
fn add_declaration(&mut self, decl: &parsing::Declaration) -> TypeResult<Type> {
use self::parsing::Declaration::*;
use self::Type::*;
match decl {
Binding { name, expr, .. } => {
let ty = self.infer(expr)?;
self.bindings.insert(name.clone(), ty);
},
_ => return Err(format!("other formats not done"))
}
Ok(Void)
}
fn infer(&mut self, expr: &parsing::Expression) -> TypeResult<Type> {
use self::parsing::Expression;
match expr {
Expression(e, Some(anno)) => {
let anno_ty = anno.to_type()?;
let ty = self.infer_exprtype(&e)?;
self.unify(ty, anno_ty)
},
Expression(e, None) => self.infer_exprtype(e)
}
}
fn infer_exprtype(&mut self, expr: &parsing::ExpressionType) -> TypeResult<Type> {
use self::parsing::ExpressionType::*;
use self::Type::*; use self::TConst::*;
match expr {
NatLiteral(_) => Ok(Const(Nat)),
FloatLiteral(_) => Ok(Const(Float)),
StringLiteral(_) => Ok(Const(StringT)),
BoolLiteral(_) => Ok(Const(Bool)),
BinExp(op, lhs, rhs) => { /* remember there are both the haskell convention talk and the write you a haskell ways to do this! */
match op.get_type()? {
Func(box t1, box Func(box t2, box t3)) => {
let lhs_ty = self.infer(lhs)?;
let rhs_ty = self.infer(rhs)?;
self.unify(t1, lhs_ty)?;
self.unify(t2, rhs_ty)?;
Ok(t3)
},
other => Err(format!("{:?} is not a binary function type", other))
}
},
PrefixExp(op, expr) => match op.get_type()? {
Func(box t1, box t2) => {
let expr_ty = self.infer(expr)?;
self.unify(t1, expr_ty)?;
Ok(t2)
},
other => Err(format!("{:?} is not a prefix op function type", other))
},
Value(name) => {
match self.bindings.get(name) {
Some(ty) => Ok(ty.clone()),
None => Err(format!("No binding found for variable: {}", name)),
}
},
Call { f, arguments } => {
let mut tf = self.infer(f)?;
for arg in arguments.iter() {
match tf {
Func(box t, box rest) => {
let t_arg = self.infer(arg)?;
self.unify(t, t_arg)?;
tf = rest;
},
other => return Err(format!("Function call failed to unify; last type: {:?}", other)),
}
}
Ok(tf)
},
TupleLiteral(expressions) => {
let mut types = vec![];
for expr in expressions {
types.push(self.infer(expr)?);
}
Ok(Sum(types))
},
_ => Err(format!("Type not yet implemented"))
}
}
fn unify(&mut self, t1: Type, t2: Type) -> TypeResult<Type> {
use self::Type::*;// use self::TConst::*;
match (t1, t2) {
(Const(ref a), Const(ref b)) if a == b => Ok(Const(a.clone())),
(a, b) => Err(format!("Types {:?} and {:?} don't unify", a, b))
}
}
}
*/
#[cfg(test)]
mod tests {
/*
use super::{Type, TConst, TypeContext};
use super::Type::*;
use super::TConst::*;
use std::rc::Rc;
use std::cell::RefCell;
macro_rules! type_test {
($input:expr, $correct:expr) => {
{
let symbol_table = Rc::new(RefCell::new(SymbolTable::new()));
let mut tc = TypeContext::new(symbol_table);
let ast = ::ast::parse(::tokenizing::tokenize($input)).0.unwrap() ;
//tc.add_symbols(&ast);
assert_eq!($correct, tc.infer_block(&ast.0).unwrap())
}
}
}
#[test]
fn basic_inference() {
type_test!("30", Const(Nat));
//type_test!("fn x(a: Int): Bool {}; x(1)", TConst(Boolean));
}
*/
}

View File

@ -1,43 +0,0 @@
use std::collections::HashMap;
use std::hash::Hash;
use std::cmp::Eq;
//TODO rename this ScopeStack
#[derive(Default, Debug)]
pub struct StateStack<'a, T: 'a, V: 'a> where T: Hash + Eq {
parent: Option<&'a StateStack<'a, T, V>>,
values: HashMap<T, V>,
scope_name: Option<String>
}
impl<'a, T, V> StateStack<'a, T, V> where T: Hash + Eq {
pub fn new(name: Option<String>) -> StateStack<'a, T, V> where T: Hash + Eq {
StateStack {
parent: None,
values: HashMap::new(),
scope_name: name
}
}
pub fn insert(&mut self, key: T, value: V) where T: Hash + Eq {
self.values.insert(key, value);
}
pub fn lookup(&self, key: &T) -> Option<&V> where T: Hash + Eq {
match (self.values.get(key), self.parent) {
(None, None) => None,
(None, Some(parent)) => parent.lookup(key),
(Some(value), _) => Some(value),
}
}
//TODO rename new_scope
pub fn new_frame(&'a self, name: Option<String>) -> StateStack<'a, T, V> where T: Hash + Eq {
StateStack {
parent: Some(self),
values: HashMap::default(),
scope_name: name,
}
}
pub fn get_name(&self) -> Option<&String> {
self.scope_name.as_ref()
}
}

BIN
schala-logo.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

8
schala-main/Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "schala-main"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

3
schala-main/src/main.rs Normal file
View File

@ -0,0 +1,3 @@
fn main() {
println!("Schala");
}

8
schala-parser/Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "schala-parser"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View File

@ -0,0 +1,3 @@
fn main() {
println!("Hello, world!");
}

View File

@ -1,26 +0,0 @@
[package]
name = "schala-repl"
version = "0.1.0"
authors = ["greg <greg.shuflin@protonmail.com>"]
[dependencies]
llvm-sys = "*"
take_mut = "0.1.3"
itertools = "0.5.8"
getopts = "*"
lazy_static = "0.2.8"
maplit = "*"
colored = "1.5"
serde = "1.0.15"
serde_derive = "1.0.15"
serde_json = "1.0.3"
rocket = "0.3.13"
rocket_codegen = "0.3.13"
rocket_contrib = "0.3.13"
phf = "0.7.12"
includedir = "0.2.0"
linefeed = "0.5.0"
regex = "0.2"
[build-dependencies]
includedir_codegen = "0.2.0"

View File

@ -1,10 +0,0 @@
extern crate includedir_codegen;
use includedir_codegen::Compression;
fn main() {
includedir_codegen::start("WEBFILES")
.dir("../static", Compression::Gzip)
.build("static.rs")
.unwrap();
}

View File

@ -1,196 +0,0 @@
use std::collections::HashMap;
use colored::*;
use std::fmt::Write;
pub struct LLVMCodeString(pub String);
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct EvalOptions {
pub execution_method: ExecutionMethod,
pub debug_passes: HashMap<String, PassDebugOptionsDescriptor>,
}
#[derive(Debug, Hash, PartialEq)]
pub struct PassDescriptor {
pub name: String,
pub debug_options: Vec<String>
}
#[derive(Debug, Serialize, Deserialize)]
pub struct PassDebugOptionsDescriptor {
pub opts: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize)]
pub enum ExecutionMethod {
Compile,
Interpret,
}
impl Default for ExecutionMethod {
fn default() -> ExecutionMethod {
ExecutionMethod::Interpret
}
}
#[derive(Debug, Default)]
pub struct UnfinishedComputation {
artifacts: Vec<(String, TraceArtifact)>,
pub cur_debug_options: Vec<String>,
}
#[derive(Debug)]
pub struct FinishedComputation {
artifacts: Vec<(String, TraceArtifact)>,
text_output: Result<String, String>,
}
impl UnfinishedComputation {
pub fn add_artifact(&mut self, artifact: TraceArtifact) {
self.artifacts.push((artifact.stage_name.clone(), artifact));
}
pub fn finish(self, text_output: Result<String, String>) -> FinishedComputation {
FinishedComputation {
artifacts: self.artifacts,
text_output
}
}
pub fn output(self, output: Result<String, String>) -> FinishedComputation {
FinishedComputation {
artifacts: self.artifacts,
text_output: output
}
}
}
impl FinishedComputation {
pub fn to_repl(&self) -> String {
let mut buf = String::new();
for (stage, artifact) in self.artifacts.iter() {
let color = artifact.text_color;
let stage = stage.color(color).bold();
let output = artifact.debug_output.color(color);
write!(&mut buf, "{}: {}\n", stage, output).unwrap();
}
match self.text_output {
Ok(ref output) => write!(&mut buf, "{}", output).unwrap(),
Err(ref err) => write!(&mut buf, "{} {}", "Error: ".red().bold(), err).unwrap(),
}
buf
}
pub fn to_noninteractive(&self) -> Option<String> {
match self.text_output {
Ok(_) => {
let mut buf = String::new();
for (stage, artifact) in self.artifacts.iter() {
let color = artifact.text_color;
let stage = stage.color(color).bold();
let output = artifact.debug_output.color(color);
write!(&mut buf, "{}: {}\n", stage, output).unwrap();
}
if buf == "" { None } else { Some(buf) }
},
Err(ref s) => Some(format!("{} {}", "Error: ".red().bold(), s))
}
}
}
#[derive(Debug)]
pub struct TraceArtifact {
stage_name: String,
debug_output: String,
text_color: &'static str,
}
impl TraceArtifact {
pub fn new(stage: &str, debug: String) -> TraceArtifact {
let color = match stage {
"parse_trace" | "ast" => "red",
"ast_reducing" => "red",
"tokens" => "green",
"type_check" => "magenta",
_ => "blue",
};
TraceArtifact { stage_name: stage.to_string(), debug_output: debug, text_color: color}
}
pub fn new_parse_trace(trace: Vec<String>) -> TraceArtifact {
let mut output = String::new();
for t in trace {
output.push_str(&t);
output.push_str("\n");
}
TraceArtifact { stage_name: "parse_trace".to_string(), debug_output: output, text_color: "red"}
}
}
pub trait ProgrammingLanguageInterface {
fn execute_pipeline(&mut self, _input: &str, _eval_options: &EvalOptions) -> FinishedComputation {
FinishedComputation { artifacts: vec![], text_output: Err(format!("Execution pipeline not done")) }
}
fn get_language_name(&self) -> String;
fn get_source_file_suffix(&self) -> String;
fn get_passes(&self) -> Vec<PassDescriptor> {
vec![]
}
fn handle_custom_interpreter_directives(&mut self, _commands: &Vec<&str>) -> Option<String> {
None
}
fn custom_interpreter_directives_help(&self) -> String {
format!(">> No custom interpreter directives specified <<")
}
}
/* a pass_chain function signature looks like:
* fn(&mut ProgrammingLanguageInterface, A, Option<&mut DebugHandler>) -> Result<B, String>
*
* TODO use some kind of failure-handling library to make this better
*/
#[macro_export]
macro_rules! pass_chain {
($state:expr, $options:expr; $($pass:path), *) => {
|text_input| {
let mut comp = UnfinishedComputation::default();
pass_chain_helper! { ($state, comp, $options); text_input $(, $pass)* }
}
};
}
#[macro_export]
macro_rules! pass_chain_helper {
(($state:expr, $comp:expr, $options:expr); $input:expr, $pass:path $(, $rest:path)*) => {
{
use schala_repl::PassDebugOptionsDescriptor;
let pass_name = stringify!($pass);
let output = {
let ref debug_map = $options.debug_passes;
let debug_handle = match debug_map.get(pass_name) {
Some(PassDebugOptionsDescriptor { opts }) => {
let ptr = &mut $comp;
ptr.cur_debug_options = opts.clone();
Some(ptr)
}
_ => None
};
$pass($state, $input, debug_handle)
};
match output {
Ok(result) => pass_chain_helper! { ($state, $comp, $options); result $(, $rest)* },
Err(err) => {
$comp.output(Err(format!("Pass {} failed with {:?}", pass_name, err)))
}
}
}
};
// Done
(($state:expr, $comp:expr, $options:expr); $final_output:expr) => {
{
let final_output: FinishedComputation = $comp.finish(Ok($final_output));
final_output
}
};
}

View File

@ -1,561 +0,0 @@
#![feature(link_args)]
#![feature(slice_patterns, box_patterns, box_syntax)]
#![feature(plugin)]
#![plugin(rocket_codegen)]
extern crate getopts;
extern crate linefeed;
extern crate itertools;
extern crate colored;
#[macro_use]
extern crate serde_derive;
extern crate serde_json;
extern crate rocket;
extern crate rocket_contrib;
extern crate includedir;
extern crate phf;
use std::path::Path;
use std::fs::File;
use std::io::{Read, Write};
use std::process::exit;
use std::default::Default;
use std::fmt::Write as FmtWrite;
use colored::*;
use itertools::Itertools;
mod language;
mod webapp;
pub mod llvm_wrap;
const VERSION_STRING: &'static str = "0.1.0";
include!(concat!(env!("OUT_DIR"), "/static.rs"));
pub use language::{LLVMCodeString, ProgrammingLanguageInterface, EvalOptions,
ExecutionMethod, TraceArtifact, FinishedComputation, UnfinishedComputation, PassDebugOptionsDescriptor, PassDescriptor};
pub type PLIGenerator = Box<Fn() -> Box<ProgrammingLanguageInterface> + Send + Sync>;
pub fn repl_main(generators: Vec<PLIGenerator>) {
let languages: Vec<Box<ProgrammingLanguageInterface>> = generators.iter().map(|x| x()).collect();
let option_matches = program_options().parse(std::env::args()).unwrap_or_else(|e| {
println!("{:?}", e);
exit(1);
});
if option_matches.opt_present("list-languages") {
for lang in languages {
println!("{}", lang.get_language_name());
}
exit(1);
}
if option_matches.opt_present("help") {
println!("{}", program_options().usage("Schala metainterpreter"));
exit(0);
}
if option_matches.opt_present("webapp") {
webapp::web_main(generators);
exit(0);
}
let mut options = EvalOptions::default();
let debug_passes = if let Some(opts) = option_matches.opt_str("debug") {
let output: Vec<String> = opts.split_terminator(",").map(|s| s.to_string()).collect();
output
} else {
vec![]
};
let language_names: Vec<String> = languages.iter().map(|lang| {lang.get_language_name()}).collect();
let initial_index: usize =
option_matches.opt_str("lang")
.and_then(|lang| { language_names.iter().position(|x| { x.to_lowercase() == lang.to_lowercase() }) })
.unwrap_or(0);
options.execution_method = match option_matches.opt_str("eval-style") {
Some(ref s) if s == "compile" => ExecutionMethod::Compile,
_ => ExecutionMethod::Interpret,
};
match option_matches.free[..] {
[] | [_] => {
let mut repl = Repl::new(languages, initial_index);
repl.run();
}
[_, ref filename, _..] => {
run_noninteractive(filename, languages, options, debug_passes);
}
};
}
fn run_noninteractive(filename: &str, languages: Vec<Box<ProgrammingLanguageInterface>>, mut options: EvalOptions, debug_passes: Vec<String>) {
let path = Path::new(filename);
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or_else(|| {
println!("Source file lacks extension");
exit(1);
});
let mut language = Box::new(languages.into_iter().find(|lang| lang.get_source_file_suffix() == ext)
.unwrap_or_else(|| {
println!("Extension .{} not recognized", ext);
exit(1);
}));
let mut source_file = File::open(path).unwrap();
let mut buffer = String::new();
source_file.read_to_string(&mut buffer).unwrap();
for pass in debug_passes.into_iter() {
if let Some(_) = language.get_passes().iter().find(|desc| desc.name == pass) {
options.debug_passes.insert(pass, PassDebugOptionsDescriptor { opts: vec![] });
}
}
match options.execution_method {
ExecutionMethod::Compile => {
/*
let llvm_bytecode = language.compile(&buffer);
compilation_sequence(llvm_bytecode, filename);
*/
panic!("Not ready to go yet");
},
ExecutionMethod::Interpret => {
let output = language.execute_pipeline(&buffer, &options);
output.to_noninteractive().map(|text| println!("{}", text));
}
}
}
#[derive(Clone)]
enum CommandTree {
Terminal(String, Option<String>),
NonTerminal(String, Vec<CommandTree>, Option<String>),
Top(Vec<CommandTree>),
}
impl CommandTree {
fn term(s: &str, help: Option<&str>) -> CommandTree {
CommandTree::Terminal(s.to_string(), help.map(|x| x.to_string()))
}
fn get_cmd(&self) -> String {
match self {
CommandTree::Terminal(s, _) => s.to_string(),
CommandTree::NonTerminal(s, _, _) => s.to_string(),
CommandTree::Top(_) => "".to_string(),
}
}
fn get_help(&self) -> String {
match self {
CommandTree::Terminal(_, h) => h.as_ref().map(|h| h.clone()).unwrap_or(format!("")),
CommandTree::NonTerminal(_, _, h) => h.as_ref().map(|h| h.clone()).unwrap_or(format!("")),
CommandTree::Top(_) => "".to_string(),
}
}
fn get_children(&self) -> Vec<String> {
match self {
CommandTree::Terminal(_, _) => vec![],
CommandTree::NonTerminal(_, children, _) => children.iter().map(|x| x.get_cmd()).collect(),
CommandTree::Top(children) => children.iter().map(|x| x.get_cmd()).collect(),
}
}
}
struct TabCompleteHandler {
sigil: char,
top_level_commands: CommandTree,
}
use linefeed::complete::{Completion, Completer};
use linefeed::terminal::Terminal;
impl TabCompleteHandler {
fn new(sigil: char, top_level_commands: CommandTree) -> TabCompleteHandler {
TabCompleteHandler {
top_level_commands,
sigil,
}
}
}
impl<T: Terminal> Completer<T> for TabCompleteHandler {
fn complete(&self, word: &str, prompter: &linefeed::prompter::Prompter<T>, start: usize, _end: usize) -> Option<Vec<Completion>> {
let line = prompter.buffer();
if line.starts_with(&format!("{}", self.sigil)) {
let mut words = line[1..(if start == 0 { 1 } else { start })].split_whitespace();
let mut completions = Vec::new();
let mut command_tree: Option<&CommandTree> = Some(&self.top_level_commands);
loop {
match words.next() {
None => {
let top = match command_tree {
Some(CommandTree::Top(_)) => true,
_ => false
};
let word = if top { word.get(1..).unwrap() } else { word };
for cmd in command_tree.map(|x| x.get_children()).unwrap_or(vec![]).into_iter() {
if cmd.starts_with(word) {
completions.push(Completion {
completion: format!("{}{}", if top { ":" } else { "" }, cmd),
display: Some(cmd.clone()),
suffix: linefeed::complete::Suffix::Some(' ')
})
}
}
break;
},
Some(s) => {
let new_ptr: Option<&CommandTree> = command_tree.and_then(|cm| match cm {
CommandTree::Top(children) => children.iter().find(|c| c.get_cmd() == s),
CommandTree::NonTerminal(_, children, _) => children.iter().find(|c| c.get_cmd() == s),
CommandTree::Terminal(_, _) => None,
});
command_tree = new_ptr;
}
}
}
Some(completions)
} else {
None
}
}
}
struct Repl {
options: EvalOptions,
languages: Vec<Box<ProgrammingLanguageInterface>>,
current_language_index: usize,
interpreter_directive_sigil: char,
line_reader: linefeed::interface::Interface<linefeed::terminal::DefaultTerminal>,
}
impl Repl {
fn new(languages: Vec<Box<ProgrammingLanguageInterface>>, initial_index: usize) -> Repl {
use linefeed::Interface;
let i = if initial_index < languages.len() { initial_index } else { 0 };
let line_reader = Interface::new("schala-repl").unwrap();
Repl {
options: Repl::get_options(),
languages: languages,
current_language_index: i,
interpreter_directive_sigil: ':',
line_reader
}
}
fn get_cur_language(&self) -> &ProgrammingLanguageInterface {
self.languages[self.current_language_index].as_ref()
}
fn get_options() -> EvalOptions {
File::open(".schala_repl")
.and_then(|mut file| {
let mut contents = String::new();
file.read_to_string(&mut contents)?;
Ok(contents)
})
.and_then(|contents| {
let options: EvalOptions = serde_json::from_str(&contents)?;
Ok(options)
}).unwrap_or(EvalOptions::default())
}
fn save_options(&self) {
let ref options = self.options;
let read = File::create(".schala_repl")
.and_then(|mut file| {
let buf = serde_json::to_string(options).unwrap();
file.write_all(buf.as_bytes())
});
if let Err(err) = read {
println!("Error saving .schala_repl file {}", err);
}
}
fn run(&mut self) {
use linefeed::ReadResult;
println!("Schala MetaInterpreter version {}", VERSION_STRING);
println!("Type {}help for help with the REPL", self.interpreter_directive_sigil);
self.line_reader.load_history(".schala_history").unwrap_or(());
loop {
let language_name = self.languages[self.current_language_index].get_language_name();
let directives = self.get_directives();
let tab_complete_handler = TabCompleteHandler::new(self.interpreter_directive_sigil, directives);
self.line_reader.set_completer(std::sync::Arc::new(tab_complete_handler));
let prompt_str = format!("{} >> ", language_name);
self.line_reader.set_prompt(&prompt_str);
match self.line_reader.read_line() {
Err(e) => {
println!("Terminal read error: {}", e);
},
Ok(ReadResult::Eof) => break,
Ok(ReadResult::Signal(_)) => break,
Ok(ReadResult::Input(ref input)) => {
self.line_reader.add_history_unique(input.to_string());
let output = match input.chars().nth(0) {
Some(ch) if ch == self.interpreter_directive_sigil => self.handle_interpreter_directive(input),
_ => Some(self.input_handler(input)),
};
if let Some(o) = output {
println!("=> {}", o);
}
}
}
}
self.line_reader.save_history(".schala_history").unwrap_or(());
self.save_options();
println!("Exiting...");
}
fn input_handler(&mut self, input: &str) -> String {
let ref mut language = self.languages[self.current_language_index];
let interpreter_output = language.execute_pipeline(input, &self.options);
interpreter_output.to_repl()
}
fn get_directives(&self) -> CommandTree {
let ref passes = self.get_cur_language().get_passes();
let passes_directives: Vec<CommandTree> = passes.iter()
.map(|pass_descriptor| {
let name = &pass_descriptor.name;
if pass_descriptor.debug_options.len() == 0 {
CommandTree::term(name, None)
} else {
let sub_opts: Vec<CommandTree> = pass_descriptor.debug_options.iter()
.map(|o| CommandTree::term(o, None)).collect();
CommandTree::NonTerminal(
name.clone(),
sub_opts,
None
)
}
}).collect();
CommandTree::Top(vec![
CommandTree::term("exit", Some("exit the REPL")),
CommandTree::term("quit", Some("exit the REPL")),
CommandTree::term("help", Some("Print this help message")),
CommandTree::NonTerminal(format!("debug"), vec![
CommandTree::term("passes", None),
CommandTree::NonTerminal(format!("show"), passes_directives.clone(), None),
CommandTree::NonTerminal(format!("hide"), passes_directives.clone(), None),
], Some(format!("show or hide pass info for a given pass, or display the names of all passes"))),
CommandTree::NonTerminal(format!("lang"), vec![
CommandTree::term("next", None),
CommandTree::term("prev", None),
CommandTree::NonTerminal(format!("go"), vec![], None)//TODO
], Some(format!("switch between languages, or go directly to a langauge by name"))),
])
}
fn handle_interpreter_directive(&mut self, input: &str) -> Option<String> {
let mut iter = input.chars();
iter.next();
let commands: Vec<&str> = iter
.as_str()
.split_whitespace()
.collect();
let cmd: &str = match commands.get(0).clone() {
None => return None,
Some(s) => s
};
match cmd {
"exit" | "quit" => {
self.save_options();
exit(0)
},
"lang" | "language" => match commands.get(1) {
Some(&"show") => {
let mut buf = String::new();
for (i, lang) in self.languages.iter().enumerate() {
write!(buf, "{}{}\n", if i == self.current_language_index { "* "} else { "" }, lang.get_language_name()).unwrap();
}
Some(buf)
},
Some(&"go") => match commands.get(2) {
None => Some(format!("Must specify a language name")),
Some(&desired_name) => {
for (i, _) in self.languages.iter().enumerate() {
let lang_name = self.languages[i].get_language_name();
if lang_name.to_lowercase() == desired_name.to_lowercase() {
self.current_language_index = i;
return Some(format!("Switching to {}", self.languages[self.current_language_index].get_language_name()));
}
}
Some(format!("Language {} not found", desired_name))
}
},
Some(&"next") | Some(&"n") => {
self.current_language_index = (self.current_language_index + 1) % self.languages.len();
Some(format!("Switching to {}", self.languages[self.current_language_index].get_language_name()))
},
Some(&"previous") | Some(&"p") | Some(&"prev") => {
self.current_language_index = if self.current_language_index == 0 { self.languages.len() - 1 } else { self.current_language_index - 1 };
Some(format!("Switching to {}", self.languages[self.current_language_index].get_language_name()))
},
Some(e) => Some(format!("Bad `lang(uage)` argument: {}", e)),
None => Some(format!("Valid arguments for `lang(uage)` are `show`, `next`|`n`, `previous`|`prev`|`n`"))
},
"help" => {
let mut buf = String::new();
let ref lang = self.languages[self.current_language_index];
let directives = match self.get_directives() {
CommandTree::Top(children) => children,
_ => panic!("Top-level CommandTree not Top")
};
writeln!(buf, "MetaInterpreter options").unwrap();
writeln!(buf, "-----------------------").unwrap();
for directive in directives {
let trailer = " ";
writeln!(buf, "{}{}- {}", directive.get_cmd(), trailer, directive.get_help()).unwrap();
}
writeln!(buf, "").unwrap();
writeln!(buf, "Language-specific help for {}", lang.get_language_name()).unwrap();
writeln!(buf, "-----------------------").unwrap();
writeln!(buf, "{}", lang.custom_interpreter_directives_help()).unwrap();
Some(buf)
},
"debug" => self.handle_debug(commands),
e => self.languages[self.current_language_index]
.handle_custom_interpreter_directives(&commands)
.or(Some(format!("Unknown command: {}", e)))
}
}
fn handle_debug(&mut self, commands: Vec<&str>) -> Option<String> {
let passes = self.get_cur_language().get_passes();
match commands.get(1) {
Some(&"passes") => Some(
passes.into_iter()
.map(|desc| {
if self.options.debug_passes.contains_key(&desc.name) {
let color = "green";
format!("*{}", desc.name.color(color))
} else {
desc.name
}
})
.intersperse(format!(" -> "))
.collect()),
b @ Some(&"show") | b @ Some(&"hide") => {
let show = b == Some(&"show");
let debug_pass: String = match commands.get(2) {
Some(s) => s.to_string(),
None => return Some(format!("Must specify a stage to debug")),
};
let pass_opt = commands.get(3);
if let Some(desc) = passes.iter().find(|desc| desc.name == debug_pass) {
let mut opts = vec![];
if let Some(opt) = pass_opt {
opts.push(opt.to_string());
}
let msg = format!("{} debug for pass {}", if show { "Enabling" } else { "Disabling" }, debug_pass);
if show {
self.options.debug_passes.insert(desc.name.clone(), PassDebugOptionsDescriptor { opts });
} else {
self.options.debug_passes.remove(&desc.name);
}
Some(msg)
} else {
Some(format!("Couldn't find stage: {}", debug_pass))
}
},
_ => Some(format!("Unknown debug command"))
}
}
}
/*
pub fn compilation_sequence(llvm_code: LLVMCodeString, sourcefile: &str) {
use std::process::Command;
let ll_filename = "out.ll";
let obj_filename = "out.o";
let q: Vec<&str> = sourcefile.split('.').collect();
let bin_filename = match &q[..] {
&[name, "maaru"] => name,
_ => panic!("Bad filename {}", sourcefile),
};
let LLVMCodeString(llvm_str) = llvm_code;
println!("Compilation process finished for {}", ll_filename);
File::create(ll_filename)
.and_then(|mut f| f.write_all(llvm_str.as_bytes()))
.expect("Error writing file");
let llc_output = Command::new("llc")
.args(&["-filetype=obj", ll_filename, "-o", obj_filename])
.output()
.expect("Failed to run llc");
if !llc_output.status.success() {
println!("{}", String::from_utf8_lossy(&llc_output.stderr));
}
let gcc_output = Command::new("gcc")
.args(&["-o", bin_filename, &obj_filename])
.output()
.expect("failed to run gcc");
if !gcc_output.status.success() {
println!("{}", String::from_utf8_lossy(&gcc_output.stdout));
println!("{}", String::from_utf8_lossy(&gcc_output.stderr));
}
for filename in [obj_filename].iter() {
Command::new("rm")
.arg(filename)
.output()
.expect(&format!("failed to run rm {}", filename));
}
}
*/
fn program_options() -> getopts::Options {
let mut options = getopts::Options::new();
options.optopt("s",
"eval-style",
"Specify whether to compile (if supported) or interpret the language. If not specified, the default is language-specific",
"[compile|interpret]"
);
options.optflag("",
"list-languages",
"Show a list of all supported languages");
options.optopt("l",
"lang",
"Start up REPL in a language",
"LANGUAGE");
options.optflag("h",
"help",
"Show help text");
options.optflag("w",
"webapp",
"Start up web interpreter");
options.optopt("d",
"debug",
"Debug a stage (l = tokenizer, a = AST, r = parse trace, s = symbol table)",
"[l|a|r|s]");
options
}

View File

@ -1,279 +0,0 @@
#![allow(non_snake_case)]
#![allow(dead_code)]
extern crate llvm_sys;
use self::llvm_sys::{LLVMIntPredicate, LLVMRealPredicate};
use self::llvm_sys::prelude::*;
use self::llvm_sys::core;
use std::ptr;
use std::ffi::{CString, CStr};
use std::os::raw::c_char;
pub fn create_context() -> LLVMContextRef {
unsafe { core::LLVMContextCreate() }
}
pub fn module_create_with_name(name: &str) -> LLVMModuleRef {
unsafe {
let n = name.as_ptr() as *const _;
core::LLVMModuleCreateWithName(n)
}
}
pub fn CreateBuilderInContext(context: LLVMContextRef) -> LLVMBuilderRef {
unsafe { core::LLVMCreateBuilderInContext(context) }
}
pub fn AppendBasicBlockInContext(context: LLVMContextRef,
function: LLVMValueRef,
name: &str)
-> LLVMBasicBlockRef {
let c_name = CString::new(name).unwrap();
unsafe { core::LLVMAppendBasicBlockInContext(context, function, c_name.as_ptr()) }
}
pub fn AddFunction(module: LLVMModuleRef, name: &str, function_type: LLVMTypeRef) -> LLVMValueRef {
let c_name = CString::new(name).unwrap();
unsafe { core::LLVMAddFunction(module, c_name.as_ptr(), function_type) }
}
pub fn FunctionType(return_type: LLVMTypeRef,
mut param_types: Vec<LLVMTypeRef>,
is_var_rag: bool)
-> LLVMTypeRef {
let len = param_types.len();
unsafe {
let pointer = param_types.as_mut_ptr();
core::LLVMFunctionType(return_type,
pointer,
len as u32,
if is_var_rag { 1 } else { 0 })
}
}
pub fn GetNamedFunction(module: LLVMModuleRef,
name: &str) -> Option<LLVMValueRef> {
let c_name = CString::new(name).unwrap();
let ret = unsafe { core::LLVMGetNamedFunction(module, c_name.as_ptr()) };
if ret.is_null() {
None
} else {
Some(ret)
}
}
pub fn VoidTypeInContext(context: LLVMContextRef) -> LLVMTypeRef {
unsafe { core::LLVMVoidTypeInContext(context) }
}
pub fn DisposeBuilder(builder: LLVMBuilderRef) {
unsafe { core::LLVMDisposeBuilder(builder) }
}
pub fn DisposeModule(module: LLVMModuleRef) {
unsafe { core::LLVMDisposeModule(module) }
}
pub fn ContextDispose(context: LLVMContextRef) {
unsafe { core::LLVMContextDispose(context) }
}
pub fn PositionBuilderAtEnd(builder: LLVMBuilderRef, basic_block: LLVMBasicBlockRef) {
unsafe { core::LLVMPositionBuilderAtEnd(builder, basic_block) }
}
pub fn BuildRet(builder: LLVMBuilderRef, val: LLVMValueRef) -> LLVMValueRef {
unsafe { core::LLVMBuildRet(builder, val) }
}
pub fn BuildRetVoid(builder: LLVMBuilderRef) -> LLVMValueRef {
unsafe { core::LLVMBuildRetVoid(builder) }
}
pub fn DumpModule(module: LLVMModuleRef) {
unsafe { core::LLVMDumpModule(module) }
}
pub fn Int64TypeInContext(context: LLVMContextRef) -> LLVMTypeRef {
unsafe { core::LLVMInt64TypeInContext(context) }
}
pub fn ConstInt(int_type: LLVMTypeRef, n: u64, sign_extend: bool) -> LLVMValueRef {
unsafe { core::LLVMConstInt(int_type, n, if sign_extend { 1 } else { 0 }) }
}
pub fn BuildAdd(builder: LLVMBuilderRef,
lhs: LLVMValueRef,
rhs: LLVMValueRef,
reg_name: &str)
-> LLVMValueRef {
let name = CString::new(reg_name).unwrap();
unsafe { core::LLVMBuildAdd(builder, lhs, rhs, name.as_ptr()) }
}
pub fn BuildSub(builder: LLVMBuilderRef,
lhs: LLVMValueRef,
rhs: LLVMValueRef,
reg_name: &str)
-> LLVMValueRef {
let name = CString::new(reg_name).unwrap();
unsafe { core::LLVMBuildSub(builder, lhs, rhs, name.as_ptr()) }
}
pub fn BuildMul(builder: LLVMBuilderRef,
lhs: LLVMValueRef,
rhs: LLVMValueRef,
reg_name: &str)
-> LLVMValueRef {
let name = CString::new(reg_name).unwrap();
unsafe { core::LLVMBuildMul(builder, lhs, rhs, name.as_ptr()) }
}
pub fn BuildUDiv(builder: LLVMBuilderRef,
lhs: LLVMValueRef,
rhs: LLVMValueRef,
reg_name: &str)
-> LLVMValueRef {
let name = CString::new(reg_name).unwrap();
unsafe { core::LLVMBuildUDiv(builder, lhs, rhs, name.as_ptr()) }
}
pub fn BuildSRem(builder: LLVMBuilderRef,
lhs: LLVMValueRef,
rhs: LLVMValueRef,
reg_name: &str)
-> LLVMValueRef {
let name = CString::new(reg_name).unwrap();
unsafe { core::LLVMBuildSRem(builder, lhs, rhs, name.as_ptr()) }
}
pub fn BuildCondBr(builder: LLVMBuilderRef,
if_expr: LLVMValueRef,
then_expr: LLVMBasicBlockRef,
else_expr: LLVMBasicBlockRef) -> LLVMValueRef {
unsafe { core::LLVMBuildCondBr(builder, if_expr, then_expr, else_expr) }
}
pub fn BuildBr(builder: LLVMBuilderRef,
dest: LLVMBasicBlockRef) -> LLVMValueRef {
unsafe { core::LLVMBuildBr(builder, dest) }
}
pub fn GetInsertBlock(builder: LLVMBuilderRef) -> LLVMBasicBlockRef {
unsafe { core::LLVMGetInsertBlock(builder) }
}
pub fn BuildPhi(builder: LLVMBuilderRef, ty: LLVMTypeRef, name: &str) -> LLVMValueRef {
let name = CString::new(name).unwrap();
unsafe { core::LLVMBuildPhi(builder, ty, name.as_ptr()) }
}
pub fn SetValueName(value: LLVMValueRef, name: &str) {
let name = CString::new(name).unwrap();
unsafe {
core::LLVMSetValueName(value, name.as_ptr())
}
}
pub fn GetValueName(value: LLVMValueRef) -> String {
unsafe {
let name_ptr: *const c_char = core::LLVMGetValueName(value);
CStr::from_ptr(name_ptr).to_string_lossy().into_owned()
}
}
pub fn GetParams(function: LLVMValueRef) -> Vec<LLVMValueRef> {
let size = CountParams(function);
unsafe {
let mut container = Vec::with_capacity(size);
container.set_len(size);
core::LLVMGetParams(function, container.as_mut_ptr());
container
}
}
pub fn CountParams(function: LLVMValueRef) -> usize {
unsafe { core::LLVMCountParams(function) as usize }
}
pub fn BuildFCmp(builder: LLVMBuilderRef,
op: LLVMRealPredicate,
lhs: LLVMValueRef,
rhs: LLVMValueRef,
name: &str) -> LLVMValueRef {
let name = CString::new(name).unwrap();
unsafe { core::LLVMBuildFCmp(builder, op, lhs, rhs, name.as_ptr()) }
}
pub fn BuildZExt(builder: LLVMBuilderRef,
val: LLVMValueRef,
dest_type: LLVMTypeRef,
name: &str) -> LLVMValueRef {
let name = CString::new(name).unwrap();
unsafe { core::LLVMBuildZExt(builder, val, dest_type, name.as_ptr()) }
}
pub fn BuildUIToFP(builder: LLVMBuilderRef,
val: LLVMValueRef,
dest_type: LLVMTypeRef,
name: &str) -> LLVMValueRef {
let name = CString::new(name).unwrap();
unsafe { core::LLVMBuildUIToFP(builder, val, dest_type, name.as_ptr()) }
}
pub fn BuildICmp(builder: LLVMBuilderRef,
op: LLVMIntPredicate,
lhs: LLVMValueRef,
rhs: LLVMValueRef,
name: &str) -> LLVMValueRef {
let name = CString::new(name).unwrap();
unsafe { core::LLVMBuildICmp(builder, op, lhs, rhs, name.as_ptr()) }
}
pub fn GetBasicBlockParent(block: LLVMBasicBlockRef) -> LLVMValueRef {
unsafe { core::LLVMGetBasicBlockParent(block) }
}
pub fn GetBasicBlocks(function: LLVMValueRef) -> Vec<LLVMBasicBlockRef> {
let size = CountBasicBlocks(function);
unsafe {
let mut container = Vec::with_capacity(size);
container.set_len(size);
core::LLVMGetBasicBlocks(function, container.as_mut_ptr());
container
}
}
pub fn CountBasicBlocks(function: LLVMValueRef) -> usize {
unsafe { core::LLVMCountBasicBlocks(function) as usize }
}
pub fn PrintModuleToString(module: LLVMModuleRef) -> String {
unsafe {
let str_ptr: *const c_char = core::LLVMPrintModuleToString(module);
CStr::from_ptr(str_ptr).to_string_lossy().into_owned()
}
}
pub fn AddIncoming(phi_node: LLVMValueRef, mut incoming_values: Vec<LLVMValueRef>,
mut incoming_blocks: Vec<LLVMBasicBlockRef>) {
let count = incoming_blocks.len() as u32;
if incoming_values.len() as u32 != count {
panic!("Bad invocation of AddIncoming");
}
unsafe {
let vals = incoming_values.as_mut_ptr();
let blocks = incoming_blocks.as_mut_ptr();
core::LLVMAddIncoming(phi_node, vals, blocks, count)
}
}
pub fn PrintModuleToFile(module: LLVMModuleRef, filename: &str) -> LLVMBool {
let out_file = CString::new(filename).unwrap();
unsafe { core::LLVMPrintModuleToFile(module, out_file.as_ptr(), ptr::null_mut()) }
}

View File

@ -1,44 +0,0 @@
use rocket;
use rocket::State;
use rocket::response::Content;
use rocket::http::ContentType;
use rocket_contrib::Json;
use language::{ProgrammingLanguageInterface, EvalOptions};
use WEBFILES;
use ::PLIGenerator;
#[get("/")]
fn index() -> Content<String> {
let path = "static/index.html";
let html_contents = String::from_utf8(WEBFILES.get(path).unwrap().into_owned()).unwrap();
Content(ContentType::HTML, html_contents)
}
#[get("/bundle.js")]
fn js_bundle() -> Content<String> {
let path = "static/bundle.js";
let js_contents = String::from_utf8(WEBFILES.get(path).unwrap().into_owned()).unwrap();
Content(ContentType::JavaScript, js_contents)
}
#[derive(Debug, Serialize, Deserialize)]
struct Input {
source: String,
}
#[derive(Serialize, Deserialize)]
struct Output {
text: String,
}
#[post("/input", format = "application/json", data = "<input>")]
fn interpreter_input(input: Json<Input>, generators: State<Vec<PLIGenerator>>) -> Json<Output> {
let schala_gen = generators.get(0).unwrap();
let mut schala: Box<ProgrammingLanguageInterface> = schala_gen();
let code_output = schala.execute_pipeline(&input.source, &EvalOptions::default());
Json(Output { text: code_output.to_repl() })
}
pub fn web_main(language_generators: Vec<PLIGenerator>) {
rocket::ignite().manage(language_generators).mount("/", routes![index, js_bundle, interpreter_input]).launch();
}

View File

@ -1,11 +0,0 @@
fn outer() {
fn inner(a) {
a + 10
}
inner(20) + 8.3
}
outer()

View File

@ -1,21 +0,0 @@
fn hella(a, b) {
a + b
}
fn paha(x, y, z) {
x * y * z
}
a = 1
c = if a {
10
} else {
20
}
q = 4
q = q + 2
q + 1 + c

View File

@ -1,8 +0,0 @@
if 20 {
a = 20
b = 30
c = 40
a + b + c
} else {
Null
}

View File

@ -1,5 +0,0 @@
(fn(q) { q * 2 }(25))
a = fn(x) { x + 5 }
a(2)

View File

@ -1,17 +0,0 @@
fn add(a, b) {
a + b
}
fn subtract(a, b) {
a - b
}
fn main() {
first_value = add(20, 20)
second_value = subtract(700, 650)
first_value + second_value
}
main()

View File

@ -1,24 +0,0 @@
fn hella(x) {
print("hey")
if x == 3 {
Null
} else {
hella(x + 1)
}
}
hella(0)
fn fib(x) {
if x < 3 {
1
} else {
fib(x - 1) + fib(x - 2)
}
}
fib(10)

View File

@ -1,12 +0,0 @@
fn main() {
const a = 10
const b = 20
a + b
}
//foo
print(main())

View File

@ -1,12 +0,0 @@
for n <- 1..=100 {
if n % 15 == 0 {
print("FizzBuzz")
} else if n % 5 == 0 {
print("Buzz")
} else if n % 3 == 0 {
print("Fizz")
} else {
print(n.to_string())
}
}

View File

@ -1,114 +0,0 @@
fn main() {
//comments are C-style
/* nested comments /* are cool */ */
}
@annotations are with @-
// variable expressions
var a: I32 = 20
const b: String = 20
there(); can(); be(); multiple(); statements(); per_line();
//string interpolation
const yolo = "I have ${a + b} people in my house"
// let expressions ??? not sure if I want this
let a = 10, b = 20, c = 30 in a + b + c
//list literal
const q = [1,2,3,4]
//lambda literal
q.map({|item| item * 100 })
fn yolo(a: MyType, b: YourType): ReturnType<Param1, Param2> {
if a == 20 {
return "early"
}
var sex = 20
sex
}
/* for/while loop topics */
//infinite loop
while {
if x() { break }
...
}
//conditional loop
while conditionHolds() {
...
}
//iteration over a variable
for i <- [1..1000] {
} //return type is return type of block
//monadic decomposition
for {
a <- maybeInt();
s <- foo()
} return {
a + s
} //return type is Monad<return type of block>
/* end of for loops */
/* conditionals/pattern matching */
// "is" operator for "does this pattern match"
x is Some(t) // type bool
if x {
is Some(t) => {
},
is None => {
}
}
//syntax is, I guess, for <expr> <brace-block>, where <expr> is a bool, or a <arrow-expr>
// type level alises
typealias <name> = <other type> #maybe thsi should be 'alias'?
/*
what if type A = B meant that you could had to create A's with A(B), but when you used A's the interface was exactly like B's?
maybe introduce a 'newtype' keyword for this
*/
//declaring types of all stripes
type MyData = { a: i32, b: String }
type MyType = MyType
type Option<a> = None | Some(a)
type Signal = Absence | SimplePresence(i32) | ComplexPresence {a: i32, b: MyCustomData}
//traits
trait Bashable { }
trait Luggable {
fn lug(self, a: Option<Self>)
}
}
// lambdas
// ruby-style not rust-style
const a: X -> Y -> Z = {|x,y| }

View File

@ -1,17 +0,0 @@
println(sua(4))
fn sua(x): Int {
x + 10
}
//const a = getline()
/*
if a == "true" {
println("You typed true")
} else {
println("You typed something else")
}
*/

View File

@ -1,12 +0,0 @@
const c = 10
fn add(a, b) {
const c = a + b
c
}
var b = 20
println(add(1,2))
println(c + b)

View File

@ -1,12 +0,0 @@
fn a(x) {
x + 20
}
fn x(x) {
x + a(9384)
}
a(0)
x(1)

View File

@ -1,3 +0,0 @@
(display (+ 1 2))
(display "Hello")

View File

@ -1,8 +0,0 @@
fn めんどくさい(a) {
a + 20
}
print(めんどくさい(394))

View File

@ -1,7 +0,0 @@
a = 0
while a < 100000
print("hello", a)
a = a + 1
end

View File

@ -1,20 +0,0 @@
extern crate schala_repl;
extern crate maaru_lang;
extern crate rukka_lang;
extern crate robo_lang;
extern crate schala_lang;
use schala_repl::{PLIGenerator, repl_main};
extern { }
fn main() {
let generators: Vec<PLIGenerator> = vec![
Box::new(|| { Box::new(schala_lang::Schala::new())}),
Box::new(|| { Box::new(maaru_lang::Maaru::new())}),
Box::new(|| { Box::new(robo_lang::Robo::new())}),
Box::new(|| { Box::new(rukka_lang::Rukka::new())}),
];
repl_main(generators);
}

View File

@ -1,17 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Schala Metainterpreter Web Evaluator</title>
<style>
.CodeArea {
display: flex;
flex-direction: row;
}
</style>
</head>
<body>
<div id="main">
</div>
<script src="bundle.js"></script>
</body>
</html>

View File

@ -1,64 +0,0 @@
const React = require("react");
const ReactDOM = require("react-dom");
const superagent = require("superagent");
const serverAddress = "http://localhost:8000";
class CodeArea extends React.Component {
constructor(props) {
super(props);
this.state = {value: "", lastOutput: null};
this.handleChange = this.handleChange.bind(this);
this.submit = this.submit.bind(this);
}
handleChange(event) {
this.setState({value: event.target.value});
}
submit(event) {
console.log("Event", this.state.value);
const source = this.state.value;
superagent.post(`${serverAddress}/input`)
.send({ source })
.set("accept", "json")
.end((error, response) => {
if (response) {
console.log("Resp", response);
this.setState({lastOutput: response.body.text})
} else {
console.error("Error: ", error);
}
});
}
renderOutput() {
if (!this.state.lastOutput) {
return null;
}
return <textarea readOnly value={ this.state.lastOutput } />;
}
render() {
return (<div className="CodeArea">
<div className="input">
<textarea value={ this.state.value } onChange={this.handleChange}>
</textarea>
<button onClick={ this.submit }>Run!</button>
</div>
<div className="output">
{ this.renderOutput() }
</div>
</div>);
}
}
const main = (<div>
<h1>Schala web input</h1>
<p>Write your source code here</p>
<CodeArea/>
</div>);
const rootDom = document.getElementById("main");
ReactDOM.render(main, rootDom);

View File

@ -1,27 +0,0 @@
{
"name": "static",
"version": "1.0.0",
"main": "index.js",
"license": "MIT",
"dependencies": {
"babel": "^6.23.0",
"babel-preset-es2015": "^6.24.1",
"babel-preset-react": "^6.24.1",
"babelify": "^7.3.0",
"browserify": "^14.4.0",
"react": "^15.6.1",
"react-dom": "^15.6.1",
"superagent": "^3.6.3",
"uglify-js": "^3.1.1"
},
"babel": {
"presets": [
"babel-preset-react",
"babel-preset-es2015"
]
},
"scripts": {
"build": "browserify main.jsx -t babelify -o bundle.js",
"build-minify": "browserify main.jsx -t babelify | uglifyjs > bundle.js"
}
}

File diff suppressed because it is too large Load Diff

2
subtrees/parser-combinator/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
/Cargo.lock

View File

@ -0,0 +1,13 @@
[package]
name = "parser-combinator"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
arbitrary = "1.2.0"
proptest = "1.0.0"
[dev-dependencies]
rstest = "0.16.0"

View File

@ -0,0 +1,10 @@
# Rust Parser Combinator
This is a super-basic Rust parser combinator library I wrote mostly
as an exercise for myself. Inspired by [nom](https://github.com/rust-bakery/nom)
and [chumsky](https://github.com/zesterer/chumsky)
## Ideas for future work
* See if some of the ideas in [Efficient Parsing with Parser Combinators](https://research.rug.nl/en/publications/efficient-parsing-with-parser-combinators)
can be incorporated here.

View File

@ -0,0 +1,198 @@
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub fn choice2<P1, P2, I, O, E>(parser1: P1, parser2: P2) -> impl Parser<I, O, E>
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
I: ParserInput + Clone,
{
choice((parser1, parser2))
}
pub fn choice<C, I, O, E>(choices: C) -> impl Parser<I, O, E>
where
C: Choice<I, O, E>,
I: ParserInput + Clone,
{
let rep = choices.representation();
(move |input| choices.parse(input), rep)
}
pub trait Choice<I: Clone, O, E> {
fn parse(&self, input: I) -> ParseResult<I, O, E>;
fn representation(&self) -> Representation;
}
impl<I, O, E, P1, P2> Choice<I, O, E> for (P1, P2)
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
I: ParserInput + Clone,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1];
choice_loop(input, parsers)
}
fn representation(&self) -> Representation {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1];
repr_loop(parsers)
}
}
impl<I, O, E, P1, P2, P3> Choice<I, O, E> for (P1, P2, P3)
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
P3: Parser<I, O, E>,
I: ParserInput + Clone,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1, &self.2];
choice_loop(input, parsers)
}
fn representation(&self) -> Representation {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1, &self.2];
repr_loop(parsers)
}
}
impl<I, O, E, P1, P2, P3, P4> Choice<I, O, E> for (P1, P2, P3, P4)
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
P3: Parser<I, O, E>,
P4: Parser<I, O, E>,
I: ParserInput + Clone,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1, &self.2, &self.3];
choice_loop(input, parsers)
}
fn representation(&self) -> Representation {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1, &self.2, &self.3];
repr_loop(parsers)
}
}
impl<I, O, E, P1, P2, P3, P4, P5> Choice<I, O, E> for (P1, P2, P3, P4, P5)
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
P3: Parser<I, O, E>,
P4: Parser<I, O, E>,
P5: Parser<I, O, E>,
I: ParserInput + Clone,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
let parsers = vec![
&self.0 as &dyn Parser<I, O, E>,
&self.1,
&self.2,
&self.3,
&self.4,
];
choice_loop(input, parsers)
}
fn representation(&self) -> Representation {
let parsers = vec![
&self.0 as &dyn Parser<I, O, E>,
&self.1,
&self.2,
&self.3,
&self.4,
];
repr_loop(parsers)
}
}
impl<I, O, E, P1, P2, P3, P4, P5, P6> Choice<I, O, E> for (P1, P2, P3, P4, P5, P6)
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
P3: Parser<I, O, E>,
P4: Parser<I, O, E>,
P5: Parser<I, O, E>,
P6: Parser<I, O, E>,
I: ParserInput + Clone,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
let parsers = vec![
&self.0 as &dyn Parser<I, O, E>,
&self.1,
&self.2,
&self.3,
&self.4,
&self.5,
];
choice_loop(input, parsers)
}
fn representation(&self) -> Representation {
let parsers = vec![
&self.0 as &dyn Parser<I, O, E>,
&self.1,
&self.2,
&self.3,
&self.4,
&self.5,
];
repr_loop(parsers)
}
}
fn choice_loop<I, O, E>(input: I, parsers: Vec<&dyn Parser<I, O, E>>) -> ParseResult<I, O, E>
where
I: ParserInput + Clone,
{
//TODO need a more principled way to return an error when no choices work
let mut err = None;
for parser in parsers.iter() {
match parser.parse(input.clone()) {
Ok(result) => return Ok(result),
Err(e) => {
err = Some(e);
}
}
}
Err(err.unwrap())
}
fn repr_loop<I, O, E>(parsers: Vec<&dyn Parser<I, O, E>>) -> Representation
where
I: ParserInput + Clone,
{
let mut iter = parsers.iter().map(|p| p.representation());
Representation::from_choice(&mut iter)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::combinators::repeated;
use crate::primitives::literal;
#[test]
fn test_choice() {
let p = choice2(
literal("gnostika").to(1),
repeated(literal(" ")).at_least(1).to(2),
);
assert_eq!(p.parse("gnostika twentynine"), Ok((1, " twentynine")));
}
#[test]
fn test_several_choices() {
let p = choice((
literal("a").to(1),
literal("q").to(10),
repeated(literal("chutney")).to(200),
literal("banana").to(10000),
));
assert_eq!(p.parse("q drugs").unwrap(), (10, " drugs"));
}
}

View File

@ -0,0 +1,16 @@
use crate::parser::{Parser, ParserInput};
pub fn map<P, F, I, O1, O2, E>(parser: P, map_fn: F) -> impl Parser<I, O2, E>
where
I: ParserInput,
P: Parser<I, O1, E>,
F: Fn(O1) -> O2,
{
let rep = parser.representation();
let p = move |input| {
parser
.parse(input)
.map(|(result, rest)| (map_fn(result), rest))
};
(p, rep)
}

View File

@ -0,0 +1,66 @@
mod map;
mod optional;
mod repeated;
mod separated_by;
pub use map::map;
pub use optional::optional;
pub use repeated::repeated;
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::Parser;
use crate::primitives::literal;
#[test]
fn test_map() {
let lit_a = literal("a");
let output = lit_a.map(|s| s.to_uppercase()).parse("a yolo");
assert_eq!(output.unwrap(), ("A".to_string(), " yolo"));
}
#[test]
fn test_one_or_more() {
let p = repeated(literal("bongo ")).at_least(1);
let input = "bongo bongo bongo bongo bongo ";
let (output, rest) = p.parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(output.len(), 5);
let (output, rest) = p.parse("bongo ecks").unwrap();
assert_eq!(output.len(), 1);
assert_eq!(rest, "ecks");
}
#[test]
fn test_separated_by() {
let p = repeated(literal("garb").to(20))
.separated_by(repeated(literal(" ")).at_least(1), false);
assert_eq!(
p.parse("garb garb garb garb").unwrap(),
(vec![20, 20, 20, 20], "")
);
assert!(p.parse("garb garb garb garb ").is_err());
let p =
repeated(literal("garb").to(20)).separated_by(repeated(literal(" ")).at_least(1), true);
assert_eq!(
p.parse("garb garb garb garb").unwrap(),
(vec![20, 20, 20, 20], "")
);
assert_eq!(
p.parse("garb garb garb garb ").unwrap(),
(vec![20, 20, 20, 20], "")
);
assert_eq!(
p.parse("garb garb garb garb q").unwrap(),
(vec![20, 20, 20, 20], "q")
);
}
}

View File

@ -0,0 +1,17 @@
use crate::parser::{Parser, ParserInput, Representation};
pub fn optional<P, I, O, E>(parser: P) -> impl Parser<I, Option<O>, E>
where
P: Parser<I, O, E>,
I: ParserInput + Clone,
{
let rep = Representation::from_choice(
&mut [parser.representation(), Representation::new("ε")].into_iter(),
);
let p = move |input: I| match parser.parse(input.clone()) {
Ok((output, rest)) => Ok((Some(output), rest)),
Err(_e) => Ok((None, input)),
};
(p, rep)
}

View File

@ -0,0 +1,94 @@
use crate::combinators::separated_by::SeparatedBy;
use crate::parser::{BoxedParser, ParseResult, Parser, ParserInput, Representation};
pub fn repeated<'a, P, I, O>(parser: P) -> Repeated<'a, I, O>
where
P: Parser<I, O, I> + 'a,
I: ParserInput + Clone + 'a,
{
Repeated {
inner_parser: BoxedParser::new(parser),
at_least: None,
at_most: None,
}
}
pub struct Repeated<'a, I, O>
where
I: ParserInput + Clone,
{
pub(super) inner_parser: BoxedParser<'a, I, O, I>,
pub(super) at_least: Option<u16>,
pub(super) at_most: Option<u16>,
}
impl<'a, I, O> Repeated<'a, I, O>
where
I: ParserInput + Clone,
{
pub fn at_least(self, n: u16) -> Self {
Self {
at_least: Some(n),
..self
}
}
pub fn at_most(self, n: u16) -> Self {
Self {
at_most: Some(n),
..self
}
}
pub fn separated_by<D, O2>(self, delimiter: D, allow_trailing: bool) -> SeparatedBy<'a, I, O>
where
D: Parser<I, O2, I> + 'a,
O2: 'a,
I: 'a,
{
SeparatedBy {
inner_repeated: self,
delimiter: BoxedParser::new(delimiter.to(())),
allow_trailing,
}
}
}
impl<'a, I, O> Parser<I, Vec<O>, I> for Repeated<'a, I, O>
where
I: ParserInput + Clone + 'a,
{
fn parse(&self, input: I) -> ParseResult<I, Vec<O>, I> {
let at_least = self.at_least.unwrap_or(0);
let at_most = self.at_most.unwrap_or(u16::MAX);
if at_most == 0 {
return Ok((vec![], input));
}
let mut results = Vec::new();
let mut count: u16 = 0;
let mut further_input = input.clone();
while let Ok((item, rest)) = self.inner_parser.parse(further_input.clone()) {
results.push(item);
further_input = rest;
count += 1;
if count >= at_most {
break;
}
}
if count < at_least {
return Err(input);
}
Ok((results, further_input))
}
fn representation(&self) -> Representation {
Representation::repeated(
self.inner_parser.representation(),
self.at_least.unwrap_or(0),
self.at_most.unwrap_or(u16::MAX),
)
}
}

View File

@ -0,0 +1,84 @@
use crate::combinators::repeated::Repeated;
use crate::parser::{BoxedParser, ParseResult, Parser, ParserInput, Representation};
pub struct SeparatedBy<'a, I, O>
where
I: ParserInput + Clone,
{
pub(super) inner_repeated: Repeated<'a, I, O>,
pub(super) delimiter: BoxedParser<'a, I, (), I>,
pub(super) allow_trailing: bool,
}
impl<'a, I, O> Parser<I, Vec<O>, I> for SeparatedBy<'a, I, O>
where
I: ParserInput + Clone + 'a,
{
fn representation(&self) -> Representation {
Representation::new("sepby")
}
fn parse(&self, input: I) -> ParseResult<I, Vec<O>, I> {
let at_least = self.inner_repeated.at_least.unwrap_or(0);
let at_most = self.inner_repeated.at_most.unwrap_or(u16::MAX);
let parser = &self.inner_repeated.inner_parser;
let delimiter = &self.delimiter;
if at_most == 0 {
return Ok((vec![], input));
}
let mut results = Vec::new();
let mut count: u16 = 0;
let mut further_input;
match parser.parse(input.clone()) {
Ok((item, rest)) => {
results.push(item);
further_input = rest;
}
Err(_e) => {
if at_least > 0 {
return Err(input);
} else {
return Ok((vec![], input));
}
}
}
loop {
match delimiter.parse(further_input.clone()) {
Ok(((), rest)) => {
further_input = rest;
}
Err(_e) => {
break;
}
}
match parser.parse(further_input.clone()) {
Ok((item, rest)) => {
results.push(item);
further_input = rest;
count += 1;
}
Err(_e) if self.allow_trailing => {
break;
}
Err(e) => {
return Err(e);
}
}
if count >= at_most {
break;
}
}
if count < at_least {
return Err(input);
}
Ok((results, further_input))
}
}

View File

@ -0,0 +1,7 @@
pub mod choice;
pub mod combinators;
mod parser;
pub mod primitives;
pub mod sequence;
pub use parser::{ParseResult, Parser, ParserInput, Representation};

View File

@ -0,0 +1,38 @@
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub struct BoxedParser<'a, I, O, E>
where
I: ParserInput,
{
inner: Box<dyn Parser<I, O, E> + 'a>,
}
impl<'a, I, O, E> BoxedParser<'a, I, O, E>
where
I: ParserInput,
{
pub(crate) fn new<P>(inner: P) -> Self
where
P: Parser<I, O, E> + 'a,
{
BoxedParser {
inner: Box::new(inner),
}
}
}
impl<'a, I: ParserInput, O, E> Parser<I, O, E> for BoxedParser<'a, I, O, E> {
fn representation(&self) -> Representation {
self.inner.representation()
}
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.inner.parse(input)
}
fn boxed<'b>(self) -> BoxedParser<'b, I, O, E>
where
Self: Sized + 'b,
{
self
}
}

View File

@ -0,0 +1,179 @@
mod boxed_parser;
mod named_parser;
mod parser_input;
mod representation;
use std::rc::Rc;
pub use boxed_parser::BoxedParser;
pub use named_parser::NamedParser;
pub use parser_input::ParserInput;
pub use representation::Representation;
pub type ParseResult<I, O, E> = Result<(O, I), E>;
pub trait Parser<I, O, E>
where
I: ParserInput,
{
fn parse(&self, input: I) -> ParseResult<I, O, E>;
fn representation(&self) -> Representation;
fn boxed<'a>(self) -> BoxedParser<'a, I, O, E>
where
Self: Sized + 'a,
{
BoxedParser::new(self)
}
fn map<'a, F, O2>(self, map_fn: F) -> BoxedParser<'a, I, O2, E>
where
Self: Sized + 'a,
I: 'a,
E: 'a,
O: 'a,
O2: 'a,
F: Fn(O) -> O2 + 'a,
{
crate::combinators::map(self, map_fn).boxed()
}
fn to<'a, O2>(self, item: O2) -> BoxedParser<'a, I, O2, E>
where
Self: Sized + 'a,
I: 'a,
O: 'a,
O2: Clone + 'a,
E: 'a,
{
self.map(move |_| item.clone())
}
fn then<'a, P, O2>(self, next_parser: P) -> BoxedParser<'a, I, (O, O2), E>
where
Self: Sized + 'a,
I: 'a,
O: 'a,
O2: 'a,
E: 'a,
P: Parser<I, O2, E> + 'a,
{
crate::sequence::tuple2(self, next_parser).boxed()
}
fn ignore_then<'a, P, O2>(self, next_parser: P) -> BoxedParser<'a, I, O2, E>
where
Self: Sized + 'a,
I: 'a,
O: 'a,
O2: 'a,
E: 'a,
P: Parser<I, O2, E> + 'a,
{
crate::sequence::tuple2(self, next_parser).map(|(_, next_output)| next_output)
}
fn then_ignore<'a, P, O2>(self, next_parser: P) -> BoxedParser<'a, I, O, E>
where
Self: Sized + 'a,
I: 'a,
O: 'a,
O2: 'a,
E: 'a,
P: Parser<I, O2, E> + 'a,
{
crate::sequence::tuple2(self, next_parser).map(|(this_output, _)| this_output)
}
fn delimited<'a, P1, O1, P2, O2>(self, left: P1, right: P2) -> BoxedParser<'a, I, O, E>
where
Self: Sized + 'a,
I: 'a,
O1: 'a,
O2: 'a,
O: 'a,
E: 'a,
P1: Parser<I, O1, E> + 'a,
P2: Parser<I, O2, E> + 'a,
{
crate::sequence::seq((left, self, right)).map(|(_, output, _)| output)
}
fn surrounded_by<'a, P, O1>(self, surrounding: P) -> BoxedParser<'a, I, O, E>
where
Self: Sized + 'a,
I: 'a,
O1: 'a,
O: 'a,
E: 'a,
P: Parser<I, O1, E> + 'a,
{
BoxedParser::new(move |input| {
let p1 = |i| surrounding.parse(i);
let p2 = |i| surrounding.parse(i);
let main = |i| self.parse(i);
crate::sequence::seq((p1, main, p2))
.map(|(_, output, _)| output)
.parse(input)
})
}
fn optional<'a>(self) -> BoxedParser<'a, I, Option<O>, E>
where
I: Clone + 'a,
O: 'a,
E: 'a,
Self: Sized + 'a,
{
crate::combinators::optional(self).boxed()
}
fn named<'a>(self, parser_name: &str) -> NamedParser<'a, I, O, E>
where
Self: Sized + 'a,
I: 'a,
{
NamedParser::new(self.boxed(), parser_name.to_string())
}
}
impl<I: ParserInput, O, E, F> Parser<I, O, E> for F
where
F: Fn(I) -> ParseResult<I, O, E>,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self(input)
}
fn representation(&self) -> Representation {
Representation::new("NOT IMPL'D")
}
}
impl<I: ParserInput, O, E, F> Parser<I, O, E> for (F, Representation)
where
F: Fn(I) -> ParseResult<I, O, E>,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.0(input)
}
fn representation(&self) -> Representation {
self.1.clone()
}
}
impl<I, O, E, T> Parser<I, O, E> for Rc<T>
where
I: ParserInput,
T: Parser<I, O, E>,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.as_ref().parse(input)
}
fn representation(&self) -> Representation {
self.as_ref().representation()
}
}

View File

@ -0,0 +1,36 @@
use super::boxed_parser::BoxedParser;
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub struct NamedParser<'a, I, O, E>
where
I: ParserInput,
{
inner_parser: BoxedParser<'a, I, O, E>,
name: String,
}
impl<'a, I, O, E> NamedParser<'a, I, O, E>
where
I: ParserInput,
{
pub(super) fn new(inner_parser: BoxedParser<'a, I, O, E>, name: String) -> Self
where
I: 'a,
{
NamedParser { inner_parser, name }
}
pub fn get_name(&'a self) -> &'a str {
self.name.as_ref()
}
}
impl<'a, I: ParserInput, O, E> Parser<I, O, E> for NamedParser<'a, I, O, E> {
fn representation(&self) -> Representation {
self.inner_parser.representation()
}
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.inner_parser.parse(input)
}
}

View File

@ -0,0 +1,11 @@
pub trait ParserInput: std::fmt::Debug {
type Output;
fn next_token() -> Self::Output;
}
impl ParserInput for &str {
type Output = ();
fn next_token() -> Self::Output {
()
}
}

View File

@ -0,0 +1,66 @@
#[derive(Debug, Clone, PartialEq)]
pub struct Representation {
val: String,
}
impl Representation {
pub fn new(from: &str) -> Self {
Self {
val: from.to_string(),
}
}
pub(crate) fn from_choice(
choice_parser_reps: &mut impl Iterator<Item = Representation>,
) -> Self {
let mut buf = String::new();
let mut iter = choice_parser_reps.peekable();
loop {
let rep = match iter.next() {
Some(r) => r,
None => break,
};
buf.push_str(&rep.val);
match iter.peek() {
Some(_) => {
buf.push_str(" | ");
}
None => {
break;
}
}
}
Representation::new(&buf)
}
pub(crate) fn from_sequence(
sequence_representations: &mut impl Iterator<Item = Representation>,
) -> Self {
let mut buf = String::new();
let mut iter = sequence_representations.peekable();
loop {
let rep = match iter.next() {
Some(r) => r,
None => break,
};
buf.push_str(&rep.val);
match iter.peek() {
Some(_) => {
buf.push_str(" ");
}
None => {
break;
}
}
}
Representation::new(&buf)
}
// TODO use at_least, at_most
pub(crate) fn repeated(underlying: Representation, at_least: u16, _at_most: u16) -> Self {
let sigil = if at_least == 0 { "*" } else { "+" };
Representation::new(&format!("({}){}", underlying.val, sigil))
}
}

View File

@ -0,0 +1,108 @@
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub fn literal_char(expected: char) -> impl Fn(&str) -> ParseResult<&str, char, &str> {
move |input| match input.chars().next() {
Some(ch) if ch == expected => Ok((expected, &input[ch.len_utf8()..])),
_ => Err(input),
}
}
pub fn literal<'a>(expected: &'static str) -> impl Parser<&'a str, &'a str, &'a str> {
println!("literal call expected: {}", expected);
let rep = Representation::new(expected);
let p = move |input: &'a str| match input.get(0..expected.len()) {
Some(next) if next == expected => Ok((expected, &input[expected.len()..])),
_ => Err(input),
};
(p, rep)
}
pub fn any_char(input: &str) -> ParseResult<&str, char, &str> {
match input.chars().next() {
Some(ch) => Ok((ch, &input[ch.len_utf8()..])),
None => Err(input),
}
}
pub fn one_of<'a>(items: &'static str) -> impl Parser<&'a str, &'a str, &'a str> {
let p = move |input: &'a str| {
if let Some(ch) = input.chars().next() {
if items.contains(ch) {
let (first, rest) = input.split_at(1);
return Ok((first, rest));
}
}
Err(input)
};
let mut s = String::new();
for ch in items.chars() {
s.push(ch);
s.push_str(" | ");
}
let rep = Representation::new(&s);
(p, rep)
}
pub fn pred<P, F, I, O>(parser: P, pred_fn: F) -> impl Parser<I, O, I>
where
I: ParserInput,
P: Parser<I, O, I>,
F: Fn(&O) -> bool,
{
let orig_rep = parser.representation();
(
move |input| {
parser.parse(input).and_then(|(result, rest)| {
if pred_fn(&result) {
Ok((result, rest))
} else {
Err(rest)
}
})
},
Representation::new(&format!("{:?} if <PREDICATE>", orig_rep)),
)
}
/// Parses a standard identifier in a programming language
pub fn identifier(input: &str) -> ParseResult<&str, String, &str> {
let mut chars = input.chars();
let mut buf = String::new();
match chars.next() {
Some(ch) if ch.is_alphabetic() => buf.push(ch),
_ => return Err(input),
}
for next in chars {
if next.is_alphanumeric() {
buf.push(next);
} else {
break;
}
}
let next_index = buf.len();
Ok((buf, &input[next_index..]))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_identifier() {
assert_eq!(
identifier("bongo1beans").unwrap(),
(("bongo1beans".to_string(), ""))
);
assert_eq!(identifier("2bongo1beans"), Err("2bongo1beans"));
}
#[test]
fn test_pred() {
let p = pred(any_char, |c| *c == 'f');
assert_eq!(p.parse("frog"), Ok(('f', "rog")));
}
}

View File

@ -0,0 +1,195 @@
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub fn tuple2<P1, P2, I, O1, O2, E>(parser1: P1, parser2: P2) -> impl Parser<I, (O1, O2), E>
where
I: ParserInput,
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
{
seq((parser1, parser2))
}
pub fn seq<T, I, O, E>(sequence: T) -> impl Parser<I, O, E>
where
I: ParserInput,
T: Sequence<I, O, E>,
{
let rep = sequence.representation();
let p = move |input| sequence.parse(input);
(p, rep)
}
/* TODO - eventually rewrite this parser combinator in Schala. Seeing what this
* code that makes heavy use of type variables and abstraction over types looks like
* in Schala's type system should be educational
*/
pub trait Sequence<I, O, E> {
fn parse(&self, input: I) -> ParseResult<I, O, E>;
fn representation(&self) -> Representation;
}
impl<I, O1, O2, E, P1, P2> Sequence<I, (O1, O2), E> for (P1, P2)
where
I: ParserInput,
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
{
fn parse(&self, input: I) -> ParseResult<I, (O1, O2), E> {
let parser1 = &self.0;
let parser2 = &self.1;
parser1.parse(input).and_then(|(result1, rest1)| {
parser2
.parse(rest1)
.map(|(result2, rest2)| ((result1, result2), rest2))
})
}
fn representation(&self) -> Representation {
let mut iter = [self.0.representation(), self.1.representation()].into_iter();
Representation::from_sequence(&mut iter)
}
}
impl<I, O1, O2, O3, E, P1, P2, P3> Sequence<I, (O1, O2, O3), E> for (P1, P2, P3)
where
I: ParserInput,
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
P3: Parser<I, O3, E>,
{
fn parse(&self, input: I) -> ParseResult<I, (O1, O2, O3), E> {
let parser1 = &self.0;
let parser2 = &self.1;
let parser3 = &self.2;
let (result1, rest1) = parser1.parse(input)?;
let (result2, rest2) = parser2.parse(rest1)?;
let (result3, rest3) = parser3.parse(rest2)?;
Ok(((result1, result2, result3), rest3))
}
fn representation(&self) -> Representation {
let mut iter = [
self.0.representation(),
self.1.representation(),
self.2.representation(),
]
.into_iter();
Representation::from_sequence(&mut iter)
}
}
impl<I, O1, O2, O3, O4, E, P1, P2, P3, P4> Sequence<I, (O1, O2, O3, O4), E> for (P1, P2, P3, P4)
where
I: ParserInput,
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
P3: Parser<I, O3, E>,
P4: Parser<I, O4, E>,
{
fn parse(&self, input: I) -> ParseResult<I, (O1, O2, O3, O4), E> {
let parser1 = &self.0;
let parser2 = &self.1;
let parser3 = &self.2;
let parser4 = &self.3;
let (result1, rest1) = parser1.parse(input)?;
let (result2, rest2) = parser2.parse(rest1)?;
let (result3, rest3) = parser3.parse(rest2)?;
let (result4, rest4) = parser4.parse(rest3)?;
Ok(((result1, result2, result3, result4), rest4))
}
fn representation(&self) -> Representation {
let mut iter = [
self.0.representation(),
self.1.representation(),
self.2.representation(),
self.3.representation(),
]
.into_iter();
Representation::from_sequence(&mut iter)
}
}
impl<I, O1, O2, O3, O4, O5, E, P1, P2, P3, P4, P5> Sequence<I, (O1, O2, O3, O4, O5), E>
for (P1, P2, P3, P4, P5)
where
I: ParserInput,
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
P3: Parser<I, O3, E>,
P4: Parser<I, O4, E>,
P5: Parser<I, O5, E>,
{
fn parse(&self, input: I) -> ParseResult<I, (O1, O2, O3, O4, O5), E> {
let parser1 = &self.0;
let parser2 = &self.1;
let parser3 = &self.2;
let parser4 = &self.3;
let parser5 = &self.4;
let (result1, rest1) = parser1.parse(input)?;
let (result2, rest2) = parser2.parse(rest1)?;
let (result3, rest3) = parser3.parse(rest2)?;
let (result4, rest4) = parser4.parse(rest3)?;
let (result5, rest5) = parser5.parse(rest4)?;
Ok(((result1, result2, result3, result4, result5), rest5))
}
fn representation(&self) -> Representation {
let mut iter = [
self.0.representation(),
self.1.representation(),
self.2.representation(),
self.3.representation(),
self.4.representation(),
]
.into_iter();
Representation::from_sequence(&mut iter)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::combinators::repeated;
use crate::primitives::{identifier, literal};
#[test]
fn test_tuple2() {
let p = tuple2(identifier, tuple2(literal(" "), literal("ruts")));
let (output, _rest) = p.parse("fort1 ruts").unwrap();
assert_eq!(output, ("fort1".into(), (" ", "ruts")));
let p = identifier.then(literal(" ")).then(literal("ruts"));
let (output, _rest) = p.parse("fort1 ruts").unwrap();
assert_eq!(output, (("fort1".into(), " "), "ruts"));
}
#[test]
fn test_seq() {
let p = seq((
literal("bong").to(10),
repeated(literal(" ")).to(()),
literal("hits").to(20),
));
assert_eq!(p.parse("bong hits").unwrap(), ((10, (), 20), ""));
let p = seq((
literal("alpha").to(10),
repeated(literal(" ")).to(()),
repeated(literal("-")).to(()),
repeated(literal(" ")),
literal("beta"),
));
assert_eq!(
p.parse("alpha ------ beta gamma").unwrap(),
((10, (), (), vec![" ", " ", " "], "beta"), " gamma")
);
}
}

View File

@ -0,0 +1,49 @@
{
"$schema": "https://joplinapp.org/schema/settings.json",
"locale": "en_GB",
"sync.target": 6,
"markdown.plugin.softbreaks": false,
"markdown.plugin.typographer": false,
"spellChecker.language": "en-US",
"ui.layout": {
"key": "root",
"children": [
{
"key": "sideBar",
"width": 250,
"visible": true
},
{
"key": "noteList",
"width": 250,
"visible": true
},
{
"key": "editor",
"visible": true,
"width": 1493
},
{
"key": "plugin-view-joplin.plugin.note.tabs-note.tabs.panel",
"context": {
"pluginId": "joplin.plugin.note.tabs"
},
"visible": true
}
],
"visible": true
},
"noteVisiblePanes": [
"editor",
"viewer"
],
"theme": 4,
"sync.6.username": "webdav",
"net.ignoreTlsErrors": true,
"style.editor.contentMaxWidth": 600,
"editor.codeView": true,
"markdown.plugin.sub": true,
"markdown.plugin.sup": true,
"markdown.plugin.multitable": true
}

View File

@ -0,0 +1,248 @@
use parser_combinator::choice::choice;
use parser_combinator::combinators::repeated;
use parser_combinator::primitives::{any_char, literal, literal_char, one_of, pred};
use parser_combinator::sequence::seq;
use parser_combinator::Parser;
use parser_combinator::Representation;
use proptest::prelude::*;
use rstest::*;
proptest! {
#[test]
fn doesnt_crash(s in "\\PC*") {
let _output = json_object().parse(&s);
}
#[test]
fn parse_string(s in r#"[^"]+"#) {
let input = format!("\"{}\"", s);
let output = json_string().parse(&input).unwrap();
match output {
(JsonValue::Str(output_s), "") if output_s == s => (),
_ => panic!(),
}
}
}
#[test]
fn test_parsing() {
let output = literal("a").parse("a yolo");
assert_eq!(output.unwrap(), ("a", " yolo"));
}
/*
* JSON BNF
* <JSON> ::= <value>
<value> ::= <object> | <array> | <boolean> | <string> | <number> | <null>
<array> ::= "[" [<value>] {"," <value>}* "]"
<object> ::= "{" [<property>] {"," <property>}* "}"
<property> ::= <string> ":" <value>
*/
#[derive(Debug, Clone, PartialEq)]
enum JsonValue {
Null,
Bool(bool),
Str(String),
Num(f64),
Array(Vec<JsonValue>),
Object(Vec<(String, JsonValue)>),
}
trait JsonParser<'a, T>: Parser<&'a str, T, &'a str> {}
impl<'a, T, P> JsonParser<'a, T> for P where P: Parser<&'a str, T, &'a str> {}
fn json_null<'a>() -> impl JsonParser<'a, JsonValue> {
literal("null").to(JsonValue::Null)
}
fn json_bool<'a>() -> impl JsonParser<'a, JsonValue> {
choice((
literal("true").to(JsonValue::Bool(true)),
literal("false").to(JsonValue::Bool(false)),
))
}
fn json_number<'a>() -> impl JsonParser<'a, JsonValue> {
fn digit<'a>() -> impl JsonParser<'a, &'a str> {
one_of("1234567890")
}
fn digits<'a>() -> impl JsonParser<'a, Vec<&'a str>> {
repeated(digit()).at_least(1)
}
let json_number_inner = choice((
seq((digits(), literal(".").ignore_then(digits()).optional())).map(
|(mut digits, maybe_decimal)| {
if let Some(decimal_digits) = maybe_decimal {
digits.push(".");
digits.extend(decimal_digits.into_iter());
}
digits.into_iter().collect::<String>()
},
),
literal(".").ignore_then(digits()).map(|decimal_digits| {
let mut d = vec!["."];
d.extend(decimal_digits.into_iter());
d.into_iter().collect::<String>()
}),
))
.map(|digits| digits.parse::<f64>().unwrap());
literal("-")
.optional()
.then(json_number_inner)
.map(|(maybe_sign, mut val)| {
if maybe_sign.is_some() {
val *= -1.0;
}
JsonValue::Num(val)
})
}
fn json_string_raw<'a>() -> impl JsonParser<'a, String> {
seq((
literal_char('"'),
repeated(pred(any_char, |ch| *ch != '"')),
literal_char('"'),
))
.map(|(_, s, _)| s.iter().cloned().collect::<String>())
}
fn json_string<'a>() -> impl JsonParser<'a, JsonValue> {
json_string_raw().map(JsonValue::Str)
}
fn whitespace<'a>() -> impl JsonParser<'a, ()> {
repeated(choice((
literal_char('\t'),
literal_char('\n'),
literal_char(' '),
)))
.to(())
}
fn json_array<'a>() -> impl JsonParser<'a, JsonValue> {
move |input| {
let val = json_value().surrounded_by(whitespace());
repeated(val)
.separated_by(literal(","), false)
.delimited(literal_char('['), literal_char(']'))
.map(JsonValue::Array)
.parse(input)
}
}
fn json_object<'a>() -> impl JsonParser<'a, JsonValue> {
move |input| {
let kv = json_string_raw()
.surrounded_by(whitespace())
.then_ignore(literal_char(':'))
.then(json_value().surrounded_by(whitespace()));
repeated(kv)
.separated_by(literal_char(','), false)
.delimited(literal_char('{'), literal_char('}'))
.map(JsonValue::Object)
.parse(input)
}
}
fn json_value<'a>() -> impl JsonParser<'a, JsonValue> {
choice((
json_null(),
json_bool(),
json_number(),
json_string(),
json_array(),
json_object(),
))
}
#[test]
fn parse_json_primitives() {
assert_eq!(
json_string().parse(r#""yolo swagg""#).unwrap(),
(JsonValue::Str("yolo swagg".into()), "")
);
assert_eq!(
json_number().parse("-383").unwrap().0,
JsonValue::Num(-383f64)
);
assert_eq!(
json_number().parse("-.383").unwrap().0,
JsonValue::Num(-0.383)
);
assert_eq!(
json_number().parse(".383").unwrap().0,
JsonValue::Num(0.383)
);
assert_eq!(
json_number().parse("-1.383").unwrap().0,
JsonValue::Num(-1.383)
);
}
#[rstest]
#[case(r#"[ 4, 9, "ara",]"#)]
fn parse_json_array_err(#[case] input: &str) {
assert!(json_array().parse(input).is_err());
}
#[rstest]
#[case("[[],[]]", (JsonValue::Array(vec![JsonValue::Array(vec![]), JsonValue::Array(vec![])]), ""))]
#[case(r#"[ 4, 9, "foo" ]"#, (
JsonValue::Array(vec![
JsonValue::Num(4.),
JsonValue::Num(9.0),
JsonValue::Str("foo".to_string())
]),
""
))]
#[case(r#"[8,null,[],5],{}"#,
(
JsonValue::Array(vec![
JsonValue::Num(8.),
JsonValue::Null,
JsonValue::Array(vec![]),
JsonValue::Num(5.),
]),
",{}"
))]
fn parse_json_array(#[case] input: &str, #[case] expected: (JsonValue, &str)) {
assert_eq!(json_array().parse(input).unwrap(), expected);
}
#[test]
fn parse_json_object() {
assert_eq!(
json_object().parse(r#"{ "a": 23}"#).unwrap().0,
JsonValue::Object(vec![("a".into(), JsonValue::Num(23.))])
);
assert_eq!(
json_object().parse(r#"{}"#).unwrap().0,
JsonValue::Object(vec![])
);
}
#[test]
fn parse_json_document() {
let test_json = include_str!("joplin-cfg.json");
let parsed_json = json_object().parse(test_json);
assert!(parsed_json.is_ok());
}
#[rstest]
#[case(json_null().representation(), Representation::new("null"))]
#[case(json_bool().representation(), Representation::new("true | false"))]
#[case(json_number().representation(), Representation::new("- | ε (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | )+ . (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | )+ | ε | . (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | )+"))]
fn representations_test(
#[case] parser_representation: Representation,
#[case] expected: Representation,
) {
assert_eq!(parser_representation, expected);
}