{-# OPTIONS --without-K --safe #-}
open import Level
open import Data.Unit
open import Data.Bool
open import Data.Char as Char
open import Data.List as List using (List; []; _∷_)
open import Data.List.NonEmpty as List⁺ using (List⁺; _∷_)
open import Data.Maybe as Maybe
open import Data.Product as Prod
open import Data.String.Base as String using (String)
open import Data.These as These
open import Function
open import Text.Parser.Position as Pos using (Position)
module Text.Lexer
{t} {Tok : Set t}
(keywords : List⁺ (String × Tok))
(breaking : Char → ∃ λ b → if b then Maybe Tok else Lift _ ⊤)
(default : String → Tok)
where
Result : Set _
Result = List (Position × Tok)
tokenize : String → Result
tokenize = start Pos.start ∘′ String.toList where
mutual
Keywords : Set _
Keywords = List (List Char × Tok)
init : Keywords
init = List.map (Prod.map₁ String.toList) $ List⁺.toList $ keywords
read : Char → Keywords → Keywords
read t = List.mapMaybe $ λ where
(c ∷ cs , tok) → if c == t then just (cs , tok) else nothing
_ → nothing
value : Keywords → Maybe Tok
value ks = List.head $ flip List.mapMaybe ks $ λ where
([] , k) → just k
_ → nothing
start : Position → List Char → Result
start p = loop (p , []) init p
loop :
(acc : Position × List Char) →
(toks : Keywords) →
(pos : Position) →
(input : List Char) →
Result
loop acc toks pos [] = push acc []
loop acc toks pos (c ∷ cs) = case breaking c of λ where
(true , m) → push acc $ break pos m $ start (Pos.update c pos) cs
(false , _) → let toks' = read c toks in case value toks' of λ where
(just k) → (proj₁ acc , k) ∷ start (Pos.update c pos) cs
nothing → loop (Prod.map₂ (c ∷_) acc) toks' (Pos.update c pos) cs
push : (Position × List Char) → Result → Result
push (pos , []) ts = ts
push (pos , cs) ts = (pos , default (String.fromList (List.reverse cs))) ∷ ts
break : Position → Maybe Tok → Result → Result
break pos (just tok) rs = (pos , tok) ∷ rs
break pos nothing rs = rs