307 lines
6.9 KiB
Go
307 lines
6.9 KiB
Go
|
package expr
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"fmt"
|
||
|
"strconv"
|
||
|
"strings"
|
||
|
"text/scanner"
|
||
|
"unicode"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
TokenEOF = -(iota + 1)
|
||
|
TokenIdent
|
||
|
TokenInt
|
||
|
TokenFloat
|
||
|
TokenString
|
||
|
TokenOperator
|
||
|
)
|
||
|
|
||
|
type lexer struct {
|
||
|
scan scanner.Scanner
|
||
|
token rune
|
||
|
text string
|
||
|
}
|
||
|
|
||
|
func (lex *lexer) getToken() rune {
|
||
|
return lex.token
|
||
|
}
|
||
|
|
||
|
func (lex *lexer) getText() string {
|
||
|
return lex.text
|
||
|
}
|
||
|
|
||
|
func (lex *lexer) next() {
|
||
|
token := lex.scan.Scan()
|
||
|
text := lex.scan.TokenText()
|
||
|
switch token {
|
||
|
case scanner.EOF:
|
||
|
lex.token = TokenEOF
|
||
|
lex.text = text
|
||
|
case scanner.Ident:
|
||
|
lex.token = TokenIdent
|
||
|
lex.text = text
|
||
|
case scanner.Int:
|
||
|
lex.token = TokenInt
|
||
|
lex.text = text
|
||
|
case scanner.Float:
|
||
|
lex.token = TokenFloat
|
||
|
lex.text = text
|
||
|
case scanner.RawString:
|
||
|
fallthrough
|
||
|
case scanner.String:
|
||
|
lex.token = TokenString
|
||
|
if len(text) >= 2 && ((text[0] == '`' && text[len(text)-1] == '`') || (text[0] == '"' && text[len(text)-1] == '"')) {
|
||
|
lex.text = text[1 : len(text)-1]
|
||
|
} else {
|
||
|
msg := fmt.Sprintf("got illegal string:%s", text)
|
||
|
panic(lexPanic(msg))
|
||
|
}
|
||
|
case '+', '-', '*', '/', '%', '~':
|
||
|
lex.token = TokenOperator
|
||
|
lex.text = text
|
||
|
case '&', '|', '=':
|
||
|
var buffer bytes.Buffer
|
||
|
lex.token = TokenOperator
|
||
|
buffer.WriteRune(token)
|
||
|
next := lex.scan.Peek()
|
||
|
if next == token {
|
||
|
buffer.WriteRune(next)
|
||
|
lex.scan.Scan()
|
||
|
}
|
||
|
lex.text = buffer.String()
|
||
|
case '>', '<', '!':
|
||
|
var buffer bytes.Buffer
|
||
|
lex.token = TokenOperator
|
||
|
buffer.WriteRune(token)
|
||
|
next := lex.scan.Peek()
|
||
|
if next == '=' {
|
||
|
buffer.WriteRune(next)
|
||
|
lex.scan.Scan()
|
||
|
}
|
||
|
lex.text = buffer.String()
|
||
|
default:
|
||
|
if token >= 0 {
|
||
|
lex.token = token
|
||
|
lex.text = text
|
||
|
} else {
|
||
|
msg := fmt.Sprintf("got unknown token:%d, text:%s", lex.token, lex.text)
|
||
|
panic(lexPanic(msg))
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
type lexPanic string
|
||
|
|
||
|
// describe returns a string describing the current token, for use in errors.
|
||
|
func (lex *lexer) describe() string {
|
||
|
switch lex.token {
|
||
|
case TokenEOF:
|
||
|
return "end of file"
|
||
|
case TokenIdent:
|
||
|
return fmt.Sprintf("identifier:%s", lex.getText())
|
||
|
case TokenInt, TokenFloat:
|
||
|
return fmt.Sprintf("number:%s", lex.getText())
|
||
|
case TokenString:
|
||
|
return fmt.Sprintf("string:%s", lex.getText())
|
||
|
}
|
||
|
return fmt.Sprintf("token:%d", rune(lex.getToken())) // any other rune
|
||
|
}
|
||
|
|
||
|
func precedence(token rune, text string) int {
|
||
|
if token == TokenOperator {
|
||
|
switch text {
|
||
|
case "~", "!":
|
||
|
return 9
|
||
|
case "*", "/", "%":
|
||
|
return 8
|
||
|
case "+", "-":
|
||
|
return 7
|
||
|
case ">", ">=", "<", "<=":
|
||
|
return 6
|
||
|
case "!=", "==", "=":
|
||
|
return 5
|
||
|
case "&":
|
||
|
return 4
|
||
|
case "|":
|
||
|
return 3
|
||
|
case "&&":
|
||
|
return 2
|
||
|
case "||":
|
||
|
return 1
|
||
|
default:
|
||
|
msg := fmt.Sprintf("unknown operator:%s", text)
|
||
|
panic(lexPanic(msg))
|
||
|
}
|
||
|
}
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
// ---- parser ----
|
||
|
type ExpressionParser struct {
|
||
|
expression Expr
|
||
|
variable map[string]struct{}
|
||
|
}
|
||
|
|
||
|
func NewExpressionParser() *ExpressionParser {
|
||
|
return &ExpressionParser{
|
||
|
expression: nil,
|
||
|
variable: make(map[string]struct{}),
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Parse parses the input string as an arithmetic expression.
|
||
|
//
|
||
|
// expr = num a literal number, e.g., 3.14159
|
||
|
// | id a variable name, e.g., x
|
||
|
// | id '(' expr ',' ... ')' a function call
|
||
|
// | '-' expr a unary operator ( + - ! )
|
||
|
// | expr '+' expr a binary operator ( + - * / && & || | == )
|
||
|
//
|
||
|
func (parser *ExpressionParser) Parse(input string) (err error) {
|
||
|
defer func() {
|
||
|
switch x := recover().(type) {
|
||
|
case nil:
|
||
|
// no panic
|
||
|
case lexPanic:
|
||
|
err = fmt.Errorf("%s", x)
|
||
|
default:
|
||
|
// unexpected panic: resume state of panic.
|
||
|
panic(x)
|
||
|
}
|
||
|
}()
|
||
|
lex := new(lexer)
|
||
|
lex.scan.Init(strings.NewReader(input))
|
||
|
lex.scan.Mode = scanner.ScanIdents | scanner.ScanInts | scanner.ScanFloats | scanner.ScanStrings | scanner.ScanRawStrings
|
||
|
lex.scan.IsIdentRune = parser.isIdentRune
|
||
|
lex.next() // initial lookahead
|
||
|
parser.expression = nil
|
||
|
parser.variable = make(map[string]struct{})
|
||
|
e := parser.parseExpr(lex)
|
||
|
if lex.token != scanner.EOF {
|
||
|
return fmt.Errorf("unexpected %s", lex.describe())
|
||
|
}
|
||
|
parser.expression = e
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (parser *ExpressionParser) GetExpr() Expr {
|
||
|
return parser.expression
|
||
|
}
|
||
|
|
||
|
func (parser *ExpressionParser) GetVariable() []string {
|
||
|
variable := make([]string, 0, len(parser.variable))
|
||
|
for v := range parser.variable {
|
||
|
if v != "true" && v != "false" {
|
||
|
variable = append(variable, v)
|
||
|
}
|
||
|
}
|
||
|
return variable
|
||
|
}
|
||
|
|
||
|
func (parser *ExpressionParser) isIdentRune(ch rune, i int) bool {
|
||
|
return ch == '$' || ch == '_' || ch == '?' || unicode.IsLetter(ch) || unicode.IsDigit(ch) && i > 0
|
||
|
}
|
||
|
|
||
|
func (parser *ExpressionParser) parseExpr(lex *lexer) Expr {
|
||
|
return parser.parseBinary(lex, 1)
|
||
|
}
|
||
|
|
||
|
// binary = unary ('+' binary)*
|
||
|
// parseBinary stops when it encounters an
|
||
|
// operator of lower precedence than prec1.
|
||
|
func (parser *ExpressionParser) parseBinary(lex *lexer, prec1 int) Expr {
|
||
|
lhs := parser.parseUnary(lex)
|
||
|
for prec := precedence(lex.getToken(), lex.getText()); prec >= prec1; prec-- {
|
||
|
for precedence(lex.getToken(), lex.getText()) == prec {
|
||
|
op := lex.getText()
|
||
|
lex.next() // consume operator
|
||
|
rhs := parser.parseBinary(lex, prec+1)
|
||
|
lhs = binary{op, lhs, rhs}
|
||
|
}
|
||
|
}
|
||
|
return lhs
|
||
|
}
|
||
|
|
||
|
// unary = '+' expr | primary
|
||
|
func (parser *ExpressionParser) parseUnary(lex *lexer) Expr {
|
||
|
if lex.getToken() == TokenOperator {
|
||
|
op := lex.getText()
|
||
|
if op == "+" || op == "-" || op == "~" || op == "!" {
|
||
|
lex.next()
|
||
|
return unary{op, parser.parseUnary(lex)}
|
||
|
} else {
|
||
|
msg := fmt.Sprintf("unary got unknown operator:%s", lex.getText())
|
||
|
panic(lexPanic(msg))
|
||
|
}
|
||
|
}
|
||
|
return parser.parsePrimary(lex)
|
||
|
}
|
||
|
|
||
|
// primary = id
|
||
|
// | id '(' expr ',' ... ',' expr ')'
|
||
|
// | num
|
||
|
// | '(' expr ')'
|
||
|
func (parser *ExpressionParser) parsePrimary(lex *lexer) Expr {
|
||
|
switch lex.token {
|
||
|
case TokenIdent:
|
||
|
id := lex.getText()
|
||
|
lex.next()
|
||
|
if lex.token != '(' {
|
||
|
parser.variable[id] = struct{}{}
|
||
|
return Var(id)
|
||
|
}
|
||
|
lex.next() // consume '('
|
||
|
var args []Expr
|
||
|
if lex.token != ')' {
|
||
|
for {
|
||
|
args = append(args, parser.parseExpr(lex))
|
||
|
if lex.token != ',' {
|
||
|
break
|
||
|
}
|
||
|
lex.next() // consume ','
|
||
|
}
|
||
|
if lex.token != ')' {
|
||
|
msg := fmt.Sprintf("got %q, want ')'", lex.token)
|
||
|
panic(lexPanic(msg))
|
||
|
}
|
||
|
}
|
||
|
lex.next() // consume ')'
|
||
|
return call{id, args}
|
||
|
|
||
|
case TokenFloat:
|
||
|
f, err := strconv.ParseFloat(lex.getText(), 64)
|
||
|
if err != nil {
|
||
|
panic(lexPanic(err.Error()))
|
||
|
}
|
||
|
lex.next()
|
||
|
return literal{value: f}
|
||
|
|
||
|
case TokenInt:
|
||
|
i, err := strconv.ParseInt(lex.getText(), 10, 64)
|
||
|
if err != nil {
|
||
|
panic(lexPanic(err.Error()))
|
||
|
}
|
||
|
lex.next()
|
||
|
return literal{value: i}
|
||
|
|
||
|
case TokenString:
|
||
|
s := lex.getText()
|
||
|
lex.next()
|
||
|
return literal{value: s}
|
||
|
|
||
|
case '(':
|
||
|
lex.next() // consume '('
|
||
|
e := parser.parseExpr(lex)
|
||
|
if lex.token != ')' {
|
||
|
msg := fmt.Sprintf("got %s, want ')'", lex.describe())
|
||
|
panic(lexPanic(msg))
|
||
|
}
|
||
|
lex.next() // consume ')'
|
||
|
return e
|
||
|
}
|
||
|
msg := fmt.Sprintf("unexpected %s", lex.describe())
|
||
|
panic(lexPanic(msg))
|
||
|
}
|